pearmut 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pearmut/app.py +73 -0
- pearmut/assignment.py +70 -17
- pearmut/cli.py +209 -136
- pearmut/constants.py +93 -0
- pearmut/static/basic.bundle.js +1 -1
- pearmut/static/basic.html +39 -3
- pearmut/static/dashboard.bundle.js +1 -1
- pearmut/static/dashboard.html +1 -1
- pearmut/static/index.html +1 -1
- pearmut/utils.py +1 -13
- {pearmut-1.0.0.dist-info → pearmut-1.0.1.dist-info}/METADATA +101 -11
- pearmut-1.0.1.dist-info/RECORD +20 -0
- pearmut-1.0.0.dist-info/RECORD +0 -19
- {pearmut-1.0.0.dist-info → pearmut-1.0.1.dist-info}/WHEEL +0 -0
- {pearmut-1.0.0.dist-info → pearmut-1.0.1.dist-info}/entry_points.txt +0 -0
- {pearmut-1.0.0.dist-info → pearmut-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {pearmut-1.0.0.dist-info → pearmut-1.0.1.dist-info}/top_level.txt +0 -0
pearmut/cli.py
CHANGED
|
@@ -23,31 +23,39 @@ def _run(args_unknown):
|
|
|
23
23
|
|
|
24
24
|
args = argparse.ArgumentParser()
|
|
25
25
|
args.add_argument(
|
|
26
|
-
"--port", type=int, default=8001,
|
|
27
|
-
help="Port to run the server on"
|
|
26
|
+
"--port", type=int, default=8001, help="Port to run the server on"
|
|
28
27
|
)
|
|
29
28
|
args.add_argument(
|
|
30
|
-
"--server",
|
|
31
|
-
|
|
29
|
+
"--server",
|
|
30
|
+
default="http://localhost:8001",
|
|
31
|
+
help="Prefix server URL for protocol links",
|
|
32
32
|
)
|
|
33
33
|
args = args.parse_args(args_unknown)
|
|
34
34
|
|
|
35
35
|
# print access dashboard URL for all campaigns
|
|
36
36
|
if tasks_data:
|
|
37
|
-
dashboard_url =
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
37
|
+
dashboard_url = (
|
|
38
|
+
args.server
|
|
39
|
+
+ "/dashboard.html?"
|
|
40
|
+
+ "&".join(
|
|
41
|
+
[
|
|
42
|
+
f"campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
|
|
43
|
+
for campaign_id, campaign_data in tasks_data.items()
|
|
44
|
+
]
|
|
45
|
+
)
|
|
46
|
+
)
|
|
47
|
+
print(
|
|
48
|
+
"\033[92mNow serving Pearmut, use the following URL to access the everything-dashboard:\033[0m"
|
|
49
|
+
)
|
|
50
|
+
print("🍐", dashboard_url + "\n", flush=True)
|
|
51
|
+
|
|
44
52
|
# disable startup message
|
|
45
53
|
uvicorn.config.LOGGING_CONFIG["loggers"]["uvicorn.error"]["level"] = "WARNING"
|
|
46
54
|
# set time logging
|
|
47
55
|
uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["datefmt"] = "%Y-%m-%d %H:%M"
|
|
48
|
-
uvicorn.config.LOGGING_CONFIG["formatters"]["access"][
|
|
49
|
-
|
|
50
|
-
)
|
|
56
|
+
uvicorn.config.LOGGING_CONFIG["formatters"]["access"][
|
|
57
|
+
"fmt"
|
|
58
|
+
] = "%(asctime)s %(levelprefix)s %(client_addr)s - %(request_line)s %(status_code)s"
|
|
51
59
|
uvicorn.run(
|
|
52
60
|
app,
|
|
53
61
|
host="0.0.0.0",
|
|
@@ -59,9 +67,9 @@ def _run(args_unknown):
|
|
|
59
67
|
def _validate_item_structure(items):
|
|
60
68
|
"""
|
|
61
69
|
Validate that items have the correct structure.
|
|
62
|
-
Items should be lists of dictionaries with 'src' and '
|
|
70
|
+
Items should be lists of dictionaries with 'tgt' and optionally 'src' and/or 'ref' keys.
|
|
63
71
|
The 'tgt' field should be a dictionary mapping model names to translations.
|
|
64
|
-
|
|
72
|
+
|
|
65
73
|
Args:
|
|
66
74
|
items: List of item dictionaries to validate
|
|
67
75
|
"""
|
|
@@ -70,71 +78,91 @@ def _validate_item_structure(items):
|
|
|
70
78
|
|
|
71
79
|
for item in items:
|
|
72
80
|
if not isinstance(item, dict):
|
|
73
|
-
raise ValueError("Each item must be a dictionary with '
|
|
74
|
-
if
|
|
75
|
-
raise ValueError("Each item must contain '
|
|
76
|
-
|
|
77
|
-
# Validate src is
|
|
78
|
-
if not isinstance(item[
|
|
81
|
+
raise ValueError("Each item must be a dictionary with 'tgt' key")
|
|
82
|
+
if "tgt" not in item:
|
|
83
|
+
raise ValueError("Each item must contain 'tgt' key")
|
|
84
|
+
|
|
85
|
+
# Validate src is a string if present
|
|
86
|
+
if "src" in item and not isinstance(item["src"], str):
|
|
79
87
|
raise ValueError("Item 'src' must be a string")
|
|
80
|
-
|
|
88
|
+
|
|
89
|
+
# Validate ref is a string if present
|
|
90
|
+
if "ref" in item and not isinstance(item["ref"], str):
|
|
91
|
+
raise ValueError("Item 'ref' must be a string")
|
|
92
|
+
|
|
81
93
|
# Validate tgt is a dictionary (basic template with model names)
|
|
82
|
-
if isinstance(item[
|
|
94
|
+
if isinstance(item["tgt"], str):
|
|
83
95
|
# String not allowed - suggest using dictionary (don't include user input to prevent injection)
|
|
84
|
-
raise ValueError(
|
|
85
|
-
|
|
96
|
+
raise ValueError(
|
|
97
|
+
'Item \'tgt\' must be a dictionary mapping model names to translations. For single translation, use {"default": "your_translation"}'
|
|
98
|
+
)
|
|
99
|
+
elif isinstance(item["tgt"], dict):
|
|
86
100
|
# Dictionary mapping model names to translations
|
|
87
101
|
# Validate that model names don't contain only numbers (JavaScript ordering issue)
|
|
88
|
-
for model_name, translation in item[
|
|
102
|
+
for model_name, translation in item["tgt"].items():
|
|
89
103
|
if not isinstance(model_name, str):
|
|
90
104
|
raise ValueError("Model names in 'tgt' dictionary must be strings")
|
|
91
105
|
if model_name.isdigit():
|
|
92
|
-
raise ValueError(
|
|
106
|
+
raise ValueError(
|
|
107
|
+
f"Model name '{model_name}' cannot be only numeric digits (would cause issues in JS/TS)"
|
|
108
|
+
)
|
|
93
109
|
if not isinstance(translation, str):
|
|
94
|
-
raise ValueError(
|
|
110
|
+
raise ValueError(
|
|
111
|
+
f"Translation for model '{model_name}' must be a string"
|
|
112
|
+
)
|
|
95
113
|
else:
|
|
96
|
-
raise ValueError(
|
|
97
|
-
|
|
114
|
+
raise ValueError(
|
|
115
|
+
"Item 'tgt' must be a dictionary mapping model names to translations"
|
|
116
|
+
)
|
|
117
|
+
|
|
98
118
|
# Validate error_spans structure if present
|
|
99
|
-
if
|
|
100
|
-
if not isinstance(item[
|
|
101
|
-
raise ValueError(
|
|
102
|
-
|
|
119
|
+
if "error_spans" in item:
|
|
120
|
+
if not isinstance(item["error_spans"], dict):
|
|
121
|
+
raise ValueError(
|
|
122
|
+
"'error_spans' must be a dictionary mapping model names to error span lists"
|
|
123
|
+
)
|
|
124
|
+
for model_name, spans in item["error_spans"].items():
|
|
103
125
|
if not isinstance(spans, list):
|
|
104
|
-
raise ValueError(
|
|
105
|
-
|
|
126
|
+
raise ValueError(
|
|
127
|
+
f"Error spans for model '{model_name}' must be a list"
|
|
128
|
+
)
|
|
129
|
+
|
|
106
130
|
# Validate validation structure if present
|
|
107
|
-
if
|
|
108
|
-
if not isinstance(item[
|
|
109
|
-
raise ValueError(
|
|
110
|
-
|
|
131
|
+
if "validation" in item:
|
|
132
|
+
if not isinstance(item["validation"], dict):
|
|
133
|
+
raise ValueError(
|
|
134
|
+
"'validation' must be a dictionary mapping model names to validation rules"
|
|
135
|
+
)
|
|
136
|
+
for model_name, val_rule in item["validation"].items():
|
|
111
137
|
if not isinstance(val_rule, dict):
|
|
112
|
-
raise ValueError(
|
|
138
|
+
raise ValueError(
|
|
139
|
+
f"Validation rule for model '{model_name}' must be a dictionary"
|
|
140
|
+
)
|
|
113
141
|
|
|
114
142
|
|
|
115
143
|
def _validate_document_models(doc):
|
|
116
144
|
"""
|
|
117
145
|
Validate that all items in a document have the same model outputs.
|
|
118
|
-
|
|
146
|
+
|
|
119
147
|
Args:
|
|
120
148
|
doc: List of items in a document
|
|
121
|
-
|
|
149
|
+
|
|
122
150
|
Returns:
|
|
123
151
|
None if valid
|
|
124
|
-
|
|
152
|
+
|
|
125
153
|
Raises:
|
|
126
154
|
ValueError: If items have different model outputs
|
|
127
155
|
"""
|
|
128
156
|
# Get model names from the first item
|
|
129
157
|
first_item = doc[0]
|
|
130
|
-
first_models = set(first_item[
|
|
131
|
-
|
|
158
|
+
first_models = set(first_item["tgt"].keys())
|
|
159
|
+
|
|
132
160
|
# Check all other items have the same model names
|
|
133
161
|
for i, item in enumerate(doc[1:], start=1):
|
|
134
|
-
if
|
|
162
|
+
if "tgt" not in item or not isinstance(item["tgt"], dict):
|
|
135
163
|
continue
|
|
136
|
-
|
|
137
|
-
item_models = set(item[
|
|
164
|
+
|
|
165
|
+
item_models = set(item["tgt"].keys())
|
|
138
166
|
if item_models != first_models:
|
|
139
167
|
raise ValueError(
|
|
140
168
|
f"Document contains items with different model outputs. "
|
|
@@ -147,33 +175,31 @@ def _validate_document_models(doc):
|
|
|
147
175
|
def _shuffle_campaign_data(campaign_data, rng):
|
|
148
176
|
"""
|
|
149
177
|
Shuffle campaign data at the document level in-place
|
|
150
|
-
|
|
178
|
+
|
|
151
179
|
For each document, randomly shuffles the order of models in the tgt dictionary.
|
|
152
|
-
|
|
180
|
+
|
|
153
181
|
Args:
|
|
154
182
|
campaign_data: The campaign data dictionary
|
|
155
183
|
rng: Random number generator with campaign-specific seed
|
|
156
184
|
"""
|
|
185
|
+
|
|
157
186
|
def shuffle_document(doc):
|
|
158
187
|
"""Shuffle a single document (list of items) by reordering models in tgt dict."""
|
|
159
188
|
# Validate that all items have the same models
|
|
160
189
|
_validate_document_models(doc)
|
|
161
|
-
|
|
190
|
+
|
|
162
191
|
# Get all model names from the first item's tgt dict
|
|
163
192
|
first_item = doc[0]
|
|
164
|
-
model_names = list(first_item[
|
|
193
|
+
model_names = list(first_item["tgt"].keys())
|
|
165
194
|
rng.shuffle(model_names)
|
|
166
|
-
|
|
195
|
+
|
|
167
196
|
# Reorder tgt dict for all items in the document
|
|
168
197
|
for item in doc:
|
|
169
|
-
if
|
|
170
|
-
item["tgt"] = {
|
|
171
|
-
|
|
172
|
-
for model in model_names
|
|
173
|
-
}
|
|
174
|
-
|
|
198
|
+
if "tgt" in item and isinstance(item["tgt"], dict):
|
|
199
|
+
item["tgt"] = {model: item["tgt"][model] for model in model_names}
|
|
200
|
+
|
|
175
201
|
assignment = campaign_data["info"]["assignment"]
|
|
176
|
-
|
|
202
|
+
|
|
177
203
|
if assignment == "task-based":
|
|
178
204
|
# After transformation, data is a dict mapping user_id -> tasks
|
|
179
205
|
for user_id, task in campaign_data["data"].items():
|
|
@@ -185,33 +211,33 @@ def _shuffle_campaign_data(campaign_data, rng):
|
|
|
185
211
|
shuffle_document(doc)
|
|
186
212
|
|
|
187
213
|
|
|
188
|
-
def _add_single_campaign(
|
|
214
|
+
def _add_single_campaign(campaign_data, overwrite, server):
|
|
189
215
|
"""
|
|
190
|
-
Add a single campaign from
|
|
216
|
+
Add a single campaign from campaign data dictionary.
|
|
191
217
|
"""
|
|
192
218
|
import random
|
|
193
219
|
|
|
194
220
|
import wonderwords
|
|
195
221
|
|
|
196
|
-
|
|
197
|
-
|
|
222
|
+
if "campaign_id" not in campaign_data:
|
|
223
|
+
raise ValueError("Campaign data must contain 'campaign_id' field.")
|
|
224
|
+
if "info" not in campaign_data:
|
|
225
|
+
raise ValueError("Campaign data must contain 'info' field.")
|
|
226
|
+
if "data" not in campaign_data:
|
|
227
|
+
raise ValueError("Campaign data must contain 'data' field.")
|
|
198
228
|
|
|
199
229
|
with open(f"{ROOT}/data/progress.json", "r") as f:
|
|
200
230
|
progress_data = json.load(f)
|
|
201
231
|
|
|
202
|
-
if campaign_data[
|
|
232
|
+
if campaign_data["campaign_id"] in progress_data and not overwrite:
|
|
203
233
|
raise ValueError(
|
|
204
234
|
f"Campaign {campaign_data['campaign_id']} already exists.\n"
|
|
205
235
|
"Use -o to overwrite."
|
|
206
236
|
)
|
|
207
237
|
|
|
208
|
-
if "info" not in campaign_data:
|
|
209
|
-
raise ValueError("Campaign data must contain 'info' field.")
|
|
210
|
-
if "data" not in campaign_data:
|
|
211
|
-
raise ValueError("Campaign data must contain 'data' field.")
|
|
212
238
|
if "assignment" not in campaign_data["info"]:
|
|
213
239
|
raise ValueError("Campaign 'info' must contain 'assignment' field.")
|
|
214
|
-
|
|
240
|
+
|
|
215
241
|
# Template defaults to "basic" if not specified
|
|
216
242
|
assignment = campaign_data["info"]["assignment"]
|
|
217
243
|
# use random words for identifying users
|
|
@@ -225,11 +251,11 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
225
251
|
if assignment == "task-based":
|
|
226
252
|
tasks = campaign_data["data"]
|
|
227
253
|
if not isinstance(tasks, list):
|
|
228
|
-
raise ValueError(
|
|
229
|
-
"Task-based campaign 'data' must be a list of tasks.")
|
|
254
|
+
raise ValueError("Task-based campaign 'data' must be a list of tasks.")
|
|
230
255
|
if not all(isinstance(task, list) for task in tasks):
|
|
231
256
|
raise ValueError(
|
|
232
|
-
"Each task in task-based campaign 'data' must be a list of items."
|
|
257
|
+
"Each task in task-based campaign 'data' must be a list of items."
|
|
258
|
+
)
|
|
233
259
|
# Validate item structure for each task
|
|
234
260
|
for task_i, task in enumerate(tasks):
|
|
235
261
|
for doc_i, doc in enumerate(task):
|
|
@@ -241,11 +267,9 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
241
267
|
elif assignment == "single-stream":
|
|
242
268
|
tasks = campaign_data["data"]
|
|
243
269
|
if users_spec is None:
|
|
244
|
-
raise ValueError(
|
|
245
|
-
"Single-stream campaigns must specify 'users' in info.")
|
|
270
|
+
raise ValueError("Single-stream campaigns must specify 'users' in info.")
|
|
246
271
|
if not isinstance(campaign_data["data"], list):
|
|
247
|
-
raise ValueError(
|
|
248
|
-
"Single-stream campaign 'data' must be a list of items.")
|
|
272
|
+
raise ValueError("Single-stream campaign 'data' must be a list of items.")
|
|
249
273
|
# Validate item structure for single-stream
|
|
250
274
|
for doc_i, doc in enumerate(tasks):
|
|
251
275
|
try:
|
|
@@ -261,11 +285,9 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
261
285
|
elif assignment == "dynamic":
|
|
262
286
|
tasks = campaign_data["data"]
|
|
263
287
|
if users_spec is None:
|
|
264
|
-
raise ValueError(
|
|
265
|
-
"Dynamic campaigns must specify 'users' in info.")
|
|
288
|
+
raise ValueError("Dynamic campaigns must specify 'users' in info.")
|
|
266
289
|
if not isinstance(campaign_data["data"], list):
|
|
267
|
-
raise ValueError(
|
|
268
|
-
"Dynamic campaign 'data' must be a list of items.")
|
|
290
|
+
raise ValueError("Dynamic campaign 'data' must be a list of items.")
|
|
269
291
|
# Validate item structure for dynamic
|
|
270
292
|
for doc_i, doc in enumerate(tasks):
|
|
271
293
|
try:
|
|
@@ -286,10 +308,14 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
286
308
|
if "dynamic_contrastive_models" not in campaign_data["info"]:
|
|
287
309
|
campaign_data["info"]["dynamic_contrastive_models"] = 1
|
|
288
310
|
# Validate that dynamic_first is at least 1
|
|
289
|
-
assert
|
|
311
|
+
assert (
|
|
312
|
+
campaign_data["info"]["dynamic_first"] >= 1
|
|
313
|
+
), "dynamic_first must be at least 1"
|
|
290
314
|
# Validate that dynamic_contrastive_models is at most dynamic_top
|
|
291
|
-
assert
|
|
292
|
-
"dynamic_contrastive_models
|
|
315
|
+
assert (
|
|
316
|
+
campaign_data["info"]["dynamic_contrastive_models"]
|
|
317
|
+
<= campaign_data["info"]["dynamic_top"]
|
|
318
|
+
), "dynamic_contrastive_models must be at most dynamic_top"
|
|
293
319
|
# Validate that all items have the same models
|
|
294
320
|
all_models = set()
|
|
295
321
|
for item in campaign_data["data"]:
|
|
@@ -298,7 +324,9 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
298
324
|
for item in campaign_data["data"]:
|
|
299
325
|
if item and len(item) > 0:
|
|
300
326
|
item_models = set(item[0]["tgt"].keys())
|
|
301
|
-
assert
|
|
327
|
+
assert (
|
|
328
|
+
item_models == all_models
|
|
329
|
+
), "All items must have the same model outputs"
|
|
302
330
|
else:
|
|
303
331
|
raise ValueError(f"Unknown campaign assignment type: {assignment}")
|
|
304
332
|
|
|
@@ -310,14 +338,12 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
310
338
|
new_id = f"{rword.random_words(amount=1, include_parts_of_speech=['adjective'])[0]}-{rword.random_words(amount=1, include_parts_of_speech=['noun'])[0]}"
|
|
311
339
|
if new_id not in user_ids:
|
|
312
340
|
user_ids.append(new_id)
|
|
313
|
-
user_ids = [
|
|
314
|
-
f"{user_id}-{rng.randint(0, 999):03d}"
|
|
315
|
-
for user_id in user_ids
|
|
316
|
-
]
|
|
341
|
+
user_ids = [f"{user_id}-{rng.randint(0, 999):03d}" for user_id in user_ids]
|
|
317
342
|
elif isinstance(users_spec, list):
|
|
318
343
|
if len(users_spec) != num_users:
|
|
319
344
|
raise ValueError(
|
|
320
|
-
f"Number of users ({len(users_spec)}) must match expected count ({num_users})."
|
|
345
|
+
f"Number of users ({len(users_spec)}) must match expected count ({num_users})."
|
|
346
|
+
)
|
|
321
347
|
if all(isinstance(u, str) for u in users_spec):
|
|
322
348
|
# List of string IDs
|
|
323
349
|
user_ids = users_spec
|
|
@@ -336,13 +362,31 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
336
362
|
raise ValueError("'users' list must contain all strings or all dicts.")
|
|
337
363
|
else:
|
|
338
364
|
raise ValueError("'users' must be an integer or a list.")
|
|
339
|
-
|
|
365
|
+
|
|
340
366
|
if "protocol" not in campaign_data["info"]:
|
|
341
367
|
campaign_data["info"]["protocol"] = "ESA"
|
|
342
|
-
print(
|
|
368
|
+
print(
|
|
369
|
+
"Warning: 'protocol' not specified in campaign info. Defaulting to 'ESA'."
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# Validate sliders structure if present
|
|
373
|
+
if "sliders" in campaign_data["info"]:
|
|
374
|
+
if not all(
|
|
375
|
+
isinstance(s, dict)
|
|
376
|
+
and all(k in s for k in ("name", "min", "max", "step"))
|
|
377
|
+
and isinstance(s.get("min"), (int, float))
|
|
378
|
+
and isinstance(s.get("max"), (int, float))
|
|
379
|
+
and isinstance(s.get("step"), (int, float))
|
|
380
|
+
and s["min"] <= s["max"]
|
|
381
|
+
and s["step"] > 0
|
|
382
|
+
for s in campaign_data["info"]["sliders"]
|
|
383
|
+
):
|
|
384
|
+
raise ValueError(
|
|
385
|
+
"Each slider must be a dict with 'name', 'min', 'max', and 'step' keys, where min/max/step are numeric, min <= max, and step > 0"
|
|
386
|
+
)
|
|
343
387
|
|
|
344
388
|
# Remove output file when overwriting (after all validations pass)
|
|
345
|
-
if overwrite and campaign_data[
|
|
389
|
+
if overwrite and campaign_data["campaign_id"] in progress_data:
|
|
346
390
|
output_file = f"{ROOT}/data/outputs/{campaign_data['campaign_id']}.jsonl"
|
|
347
391
|
if os.path.exists(output_file):
|
|
348
392
|
os.remove(output_file)
|
|
@@ -351,17 +395,14 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
351
395
|
# For single-stream and dynamic, data is a flat list (shared among all users)
|
|
352
396
|
if assignment == "task-based":
|
|
353
397
|
campaign_data["data"] = {
|
|
354
|
-
user_id: task
|
|
355
|
-
for user_id, task in zip(user_ids, tasks)
|
|
398
|
+
user_id: task for user_id, task in zip(user_ids, tasks)
|
|
356
399
|
}
|
|
357
400
|
elif assignment in ["single-stream", "dynamic"]:
|
|
358
401
|
campaign_data["data"] = tasks
|
|
359
402
|
|
|
360
403
|
# generate a token for dashboard access if not present
|
|
361
404
|
if "token" not in campaign_data:
|
|
362
|
-
campaign_data["token"] = (
|
|
363
|
-
hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
|
|
364
|
-
)
|
|
405
|
+
campaign_data["token"] = hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
|
|
365
406
|
|
|
366
407
|
def get_token(user_id, token_type):
|
|
367
408
|
"""Get user token or generate a random one."""
|
|
@@ -374,10 +415,17 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
374
415
|
user_id: {
|
|
375
416
|
# TODO: progress tracking could be based on the assignment type
|
|
376
417
|
"progress": (
|
|
377
|
-
[False]*len(campaign_data["data"][user_id])
|
|
378
|
-
|
|
379
|
-
else
|
|
380
|
-
|
|
418
|
+
[False] * len(campaign_data["data"][user_id])
|
|
419
|
+
if assignment == "task-based"
|
|
420
|
+
else (
|
|
421
|
+
[False] * len(campaign_data["data"])
|
|
422
|
+
if assignment == "single-stream"
|
|
423
|
+
else (
|
|
424
|
+
[list() for _ in range(len(campaign_data["data"]))]
|
|
425
|
+
if assignment == "dynamic"
|
|
426
|
+
else []
|
|
427
|
+
)
|
|
428
|
+
)
|
|
381
429
|
),
|
|
382
430
|
"time_start": None,
|
|
383
431
|
"time_end": None,
|
|
@@ -396,26 +444,34 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
396
444
|
# Handle assets symlink if specified
|
|
397
445
|
if "assets" in campaign_data["info"]:
|
|
398
446
|
assets_config = campaign_data["info"]["assets"]
|
|
399
|
-
|
|
447
|
+
|
|
400
448
|
# assets must be a dictionary with source and destination keys
|
|
401
449
|
if not isinstance(assets_config, dict):
|
|
402
|
-
raise ValueError(
|
|
450
|
+
raise ValueError(
|
|
451
|
+
"Assets must be a dictionary with 'source' and 'destination' keys."
|
|
452
|
+
)
|
|
403
453
|
if "source" not in assets_config or "destination" not in assets_config:
|
|
404
|
-
raise ValueError(
|
|
405
|
-
|
|
454
|
+
raise ValueError(
|
|
455
|
+
"Assets config must contain 'source' and 'destination' keys."
|
|
456
|
+
)
|
|
457
|
+
|
|
406
458
|
assets_source = assets_config["source"]
|
|
407
459
|
assets_destination = assets_config["destination"]
|
|
408
|
-
|
|
460
|
+
|
|
409
461
|
# Validate destination starts with 'assets/'
|
|
410
462
|
if not assets_destination.startswith("assets/"):
|
|
411
|
-
raise ValueError(
|
|
412
|
-
|
|
463
|
+
raise ValueError(
|
|
464
|
+
f"Assets destination '{assets_destination}' must start with 'assets/'."
|
|
465
|
+
)
|
|
466
|
+
|
|
413
467
|
# Resolve relative paths from the caller's current working directory
|
|
414
468
|
assets_real_path = os.path.abspath(assets_source)
|
|
415
469
|
|
|
416
470
|
if not os.path.isdir(assets_real_path):
|
|
417
|
-
raise ValueError(
|
|
418
|
-
|
|
471
|
+
raise ValueError(
|
|
472
|
+
f"Assets source path '{assets_real_path}' must be an existing directory."
|
|
473
|
+
)
|
|
474
|
+
|
|
419
475
|
# Symlink path is based on the destination, stripping the 'assets/' prefix
|
|
420
476
|
# User assets are now stored under data/assets/ instead of static/assets/
|
|
421
477
|
symlink_path = f"{ROOT}/data/{assets_destination}".rstrip("/")
|
|
@@ -423,7 +479,7 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
423
479
|
# Remove existing symlink if present and we are overriding the same campaign
|
|
424
480
|
if os.path.lexists(symlink_path):
|
|
425
481
|
# Check if any other campaign is using this destination
|
|
426
|
-
current_campaign_id = campaign_data[
|
|
482
|
+
current_campaign_id = campaign_data["campaign_id"]
|
|
427
483
|
|
|
428
484
|
for other_campaign_id in progress_data.keys():
|
|
429
485
|
if other_campaign_id == current_campaign_id:
|
|
@@ -440,8 +496,10 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
440
496
|
if overwrite:
|
|
441
497
|
os.remove(symlink_path)
|
|
442
498
|
else:
|
|
443
|
-
raise ValueError(
|
|
444
|
-
|
|
499
|
+
raise ValueError(
|
|
500
|
+
f"Assets destination '{assets_destination}' is already taken."
|
|
501
|
+
)
|
|
502
|
+
|
|
445
503
|
# Ensure the assets directory exists
|
|
446
504
|
# get parent of symlink_path dir
|
|
447
505
|
os.makedirs(os.path.dirname(symlink_path), exist_ok=True)
|
|
@@ -449,7 +507,6 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
449
507
|
os.symlink(assets_real_path, symlink_path, target_is_directory=True)
|
|
450
508
|
print(f"Assets symlinked: {symlink_path} -> {assets_real_path}")
|
|
451
509
|
|
|
452
|
-
|
|
453
510
|
# Shuffle data if shuffle parameter is true (defaults to true)
|
|
454
511
|
should_shuffle = campaign_data["info"].get("shuffle", True)
|
|
455
512
|
if should_shuffle:
|
|
@@ -459,15 +516,14 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
459
516
|
with open(f"{ROOT}/data/tasks/{campaign_data['campaign_id']}.json", "w") as f:
|
|
460
517
|
json.dump(campaign_data, f, indent=2, ensure_ascii=False)
|
|
461
518
|
|
|
462
|
-
progress_data[campaign_data[
|
|
519
|
+
progress_data[campaign_data["campaign_id"]] = user_progress
|
|
463
520
|
save_progress_data(progress_data)
|
|
464
521
|
|
|
465
|
-
|
|
466
522
|
print(
|
|
467
523
|
"🎛️ ",
|
|
468
524
|
f"{server}/dashboard.html"
|
|
469
525
|
f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
|
|
470
|
-
f"&token={campaign_data['token']}"
|
|
526
|
+
f"&token={campaign_data['token']}",
|
|
471
527
|
)
|
|
472
528
|
for user_id, user_val in user_progress.items():
|
|
473
529
|
# point to the protocol URL
|
|
@@ -481,22 +537,28 @@ def _add_campaign(args_unknown):
|
|
|
481
537
|
"""
|
|
482
538
|
args = argparse.ArgumentParser()
|
|
483
539
|
args.add_argument(
|
|
484
|
-
|
|
485
|
-
|
|
540
|
+
"data_files",
|
|
541
|
+
type=str,
|
|
542
|
+
nargs="+",
|
|
543
|
+
help="One or more paths to campaign data files",
|
|
486
544
|
)
|
|
487
545
|
args.add_argument(
|
|
488
|
-
"-o",
|
|
489
|
-
|
|
546
|
+
"-o",
|
|
547
|
+
"--overwrite",
|
|
548
|
+
action="store_true",
|
|
549
|
+
help="Overwrite existing campaign if it exists",
|
|
490
550
|
)
|
|
491
551
|
args.add_argument(
|
|
492
|
-
"--server",
|
|
493
|
-
|
|
552
|
+
"--server",
|
|
553
|
+
default="http://localhost:8001",
|
|
554
|
+
help="Prefix server URL for protocol links",
|
|
494
555
|
)
|
|
495
556
|
args = args.parse_args(args_unknown)
|
|
496
557
|
|
|
497
558
|
for data_file in args.data_files:
|
|
498
559
|
try:
|
|
499
|
-
|
|
560
|
+
with open(data_file, "r") as f:
|
|
561
|
+
_add_single_campaign(json.load(f), args.overwrite, args.server)
|
|
500
562
|
except Exception as e:
|
|
501
563
|
print(f"Error processing {data_file}: {e}")
|
|
502
564
|
exit(1)
|
|
@@ -507,7 +569,13 @@ def main():
|
|
|
507
569
|
Main entry point for the CLI.
|
|
508
570
|
"""
|
|
509
571
|
args = argparse.ArgumentParser()
|
|
510
|
-
args.add_argument(
|
|
572
|
+
args.add_argument(
|
|
573
|
+
"command",
|
|
574
|
+
type=str,
|
|
575
|
+
choices=["run", "add", "purge"],
|
|
576
|
+
default="run",
|
|
577
|
+
nargs="?",
|
|
578
|
+
)
|
|
511
579
|
args, args_unknown = args.parse_known_args()
|
|
512
580
|
|
|
513
581
|
# enforce that only one pearmut process is running
|
|
@@ -517,11 +585,11 @@ def main():
|
|
|
517
585
|
print(p)
|
|
518
586
|
exit(1)
|
|
519
587
|
|
|
520
|
-
if args.command ==
|
|
588
|
+
if args.command == "run":
|
|
521
589
|
_run(args_unknown)
|
|
522
|
-
elif args.command ==
|
|
590
|
+
elif args.command == "add":
|
|
523
591
|
_add_campaign(args_unknown)
|
|
524
|
-
elif args.command ==
|
|
592
|
+
elif args.command == "purge":
|
|
525
593
|
import shutil
|
|
526
594
|
|
|
527
595
|
def _unlink_assets(campaign_id):
|
|
@@ -531,7 +599,9 @@ def main():
|
|
|
531
599
|
return
|
|
532
600
|
with open(task_file, "r") as f:
|
|
533
601
|
campaign_data = json.load(f)
|
|
534
|
-
destination =
|
|
602
|
+
destination = (
|
|
603
|
+
campaign_data.get("info", {}).get("assets", {}).get("destination")
|
|
604
|
+
)
|
|
535
605
|
if destination:
|
|
536
606
|
symlink_path = f"{ROOT}/data/{destination}".rstrip("/")
|
|
537
607
|
if os.path.islink(symlink_path):
|
|
@@ -541,8 +611,11 @@ def main():
|
|
|
541
611
|
# Parse optional campaign name
|
|
542
612
|
purge_args = argparse.ArgumentParser()
|
|
543
613
|
purge_args.add_argument(
|
|
544
|
-
|
|
545
|
-
|
|
614
|
+
"campaign",
|
|
615
|
+
type=str,
|
|
616
|
+
nargs="?",
|
|
617
|
+
default=None,
|
|
618
|
+
help="Optional campaign name to purge (purges all if not specified)",
|
|
546
619
|
)
|
|
547
620
|
purge_args = purge_args.parse_args(args_unknown)
|
|
548
621
|
progress_data = load_progress_data()
|
|
@@ -556,7 +629,7 @@ def main():
|
|
|
556
629
|
confirm = input(
|
|
557
630
|
f"Are you sure you want to purge campaign '{campaign_id}'? This action cannot be undone. [y/n] "
|
|
558
631
|
)
|
|
559
|
-
if confirm.lower() ==
|
|
632
|
+
if confirm.lower() == "y":
|
|
560
633
|
# Unlink assets before removing task file
|
|
561
634
|
_unlink_assets(campaign_id)
|
|
562
635
|
# Remove task file
|
|
@@ -580,7 +653,7 @@ def main():
|
|
|
580
653
|
confirm = input(
|
|
581
654
|
"Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
|
|
582
655
|
)
|
|
583
|
-
if confirm.lower() ==
|
|
656
|
+
if confirm.lower() == "y":
|
|
584
657
|
# Unlink all assets first
|
|
585
658
|
for campaign_id in progress_data.keys():
|
|
586
659
|
_unlink_assets(campaign_id)
|