pearmut 0.3.3__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pearmut/app.py +119 -27
- pearmut/assignment.py +318 -55
- pearmut/cli.py +245 -135
- pearmut/constants.py +93 -0
- pearmut/results_export.py +210 -0
- pearmut/static/basic.bundle.js +1 -1
- pearmut/static/basic.html +39 -3
- pearmut/static/dashboard.bundle.js +1 -1
- pearmut/static/dashboard.html +27 -12
- pearmut/static/index.bundle.js +1 -1
- pearmut/static/index.html +1 -1
- pearmut/utils.py +3 -1
- {pearmut-0.3.3.dist-info → pearmut-1.0.1.dist-info}/METADATA +152 -34
- pearmut-1.0.1.dist-info/RECORD +20 -0
- pearmut-0.3.3.dist-info/RECORD +0 -18
- {pearmut-0.3.3.dist-info → pearmut-1.0.1.dist-info}/WHEEL +0 -0
- {pearmut-0.3.3.dist-info → pearmut-1.0.1.dist-info}/entry_points.txt +0 -0
- {pearmut-0.3.3.dist-info → pearmut-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {pearmut-0.3.3.dist-info → pearmut-1.0.1.dist-info}/top_level.txt +0 -0
pearmut/cli.py
CHANGED
|
@@ -23,31 +23,39 @@ def _run(args_unknown):
|
|
|
23
23
|
|
|
24
24
|
args = argparse.ArgumentParser()
|
|
25
25
|
args.add_argument(
|
|
26
|
-
"--port", type=int, default=8001,
|
|
27
|
-
help="Port to run the server on"
|
|
26
|
+
"--port", type=int, default=8001, help="Port to run the server on"
|
|
28
27
|
)
|
|
29
28
|
args.add_argument(
|
|
30
|
-
"--server",
|
|
31
|
-
|
|
29
|
+
"--server",
|
|
30
|
+
default="http://localhost:8001",
|
|
31
|
+
help="Prefix server URL for protocol links",
|
|
32
32
|
)
|
|
33
33
|
args = args.parse_args(args_unknown)
|
|
34
34
|
|
|
35
35
|
# print access dashboard URL for all campaigns
|
|
36
36
|
if tasks_data:
|
|
37
|
-
dashboard_url =
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
37
|
+
dashboard_url = (
|
|
38
|
+
args.server
|
|
39
|
+
+ "/dashboard.html?"
|
|
40
|
+
+ "&".join(
|
|
41
|
+
[
|
|
42
|
+
f"campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
|
|
43
|
+
for campaign_id, campaign_data in tasks_data.items()
|
|
44
|
+
]
|
|
45
|
+
)
|
|
46
|
+
)
|
|
47
|
+
print(
|
|
48
|
+
"\033[92mNow serving Pearmut, use the following URL to access the everything-dashboard:\033[0m"
|
|
49
|
+
)
|
|
50
|
+
print("🍐", dashboard_url + "\n", flush=True)
|
|
51
|
+
|
|
44
52
|
# disable startup message
|
|
45
53
|
uvicorn.config.LOGGING_CONFIG["loggers"]["uvicorn.error"]["level"] = "WARNING"
|
|
46
54
|
# set time logging
|
|
47
55
|
uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["datefmt"] = "%Y-%m-%d %H:%M"
|
|
48
|
-
uvicorn.config.LOGGING_CONFIG["formatters"]["access"][
|
|
49
|
-
|
|
50
|
-
)
|
|
56
|
+
uvicorn.config.LOGGING_CONFIG["formatters"]["access"][
|
|
57
|
+
"fmt"
|
|
58
|
+
] = "%(asctime)s %(levelprefix)s %(client_addr)s - %(request_line)s %(status_code)s"
|
|
51
59
|
uvicorn.run(
|
|
52
60
|
app,
|
|
53
61
|
host="0.0.0.0",
|
|
@@ -59,9 +67,9 @@ def _run(args_unknown):
|
|
|
59
67
|
def _validate_item_structure(items):
|
|
60
68
|
"""
|
|
61
69
|
Validate that items have the correct structure.
|
|
62
|
-
Items should be lists of dictionaries with 'src' and '
|
|
70
|
+
Items should be lists of dictionaries with 'tgt' and optionally 'src' and/or 'ref' keys.
|
|
63
71
|
The 'tgt' field should be a dictionary mapping model names to translations.
|
|
64
|
-
|
|
72
|
+
|
|
65
73
|
Args:
|
|
66
74
|
items: List of item dictionaries to validate
|
|
67
75
|
"""
|
|
@@ -70,71 +78,91 @@ def _validate_item_structure(items):
|
|
|
70
78
|
|
|
71
79
|
for item in items:
|
|
72
80
|
if not isinstance(item, dict):
|
|
73
|
-
raise ValueError("Each item must be a dictionary with '
|
|
74
|
-
if
|
|
75
|
-
raise ValueError("Each item must contain '
|
|
76
|
-
|
|
77
|
-
# Validate src is
|
|
78
|
-
if not isinstance(item[
|
|
81
|
+
raise ValueError("Each item must be a dictionary with 'tgt' key")
|
|
82
|
+
if "tgt" not in item:
|
|
83
|
+
raise ValueError("Each item must contain 'tgt' key")
|
|
84
|
+
|
|
85
|
+
# Validate src is a string if present
|
|
86
|
+
if "src" in item and not isinstance(item["src"], str):
|
|
79
87
|
raise ValueError("Item 'src' must be a string")
|
|
80
|
-
|
|
88
|
+
|
|
89
|
+
# Validate ref is a string if present
|
|
90
|
+
if "ref" in item and not isinstance(item["ref"], str):
|
|
91
|
+
raise ValueError("Item 'ref' must be a string")
|
|
92
|
+
|
|
81
93
|
# Validate tgt is a dictionary (basic template with model names)
|
|
82
|
-
if isinstance(item[
|
|
94
|
+
if isinstance(item["tgt"], str):
|
|
83
95
|
# String not allowed - suggest using dictionary (don't include user input to prevent injection)
|
|
84
|
-
raise ValueError(
|
|
85
|
-
|
|
96
|
+
raise ValueError(
|
|
97
|
+
'Item \'tgt\' must be a dictionary mapping model names to translations. For single translation, use {"default": "your_translation"}'
|
|
98
|
+
)
|
|
99
|
+
elif isinstance(item["tgt"], dict):
|
|
86
100
|
# Dictionary mapping model names to translations
|
|
87
101
|
# Validate that model names don't contain only numbers (JavaScript ordering issue)
|
|
88
|
-
for model_name, translation in item[
|
|
102
|
+
for model_name, translation in item["tgt"].items():
|
|
89
103
|
if not isinstance(model_name, str):
|
|
90
104
|
raise ValueError("Model names in 'tgt' dictionary must be strings")
|
|
91
105
|
if model_name.isdigit():
|
|
92
|
-
raise ValueError(
|
|
106
|
+
raise ValueError(
|
|
107
|
+
f"Model name '{model_name}' cannot be only numeric digits (would cause issues in JS/TS)"
|
|
108
|
+
)
|
|
93
109
|
if not isinstance(translation, str):
|
|
94
|
-
raise ValueError(
|
|
110
|
+
raise ValueError(
|
|
111
|
+
f"Translation for model '{model_name}' must be a string"
|
|
112
|
+
)
|
|
95
113
|
else:
|
|
96
|
-
raise ValueError(
|
|
97
|
-
|
|
114
|
+
raise ValueError(
|
|
115
|
+
"Item 'tgt' must be a dictionary mapping model names to translations"
|
|
116
|
+
)
|
|
117
|
+
|
|
98
118
|
# Validate error_spans structure if present
|
|
99
|
-
if
|
|
100
|
-
if not isinstance(item[
|
|
101
|
-
raise ValueError(
|
|
102
|
-
|
|
119
|
+
if "error_spans" in item:
|
|
120
|
+
if not isinstance(item["error_spans"], dict):
|
|
121
|
+
raise ValueError(
|
|
122
|
+
"'error_spans' must be a dictionary mapping model names to error span lists"
|
|
123
|
+
)
|
|
124
|
+
for model_name, spans in item["error_spans"].items():
|
|
103
125
|
if not isinstance(spans, list):
|
|
104
|
-
raise ValueError(
|
|
105
|
-
|
|
126
|
+
raise ValueError(
|
|
127
|
+
f"Error spans for model '{model_name}' must be a list"
|
|
128
|
+
)
|
|
129
|
+
|
|
106
130
|
# Validate validation structure if present
|
|
107
|
-
if
|
|
108
|
-
if not isinstance(item[
|
|
109
|
-
raise ValueError(
|
|
110
|
-
|
|
131
|
+
if "validation" in item:
|
|
132
|
+
if not isinstance(item["validation"], dict):
|
|
133
|
+
raise ValueError(
|
|
134
|
+
"'validation' must be a dictionary mapping model names to validation rules"
|
|
135
|
+
)
|
|
136
|
+
for model_name, val_rule in item["validation"].items():
|
|
111
137
|
if not isinstance(val_rule, dict):
|
|
112
|
-
raise ValueError(
|
|
138
|
+
raise ValueError(
|
|
139
|
+
f"Validation rule for model '{model_name}' must be a dictionary"
|
|
140
|
+
)
|
|
113
141
|
|
|
114
142
|
|
|
115
143
|
def _validate_document_models(doc):
|
|
116
144
|
"""
|
|
117
145
|
Validate that all items in a document have the same model outputs.
|
|
118
|
-
|
|
146
|
+
|
|
119
147
|
Args:
|
|
120
148
|
doc: List of items in a document
|
|
121
|
-
|
|
149
|
+
|
|
122
150
|
Returns:
|
|
123
151
|
None if valid
|
|
124
|
-
|
|
152
|
+
|
|
125
153
|
Raises:
|
|
126
154
|
ValueError: If items have different model outputs
|
|
127
155
|
"""
|
|
128
156
|
# Get model names from the first item
|
|
129
157
|
first_item = doc[0]
|
|
130
|
-
first_models = set(first_item[
|
|
131
|
-
|
|
158
|
+
first_models = set(first_item["tgt"].keys())
|
|
159
|
+
|
|
132
160
|
# Check all other items have the same model names
|
|
133
161
|
for i, item in enumerate(doc[1:], start=1):
|
|
134
|
-
if
|
|
162
|
+
if "tgt" not in item or not isinstance(item["tgt"], dict):
|
|
135
163
|
continue
|
|
136
|
-
|
|
137
|
-
item_models = set(item[
|
|
164
|
+
|
|
165
|
+
item_models = set(item["tgt"].keys())
|
|
138
166
|
if item_models != first_models:
|
|
139
167
|
raise ValueError(
|
|
140
168
|
f"Document contains items with different model outputs. "
|
|
@@ -147,71 +175,69 @@ def _validate_document_models(doc):
|
|
|
147
175
|
def _shuffle_campaign_data(campaign_data, rng):
|
|
148
176
|
"""
|
|
149
177
|
Shuffle campaign data at the document level in-place
|
|
150
|
-
|
|
178
|
+
|
|
151
179
|
For each document, randomly shuffles the order of models in the tgt dictionary.
|
|
152
|
-
|
|
180
|
+
|
|
153
181
|
Args:
|
|
154
182
|
campaign_data: The campaign data dictionary
|
|
155
183
|
rng: Random number generator with campaign-specific seed
|
|
156
184
|
"""
|
|
185
|
+
|
|
157
186
|
def shuffle_document(doc):
|
|
158
187
|
"""Shuffle a single document (list of items) by reordering models in tgt dict."""
|
|
159
188
|
# Validate that all items have the same models
|
|
160
189
|
_validate_document_models(doc)
|
|
161
|
-
|
|
190
|
+
|
|
162
191
|
# Get all model names from the first item's tgt dict
|
|
163
192
|
first_item = doc[0]
|
|
164
|
-
model_names = list(first_item[
|
|
193
|
+
model_names = list(first_item["tgt"].keys())
|
|
165
194
|
rng.shuffle(model_names)
|
|
166
|
-
|
|
195
|
+
|
|
167
196
|
# Reorder tgt dict for all items in the document
|
|
168
197
|
for item in doc:
|
|
169
|
-
if
|
|
170
|
-
item["tgt"] = {
|
|
171
|
-
|
|
172
|
-
for model in model_names
|
|
173
|
-
}
|
|
174
|
-
|
|
198
|
+
if "tgt" in item and isinstance(item["tgt"], dict):
|
|
199
|
+
item["tgt"] = {model: item["tgt"][model] for model in model_names}
|
|
200
|
+
|
|
175
201
|
assignment = campaign_data["info"]["assignment"]
|
|
176
|
-
|
|
202
|
+
|
|
177
203
|
if assignment == "task-based":
|
|
178
204
|
# After transformation, data is a dict mapping user_id -> tasks
|
|
179
205
|
for user_id, task in campaign_data["data"].items():
|
|
180
206
|
for doc in task:
|
|
181
207
|
shuffle_document(doc)
|
|
182
|
-
elif assignment
|
|
208
|
+
elif assignment in ["single-stream", "dynamic"]:
|
|
183
209
|
# Shuffle each document in the shared pool
|
|
184
210
|
for doc in campaign_data["data"]:
|
|
185
211
|
shuffle_document(doc)
|
|
186
212
|
|
|
187
213
|
|
|
188
|
-
def _add_single_campaign(
|
|
214
|
+
def _add_single_campaign(campaign_data, overwrite, server):
|
|
189
215
|
"""
|
|
190
|
-
Add a single campaign from
|
|
216
|
+
Add a single campaign from campaign data dictionary.
|
|
191
217
|
"""
|
|
192
218
|
import random
|
|
193
219
|
|
|
194
220
|
import wonderwords
|
|
195
221
|
|
|
196
|
-
|
|
197
|
-
|
|
222
|
+
if "campaign_id" not in campaign_data:
|
|
223
|
+
raise ValueError("Campaign data must contain 'campaign_id' field.")
|
|
224
|
+
if "info" not in campaign_data:
|
|
225
|
+
raise ValueError("Campaign data must contain 'info' field.")
|
|
226
|
+
if "data" not in campaign_data:
|
|
227
|
+
raise ValueError("Campaign data must contain 'data' field.")
|
|
198
228
|
|
|
199
229
|
with open(f"{ROOT}/data/progress.json", "r") as f:
|
|
200
230
|
progress_data = json.load(f)
|
|
201
231
|
|
|
202
|
-
if campaign_data[
|
|
232
|
+
if campaign_data["campaign_id"] in progress_data and not overwrite:
|
|
203
233
|
raise ValueError(
|
|
204
234
|
f"Campaign {campaign_data['campaign_id']} already exists.\n"
|
|
205
235
|
"Use -o to overwrite."
|
|
206
236
|
)
|
|
207
237
|
|
|
208
|
-
if "info" not in campaign_data:
|
|
209
|
-
raise ValueError("Campaign data must contain 'info' field.")
|
|
210
|
-
if "data" not in campaign_data:
|
|
211
|
-
raise ValueError("Campaign data must contain 'data' field.")
|
|
212
238
|
if "assignment" not in campaign_data["info"]:
|
|
213
239
|
raise ValueError("Campaign 'info' must contain 'assignment' field.")
|
|
214
|
-
|
|
240
|
+
|
|
215
241
|
# Template defaults to "basic" if not specified
|
|
216
242
|
assignment = campaign_data["info"]["assignment"]
|
|
217
243
|
# use random words for identifying users
|
|
@@ -225,11 +251,11 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
225
251
|
if assignment == "task-based":
|
|
226
252
|
tasks = campaign_data["data"]
|
|
227
253
|
if not isinstance(tasks, list):
|
|
228
|
-
raise ValueError(
|
|
229
|
-
"Task-based campaign 'data' must be a list of tasks.")
|
|
254
|
+
raise ValueError("Task-based campaign 'data' must be a list of tasks.")
|
|
230
255
|
if not all(isinstance(task, list) for task in tasks):
|
|
231
256
|
raise ValueError(
|
|
232
|
-
"Each task in task-based campaign 'data' must be a list of items."
|
|
257
|
+
"Each task in task-based campaign 'data' must be a list of items."
|
|
258
|
+
)
|
|
233
259
|
# Validate item structure for each task
|
|
234
260
|
for task_i, task in enumerate(tasks):
|
|
235
261
|
for doc_i, doc in enumerate(task):
|
|
@@ -241,11 +267,9 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
241
267
|
elif assignment == "single-stream":
|
|
242
268
|
tasks = campaign_data["data"]
|
|
243
269
|
if users_spec is None:
|
|
244
|
-
raise ValueError(
|
|
245
|
-
"Single-stream campaigns must specify 'users' in info.")
|
|
270
|
+
raise ValueError("Single-stream campaigns must specify 'users' in info.")
|
|
246
271
|
if not isinstance(campaign_data["data"], list):
|
|
247
|
-
raise ValueError(
|
|
248
|
-
"Single-stream campaign 'data' must be a list of items.")
|
|
272
|
+
raise ValueError("Single-stream campaign 'data' must be a list of items.")
|
|
249
273
|
# Validate item structure for single-stream
|
|
250
274
|
for doc_i, doc in enumerate(tasks):
|
|
251
275
|
try:
|
|
@@ -259,8 +283,50 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
259
283
|
else:
|
|
260
284
|
raise ValueError("'users' must be an integer or a list.")
|
|
261
285
|
elif assignment == "dynamic":
|
|
262
|
-
|
|
263
|
-
|
|
286
|
+
tasks = campaign_data["data"]
|
|
287
|
+
if users_spec is None:
|
|
288
|
+
raise ValueError("Dynamic campaigns must specify 'users' in info.")
|
|
289
|
+
if not isinstance(campaign_data["data"], list):
|
|
290
|
+
raise ValueError("Dynamic campaign 'data' must be a list of items.")
|
|
291
|
+
# Validate item structure for dynamic
|
|
292
|
+
for doc_i, doc in enumerate(tasks):
|
|
293
|
+
try:
|
|
294
|
+
_validate_item_structure(doc)
|
|
295
|
+
except ValueError as e:
|
|
296
|
+
raise ValueError(f"Document {doc_i}: {e}")
|
|
297
|
+
if isinstance(users_spec, int):
|
|
298
|
+
num_users = users_spec
|
|
299
|
+
elif isinstance(users_spec, list):
|
|
300
|
+
num_users = len(users_spec)
|
|
301
|
+
else:
|
|
302
|
+
raise ValueError("'users' must be an integer or a list.")
|
|
303
|
+
# Validate dynamic-specific parameters
|
|
304
|
+
if "dynamic_top" not in campaign_data["info"]:
|
|
305
|
+
campaign_data["info"]["dynamic_top"] = 2
|
|
306
|
+
if "dynamic_first" not in campaign_data["info"]:
|
|
307
|
+
campaign_data["info"]["dynamic_first"] = 5
|
|
308
|
+
if "dynamic_contrastive_models" not in campaign_data["info"]:
|
|
309
|
+
campaign_data["info"]["dynamic_contrastive_models"] = 1
|
|
310
|
+
# Validate that dynamic_first is at least 1
|
|
311
|
+
assert (
|
|
312
|
+
campaign_data["info"]["dynamic_first"] >= 1
|
|
313
|
+
), "dynamic_first must be at least 1"
|
|
314
|
+
# Validate that dynamic_contrastive_models is at most dynamic_top
|
|
315
|
+
assert (
|
|
316
|
+
campaign_data["info"]["dynamic_contrastive_models"]
|
|
317
|
+
<= campaign_data["info"]["dynamic_top"]
|
|
318
|
+
), "dynamic_contrastive_models must be at most dynamic_top"
|
|
319
|
+
# Validate that all items have the same models
|
|
320
|
+
all_models = set()
|
|
321
|
+
for item in campaign_data["data"]:
|
|
322
|
+
if item and len(item) > 0:
|
|
323
|
+
all_models.update(item[0]["tgt"].keys())
|
|
324
|
+
for item in campaign_data["data"]:
|
|
325
|
+
if item and len(item) > 0:
|
|
326
|
+
item_models = set(item[0]["tgt"].keys())
|
|
327
|
+
assert (
|
|
328
|
+
item_models == all_models
|
|
329
|
+
), "All items must have the same model outputs"
|
|
264
330
|
else:
|
|
265
331
|
raise ValueError(f"Unknown campaign assignment type: {assignment}")
|
|
266
332
|
|
|
@@ -272,14 +338,12 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
272
338
|
new_id = f"{rword.random_words(amount=1, include_parts_of_speech=['adjective'])[0]}-{rword.random_words(amount=1, include_parts_of_speech=['noun'])[0]}"
|
|
273
339
|
if new_id not in user_ids:
|
|
274
340
|
user_ids.append(new_id)
|
|
275
|
-
user_ids = [
|
|
276
|
-
f"{user_id}-{rng.randint(0, 999):03d}"
|
|
277
|
-
for user_id in user_ids
|
|
278
|
-
]
|
|
341
|
+
user_ids = [f"{user_id}-{rng.randint(0, 999):03d}" for user_id in user_ids]
|
|
279
342
|
elif isinstance(users_spec, list):
|
|
280
343
|
if len(users_spec) != num_users:
|
|
281
344
|
raise ValueError(
|
|
282
|
-
f"Number of users ({len(users_spec)}) must match expected count ({num_users})."
|
|
345
|
+
f"Number of users ({len(users_spec)}) must match expected count ({num_users})."
|
|
346
|
+
)
|
|
283
347
|
if all(isinstance(u, str) for u in users_spec):
|
|
284
348
|
# List of string IDs
|
|
285
349
|
user_ids = users_spec
|
|
@@ -298,32 +362,47 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
298
362
|
raise ValueError("'users' list must contain all strings or all dicts.")
|
|
299
363
|
else:
|
|
300
364
|
raise ValueError("'users' must be an integer or a list.")
|
|
301
|
-
|
|
365
|
+
|
|
302
366
|
if "protocol" not in campaign_data["info"]:
|
|
303
367
|
campaign_data["info"]["protocol"] = "ESA"
|
|
304
|
-
print(
|
|
368
|
+
print(
|
|
369
|
+
"Warning: 'protocol' not specified in campaign info. Defaulting to 'ESA'."
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# Validate sliders structure if present
|
|
373
|
+
if "sliders" in campaign_data["info"]:
|
|
374
|
+
if not all(
|
|
375
|
+
isinstance(s, dict)
|
|
376
|
+
and all(k in s for k in ("name", "min", "max", "step"))
|
|
377
|
+
and isinstance(s.get("min"), (int, float))
|
|
378
|
+
and isinstance(s.get("max"), (int, float))
|
|
379
|
+
and isinstance(s.get("step"), (int, float))
|
|
380
|
+
and s["min"] <= s["max"]
|
|
381
|
+
and s["step"] > 0
|
|
382
|
+
for s in campaign_data["info"]["sliders"]
|
|
383
|
+
):
|
|
384
|
+
raise ValueError(
|
|
385
|
+
"Each slider must be a dict with 'name', 'min', 'max', and 'step' keys, where min/max/step are numeric, min <= max, and step > 0"
|
|
386
|
+
)
|
|
305
387
|
|
|
306
388
|
# Remove output file when overwriting (after all validations pass)
|
|
307
|
-
if overwrite and campaign_data[
|
|
389
|
+
if overwrite and campaign_data["campaign_id"] in progress_data:
|
|
308
390
|
output_file = f"{ROOT}/data/outputs/{campaign_data['campaign_id']}.jsonl"
|
|
309
391
|
if os.path.exists(output_file):
|
|
310
392
|
os.remove(output_file)
|
|
311
393
|
|
|
312
394
|
# For task-based, data is a dict mapping user_id -> tasks
|
|
313
|
-
# For single-stream, data is a flat list (shared among all users)
|
|
395
|
+
# For single-stream and dynamic, data is a flat list (shared among all users)
|
|
314
396
|
if assignment == "task-based":
|
|
315
397
|
campaign_data["data"] = {
|
|
316
|
-
user_id: task
|
|
317
|
-
for user_id, task in zip(user_ids, tasks)
|
|
398
|
+
user_id: task for user_id, task in zip(user_ids, tasks)
|
|
318
399
|
}
|
|
319
|
-
elif assignment
|
|
400
|
+
elif assignment in ["single-stream", "dynamic"]:
|
|
320
401
|
campaign_data["data"] = tasks
|
|
321
402
|
|
|
322
403
|
# generate a token for dashboard access if not present
|
|
323
404
|
if "token" not in campaign_data:
|
|
324
|
-
campaign_data["token"] = (
|
|
325
|
-
hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
|
|
326
|
-
)
|
|
405
|
+
campaign_data["token"] = hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
|
|
327
406
|
|
|
328
407
|
def get_token(user_id, token_type):
|
|
329
408
|
"""Get user token or generate a random one."""
|
|
@@ -336,9 +415,17 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
336
415
|
user_id: {
|
|
337
416
|
# TODO: progress tracking could be based on the assignment type
|
|
338
417
|
"progress": (
|
|
339
|
-
[False]*len(campaign_data["data"][user_id])
|
|
340
|
-
|
|
341
|
-
else
|
|
418
|
+
[False] * len(campaign_data["data"][user_id])
|
|
419
|
+
if assignment == "task-based"
|
|
420
|
+
else (
|
|
421
|
+
[False] * len(campaign_data["data"])
|
|
422
|
+
if assignment == "single-stream"
|
|
423
|
+
else (
|
|
424
|
+
[list() for _ in range(len(campaign_data["data"]))]
|
|
425
|
+
if assignment == "dynamic"
|
|
426
|
+
else []
|
|
427
|
+
)
|
|
428
|
+
)
|
|
342
429
|
),
|
|
343
430
|
"time_start": None,
|
|
344
431
|
"time_end": None,
|
|
@@ -357,26 +444,34 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
357
444
|
# Handle assets symlink if specified
|
|
358
445
|
if "assets" in campaign_data["info"]:
|
|
359
446
|
assets_config = campaign_data["info"]["assets"]
|
|
360
|
-
|
|
447
|
+
|
|
361
448
|
# assets must be a dictionary with source and destination keys
|
|
362
449
|
if not isinstance(assets_config, dict):
|
|
363
|
-
raise ValueError(
|
|
450
|
+
raise ValueError(
|
|
451
|
+
"Assets must be a dictionary with 'source' and 'destination' keys."
|
|
452
|
+
)
|
|
364
453
|
if "source" not in assets_config or "destination" not in assets_config:
|
|
365
|
-
raise ValueError(
|
|
366
|
-
|
|
454
|
+
raise ValueError(
|
|
455
|
+
"Assets config must contain 'source' and 'destination' keys."
|
|
456
|
+
)
|
|
457
|
+
|
|
367
458
|
assets_source = assets_config["source"]
|
|
368
459
|
assets_destination = assets_config["destination"]
|
|
369
|
-
|
|
460
|
+
|
|
370
461
|
# Validate destination starts with 'assets/'
|
|
371
462
|
if not assets_destination.startswith("assets/"):
|
|
372
|
-
raise ValueError(
|
|
373
|
-
|
|
463
|
+
raise ValueError(
|
|
464
|
+
f"Assets destination '{assets_destination}' must start with 'assets/'."
|
|
465
|
+
)
|
|
466
|
+
|
|
374
467
|
# Resolve relative paths from the caller's current working directory
|
|
375
468
|
assets_real_path = os.path.abspath(assets_source)
|
|
376
469
|
|
|
377
470
|
if not os.path.isdir(assets_real_path):
|
|
378
|
-
raise ValueError(
|
|
379
|
-
|
|
471
|
+
raise ValueError(
|
|
472
|
+
f"Assets source path '{assets_real_path}' must be an existing directory."
|
|
473
|
+
)
|
|
474
|
+
|
|
380
475
|
# Symlink path is based on the destination, stripping the 'assets/' prefix
|
|
381
476
|
# User assets are now stored under data/assets/ instead of static/assets/
|
|
382
477
|
symlink_path = f"{ROOT}/data/{assets_destination}".rstrip("/")
|
|
@@ -384,7 +479,7 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
384
479
|
# Remove existing symlink if present and we are overriding the same campaign
|
|
385
480
|
if os.path.lexists(symlink_path):
|
|
386
481
|
# Check if any other campaign is using this destination
|
|
387
|
-
current_campaign_id = campaign_data[
|
|
482
|
+
current_campaign_id = campaign_data["campaign_id"]
|
|
388
483
|
|
|
389
484
|
for other_campaign_id in progress_data.keys():
|
|
390
485
|
if other_campaign_id == current_campaign_id:
|
|
@@ -401,8 +496,10 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
401
496
|
if overwrite:
|
|
402
497
|
os.remove(symlink_path)
|
|
403
498
|
else:
|
|
404
|
-
raise ValueError(
|
|
405
|
-
|
|
499
|
+
raise ValueError(
|
|
500
|
+
f"Assets destination '{assets_destination}' is already taken."
|
|
501
|
+
)
|
|
502
|
+
|
|
406
503
|
# Ensure the assets directory exists
|
|
407
504
|
# get parent of symlink_path dir
|
|
408
505
|
os.makedirs(os.path.dirname(symlink_path), exist_ok=True)
|
|
@@ -410,7 +507,6 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
410
507
|
os.symlink(assets_real_path, symlink_path, target_is_directory=True)
|
|
411
508
|
print(f"Assets symlinked: {symlink_path} -> {assets_real_path}")
|
|
412
509
|
|
|
413
|
-
|
|
414
510
|
# Shuffle data if shuffle parameter is true (defaults to true)
|
|
415
511
|
should_shuffle = campaign_data["info"].get("shuffle", True)
|
|
416
512
|
if should_shuffle:
|
|
@@ -420,17 +516,14 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
420
516
|
with open(f"{ROOT}/data/tasks/{campaign_data['campaign_id']}.json", "w") as f:
|
|
421
517
|
json.dump(campaign_data, f, indent=2, ensure_ascii=False)
|
|
422
518
|
|
|
423
|
-
progress_data[campaign_data[
|
|
424
|
-
|
|
425
|
-
with open(f"{ROOT}/data/progress.json", "w") as f:
|
|
426
|
-
json.dump(progress_data, f, indent=2, ensure_ascii=False)
|
|
427
|
-
|
|
519
|
+
progress_data[campaign_data["campaign_id"]] = user_progress
|
|
520
|
+
save_progress_data(progress_data)
|
|
428
521
|
|
|
429
522
|
print(
|
|
430
523
|
"🎛️ ",
|
|
431
524
|
f"{server}/dashboard.html"
|
|
432
525
|
f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
|
|
433
|
-
f"&token={campaign_data['token']}"
|
|
526
|
+
f"&token={campaign_data['token']}",
|
|
434
527
|
)
|
|
435
528
|
for user_id, user_val in user_progress.items():
|
|
436
529
|
# point to the protocol URL
|
|
@@ -444,22 +537,28 @@ def _add_campaign(args_unknown):
|
|
|
444
537
|
"""
|
|
445
538
|
args = argparse.ArgumentParser()
|
|
446
539
|
args.add_argument(
|
|
447
|
-
|
|
448
|
-
|
|
540
|
+
"data_files",
|
|
541
|
+
type=str,
|
|
542
|
+
nargs="+",
|
|
543
|
+
help="One or more paths to campaign data files",
|
|
449
544
|
)
|
|
450
545
|
args.add_argument(
|
|
451
|
-
"-o",
|
|
452
|
-
|
|
546
|
+
"-o",
|
|
547
|
+
"--overwrite",
|
|
548
|
+
action="store_true",
|
|
549
|
+
help="Overwrite existing campaign if it exists",
|
|
453
550
|
)
|
|
454
551
|
args.add_argument(
|
|
455
|
-
"--server",
|
|
456
|
-
|
|
552
|
+
"--server",
|
|
553
|
+
default="http://localhost:8001",
|
|
554
|
+
help="Prefix server URL for protocol links",
|
|
457
555
|
)
|
|
458
556
|
args = args.parse_args(args_unknown)
|
|
459
557
|
|
|
460
558
|
for data_file in args.data_files:
|
|
461
559
|
try:
|
|
462
|
-
|
|
560
|
+
with open(data_file, "r") as f:
|
|
561
|
+
_add_single_campaign(json.load(f), args.overwrite, args.server)
|
|
463
562
|
except Exception as e:
|
|
464
563
|
print(f"Error processing {data_file}: {e}")
|
|
465
564
|
exit(1)
|
|
@@ -470,7 +569,13 @@ def main():
|
|
|
470
569
|
Main entry point for the CLI.
|
|
471
570
|
"""
|
|
472
571
|
args = argparse.ArgumentParser()
|
|
473
|
-
args.add_argument(
|
|
572
|
+
args.add_argument(
|
|
573
|
+
"command",
|
|
574
|
+
type=str,
|
|
575
|
+
choices=["run", "add", "purge"],
|
|
576
|
+
default="run",
|
|
577
|
+
nargs="?",
|
|
578
|
+
)
|
|
474
579
|
args, args_unknown = args.parse_known_args()
|
|
475
580
|
|
|
476
581
|
# enforce that only one pearmut process is running
|
|
@@ -480,11 +585,11 @@ def main():
|
|
|
480
585
|
print(p)
|
|
481
586
|
exit(1)
|
|
482
587
|
|
|
483
|
-
if args.command ==
|
|
588
|
+
if args.command == "run":
|
|
484
589
|
_run(args_unknown)
|
|
485
|
-
elif args.command ==
|
|
590
|
+
elif args.command == "add":
|
|
486
591
|
_add_campaign(args_unknown)
|
|
487
|
-
elif args.command ==
|
|
592
|
+
elif args.command == "purge":
|
|
488
593
|
import shutil
|
|
489
594
|
|
|
490
595
|
def _unlink_assets(campaign_id):
|
|
@@ -494,7 +599,9 @@ def main():
|
|
|
494
599
|
return
|
|
495
600
|
with open(task_file, "r") as f:
|
|
496
601
|
campaign_data = json.load(f)
|
|
497
|
-
destination =
|
|
602
|
+
destination = (
|
|
603
|
+
campaign_data.get("info", {}).get("assets", {}).get("destination")
|
|
604
|
+
)
|
|
498
605
|
if destination:
|
|
499
606
|
symlink_path = f"{ROOT}/data/{destination}".rstrip("/")
|
|
500
607
|
if os.path.islink(symlink_path):
|
|
@@ -504,8 +611,11 @@ def main():
|
|
|
504
611
|
# Parse optional campaign name
|
|
505
612
|
purge_args = argparse.ArgumentParser()
|
|
506
613
|
purge_args.add_argument(
|
|
507
|
-
|
|
508
|
-
|
|
614
|
+
"campaign",
|
|
615
|
+
type=str,
|
|
616
|
+
nargs="?",
|
|
617
|
+
default=None,
|
|
618
|
+
help="Optional campaign name to purge (purges all if not specified)",
|
|
509
619
|
)
|
|
510
620
|
purge_args = purge_args.parse_args(args_unknown)
|
|
511
621
|
progress_data = load_progress_data()
|
|
@@ -519,7 +629,7 @@ def main():
|
|
|
519
629
|
confirm = input(
|
|
520
630
|
f"Are you sure you want to purge campaign '{campaign_id}'? This action cannot be undone. [y/n] "
|
|
521
631
|
)
|
|
522
|
-
if confirm.lower() ==
|
|
632
|
+
if confirm.lower() == "y":
|
|
523
633
|
# Unlink assets before removing task file
|
|
524
634
|
_unlink_assets(campaign_id)
|
|
525
635
|
# Remove task file
|
|
@@ -543,7 +653,7 @@ def main():
|
|
|
543
653
|
confirm = input(
|
|
544
654
|
"Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
|
|
545
655
|
)
|
|
546
|
-
if confirm.lower() ==
|
|
656
|
+
if confirm.lower() == "y":
|
|
547
657
|
# Unlink all assets first
|
|
548
658
|
for campaign_id in progress_data.keys():
|
|
549
659
|
_unlink_assets(campaign_id)
|