pearmut 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pearmut/app.py +103 -2
- pearmut/assignment.py +59 -25
- pearmut/cli.py +241 -150
- pearmut/constants.py +93 -0
- pearmut/results_export.py +1 -1
- pearmut/static/annotate.bundle.js +1 -0
- pearmut/static/annotate.html +160 -0
- pearmut/static/dashboard.bundle.js +1 -1
- pearmut/static/dashboard.html +6 -1
- pearmut/static/index.html +1 -1
- pearmut/static/style.css +8 -0
- pearmut/utils.py +4 -14
- {pearmut-1.0.0.dist-info → pearmut-1.0.2.dist-info}/METADATA +87 -16
- pearmut-1.0.2.dist-info/RECORD +20 -0
- pearmut/static/basic.bundle.js +0 -1
- pearmut/static/basic.html +0 -97
- pearmut-1.0.0.dist-info/RECORD +0 -19
- {pearmut-1.0.0.dist-info → pearmut-1.0.2.dist-info}/WHEEL +0 -0
- {pearmut-1.0.0.dist-info → pearmut-1.0.2.dist-info}/entry_points.txt +0 -0
- {pearmut-1.0.0.dist-info → pearmut-1.0.2.dist-info}/licenses/LICENSE +0 -0
- {pearmut-1.0.0.dist-info → pearmut-1.0.2.dist-info}/top_level.txt +0 -0
pearmut/cli.py
CHANGED
|
@@ -3,51 +3,84 @@ Command-line interface for managing and running the Pearmut server.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import argparse
|
|
6
|
+
import atexit
|
|
7
|
+
import fcntl
|
|
6
8
|
import hashlib
|
|
7
9
|
import json
|
|
8
10
|
import os
|
|
9
11
|
import urllib.parse
|
|
10
12
|
|
|
11
|
-
import
|
|
12
|
-
|
|
13
|
-
from .utils import ROOT, load_progress_data, save_progress_data
|
|
13
|
+
from .utils import ROOT, TOKEN_MAIN, load_progress_data, save_progress_data
|
|
14
14
|
|
|
15
15
|
os.makedirs(f"{ROOT}/data/tasks", exist_ok=True)
|
|
16
16
|
load_progress_data(warn=None)
|
|
17
17
|
|
|
18
18
|
|
|
19
|
+
|
|
19
20
|
def _run(args_unknown):
|
|
21
|
+
# Acquire lock before starting server
|
|
22
|
+
lock_file = f"{ROOT}/data/.lock"
|
|
23
|
+
try:
|
|
24
|
+
lock_fd = open(lock_file, "a+")
|
|
25
|
+
fcntl.flock(lock_fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
26
|
+
lock_fd.seek(0)
|
|
27
|
+
lock_fd.truncate()
|
|
28
|
+
lock_fd.write(str(os.getpid()))
|
|
29
|
+
lock_fd.flush()
|
|
30
|
+
except BlockingIOError:
|
|
31
|
+
try:
|
|
32
|
+
with open(lock_file, "r") as f:
|
|
33
|
+
pid = f.read().strip()
|
|
34
|
+
print("You can't run multiple instances of Pearmut in the same directory.")
|
|
35
|
+
if pid:
|
|
36
|
+
print(f"Another instance (PID {pid}) is holding the lock.")
|
|
37
|
+
except (FileNotFoundError, PermissionError, OSError):
|
|
38
|
+
print("You can't run multiple instances of Pearmut in the same directory.")
|
|
39
|
+
exit(1)
|
|
40
|
+
|
|
41
|
+
# Register cleanup to remove lock file on exit
|
|
42
|
+
atexit.register(lambda: os.path.exists(lock_file) and os.remove(lock_file))
|
|
43
|
+
|
|
20
44
|
import uvicorn
|
|
21
45
|
|
|
22
46
|
from .app import app, tasks_data
|
|
23
47
|
|
|
24
48
|
args = argparse.ArgumentParser()
|
|
25
49
|
args.add_argument(
|
|
26
|
-
"--port", type=int, default=8001,
|
|
27
|
-
help="Port to run the server on"
|
|
50
|
+
"--port", type=int, default=8001, help="Port to run the server on"
|
|
28
51
|
)
|
|
29
52
|
args.add_argument(
|
|
30
|
-
"--server",
|
|
31
|
-
|
|
53
|
+
"--server",
|
|
54
|
+
default="http://localhost:8001",
|
|
55
|
+
help="Prefix server URL for protocol links",
|
|
32
56
|
)
|
|
33
57
|
args = args.parse_args(args_unknown)
|
|
34
58
|
|
|
35
59
|
# print access dashboard URL for all campaigns
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
60
|
+
dashboard_url = (
|
|
61
|
+
args.server
|
|
62
|
+
+ "/dashboard?"
|
|
63
|
+
+ f"token_main={TOKEN_MAIN}"
|
|
64
|
+
+ "".join(
|
|
65
|
+
[
|
|
66
|
+
f"&campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
|
|
67
|
+
for campaign_id, campaign_data in tasks_data.items()
|
|
68
|
+
]
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
print(
|
|
72
|
+
"\033[92mNow serving Pearmut, use the following URL to access the everything-dashboard:\033[0m"
|
|
73
|
+
)
|
|
74
|
+
print("🍐", dashboard_url + "\n", flush=True)
|
|
75
|
+
|
|
76
|
+
|
|
44
77
|
# disable startup message
|
|
45
78
|
uvicorn.config.LOGGING_CONFIG["loggers"]["uvicorn.error"]["level"] = "WARNING"
|
|
46
79
|
# set time logging
|
|
47
80
|
uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["datefmt"] = "%Y-%m-%d %H:%M"
|
|
48
|
-
uvicorn.config.LOGGING_CONFIG["formatters"]["access"][
|
|
49
|
-
|
|
50
|
-
)
|
|
81
|
+
uvicorn.config.LOGGING_CONFIG["formatters"]["access"][
|
|
82
|
+
"fmt"
|
|
83
|
+
] = "%(asctime)s %(levelprefix)s %(client_addr)s - %(request_line)s %(status_code)s"
|
|
51
84
|
uvicorn.run(
|
|
52
85
|
app,
|
|
53
86
|
host="0.0.0.0",
|
|
@@ -59,9 +92,9 @@ def _run(args_unknown):
|
|
|
59
92
|
def _validate_item_structure(items):
|
|
60
93
|
"""
|
|
61
94
|
Validate that items have the correct structure.
|
|
62
|
-
Items should be lists of dictionaries with 'src' and '
|
|
95
|
+
Items should be lists of dictionaries with 'tgt' and optionally 'src' and/or 'ref' keys.
|
|
63
96
|
The 'tgt' field should be a dictionary mapping model names to translations.
|
|
64
|
-
|
|
97
|
+
|
|
65
98
|
Args:
|
|
66
99
|
items: List of item dictionaries to validate
|
|
67
100
|
"""
|
|
@@ -70,71 +103,91 @@ def _validate_item_structure(items):
|
|
|
70
103
|
|
|
71
104
|
for item in items:
|
|
72
105
|
if not isinstance(item, dict):
|
|
73
|
-
raise ValueError("Each item must be a dictionary with '
|
|
74
|
-
if
|
|
75
|
-
raise ValueError("Each item must contain '
|
|
76
|
-
|
|
77
|
-
# Validate src is
|
|
78
|
-
if not isinstance(item[
|
|
106
|
+
raise ValueError("Each item must be a dictionary with 'tgt' key")
|
|
107
|
+
if "tgt" not in item:
|
|
108
|
+
raise ValueError("Each item must contain 'tgt' key")
|
|
109
|
+
|
|
110
|
+
# Validate src is a string if present
|
|
111
|
+
if "src" in item and not isinstance(item["src"], str):
|
|
79
112
|
raise ValueError("Item 'src' must be a string")
|
|
80
|
-
|
|
81
|
-
# Validate
|
|
82
|
-
if isinstance(item[
|
|
113
|
+
|
|
114
|
+
# Validate ref is a string if present
|
|
115
|
+
if "ref" in item and not isinstance(item["ref"], str):
|
|
116
|
+
raise ValueError("Item 'ref' must be a string")
|
|
117
|
+
|
|
118
|
+
# Validate tgt is a dictionary (annotate template with model names)
|
|
119
|
+
if isinstance(item["tgt"], str):
|
|
83
120
|
# String not allowed - suggest using dictionary (don't include user input to prevent injection)
|
|
84
|
-
raise ValueError(
|
|
85
|
-
|
|
121
|
+
raise ValueError(
|
|
122
|
+
'Item \'tgt\' must be a dictionary mapping model names to translations. For single translation, use {"default": "your_translation"}'
|
|
123
|
+
)
|
|
124
|
+
elif isinstance(item["tgt"], dict):
|
|
86
125
|
# Dictionary mapping model names to translations
|
|
87
126
|
# Validate that model names don't contain only numbers (JavaScript ordering issue)
|
|
88
|
-
for model_name, translation in item[
|
|
127
|
+
for model_name, translation in item["tgt"].items():
|
|
89
128
|
if not isinstance(model_name, str):
|
|
90
129
|
raise ValueError("Model names in 'tgt' dictionary must be strings")
|
|
91
130
|
if model_name.isdigit():
|
|
92
|
-
raise ValueError(
|
|
131
|
+
raise ValueError(
|
|
132
|
+
f"Model name '{model_name}' cannot be only numeric digits (would cause issues in JS/TS)"
|
|
133
|
+
)
|
|
93
134
|
if not isinstance(translation, str):
|
|
94
|
-
raise ValueError(
|
|
135
|
+
raise ValueError(
|
|
136
|
+
f"Translation for model '{model_name}' must be a string"
|
|
137
|
+
)
|
|
95
138
|
else:
|
|
96
|
-
raise ValueError(
|
|
97
|
-
|
|
139
|
+
raise ValueError(
|
|
140
|
+
"Item 'tgt' must be a dictionary mapping model names to translations"
|
|
141
|
+
)
|
|
142
|
+
|
|
98
143
|
# Validate error_spans structure if present
|
|
99
|
-
if
|
|
100
|
-
if not isinstance(item[
|
|
101
|
-
raise ValueError(
|
|
102
|
-
|
|
144
|
+
if "error_spans" in item:
|
|
145
|
+
if not isinstance(item["error_spans"], dict):
|
|
146
|
+
raise ValueError(
|
|
147
|
+
"'error_spans' must be a dictionary mapping model names to error span lists"
|
|
148
|
+
)
|
|
149
|
+
for model_name, spans in item["error_spans"].items():
|
|
103
150
|
if not isinstance(spans, list):
|
|
104
|
-
raise ValueError(
|
|
105
|
-
|
|
151
|
+
raise ValueError(
|
|
152
|
+
f"Error spans for model '{model_name}' must be a list"
|
|
153
|
+
)
|
|
154
|
+
|
|
106
155
|
# Validate validation structure if present
|
|
107
|
-
if
|
|
108
|
-
if not isinstance(item[
|
|
109
|
-
raise ValueError(
|
|
110
|
-
|
|
156
|
+
if "validation" in item:
|
|
157
|
+
if not isinstance(item["validation"], dict):
|
|
158
|
+
raise ValueError(
|
|
159
|
+
"'validation' must be a dictionary mapping model names to validation rules"
|
|
160
|
+
)
|
|
161
|
+
for model_name, val_rule in item["validation"].items():
|
|
111
162
|
if not isinstance(val_rule, dict):
|
|
112
|
-
raise ValueError(
|
|
163
|
+
raise ValueError(
|
|
164
|
+
f"Validation rule for model '{model_name}' must be a dictionary"
|
|
165
|
+
)
|
|
113
166
|
|
|
114
167
|
|
|
115
168
|
def _validate_document_models(doc):
|
|
116
169
|
"""
|
|
117
170
|
Validate that all items in a document have the same model outputs.
|
|
118
|
-
|
|
171
|
+
|
|
119
172
|
Args:
|
|
120
173
|
doc: List of items in a document
|
|
121
|
-
|
|
174
|
+
|
|
122
175
|
Returns:
|
|
123
176
|
None if valid
|
|
124
|
-
|
|
177
|
+
|
|
125
178
|
Raises:
|
|
126
179
|
ValueError: If items have different model outputs
|
|
127
180
|
"""
|
|
128
181
|
# Get model names from the first item
|
|
129
182
|
first_item = doc[0]
|
|
130
|
-
first_models = set(first_item[
|
|
131
|
-
|
|
183
|
+
first_models = set(first_item["tgt"].keys())
|
|
184
|
+
|
|
132
185
|
# Check all other items have the same model names
|
|
133
186
|
for i, item in enumerate(doc[1:], start=1):
|
|
134
|
-
if
|
|
187
|
+
if "tgt" not in item or not isinstance(item["tgt"], dict):
|
|
135
188
|
continue
|
|
136
|
-
|
|
137
|
-
item_models = set(item[
|
|
189
|
+
|
|
190
|
+
item_models = set(item["tgt"].keys())
|
|
138
191
|
if item_models != first_models:
|
|
139
192
|
raise ValueError(
|
|
140
193
|
f"Document contains items with different model outputs. "
|
|
@@ -147,33 +200,31 @@ def _validate_document_models(doc):
|
|
|
147
200
|
def _shuffle_campaign_data(campaign_data, rng):
|
|
148
201
|
"""
|
|
149
202
|
Shuffle campaign data at the document level in-place
|
|
150
|
-
|
|
203
|
+
|
|
151
204
|
For each document, randomly shuffles the order of models in the tgt dictionary.
|
|
152
|
-
|
|
205
|
+
|
|
153
206
|
Args:
|
|
154
207
|
campaign_data: The campaign data dictionary
|
|
155
208
|
rng: Random number generator with campaign-specific seed
|
|
156
209
|
"""
|
|
210
|
+
|
|
157
211
|
def shuffle_document(doc):
|
|
158
212
|
"""Shuffle a single document (list of items) by reordering models in tgt dict."""
|
|
159
213
|
# Validate that all items have the same models
|
|
160
214
|
_validate_document_models(doc)
|
|
161
|
-
|
|
215
|
+
|
|
162
216
|
# Get all model names from the first item's tgt dict
|
|
163
217
|
first_item = doc[0]
|
|
164
|
-
model_names = list(first_item[
|
|
218
|
+
model_names = list(first_item["tgt"].keys())
|
|
165
219
|
rng.shuffle(model_names)
|
|
166
|
-
|
|
220
|
+
|
|
167
221
|
# Reorder tgt dict for all items in the document
|
|
168
222
|
for item in doc:
|
|
169
|
-
if
|
|
170
|
-
item["tgt"] = {
|
|
171
|
-
|
|
172
|
-
for model in model_names
|
|
173
|
-
}
|
|
174
|
-
|
|
223
|
+
if "tgt" in item and isinstance(item["tgt"], dict):
|
|
224
|
+
item["tgt"] = {model: item["tgt"][model] for model in model_names}
|
|
225
|
+
|
|
175
226
|
assignment = campaign_data["info"]["assignment"]
|
|
176
|
-
|
|
227
|
+
|
|
177
228
|
if assignment == "task-based":
|
|
178
229
|
# After transformation, data is a dict mapping user_id -> tasks
|
|
179
230
|
for user_id, task in campaign_data["data"].items():
|
|
@@ -185,34 +236,34 @@ def _shuffle_campaign_data(campaign_data, rng):
|
|
|
185
236
|
shuffle_document(doc)
|
|
186
237
|
|
|
187
238
|
|
|
188
|
-
def _add_single_campaign(
|
|
239
|
+
def _add_single_campaign(campaign_data, overwrite, server):
|
|
189
240
|
"""
|
|
190
|
-
Add a single campaign from
|
|
241
|
+
Add a single campaign from campaign data dictionary.
|
|
191
242
|
"""
|
|
192
243
|
import random
|
|
193
244
|
|
|
194
245
|
import wonderwords
|
|
195
246
|
|
|
196
|
-
|
|
197
|
-
|
|
247
|
+
if "campaign_id" not in campaign_data:
|
|
248
|
+
raise ValueError("Campaign data must contain 'campaign_id' field.")
|
|
249
|
+
if "info" not in campaign_data:
|
|
250
|
+
raise ValueError("Campaign data must contain 'info' field.")
|
|
251
|
+
if "data" not in campaign_data:
|
|
252
|
+
raise ValueError("Campaign data must contain 'data' field.")
|
|
198
253
|
|
|
199
254
|
with open(f"{ROOT}/data/progress.json", "r") as f:
|
|
200
255
|
progress_data = json.load(f)
|
|
201
256
|
|
|
202
|
-
if campaign_data[
|
|
257
|
+
if campaign_data["campaign_id"] in progress_data and not overwrite:
|
|
203
258
|
raise ValueError(
|
|
204
259
|
f"Campaign {campaign_data['campaign_id']} already exists.\n"
|
|
205
260
|
"Use -o to overwrite."
|
|
206
261
|
)
|
|
207
262
|
|
|
208
|
-
if "info" not in campaign_data:
|
|
209
|
-
raise ValueError("Campaign data must contain 'info' field.")
|
|
210
|
-
if "data" not in campaign_data:
|
|
211
|
-
raise ValueError("Campaign data must contain 'data' field.")
|
|
212
263
|
if "assignment" not in campaign_data["info"]:
|
|
213
264
|
raise ValueError("Campaign 'info' must contain 'assignment' field.")
|
|
214
|
-
|
|
215
|
-
# Template defaults to "
|
|
265
|
+
|
|
266
|
+
# Template defaults to "annotate" if not specified
|
|
216
267
|
assignment = campaign_data["info"]["assignment"]
|
|
217
268
|
# use random words for identifying users
|
|
218
269
|
rng = random.Random()
|
|
@@ -225,11 +276,11 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
225
276
|
if assignment == "task-based":
|
|
226
277
|
tasks = campaign_data["data"]
|
|
227
278
|
if not isinstance(tasks, list):
|
|
228
|
-
raise ValueError(
|
|
229
|
-
"Task-based campaign 'data' must be a list of tasks.")
|
|
279
|
+
raise ValueError("Task-based campaign 'data' must be a list of tasks.")
|
|
230
280
|
if not all(isinstance(task, list) for task in tasks):
|
|
231
281
|
raise ValueError(
|
|
232
|
-
"Each task in task-based campaign 'data' must be a list of items."
|
|
282
|
+
"Each task in task-based campaign 'data' must be a list of items."
|
|
283
|
+
)
|
|
233
284
|
# Validate item structure for each task
|
|
234
285
|
for task_i, task in enumerate(tasks):
|
|
235
286
|
for doc_i, doc in enumerate(task):
|
|
@@ -241,11 +292,9 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
241
292
|
elif assignment == "single-stream":
|
|
242
293
|
tasks = campaign_data["data"]
|
|
243
294
|
if users_spec is None:
|
|
244
|
-
raise ValueError(
|
|
245
|
-
"Single-stream campaigns must specify 'users' in info.")
|
|
295
|
+
raise ValueError("Single-stream campaigns must specify 'users' in info.")
|
|
246
296
|
if not isinstance(campaign_data["data"], list):
|
|
247
|
-
raise ValueError(
|
|
248
|
-
"Single-stream campaign 'data' must be a list of items.")
|
|
297
|
+
raise ValueError("Single-stream campaign 'data' must be a list of items.")
|
|
249
298
|
# Validate item structure for single-stream
|
|
250
299
|
for doc_i, doc in enumerate(tasks):
|
|
251
300
|
try:
|
|
@@ -261,11 +310,9 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
261
310
|
elif assignment == "dynamic":
|
|
262
311
|
tasks = campaign_data["data"]
|
|
263
312
|
if users_spec is None:
|
|
264
|
-
raise ValueError(
|
|
265
|
-
"Dynamic campaigns must specify 'users' in info.")
|
|
313
|
+
raise ValueError("Dynamic campaigns must specify 'users' in info.")
|
|
266
314
|
if not isinstance(campaign_data["data"], list):
|
|
267
|
-
raise ValueError(
|
|
268
|
-
"Dynamic campaign 'data' must be a list of items.")
|
|
315
|
+
raise ValueError("Dynamic campaign 'data' must be a list of items.")
|
|
269
316
|
# Validate item structure for dynamic
|
|
270
317
|
for doc_i, doc in enumerate(tasks):
|
|
271
318
|
try:
|
|
@@ -286,10 +333,14 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
286
333
|
if "dynamic_contrastive_models" not in campaign_data["info"]:
|
|
287
334
|
campaign_data["info"]["dynamic_contrastive_models"] = 1
|
|
288
335
|
# Validate that dynamic_first is at least 1
|
|
289
|
-
assert
|
|
336
|
+
assert (
|
|
337
|
+
campaign_data["info"]["dynamic_first"] >= 1
|
|
338
|
+
), "dynamic_first must be at least 1"
|
|
290
339
|
# Validate that dynamic_contrastive_models is at most dynamic_top
|
|
291
|
-
assert
|
|
292
|
-
"dynamic_contrastive_models
|
|
340
|
+
assert (
|
|
341
|
+
campaign_data["info"]["dynamic_contrastive_models"]
|
|
342
|
+
<= campaign_data["info"]["dynamic_top"]
|
|
343
|
+
), "dynamic_contrastive_models must be at most dynamic_top"
|
|
293
344
|
# Validate that all items have the same models
|
|
294
345
|
all_models = set()
|
|
295
346
|
for item in campaign_data["data"]:
|
|
@@ -298,7 +349,9 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
298
349
|
for item in campaign_data["data"]:
|
|
299
350
|
if item and len(item) > 0:
|
|
300
351
|
item_models = set(item[0]["tgt"].keys())
|
|
301
|
-
assert
|
|
352
|
+
assert (
|
|
353
|
+
item_models == all_models
|
|
354
|
+
), "All items must have the same model outputs"
|
|
302
355
|
else:
|
|
303
356
|
raise ValueError(f"Unknown campaign assignment type: {assignment}")
|
|
304
357
|
|
|
@@ -310,14 +363,12 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
310
363
|
new_id = f"{rword.random_words(amount=1, include_parts_of_speech=['adjective'])[0]}-{rword.random_words(amount=1, include_parts_of_speech=['noun'])[0]}"
|
|
311
364
|
if new_id not in user_ids:
|
|
312
365
|
user_ids.append(new_id)
|
|
313
|
-
user_ids = [
|
|
314
|
-
f"{user_id}-{rng.randint(0, 999):03d}"
|
|
315
|
-
for user_id in user_ids
|
|
316
|
-
]
|
|
366
|
+
user_ids = [f"{user_id}-{rng.randint(0, 999):03d}" for user_id in user_ids]
|
|
317
367
|
elif isinstance(users_spec, list):
|
|
318
368
|
if len(users_spec) != num_users:
|
|
319
369
|
raise ValueError(
|
|
320
|
-
f"Number of users ({len(users_spec)}) must match expected count ({num_users})."
|
|
370
|
+
f"Number of users ({len(users_spec)}) must match expected count ({num_users})."
|
|
371
|
+
)
|
|
321
372
|
if all(isinstance(u, str) for u in users_spec):
|
|
322
373
|
# List of string IDs
|
|
323
374
|
user_ids = users_spec
|
|
@@ -336,13 +387,31 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
336
387
|
raise ValueError("'users' list must contain all strings or all dicts.")
|
|
337
388
|
else:
|
|
338
389
|
raise ValueError("'users' must be an integer or a list.")
|
|
339
|
-
|
|
390
|
+
|
|
340
391
|
if "protocol" not in campaign_data["info"]:
|
|
341
392
|
campaign_data["info"]["protocol"] = "ESA"
|
|
342
|
-
print(
|
|
393
|
+
print(
|
|
394
|
+
"Warning: 'protocol' not specified in campaign info. Defaulting to 'ESA'."
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
# Validate sliders structure if present
|
|
398
|
+
if "sliders" in campaign_data["info"]:
|
|
399
|
+
if not all(
|
|
400
|
+
isinstance(s, dict)
|
|
401
|
+
and all(k in s for k in ("name", "min", "max", "step"))
|
|
402
|
+
and isinstance(s.get("min"), (int, float))
|
|
403
|
+
and isinstance(s.get("max"), (int, float))
|
|
404
|
+
and isinstance(s.get("step"), (int, float))
|
|
405
|
+
and s["min"] <= s["max"]
|
|
406
|
+
and s["step"] > 0
|
|
407
|
+
for s in campaign_data["info"]["sliders"]
|
|
408
|
+
):
|
|
409
|
+
raise ValueError(
|
|
410
|
+
"Each slider must be a dict with 'name', 'min', 'max', and 'step' keys, where min/max/step are numeric, min <= max, and step > 0"
|
|
411
|
+
)
|
|
343
412
|
|
|
344
413
|
# Remove output file when overwriting (after all validations pass)
|
|
345
|
-
if overwrite and campaign_data[
|
|
414
|
+
if overwrite and campaign_data["campaign_id"] in progress_data:
|
|
346
415
|
output_file = f"{ROOT}/data/outputs/{campaign_data['campaign_id']}.jsonl"
|
|
347
416
|
if os.path.exists(output_file):
|
|
348
417
|
os.remove(output_file)
|
|
@@ -351,17 +420,14 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
351
420
|
# For single-stream and dynamic, data is a flat list (shared among all users)
|
|
352
421
|
if assignment == "task-based":
|
|
353
422
|
campaign_data["data"] = {
|
|
354
|
-
user_id: task
|
|
355
|
-
for user_id, task in zip(user_ids, tasks)
|
|
423
|
+
user_id: task for user_id, task in zip(user_ids, tasks)
|
|
356
424
|
}
|
|
357
425
|
elif assignment in ["single-stream", "dynamic"]:
|
|
358
426
|
campaign_data["data"] = tasks
|
|
359
427
|
|
|
360
428
|
# generate a token for dashboard access if not present
|
|
361
429
|
if "token" not in campaign_data:
|
|
362
|
-
campaign_data["token"] = (
|
|
363
|
-
hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
|
|
364
|
-
)
|
|
430
|
+
campaign_data["token"] = hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
|
|
365
431
|
|
|
366
432
|
def get_token(user_id, token_type):
|
|
367
433
|
"""Get user token or generate a random one."""
|
|
@@ -374,16 +440,23 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
374
440
|
user_id: {
|
|
375
441
|
# TODO: progress tracking could be based on the assignment type
|
|
376
442
|
"progress": (
|
|
377
|
-
[False]*len(campaign_data["data"][user_id])
|
|
378
|
-
|
|
379
|
-
else
|
|
380
|
-
|
|
443
|
+
[False] * len(campaign_data["data"][user_id])
|
|
444
|
+
if assignment == "task-based"
|
|
445
|
+
else (
|
|
446
|
+
[False] * len(campaign_data["data"])
|
|
447
|
+
if assignment == "single-stream"
|
|
448
|
+
else (
|
|
449
|
+
[list() for _ in range(len(campaign_data["data"]))]
|
|
450
|
+
if assignment == "dynamic"
|
|
451
|
+
else []
|
|
452
|
+
)
|
|
453
|
+
)
|
|
381
454
|
),
|
|
382
455
|
"time_start": None,
|
|
383
456
|
"time_end": None,
|
|
384
457
|
"time": 0,
|
|
385
458
|
"url": (
|
|
386
|
-
f"{campaign_data['info'].get(
|
|
459
|
+
f"{campaign_data['info'].get('template', 'annotate')}"
|
|
387
460
|
f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
|
|
388
461
|
f"&user_id={user_id}"
|
|
389
462
|
),
|
|
@@ -396,26 +469,34 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
396
469
|
# Handle assets symlink if specified
|
|
397
470
|
if "assets" in campaign_data["info"]:
|
|
398
471
|
assets_config = campaign_data["info"]["assets"]
|
|
399
|
-
|
|
472
|
+
|
|
400
473
|
# assets must be a dictionary with source and destination keys
|
|
401
474
|
if not isinstance(assets_config, dict):
|
|
402
|
-
raise ValueError(
|
|
475
|
+
raise ValueError(
|
|
476
|
+
"Assets must be a dictionary with 'source' and 'destination' keys."
|
|
477
|
+
)
|
|
403
478
|
if "source" not in assets_config or "destination" not in assets_config:
|
|
404
|
-
raise ValueError(
|
|
405
|
-
|
|
479
|
+
raise ValueError(
|
|
480
|
+
"Assets config must contain 'source' and 'destination' keys."
|
|
481
|
+
)
|
|
482
|
+
|
|
406
483
|
assets_source = assets_config["source"]
|
|
407
484
|
assets_destination = assets_config["destination"]
|
|
408
|
-
|
|
485
|
+
|
|
409
486
|
# Validate destination starts with 'assets/'
|
|
410
487
|
if not assets_destination.startswith("assets/"):
|
|
411
|
-
raise ValueError(
|
|
412
|
-
|
|
488
|
+
raise ValueError(
|
|
489
|
+
f"Assets destination '{assets_destination}' must start with 'assets/'."
|
|
490
|
+
)
|
|
491
|
+
|
|
413
492
|
# Resolve relative paths from the caller's current working directory
|
|
414
493
|
assets_real_path = os.path.abspath(assets_source)
|
|
415
494
|
|
|
416
495
|
if not os.path.isdir(assets_real_path):
|
|
417
|
-
raise ValueError(
|
|
418
|
-
|
|
496
|
+
raise ValueError(
|
|
497
|
+
f"Assets source path '{assets_real_path}' must be an existing directory."
|
|
498
|
+
)
|
|
499
|
+
|
|
419
500
|
# Symlink path is based on the destination, stripping the 'assets/' prefix
|
|
420
501
|
# User assets are now stored under data/assets/ instead of static/assets/
|
|
421
502
|
symlink_path = f"{ROOT}/data/{assets_destination}".rstrip("/")
|
|
@@ -423,7 +504,7 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
423
504
|
# Remove existing symlink if present and we are overriding the same campaign
|
|
424
505
|
if os.path.lexists(symlink_path):
|
|
425
506
|
# Check if any other campaign is using this destination
|
|
426
|
-
current_campaign_id = campaign_data[
|
|
507
|
+
current_campaign_id = campaign_data["campaign_id"]
|
|
427
508
|
|
|
428
509
|
for other_campaign_id in progress_data.keys():
|
|
429
510
|
if other_campaign_id == current_campaign_id:
|
|
@@ -440,8 +521,10 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
440
521
|
if overwrite:
|
|
441
522
|
os.remove(symlink_path)
|
|
442
523
|
else:
|
|
443
|
-
raise ValueError(
|
|
444
|
-
|
|
524
|
+
raise ValueError(
|
|
525
|
+
f"Assets destination '{assets_destination}' is already taken."
|
|
526
|
+
)
|
|
527
|
+
|
|
445
528
|
# Ensure the assets directory exists
|
|
446
529
|
# get parent of symlink_path dir
|
|
447
530
|
os.makedirs(os.path.dirname(symlink_path), exist_ok=True)
|
|
@@ -449,7 +532,6 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
449
532
|
os.symlink(assets_real_path, symlink_path, target_is_directory=True)
|
|
450
533
|
print(f"Assets symlinked: {symlink_path} -> {assets_real_path}")
|
|
451
534
|
|
|
452
|
-
|
|
453
535
|
# Shuffle data if shuffle parameter is true (defaults to true)
|
|
454
536
|
should_shuffle = campaign_data["info"].get("shuffle", True)
|
|
455
537
|
if should_shuffle:
|
|
@@ -459,15 +541,14 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
459
541
|
with open(f"{ROOT}/data/tasks/{campaign_data['campaign_id']}.json", "w") as f:
|
|
460
542
|
json.dump(campaign_data, f, indent=2, ensure_ascii=False)
|
|
461
543
|
|
|
462
|
-
progress_data[campaign_data[
|
|
544
|
+
progress_data[campaign_data["campaign_id"]] = user_progress
|
|
463
545
|
save_progress_data(progress_data)
|
|
464
546
|
|
|
465
|
-
|
|
466
547
|
print(
|
|
467
548
|
"🎛️ ",
|
|
468
549
|
f"{server}/dashboard.html"
|
|
469
550
|
f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
|
|
470
|
-
f"&token={campaign_data['token']}"
|
|
551
|
+
f"&token={campaign_data['token']}",
|
|
471
552
|
)
|
|
472
553
|
for user_id, user_val in user_progress.items():
|
|
473
554
|
# point to the protocol URL
|
|
@@ -481,22 +562,28 @@ def _add_campaign(args_unknown):
|
|
|
481
562
|
"""
|
|
482
563
|
args = argparse.ArgumentParser()
|
|
483
564
|
args.add_argument(
|
|
484
|
-
|
|
485
|
-
|
|
565
|
+
"data_files",
|
|
566
|
+
type=str,
|
|
567
|
+
nargs="+",
|
|
568
|
+
help="One or more paths to campaign data files",
|
|
486
569
|
)
|
|
487
570
|
args.add_argument(
|
|
488
|
-
"-o",
|
|
489
|
-
|
|
571
|
+
"-o",
|
|
572
|
+
"--overwrite",
|
|
573
|
+
action="store_true",
|
|
574
|
+
help="Overwrite existing campaign if it exists",
|
|
490
575
|
)
|
|
491
576
|
args.add_argument(
|
|
492
|
-
"--server",
|
|
493
|
-
|
|
577
|
+
"--server",
|
|
578
|
+
default="http://localhost:8001",
|
|
579
|
+
help="Prefix server URL for protocol links",
|
|
494
580
|
)
|
|
495
581
|
args = args.parse_args(args_unknown)
|
|
496
582
|
|
|
497
583
|
for data_file in args.data_files:
|
|
498
584
|
try:
|
|
499
|
-
|
|
585
|
+
with open(data_file, "r") as f:
|
|
586
|
+
_add_single_campaign(json.load(f), args.overwrite, args.server)
|
|
500
587
|
except Exception as e:
|
|
501
588
|
print(f"Error processing {data_file}: {e}")
|
|
502
589
|
exit(1)
|
|
@@ -507,21 +594,20 @@ def main():
|
|
|
507
594
|
Main entry point for the CLI.
|
|
508
595
|
"""
|
|
509
596
|
args = argparse.ArgumentParser()
|
|
510
|
-
args.add_argument(
|
|
597
|
+
args.add_argument(
|
|
598
|
+
"command",
|
|
599
|
+
type=str,
|
|
600
|
+
choices=["run", "add", "purge"],
|
|
601
|
+
default="run",
|
|
602
|
+
nargs="?",
|
|
603
|
+
)
|
|
511
604
|
args, args_unknown = args.parse_known_args()
|
|
512
605
|
|
|
513
|
-
|
|
514
|
-
for p in psutil.process_iter():
|
|
515
|
-
if "pearmut" == p.name() and p.pid != os.getpid():
|
|
516
|
-
print("Exit all running pearmut processes before running more commands.")
|
|
517
|
-
print(p)
|
|
518
|
-
exit(1)
|
|
519
|
-
|
|
520
|
-
if args.command == 'run':
|
|
606
|
+
if args.command == "run":
|
|
521
607
|
_run(args_unknown)
|
|
522
|
-
elif args.command ==
|
|
608
|
+
elif args.command == "add":
|
|
523
609
|
_add_campaign(args_unknown)
|
|
524
|
-
elif args.command ==
|
|
610
|
+
elif args.command == "purge":
|
|
525
611
|
import shutil
|
|
526
612
|
|
|
527
613
|
def _unlink_assets(campaign_id):
|
|
@@ -531,7 +617,9 @@ def main():
|
|
|
531
617
|
return
|
|
532
618
|
with open(task_file, "r") as f:
|
|
533
619
|
campaign_data = json.load(f)
|
|
534
|
-
destination =
|
|
620
|
+
destination = (
|
|
621
|
+
campaign_data.get("info", {}).get("assets", {}).get("destination")
|
|
622
|
+
)
|
|
535
623
|
if destination:
|
|
536
624
|
symlink_path = f"{ROOT}/data/{destination}".rstrip("/")
|
|
537
625
|
if os.path.islink(symlink_path):
|
|
@@ -541,8 +629,11 @@ def main():
|
|
|
541
629
|
# Parse optional campaign name
|
|
542
630
|
purge_args = argparse.ArgumentParser()
|
|
543
631
|
purge_args.add_argument(
|
|
544
|
-
|
|
545
|
-
|
|
632
|
+
"campaign",
|
|
633
|
+
type=str,
|
|
634
|
+
nargs="?",
|
|
635
|
+
default=None,
|
|
636
|
+
help="Optional campaign name to purge (purges all if not specified)",
|
|
546
637
|
)
|
|
547
638
|
purge_args = purge_args.parse_args(args_unknown)
|
|
548
639
|
progress_data = load_progress_data()
|
|
@@ -556,7 +647,7 @@ def main():
|
|
|
556
647
|
confirm = input(
|
|
557
648
|
f"Are you sure you want to purge campaign '{campaign_id}'? This action cannot be undone. [y/n] "
|
|
558
649
|
)
|
|
559
|
-
if confirm.lower() ==
|
|
650
|
+
if confirm.lower() == "y":
|
|
560
651
|
# Unlink assets before removing task file
|
|
561
652
|
_unlink_assets(campaign_id)
|
|
562
653
|
# Remove task file
|
|
@@ -580,7 +671,7 @@ def main():
|
|
|
580
671
|
confirm = input(
|
|
581
672
|
"Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
|
|
582
673
|
)
|
|
583
|
-
if confirm.lower() ==
|
|
674
|
+
if confirm.lower() == "y":
|
|
584
675
|
# Unlink all assets first
|
|
585
676
|
for campaign_id in progress_data.keys():
|
|
586
677
|
_unlink_assets(campaign_id)
|