pearmut 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pearmut/app.py +36 -29
- pearmut/cli.py +119 -13
- pearmut/static/dashboard.bundle.js +1 -1
- pearmut/static/listwise.bundle.js +1 -1
- pearmut/static/pointwise.bundle.js +1 -1
- pearmut-0.2.7.dist-info/METADATA +330 -0
- pearmut-0.2.7.dist-info/RECORD +19 -0
- pearmut-0.2.7.dist-info/licenses/LICENSE +21 -0
- pearmut-0.2.5.dist-info/METADATA +0 -345
- pearmut-0.2.5.dist-info/RECORD +0 -19
- pearmut-0.2.5.dist-info/licenses/LICENSE +0 -201
- {pearmut-0.2.5.dist-info → pearmut-0.2.7.dist-info}/WHEEL +0 -0
- {pearmut-0.2.5.dist-info → pearmut-0.2.7.dist-info}/entry_points.txt +0 -0
- {pearmut-0.2.5.dist-info → pearmut-0.2.7.dist-info}/top_level.txt +0 -0
pearmut/app.py
CHANGED
|
@@ -30,7 +30,8 @@ app.add_middleware(
|
|
|
30
30
|
|
|
31
31
|
tasks_data = {}
|
|
32
32
|
progress_data = load_progress_data(
|
|
33
|
-
warn="No progress.json found. Running, but no campaign will be available."
|
|
33
|
+
warn="No progress.json found. Running, but no campaign will be available."
|
|
34
|
+
)
|
|
34
35
|
|
|
35
36
|
# load all tasks into data_all
|
|
36
37
|
for campaign_id in progress_data.keys():
|
|
@@ -60,30 +61,31 @@ async def _log_response(request: LogResponseRequest):
|
|
|
60
61
|
|
|
61
62
|
# append response to the output log
|
|
62
63
|
save_db_payload(
|
|
63
|
-
campaign_id, request.payload | {"user_id": user_id, "item_i": item_i}
|
|
64
|
+
campaign_id, request.payload | {"user_id": user_id, "item_i": item_i}
|
|
65
|
+
)
|
|
64
66
|
|
|
65
67
|
# if actions were submitted, we can log time data
|
|
66
68
|
if "actions" in request.payload:
|
|
67
|
-
times = [
|
|
68
|
-
x["time"] for x in request.payload["actions"]
|
|
69
|
-
]
|
|
69
|
+
times = [x["time"] for x in request.payload["actions"]]
|
|
70
70
|
if progress_data[campaign_id][user_id]["time_start"] is None:
|
|
71
71
|
progress_data[campaign_id][user_id]["time_start"] = min(times)
|
|
72
72
|
progress_data[campaign_id][user_id]["time_end"] = max(times)
|
|
73
|
-
progress_data[campaign_id][user_id]["time"] += sum(
|
|
74
|
-
min(b - a, 60)
|
|
75
|
-
|
|
76
|
-
])
|
|
73
|
+
progress_data[campaign_id][user_id]["time"] += sum(
|
|
74
|
+
[min(b - a, 60) for a, b in zip(times, times[1:])]
|
|
75
|
+
)
|
|
77
76
|
|
|
78
77
|
# Initialize validation_checks if it doesn't exist
|
|
79
78
|
if "validations" in request.payload:
|
|
80
79
|
if "validations" not in progress_data[campaign_id][user_id]:
|
|
81
80
|
progress_data[campaign_id][user_id]["validations"] = {}
|
|
82
81
|
|
|
83
|
-
progress_data[campaign_id][user_id]["validations"][request.item_i] =
|
|
82
|
+
progress_data[campaign_id][user_id]["validations"][request.item_i] = (
|
|
83
|
+
request.payload["validations"]
|
|
84
|
+
)
|
|
84
85
|
|
|
85
|
-
update_progress(
|
|
86
|
-
|
|
86
|
+
update_progress(
|
|
87
|
+
campaign_id, user_id, tasks_data, progress_data, request.item_i, request.payload
|
|
88
|
+
)
|
|
87
89
|
save_progress_data(progress_data)
|
|
88
90
|
|
|
89
91
|
return JSONResponse(content="ok", status_code=200)
|
|
@@ -149,13 +151,15 @@ async def _dashboard_data(request: DashboardDataRequest):
|
|
|
149
151
|
|
|
150
152
|
if campaign_id not in progress_data:
|
|
151
153
|
return JSONResponse(content="Unknown campaign ID", status_code=400)
|
|
152
|
-
|
|
153
|
-
is_privileged =
|
|
154
|
+
|
|
155
|
+
is_privileged = request.token == tasks_data[campaign_id]["token"]
|
|
154
156
|
|
|
155
157
|
progress_new = {}
|
|
156
158
|
assignment = tasks_data[campaign_id]["info"]["assignment"]
|
|
157
159
|
if assignment not in ["task-based", "single-stream"]:
|
|
158
|
-
return JSONResponse(
|
|
160
|
+
return JSONResponse(
|
|
161
|
+
content="Unsupported campaign assignment type", status_code=400
|
|
162
|
+
)
|
|
159
163
|
|
|
160
164
|
# Get threshold info for the campaign
|
|
161
165
|
validation_threshold = tasks_data[campaign_id]["info"].get("validation_threshold")
|
|
@@ -164,10 +168,9 @@ async def _dashboard_data(request: DashboardDataRequest):
|
|
|
164
168
|
# shallow copy
|
|
165
169
|
entry = dict(user_val)
|
|
166
170
|
entry["validations"] = [
|
|
167
|
-
all(v)
|
|
168
|
-
for v in list(entry.get("validations", {}).values())
|
|
171
|
+
all(v) for v in list(entry.get("validations", {}).values())
|
|
169
172
|
]
|
|
170
|
-
|
|
173
|
+
|
|
171
174
|
# Add threshold pass/fail status (only when user is complete)
|
|
172
175
|
if all(entry["progress"]):
|
|
173
176
|
entry["threshold_passed"] = check_validation_threshold(
|
|
@@ -183,11 +186,8 @@ async def _dashboard_data(request: DashboardDataRequest):
|
|
|
183
186
|
progress_new[user_id] = entry
|
|
184
187
|
|
|
185
188
|
return JSONResponse(
|
|
186
|
-
content={
|
|
187
|
-
|
|
188
|
-
"validation_threshold": validation_threshold
|
|
189
|
-
},
|
|
190
|
-
status_code=200
|
|
189
|
+
content={"data": progress_new, "validation_threshold": validation_threshold},
|
|
190
|
+
status_code=200,
|
|
191
191
|
)
|
|
192
192
|
|
|
193
193
|
|
|
@@ -227,7 +227,9 @@ async def _download_annotations(
|
|
|
227
227
|
for campaign_id in campaign_id:
|
|
228
228
|
output_path = f"{ROOT}/data/outputs/{campaign_id}.jsonl"
|
|
229
229
|
if campaign_id not in progress_data:
|
|
230
|
-
return JSONResponse(
|
|
230
|
+
return JSONResponse(
|
|
231
|
+
content=f"Unknown campaign ID {campaign_id}", status_code=400
|
|
232
|
+
)
|
|
231
233
|
if not os.path.exists(output_path):
|
|
232
234
|
output[campaign_id] = []
|
|
233
235
|
else:
|
|
@@ -239,28 +241,33 @@ async def _download_annotations(
|
|
|
239
241
|
|
|
240
242
|
@app.get("/download-progress")
|
|
241
243
|
async def _download_progress(
|
|
242
|
-
campaign_id: list[str] = Query(),
|
|
243
|
-
token: list[str] = Query()
|
|
244
|
+
campaign_id: list[str] = Query(), token: list[str] = Query()
|
|
244
245
|
):
|
|
245
246
|
|
|
246
247
|
if len(campaign_id) != len(token):
|
|
247
|
-
return JSONResponse(
|
|
248
|
+
return JSONResponse(
|
|
249
|
+
content="Mismatched campaign_id and token count", status_code=400
|
|
250
|
+
)
|
|
248
251
|
|
|
249
252
|
output = {}
|
|
250
253
|
for i, cid in enumerate(campaign_id):
|
|
251
254
|
if cid not in progress_data:
|
|
252
255
|
return JSONResponse(content=f"Unknown campaign ID {cid}", status_code=400)
|
|
253
256
|
if token[i] != tasks_data[cid]["token"]:
|
|
254
|
-
return JSONResponse(
|
|
257
|
+
return JSONResponse(
|
|
258
|
+
content=f"Invalid token for campaign ID {cid}", status_code=400
|
|
259
|
+
)
|
|
255
260
|
|
|
256
261
|
output[cid] = progress_data[cid]
|
|
257
262
|
|
|
258
263
|
return JSONResponse(content=output, status_code=200)
|
|
259
264
|
|
|
265
|
+
|
|
260
266
|
static_dir = f"{os.path.dirname(os.path.abspath(__file__))}/static/"
|
|
261
267
|
if not os.path.exists(static_dir + "index.html"):
|
|
262
268
|
raise FileNotFoundError(
|
|
263
|
-
"Static directory not found. Please build the frontend first."
|
|
269
|
+
"Static directory not found. Please build the frontend first."
|
|
270
|
+
)
|
|
264
271
|
|
|
265
272
|
app.mount(
|
|
266
273
|
"/",
|
pearmut/cli.py
CHANGED
|
@@ -12,6 +12,9 @@ import psutil
|
|
|
12
12
|
|
|
13
13
|
from .utils import ROOT, load_progress_data, save_progress_data
|
|
14
14
|
|
|
15
|
+
# Static directory path (constant for consistency)
|
|
16
|
+
STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
|
|
17
|
+
|
|
15
18
|
os.makedirs(f"{ROOT}/data/tasks", exist_ok=True)
|
|
16
19
|
load_progress_data(warn=None)
|
|
17
20
|
|
|
@@ -50,6 +53,40 @@ def _run(args_unknown):
|
|
|
50
53
|
)
|
|
51
54
|
|
|
52
55
|
|
|
56
|
+
def _validate_item_structure(items, template):
|
|
57
|
+
"""
|
|
58
|
+
Validate that items have the correct structure.
|
|
59
|
+
Items should be lists of dictionaries with 'src' and 'tgt' keys.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
items: List of item dictionaries to validate
|
|
63
|
+
template: Template type ('pointwise' or 'listwise') for type validation
|
|
64
|
+
"""
|
|
65
|
+
if not isinstance(items, list):
|
|
66
|
+
raise ValueError("Items must be a list")
|
|
67
|
+
|
|
68
|
+
for item in items:
|
|
69
|
+
if not isinstance(item, dict):
|
|
70
|
+
raise ValueError("Each item must be a dictionary with 'src' and 'tgt' keys")
|
|
71
|
+
if 'src' not in item or 'tgt' not in item:
|
|
72
|
+
raise ValueError("Each item must contain 'src' and 'tgt' keys")
|
|
73
|
+
|
|
74
|
+
# Validate src is always a string
|
|
75
|
+
if not isinstance(item['src'], str):
|
|
76
|
+
raise ValueError("Item 'src' must be a string")
|
|
77
|
+
|
|
78
|
+
# Validate tgt type based on template
|
|
79
|
+
if template == 'listwise':
|
|
80
|
+
if not isinstance(item['tgt'], list):
|
|
81
|
+
raise ValueError("Item 'tgt' must be a list for listwise template")
|
|
82
|
+
# Check that all elements in tgt list are strings
|
|
83
|
+
if not all(isinstance(t, str) for t in item['tgt']):
|
|
84
|
+
raise ValueError("All elements in 'tgt' list must be strings for listwise template")
|
|
85
|
+
elif template == 'pointwise':
|
|
86
|
+
if not isinstance(item['tgt'], str):
|
|
87
|
+
raise ValueError("Item 'tgt' must be a string for pointwise template")
|
|
88
|
+
|
|
89
|
+
|
|
53
90
|
def _add_single_campaign(data_file, overwrite, server):
|
|
54
91
|
"""
|
|
55
92
|
Add a single campaign from a JSON data file.
|
|
@@ -80,6 +117,7 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
80
117
|
raise ValueError("Campaign 'info' must contain 'template' field.")
|
|
81
118
|
|
|
82
119
|
assignment = campaign_data["info"]["assignment"]
|
|
120
|
+
template = campaign_data["info"]["template"]
|
|
83
121
|
# use random words for identifying users
|
|
84
122
|
rng = random.Random(campaign_data["campaign_id"])
|
|
85
123
|
rword = wonderwords.RandomWord(rng=rng)
|
|
@@ -96,6 +134,13 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
96
134
|
if not all(isinstance(task, list) for task in tasks):
|
|
97
135
|
raise ValueError(
|
|
98
136
|
"Each task in task-based campaign 'data' must be a list of items.")
|
|
137
|
+
# Validate item structure for each task
|
|
138
|
+
for task_i, task in enumerate(tasks):
|
|
139
|
+
for doc_i, doc in enumerate(task):
|
|
140
|
+
try:
|
|
141
|
+
_validate_item_structure(doc, template)
|
|
142
|
+
except ValueError as e:
|
|
143
|
+
raise ValueError(f"Task {task_i}, document {doc_i}: {e}")
|
|
99
144
|
num_users = len(tasks)
|
|
100
145
|
elif assignment == "single-stream":
|
|
101
146
|
tasks = campaign_data["data"]
|
|
@@ -105,6 +150,12 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
105
150
|
if not isinstance(campaign_data["data"], list):
|
|
106
151
|
raise ValueError(
|
|
107
152
|
"Single-stream campaign 'data' must be a list of items.")
|
|
153
|
+
# Validate item structure for single-stream
|
|
154
|
+
for doc_i, doc in enumerate(tasks):
|
|
155
|
+
try:
|
|
156
|
+
_validate_item_structure(doc, template)
|
|
157
|
+
except ValueError as e:
|
|
158
|
+
raise ValueError(f"Document {doc_i}: {e}")
|
|
108
159
|
if isinstance(users_spec, int):
|
|
109
160
|
num_users = users_spec
|
|
110
161
|
elif isinstance(users_spec, list):
|
|
@@ -199,30 +250,62 @@ def _add_single_campaign(data_file, overwrite, server):
|
|
|
199
250
|
|
|
200
251
|
# Handle assets symlink if specified
|
|
201
252
|
if "assets" in campaign_data["info"]:
|
|
202
|
-
|
|
253
|
+
assets_config = campaign_data["info"]["assets"]
|
|
254
|
+
|
|
255
|
+
# assets must be a dictionary with source and destination keys
|
|
256
|
+
if not isinstance(assets_config, dict):
|
|
257
|
+
raise ValueError("Assets must be a dictionary with 'source' and 'destination' keys.")
|
|
258
|
+
if "source" not in assets_config or "destination" not in assets_config:
|
|
259
|
+
raise ValueError("Assets config must contain 'source' and 'destination' keys.")
|
|
260
|
+
|
|
261
|
+
assets_source = assets_config["source"]
|
|
262
|
+
assets_destination = assets_config["destination"]
|
|
263
|
+
|
|
264
|
+
# Validate destination starts with 'assets/'
|
|
265
|
+
if not assets_destination.startswith("assets/"):
|
|
266
|
+
raise ValueError(f"Assets destination '{assets_destination}' must start with 'assets/'.")
|
|
203
267
|
|
|
204
268
|
# Resolve relative paths from the caller's current working directory
|
|
205
|
-
assets_real_path = os.path.abspath(
|
|
269
|
+
assets_real_path = os.path.abspath(assets_source)
|
|
206
270
|
|
|
207
271
|
if not os.path.isdir(assets_real_path):
|
|
208
|
-
raise ValueError(f"Assets path '{assets_real_path}' must be an existing directory.")
|
|
272
|
+
raise ValueError(f"Assets source path '{assets_real_path}' must be an existing directory.")
|
|
209
273
|
|
|
210
|
-
|
|
211
|
-
dir_name = assets_real_path.split(os.sep)[-1]
|
|
212
|
-
|
|
213
|
-
if not os.path.isdir(static_dir):
|
|
274
|
+
if not os.path.isdir(STATIC_DIR):
|
|
214
275
|
raise ValueError(
|
|
215
|
-
f"Static directory '{
|
|
276
|
+
f"Static directory '{STATIC_DIR}' does not exist. "
|
|
216
277
|
"Please build the frontend first."
|
|
217
278
|
)
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
279
|
+
|
|
280
|
+
# Symlink path is based on the destination, stripping the 'assets/' prefix
|
|
281
|
+
symlink_path = f"{STATIC_DIR}/{assets_destination}"
|
|
282
|
+
|
|
283
|
+
# Remove existing symlink if present and we are overriding the same campaign
|
|
284
|
+
if os.path.lexists(symlink_path):
|
|
285
|
+
# Check if any other campaign is using this destination
|
|
286
|
+
current_campaign_id = campaign_data['campaign_id']
|
|
287
|
+
tasks_dir = f"{ROOT}/data/tasks"
|
|
288
|
+
if os.path.exists(tasks_dir):
|
|
289
|
+
for task_file in os.listdir(tasks_dir):
|
|
290
|
+
if task_file.endswith('.json'):
|
|
291
|
+
other_campaign_id = task_file[:-5]
|
|
292
|
+
if other_campaign_id != current_campaign_id:
|
|
293
|
+
with open(f"{tasks_dir}/{task_file}", "r") as f:
|
|
294
|
+
other_campaign = json.load(f)
|
|
295
|
+
other_assets = other_campaign.get("info", {}).get("assets")
|
|
296
|
+
if other_assets and isinstance(other_assets, dict):
|
|
297
|
+
if other_assets.get("destination") == assets_destination:
|
|
298
|
+
raise ValueError(
|
|
299
|
+
f"Assets destination '{assets_destination}' is already used by campaign '{other_campaign_id}'."
|
|
300
|
+
)
|
|
301
|
+
# Only allow overwrite if it's the same campaign
|
|
222
302
|
if overwrite:
|
|
223
303
|
os.remove(symlink_path)
|
|
224
304
|
else:
|
|
225
|
-
raise ValueError(f"Assets
|
|
305
|
+
raise ValueError(f"Assets destination '{assets_destination}' is already taken.")
|
|
306
|
+
|
|
307
|
+
# Ensure the assets directory exists
|
|
308
|
+
os.makedirs(f"{STATIC_DIR}/assets", exist_ok=True)
|
|
226
309
|
|
|
227
310
|
os.symlink(assets_real_path, symlink_path, target_is_directory=True)
|
|
228
311
|
print(f"Assets symlinked: {symlink_path} -> {assets_real_path}")
|
|
@@ -299,6 +382,20 @@ def main():
|
|
|
299
382
|
elif args.command == 'purge':
|
|
300
383
|
import shutil
|
|
301
384
|
|
|
385
|
+
def _unlink_assets(campaign_id):
|
|
386
|
+
"""Unlink assets symlink for a campaign if it exists."""
|
|
387
|
+
task_file = f"{ROOT}/data/tasks/{campaign_id}.json"
|
|
388
|
+
if not os.path.exists(task_file):
|
|
389
|
+
return
|
|
390
|
+
with open(task_file, "r") as f:
|
|
391
|
+
campaign_data = json.load(f)
|
|
392
|
+
destination = campaign_data.get("info", {}).get("assets", {}).get("destination")
|
|
393
|
+
if destination:
|
|
394
|
+
symlink_path = f"{STATIC_DIR}/{destination}"
|
|
395
|
+
if os.path.islink(symlink_path):
|
|
396
|
+
os.remove(symlink_path)
|
|
397
|
+
print(f"Assets symlink removed: {symlink_path}")
|
|
398
|
+
|
|
302
399
|
# Parse optional campaign name
|
|
303
400
|
purge_args = argparse.ArgumentParser()
|
|
304
401
|
purge_args.add_argument(
|
|
@@ -314,6 +411,8 @@ def main():
|
|
|
314
411
|
f"Are you sure you want to purge campaign '{campaign_id}'? This action cannot be undone. [y/n] "
|
|
315
412
|
)
|
|
316
413
|
if confirm.lower() == 'y':
|
|
414
|
+
# Unlink assets before removing task file
|
|
415
|
+
_unlink_assets(campaign_id)
|
|
317
416
|
# Remove task file
|
|
318
417
|
task_file = f"{ROOT}/data/tasks/{campaign_id}.json"
|
|
319
418
|
if os.path.exists(task_file):
|
|
@@ -336,6 +435,13 @@ def main():
|
|
|
336
435
|
"Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
|
|
337
436
|
)
|
|
338
437
|
if confirm.lower() == 'y':
|
|
438
|
+
# Unlink all assets first
|
|
439
|
+
tasks_dir = f"{ROOT}/data/tasks"
|
|
440
|
+
if os.path.exists(tasks_dir):
|
|
441
|
+
for task_file in os.listdir(tasks_dir):
|
|
442
|
+
if task_file.endswith('.json'):
|
|
443
|
+
campaign_id = task_file[:-5]
|
|
444
|
+
_unlink_assets(campaign_id)
|
|
339
445
|
shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)
|
|
340
446
|
shutil.rmtree(f"{ROOT}/data/outputs", ignore_errors=True)
|
|
341
447
|
if os.path.exists(f"{ROOT}/data/progress.json"):
|