pearmut 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pearmut/app.py +48 -20
- pearmut/assignment.py +12 -31
- pearmut/cli.py +45 -27
- pearmut/results_export.py +1 -1
- pearmut/static/annotate.bundle.js +1 -0
- pearmut/static/{basic.html → annotate.html} +30 -3
- pearmut/static/dashboard.bundle.js +1 -1
- pearmut/static/dashboard.html +6 -1
- pearmut/static/index.html +1 -1
- pearmut/static/style.css +8 -0
- pearmut/utils.py +3 -1
- {pearmut-1.0.1.dist-info → pearmut-1.0.2.dist-info}/METADATA +46 -65
- pearmut-1.0.2.dist-info/RECORD +20 -0
- pearmut/static/basic.bundle.js +0 -1
- pearmut-1.0.1.dist-info/RECORD +0 -20
- {pearmut-1.0.1.dist-info → pearmut-1.0.2.dist-info}/WHEEL +0 -0
- {pearmut-1.0.1.dist-info → pearmut-1.0.2.dist-info}/entry_points.txt +0 -0
- {pearmut-1.0.1.dist-info → pearmut-1.0.2.dist-info}/licenses/LICENSE +0 -0
- {pearmut-1.0.1.dist-info → pearmut-1.0.2.dist-info}/top_level.txt +0 -0
pearmut/app.py
CHANGED
|
@@ -4,7 +4,7 @@ from typing import Any
|
|
|
4
4
|
|
|
5
5
|
from fastapi import FastAPI, Query
|
|
6
6
|
from fastapi.middleware.cors import CORSMiddleware
|
|
7
|
-
from fastapi.responses import JSONResponse, Response
|
|
7
|
+
from fastapi.responses import FileResponse, JSONResponse, Response
|
|
8
8
|
from fastapi.staticfiles import StaticFiles
|
|
9
9
|
from pydantic import BaseModel
|
|
10
10
|
|
|
@@ -17,6 +17,7 @@ from .results_export import (
|
|
|
17
17
|
)
|
|
18
18
|
from .utils import (
|
|
19
19
|
ROOT,
|
|
20
|
+
TOKEN_MAIN,
|
|
20
21
|
check_validation_threshold,
|
|
21
22
|
load_progress_data,
|
|
22
23
|
save_db_payload,
|
|
@@ -192,7 +193,11 @@ async def _dashboard_data(request: DashboardDataRequest):
|
|
|
192
193
|
progress_new[user_id] = entry
|
|
193
194
|
|
|
194
195
|
return JSONResponse(
|
|
195
|
-
content={
|
|
196
|
+
content={
|
|
197
|
+
"data": progress_new,
|
|
198
|
+
"validation_threshold": validation_threshold,
|
|
199
|
+
"assignment": assignment,
|
|
200
|
+
},
|
|
196
201
|
status_code=200,
|
|
197
202
|
)
|
|
198
203
|
|
|
@@ -288,7 +293,7 @@ class PurgeCampaignRequest(BaseModel):
|
|
|
288
293
|
@app.post("/purge-campaign")
|
|
289
294
|
async def _purge_campaign(request: PurgeCampaignRequest):
|
|
290
295
|
global progress_data, tasks_data
|
|
291
|
-
|
|
296
|
+
|
|
292
297
|
campaign_id = request.campaign_id
|
|
293
298
|
token = request.token
|
|
294
299
|
|
|
@@ -298,57 +303,69 @@ async def _purge_campaign(request: PurgeCampaignRequest):
|
|
|
298
303
|
return JSONResponse(content="Invalid token", status_code=400)
|
|
299
304
|
|
|
300
305
|
# Unlink assets if they exist
|
|
301
|
-
destination =
|
|
306
|
+
destination = (
|
|
307
|
+
tasks_data[campaign_id].get("info", {}).get("assets", {}).get("destination")
|
|
308
|
+
)
|
|
302
309
|
if destination:
|
|
303
310
|
symlink_path = f"{ROOT}/data/{destination}".rstrip("/")
|
|
304
311
|
if os.path.islink(symlink_path):
|
|
305
312
|
os.remove(symlink_path)
|
|
306
|
-
|
|
313
|
+
|
|
307
314
|
# Remove task file
|
|
308
315
|
task_file = f"{ROOT}/data/tasks/{campaign_id}.json"
|
|
309
316
|
if os.path.exists(task_file):
|
|
310
317
|
os.remove(task_file)
|
|
311
|
-
|
|
318
|
+
|
|
312
319
|
# Remove output file
|
|
313
320
|
output_file = f"{ROOT}/data/outputs/{campaign_id}.jsonl"
|
|
314
321
|
if os.path.exists(output_file):
|
|
315
322
|
os.remove(output_file)
|
|
316
|
-
|
|
323
|
+
|
|
317
324
|
# Remove from in-memory data structures
|
|
318
325
|
del tasks_data[campaign_id]
|
|
319
326
|
del progress_data[campaign_id]
|
|
320
|
-
|
|
327
|
+
|
|
321
328
|
# Save updated progress data
|
|
322
329
|
save_progress_data(progress_data)
|
|
323
|
-
|
|
330
|
+
|
|
324
331
|
return JSONResponse(content="ok", status_code=200)
|
|
325
332
|
|
|
326
333
|
|
|
327
334
|
class AddCampaignRequest(BaseModel):
|
|
328
335
|
campaign_data: dict[str, Any]
|
|
336
|
+
token_main: str
|
|
329
337
|
|
|
330
338
|
|
|
331
339
|
@app.post("/add-campaign")
|
|
332
340
|
async def _add_campaign(request: AddCampaignRequest):
|
|
333
341
|
global progress_data, tasks_data
|
|
334
|
-
|
|
342
|
+
|
|
335
343
|
from .cli import _add_single_campaign
|
|
336
|
-
|
|
344
|
+
|
|
345
|
+
if request.token_main != TOKEN_MAIN:
|
|
346
|
+
return JSONResponse(
|
|
347
|
+
content={"error": "Invalid main token. Use the latest one."},
|
|
348
|
+
status_code=400,
|
|
349
|
+
)
|
|
350
|
+
|
|
337
351
|
try:
|
|
338
352
|
server = f"{os.environ.get('PEARMUT_SERVER_URL', 'http://localhost:8001')}"
|
|
339
353
|
_add_single_campaign(request.campaign_data, overwrite=False, server=server)
|
|
340
|
-
|
|
341
|
-
campaign_id = request.campaign_data[
|
|
354
|
+
|
|
355
|
+
campaign_id = request.campaign_data["campaign_id"]
|
|
342
356
|
with open(f"{ROOT}/data/tasks/{campaign_id}.json", "r") as f:
|
|
343
357
|
tasks_data[campaign_id] = json.load(f)
|
|
344
|
-
|
|
358
|
+
|
|
345
359
|
progress_data = load_progress_data(warn=None)
|
|
346
|
-
|
|
347
|
-
return JSONResponse(
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
360
|
+
|
|
361
|
+
return JSONResponse(
|
|
362
|
+
content={
|
|
363
|
+
"status": "ok",
|
|
364
|
+
"campaign_id": campaign_id,
|
|
365
|
+
"token": tasks_data[campaign_id]["token"],
|
|
366
|
+
},
|
|
367
|
+
status_code=200,
|
|
368
|
+
)
|
|
352
369
|
except Exception as e:
|
|
353
370
|
return JSONResponse(content={"error": str(e)}, status_code=400)
|
|
354
371
|
|
|
@@ -418,6 +435,17 @@ if not os.path.exists(static_dir + "index.html"):
|
|
|
418
435
|
"Static directory not found. Please build the frontend first."
|
|
419
436
|
)
|
|
420
437
|
|
|
438
|
+
# Serve HTML files directly without redirect
|
|
439
|
+
@app.get("/annotate")
|
|
440
|
+
async def serve_annotate():
|
|
441
|
+
return FileResponse(static_dir + "annotate.html")
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
@app.get("/dashboard")
|
|
445
|
+
async def serve_dashboard():
|
|
446
|
+
return FileResponse(static_dir + "dashboard.html")
|
|
447
|
+
|
|
448
|
+
|
|
421
449
|
# Mount user assets from data/assets/
|
|
422
450
|
assets_dir = f"{ROOT}/data/assets"
|
|
423
451
|
os.makedirs(assets_dir, exist_ok=True)
|
pearmut/assignment.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import collections
|
|
2
|
-
import copy
|
|
3
2
|
import random
|
|
4
3
|
import statistics
|
|
5
4
|
from typing import Any
|
|
@@ -148,7 +147,7 @@ def get_i_item_taskbased(
|
|
|
148
147
|
| {
|
|
149
148
|
k: v
|
|
150
149
|
for k, v in data_all[campaign_id]["info"].items()
|
|
151
|
-
if k in {"protocol", "sliders"}
|
|
150
|
+
if k in {"protocol", "sliders", "textfield", "show_model_names"}
|
|
152
151
|
},
|
|
153
152
|
"payload": data_all[campaign_id]["data"][user_id][item_i],
|
|
154
153
|
}
|
|
@@ -195,7 +194,7 @@ def get_i_item_singlestream(
|
|
|
195
194
|
| {
|
|
196
195
|
k: v
|
|
197
196
|
for k, v in data_all[campaign_id]["info"].items()
|
|
198
|
-
if k in {"protocol", "sliders"}
|
|
197
|
+
if k in {"protocol", "sliders", "textfield", "show_model_names"}
|
|
199
198
|
},
|
|
200
199
|
"payload": data_all[campaign_id]["data"][item_i],
|
|
201
200
|
}
|
|
@@ -242,7 +241,7 @@ def get_next_item_taskbased(
|
|
|
242
241
|
| {
|
|
243
242
|
k: v
|
|
244
243
|
for k, v in data_all[campaign_id]["info"].items()
|
|
245
|
-
if k in {"protocol", "sliders"}
|
|
244
|
+
if k in {"protocol", "sliders", "textfield", "show_model_names"}
|
|
246
245
|
},
|
|
247
246
|
"payload": data_all[campaign_id]["data"][user_id][item_i],
|
|
248
247
|
}
|
|
@@ -298,7 +297,7 @@ def get_next_item_singlestream(
|
|
|
298
297
|
| {
|
|
299
298
|
k: v
|
|
300
299
|
for k, v in data_all[campaign_id]["info"].items()
|
|
301
|
-
if k in {"protocol", "sliders"}
|
|
300
|
+
if k in {"protocol", "sliders", "textfield", "show_model_names"}
|
|
302
301
|
},
|
|
303
302
|
"payload": data_all[campaign_id]["data"][item_i],
|
|
304
303
|
}
|
|
@@ -459,7 +458,7 @@ def get_next_item_dynamic(
|
|
|
459
458
|
| {
|
|
460
459
|
k: v
|
|
461
460
|
for k, v in campaign_data["info"].items()
|
|
462
|
-
if k in {"protocol", "sliders"}
|
|
461
|
+
if k in {"protocol", "sliders", "textfield", "show_model_names"}
|
|
463
462
|
},
|
|
464
463
|
"payload": pruned_item,
|
|
465
464
|
},
|
|
@@ -504,9 +503,15 @@ def reset_task(
|
|
|
504
503
|
"""
|
|
505
504
|
Reset the task progress for the user in the specified campaign.
|
|
506
505
|
Saves a reset marker to mask existing annotations.
|
|
506
|
+
|
|
507
|
+
Note: Dynamic assignment does not support user-level deletion.
|
|
507
508
|
"""
|
|
508
509
|
assignment = tasks_data[campaign_id]["info"]["assignment"]
|
|
509
|
-
if assignment == "
|
|
510
|
+
if assignment == "dynamic":
|
|
511
|
+
return JSONResponse(
|
|
512
|
+
content="User-level deletion is not supported for dynamic assignments", status_code=400
|
|
513
|
+
)
|
|
514
|
+
elif assignment == "task-based":
|
|
510
515
|
# Save reset marker for this user to mask existing annotations
|
|
511
516
|
num_items = len(tasks_data[campaign_id]["data"][user_id])
|
|
512
517
|
for item_i in range(num_items):
|
|
@@ -533,30 +538,6 @@ def reset_task(
|
|
|
533
538
|
for item_i in user_items:
|
|
534
539
|
progress_data[campaign_id][uid]["progress"][item_i] = False
|
|
535
540
|
|
|
536
|
-
# Reset only the specified user's time
|
|
537
|
-
_reset_user_time(progress_data, campaign_id, user_id)
|
|
538
|
-
return JSONResponse(content="ok", status_code=200)
|
|
539
|
-
elif assignment == "dynamic":
|
|
540
|
-
# Find all items that this user has annotated
|
|
541
|
-
user_items = _get_user_annotated_items(campaign_id, user_id)
|
|
542
|
-
|
|
543
|
-
# Save reset markers only for items this user has touched
|
|
544
|
-
for item_i in user_items:
|
|
545
|
-
save_db_payload(
|
|
546
|
-
campaign_id,
|
|
547
|
-
{"user_id": user_id, "item_i": item_i, "annotation": RESET_MARKER},
|
|
548
|
-
)
|
|
549
|
-
|
|
550
|
-
progress_data_user = copy.deepcopy(progress_data[campaign_id][user_id]["progress"])
|
|
551
|
-
|
|
552
|
-
# Reset only the touched items in all users' progress (shared pool, use lists to track models)
|
|
553
|
-
for uid in progress_data[campaign_id]:
|
|
554
|
-
for item_i in user_items:
|
|
555
|
-
progress_data[campaign_id][uid]["progress"][item_i] = [
|
|
556
|
-
x for x in progress_data[campaign_id][uid]["progress"][item_i]
|
|
557
|
-
if x not in progress_data_user[item_i]
|
|
558
|
-
]
|
|
559
|
-
|
|
560
541
|
# Reset only the specified user's time
|
|
561
542
|
_reset_user_time(progress_data, campaign_id, user_id)
|
|
562
543
|
return JSONResponse(content="ok", status_code=200)
|
pearmut/cli.py
CHANGED
|
@@ -3,20 +3,44 @@ Command-line interface for managing and running the Pearmut server.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import argparse
|
|
6
|
+
import atexit
|
|
7
|
+
import fcntl
|
|
6
8
|
import hashlib
|
|
7
9
|
import json
|
|
8
10
|
import os
|
|
9
11
|
import urllib.parse
|
|
10
12
|
|
|
11
|
-
import
|
|
12
|
-
|
|
13
|
-
from .utils import ROOT, load_progress_data, save_progress_data
|
|
13
|
+
from .utils import ROOT, TOKEN_MAIN, load_progress_data, save_progress_data
|
|
14
14
|
|
|
15
15
|
os.makedirs(f"{ROOT}/data/tasks", exist_ok=True)
|
|
16
16
|
load_progress_data(warn=None)
|
|
17
17
|
|
|
18
18
|
|
|
19
|
+
|
|
19
20
|
def _run(args_unknown):
|
|
21
|
+
# Acquire lock before starting server
|
|
22
|
+
lock_file = f"{ROOT}/data/.lock"
|
|
23
|
+
try:
|
|
24
|
+
lock_fd = open(lock_file, "a+")
|
|
25
|
+
fcntl.flock(lock_fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
26
|
+
lock_fd.seek(0)
|
|
27
|
+
lock_fd.truncate()
|
|
28
|
+
lock_fd.write(str(os.getpid()))
|
|
29
|
+
lock_fd.flush()
|
|
30
|
+
except BlockingIOError:
|
|
31
|
+
try:
|
|
32
|
+
with open(lock_file, "r") as f:
|
|
33
|
+
pid = f.read().strip()
|
|
34
|
+
print("You can't run multiple instances of Pearmut in the same directory.")
|
|
35
|
+
if pid:
|
|
36
|
+
print(f"Another instance (PID {pid}) is holding the lock.")
|
|
37
|
+
except (FileNotFoundError, PermissionError, OSError):
|
|
38
|
+
print("You can't run multiple instances of Pearmut in the same directory.")
|
|
39
|
+
exit(1)
|
|
40
|
+
|
|
41
|
+
# Register cleanup to remove lock file on exit
|
|
42
|
+
atexit.register(lambda: os.path.exists(lock_file) and os.remove(lock_file))
|
|
43
|
+
|
|
20
44
|
import uvicorn
|
|
21
45
|
|
|
22
46
|
from .app import app, tasks_data
|
|
@@ -33,21 +57,22 @@ def _run(args_unknown):
|
|
|
33
57
|
args = args.parse_args(args_unknown)
|
|
34
58
|
|
|
35
59
|
# print access dashboard URL for all campaigns
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
)
|
|
46
|
-
)
|
|
47
|
-
print(
|
|
48
|
-
"\033[92mNow serving Pearmut, use the following URL to access the everything-dashboard:\033[0m"
|
|
60
|
+
dashboard_url = (
|
|
61
|
+
args.server
|
|
62
|
+
+ "/dashboard?"
|
|
63
|
+
+ f"token_main={TOKEN_MAIN}"
|
|
64
|
+
+ "".join(
|
|
65
|
+
[
|
|
66
|
+
f"&campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
|
|
67
|
+
for campaign_id, campaign_data in tasks_data.items()
|
|
68
|
+
]
|
|
49
69
|
)
|
|
50
|
-
|
|
70
|
+
)
|
|
71
|
+
print(
|
|
72
|
+
"\033[92mNow serving Pearmut, use the following URL to access the everything-dashboard:\033[0m"
|
|
73
|
+
)
|
|
74
|
+
print("🍐", dashboard_url + "\n", flush=True)
|
|
75
|
+
|
|
51
76
|
|
|
52
77
|
# disable startup message
|
|
53
78
|
uvicorn.config.LOGGING_CONFIG["loggers"]["uvicorn.error"]["level"] = "WARNING"
|
|
@@ -90,7 +115,7 @@ def _validate_item_structure(items):
|
|
|
90
115
|
if "ref" in item and not isinstance(item["ref"], str):
|
|
91
116
|
raise ValueError("Item 'ref' must be a string")
|
|
92
117
|
|
|
93
|
-
# Validate tgt is a dictionary (
|
|
118
|
+
# Validate tgt is a dictionary (annotate template with model names)
|
|
94
119
|
if isinstance(item["tgt"], str):
|
|
95
120
|
# String not allowed - suggest using dictionary (don't include user input to prevent injection)
|
|
96
121
|
raise ValueError(
|
|
@@ -238,7 +263,7 @@ def _add_single_campaign(campaign_data, overwrite, server):
|
|
|
238
263
|
if "assignment" not in campaign_data["info"]:
|
|
239
264
|
raise ValueError("Campaign 'info' must contain 'assignment' field.")
|
|
240
265
|
|
|
241
|
-
# Template defaults to "
|
|
266
|
+
# Template defaults to "annotate" if not specified
|
|
242
267
|
assignment = campaign_data["info"]["assignment"]
|
|
243
268
|
# use random words for identifying users
|
|
244
269
|
rng = random.Random()
|
|
@@ -431,7 +456,7 @@ def _add_single_campaign(campaign_data, overwrite, server):
|
|
|
431
456
|
"time_end": None,
|
|
432
457
|
"time": 0,
|
|
433
458
|
"url": (
|
|
434
|
-
f"{campaign_data['info'].get(
|
|
459
|
+
f"{campaign_data['info'].get('template', 'annotate')}"
|
|
435
460
|
f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
|
|
436
461
|
f"&user_id={user_id}"
|
|
437
462
|
),
|
|
@@ -578,13 +603,6 @@ def main():
|
|
|
578
603
|
)
|
|
579
604
|
args, args_unknown = args.parse_known_args()
|
|
580
605
|
|
|
581
|
-
# enforce that only one pearmut process is running
|
|
582
|
-
for p in psutil.process_iter():
|
|
583
|
-
if "pearmut" == p.name() and p.pid != os.getpid():
|
|
584
|
-
print("Exit all running pearmut processes before running more commands.")
|
|
585
|
-
print(p)
|
|
586
|
-
exit(1)
|
|
587
|
-
|
|
588
606
|
if args.command == "run":
|
|
589
607
|
_run(args_unknown)
|
|
590
608
|
elif args.command == "add":
|
pearmut/results_export.py
CHANGED
|
@@ -39,7 +39,7 @@ def compute_model_scores(campaign_id):
|
|
|
39
39
|
# Compute model scores from annotations
|
|
40
40
|
model_scores = collections.defaultdict(dict)
|
|
41
41
|
|
|
42
|
-
# Iterate through all tasks to find items with 'models' field (
|
|
42
|
+
# Iterate through all tasks to find items with 'models' field (annotate template)
|
|
43
43
|
log = get_db_log(campaign_id)
|
|
44
44
|
for entry in log:
|
|
45
45
|
if "item" not in entry or "annotation" not in entry:
|