pearmut 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearmut/app.py CHANGED
@@ -4,7 +4,7 @@ from typing import Any
4
4
 
5
5
  from fastapi import FastAPI, Query
6
6
  from fastapi.middleware.cors import CORSMiddleware
7
- from fastapi.responses import JSONResponse, Response
7
+ from fastapi.responses import FileResponse, JSONResponse, Response
8
8
  from fastapi.staticfiles import StaticFiles
9
9
  from pydantic import BaseModel
10
10
 
@@ -17,6 +17,7 @@ from .results_export import (
17
17
  )
18
18
  from .utils import (
19
19
  ROOT,
20
+ TOKEN_MAIN,
20
21
  check_validation_threshold,
21
22
  load_progress_data,
22
23
  save_db_payload,
@@ -192,7 +193,11 @@ async def _dashboard_data(request: DashboardDataRequest):
192
193
  progress_new[user_id] = entry
193
194
 
194
195
  return JSONResponse(
195
- content={"data": progress_new, "validation_threshold": validation_threshold},
196
+ content={
197
+ "data": progress_new,
198
+ "validation_threshold": validation_threshold,
199
+ "assignment": assignment,
200
+ },
196
201
  status_code=200,
197
202
  )
198
203
 
@@ -288,7 +293,7 @@ class PurgeCampaignRequest(BaseModel):
288
293
  @app.post("/purge-campaign")
289
294
  async def _purge_campaign(request: PurgeCampaignRequest):
290
295
  global progress_data, tasks_data
291
-
296
+
292
297
  campaign_id = request.campaign_id
293
298
  token = request.token
294
299
 
@@ -298,57 +303,69 @@ async def _purge_campaign(request: PurgeCampaignRequest):
298
303
  return JSONResponse(content="Invalid token", status_code=400)
299
304
 
300
305
  # Unlink assets if they exist
301
- destination = tasks_data[campaign_id].get("info", {}).get("assets", {}).get("destination")
306
+ destination = (
307
+ tasks_data[campaign_id].get("info", {}).get("assets", {}).get("destination")
308
+ )
302
309
  if destination:
303
310
  symlink_path = f"{ROOT}/data/{destination}".rstrip("/")
304
311
  if os.path.islink(symlink_path):
305
312
  os.remove(symlink_path)
306
-
313
+
307
314
  # Remove task file
308
315
  task_file = f"{ROOT}/data/tasks/{campaign_id}.json"
309
316
  if os.path.exists(task_file):
310
317
  os.remove(task_file)
311
-
318
+
312
319
  # Remove output file
313
320
  output_file = f"{ROOT}/data/outputs/{campaign_id}.jsonl"
314
321
  if os.path.exists(output_file):
315
322
  os.remove(output_file)
316
-
323
+
317
324
  # Remove from in-memory data structures
318
325
  del tasks_data[campaign_id]
319
326
  del progress_data[campaign_id]
320
-
327
+
321
328
  # Save updated progress data
322
329
  save_progress_data(progress_data)
323
-
330
+
324
331
  return JSONResponse(content="ok", status_code=200)
325
332
 
326
333
 
327
334
  class AddCampaignRequest(BaseModel):
328
335
  campaign_data: dict[str, Any]
336
+ token_main: str
329
337
 
330
338
 
331
339
  @app.post("/add-campaign")
332
340
  async def _add_campaign(request: AddCampaignRequest):
333
341
  global progress_data, tasks_data
334
-
342
+
335
343
  from .cli import _add_single_campaign
336
-
344
+
345
+ if request.token_main != TOKEN_MAIN:
346
+ return JSONResponse(
347
+ content={"error": "Invalid main token. Use the latest one."},
348
+ status_code=400,
349
+ )
350
+
337
351
  try:
338
352
  server = f"{os.environ.get('PEARMUT_SERVER_URL', 'http://localhost:8001')}"
339
353
  _add_single_campaign(request.campaign_data, overwrite=False, server=server)
340
-
341
- campaign_id = request.campaign_data['campaign_id']
354
+
355
+ campaign_id = request.campaign_data["campaign_id"]
342
356
  with open(f"{ROOT}/data/tasks/{campaign_id}.json", "r") as f:
343
357
  tasks_data[campaign_id] = json.load(f)
344
-
358
+
345
359
  progress_data = load_progress_data(warn=None)
346
-
347
- return JSONResponse(content={
348
- "status": "ok",
349
- "campaign_id": campaign_id,
350
- "token": tasks_data[campaign_id]["token"]
351
- }, status_code=200)
360
+
361
+ return JSONResponse(
362
+ content={
363
+ "status": "ok",
364
+ "campaign_id": campaign_id,
365
+ "token": tasks_data[campaign_id]["token"],
366
+ },
367
+ status_code=200,
368
+ )
352
369
  except Exception as e:
353
370
  return JSONResponse(content={"error": str(e)}, status_code=400)
354
371
 
@@ -418,6 +435,17 @@ if not os.path.exists(static_dir + "index.html"):
418
435
  "Static directory not found. Please build the frontend first."
419
436
  )
420
437
 
438
+ # Serve HTML files directly without redirect
439
+ @app.get("/annotate")
440
+ async def serve_annotate():
441
+ return FileResponse(static_dir + "annotate.html")
442
+
443
+
444
+ @app.get("/dashboard")
445
+ async def serve_dashboard():
446
+ return FileResponse(static_dir + "dashboard.html")
447
+
448
+
421
449
  # Mount user assets from data/assets/
422
450
  assets_dir = f"{ROOT}/data/assets"
423
451
  os.makedirs(assets_dir, exist_ok=True)
pearmut/assignment.py CHANGED
@@ -1,5 +1,4 @@
1
1
  import collections
2
- import copy
3
2
  import random
4
3
  import statistics
5
4
  from typing import Any
@@ -148,7 +147,7 @@ def get_i_item_taskbased(
148
147
  | {
149
148
  k: v
150
149
  for k, v in data_all[campaign_id]["info"].items()
151
- if k in {"protocol", "sliders"}
150
+ if k in {"protocol", "sliders", "textfield", "show_model_names"}
152
151
  },
153
152
  "payload": data_all[campaign_id]["data"][user_id][item_i],
154
153
  }
@@ -195,7 +194,7 @@ def get_i_item_singlestream(
195
194
  | {
196
195
  k: v
197
196
  for k, v in data_all[campaign_id]["info"].items()
198
- if k in {"protocol", "sliders"}
197
+ if k in {"protocol", "sliders", "textfield", "show_model_names"}
199
198
  },
200
199
  "payload": data_all[campaign_id]["data"][item_i],
201
200
  }
@@ -242,7 +241,7 @@ def get_next_item_taskbased(
242
241
  | {
243
242
  k: v
244
243
  for k, v in data_all[campaign_id]["info"].items()
245
- if k in {"protocol", "sliders"}
244
+ if k in {"protocol", "sliders", "textfield", "show_model_names"}
246
245
  },
247
246
  "payload": data_all[campaign_id]["data"][user_id][item_i],
248
247
  }
@@ -298,7 +297,7 @@ def get_next_item_singlestream(
298
297
  | {
299
298
  k: v
300
299
  for k, v in data_all[campaign_id]["info"].items()
301
- if k in {"protocol", "sliders"}
300
+ if k in {"protocol", "sliders", "textfield", "show_model_names"}
302
301
  },
303
302
  "payload": data_all[campaign_id]["data"][item_i],
304
303
  }
@@ -459,7 +458,7 @@ def get_next_item_dynamic(
459
458
  | {
460
459
  k: v
461
460
  for k, v in campaign_data["info"].items()
462
- if k in {"protocol", "sliders"}
461
+ if k in {"protocol", "sliders", "textfield", "show_model_names"}
463
462
  },
464
463
  "payload": pruned_item,
465
464
  },
@@ -504,9 +503,15 @@ def reset_task(
504
503
  """
505
504
  Reset the task progress for the user in the specified campaign.
506
505
  Saves a reset marker to mask existing annotations.
506
+
507
+ Note: Dynamic assignment does not support user-level deletion.
507
508
  """
508
509
  assignment = tasks_data[campaign_id]["info"]["assignment"]
509
- if assignment == "task-based":
510
+ if assignment == "dynamic":
511
+ return JSONResponse(
512
+ content="User-level deletion is not supported for dynamic assignments", status_code=400
513
+ )
514
+ elif assignment == "task-based":
510
515
  # Save reset marker for this user to mask existing annotations
511
516
  num_items = len(tasks_data[campaign_id]["data"][user_id])
512
517
  for item_i in range(num_items):
@@ -533,30 +538,6 @@ def reset_task(
533
538
  for item_i in user_items:
534
539
  progress_data[campaign_id][uid]["progress"][item_i] = False
535
540
 
536
- # Reset only the specified user's time
537
- _reset_user_time(progress_data, campaign_id, user_id)
538
- return JSONResponse(content="ok", status_code=200)
539
- elif assignment == "dynamic":
540
- # Find all items that this user has annotated
541
- user_items = _get_user_annotated_items(campaign_id, user_id)
542
-
543
- # Save reset markers only for items this user has touched
544
- for item_i in user_items:
545
- save_db_payload(
546
- campaign_id,
547
- {"user_id": user_id, "item_i": item_i, "annotation": RESET_MARKER},
548
- )
549
-
550
- progress_data_user = copy.deepcopy(progress_data[campaign_id][user_id]["progress"])
551
-
552
- # Reset only the touched items in all users' progress (shared pool, use lists to track models)
553
- for uid in progress_data[campaign_id]:
554
- for item_i in user_items:
555
- progress_data[campaign_id][uid]["progress"][item_i] = [
556
- x for x in progress_data[campaign_id][uid]["progress"][item_i]
557
- if x not in progress_data_user[item_i]
558
- ]
559
-
560
541
  # Reset only the specified user's time
561
542
  _reset_user_time(progress_data, campaign_id, user_id)
562
543
  return JSONResponse(content="ok", status_code=200)
pearmut/cli.py CHANGED
@@ -3,20 +3,44 @@ Command-line interface for managing and running the Pearmut server.
3
3
  """
4
4
 
5
5
  import argparse
6
+ import atexit
7
+ import fcntl
6
8
  import hashlib
7
9
  import json
8
10
  import os
9
11
  import urllib.parse
10
12
 
11
- import psutil
12
-
13
- from .utils import ROOT, load_progress_data, save_progress_data
13
+ from .utils import ROOT, TOKEN_MAIN, load_progress_data, save_progress_data
14
14
 
15
15
  os.makedirs(f"{ROOT}/data/tasks", exist_ok=True)
16
16
  load_progress_data(warn=None)
17
17
 
18
18
 
19
+
19
20
  def _run(args_unknown):
21
+ # Acquire lock before starting server
22
+ lock_file = f"{ROOT}/data/.lock"
23
+ try:
24
+ lock_fd = open(lock_file, "a+")
25
+ fcntl.flock(lock_fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
26
+ lock_fd.seek(0)
27
+ lock_fd.truncate()
28
+ lock_fd.write(str(os.getpid()))
29
+ lock_fd.flush()
30
+ except BlockingIOError:
31
+ try:
32
+ with open(lock_file, "r") as f:
33
+ pid = f.read().strip()
34
+ print("You can't run multiple instances of Pearmut in the same directory.")
35
+ if pid:
36
+ print(f"Another instance (PID {pid}) is holding the lock.")
37
+ except (FileNotFoundError, PermissionError, OSError):
38
+ print("You can't run multiple instances of Pearmut in the same directory.")
39
+ exit(1)
40
+
41
+ # Register cleanup to remove lock file on exit
42
+ atexit.register(lambda: os.path.exists(lock_file) and os.remove(lock_file))
43
+
20
44
  import uvicorn
21
45
 
22
46
  from .app import app, tasks_data
@@ -33,21 +57,22 @@ def _run(args_unknown):
33
57
  args = args.parse_args(args_unknown)
34
58
 
35
59
  # print access dashboard URL for all campaigns
36
- if tasks_data:
37
- dashboard_url = (
38
- args.server
39
- + "/dashboard.html?"
40
- + "&".join(
41
- [
42
- f"campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
43
- for campaign_id, campaign_data in tasks_data.items()
44
- ]
45
- )
46
- )
47
- print(
48
- "\033[92mNow serving Pearmut, use the following URL to access the everything-dashboard:\033[0m"
60
+ dashboard_url = (
61
+ args.server
62
+ + "/dashboard?"
63
+ + f"token_main={TOKEN_MAIN}"
64
+ + "".join(
65
+ [
66
+ f"&campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
67
+ for campaign_id, campaign_data in tasks_data.items()
68
+ ]
49
69
  )
50
- print("🍐", dashboard_url + "\n", flush=True)
70
+ )
71
+ print(
72
+ "\033[92mNow serving Pearmut, use the following URL to access the everything-dashboard:\033[0m"
73
+ )
74
+ print("🍐", dashboard_url + "\n", flush=True)
75
+
51
76
 
52
77
  # disable startup message
53
78
  uvicorn.config.LOGGING_CONFIG["loggers"]["uvicorn.error"]["level"] = "WARNING"
@@ -90,7 +115,7 @@ def _validate_item_structure(items):
90
115
  if "ref" in item and not isinstance(item["ref"], str):
91
116
  raise ValueError("Item 'ref' must be a string")
92
117
 
93
- # Validate tgt is a dictionary (basic template with model names)
118
+ # Validate tgt is a dictionary (annotate template with model names)
94
119
  if isinstance(item["tgt"], str):
95
120
  # String not allowed - suggest using dictionary (don't include user input to prevent injection)
96
121
  raise ValueError(
@@ -238,7 +263,7 @@ def _add_single_campaign(campaign_data, overwrite, server):
238
263
  if "assignment" not in campaign_data["info"]:
239
264
  raise ValueError("Campaign 'info' must contain 'assignment' field.")
240
265
 
241
- # Template defaults to "basic" if not specified
266
+ # Template defaults to "annotate" if not specified
242
267
  assignment = campaign_data["info"]["assignment"]
243
268
  # use random words for identifying users
244
269
  rng = random.Random()
@@ -431,7 +456,7 @@ def _add_single_campaign(campaign_data, overwrite, server):
431
456
  "time_end": None,
432
457
  "time": 0,
433
458
  "url": (
434
- f"{campaign_data['info'].get("template", "basic")}.html"
459
+ f"{campaign_data['info'].get('template', 'annotate')}"
435
460
  f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
436
461
  f"&user_id={user_id}"
437
462
  ),
@@ -578,13 +603,6 @@ def main():
578
603
  )
579
604
  args, args_unknown = args.parse_known_args()
580
605
 
581
- # enforce that only one pearmut process is running
582
- for p in psutil.process_iter():
583
- if "pearmut" == p.name() and p.pid != os.getpid():
584
- print("Exit all running pearmut processes before running more commands.")
585
- print(p)
586
- exit(1)
587
-
588
606
  if args.command == "run":
589
607
  _run(args_unknown)
590
608
  elif args.command == "add":
pearmut/results_export.py CHANGED
@@ -39,7 +39,7 @@ def compute_model_scores(campaign_id):
39
39
  # Compute model scores from annotations
40
40
  model_scores = collections.defaultdict(dict)
41
41
 
42
- # Iterate through all tasks to find items with 'models' field (basic template)
42
+ # Iterate through all tasks to find items with 'models' field (annotate template)
43
43
  log = get_db_log(campaign_id)
44
44
  for entry in log:
45
45
  if "item" not in entry or "annotation" not in entry: