PyPI - pearmut - Versions diffs - 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

pearmut 0.2.1py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

pearmut/app.py +19 -2
pearmut/assignment.py +29 -11
pearmut/cli.py +168 -48
pearmut/static/dashboard.bundle.js +1 -1
pearmut/static/dashboard.html +2 -2
pearmut/static/index.html +1 -1
pearmut/static/listwise.html +2 -2
pearmut/static/pointwise.html +2 -2
pearmut/utils.py +72 -4
{pearmut-0.2.1.dist-info → pearmut-0.2.3.dist-info}/METADATA +64 -9
pearmut-0.2.3.dist-info/RECORD +19 -0
pearmut-0.2.1.dist-info/RECORD +0 -19
{pearmut-0.2.1.dist-info → pearmut-0.2.3.dist-info}/WHEEL +0 -0
{pearmut-0.2.1.dist-info → pearmut-0.2.3.dist-info}/entry_points.txt +0 -0
{pearmut-0.2.1.dist-info → pearmut-0.2.3.dist-info}/licenses/LICENSE +0 -0
{pearmut-0.2.1.dist-info → pearmut-0.2.3.dist-info}/top_level.txt +0 -0

pearmut/app.py CHANGED Viewed

@@ -9,7 +9,13 @@ from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
 from .assignment import get_i_item, get_next_item, reset_task, update_progress
-from .utils import ROOT, load_progress_data, save_db_payload, save_progress_data
+from .utils import (
+    ROOT,
+    check_validation_threshold,
+    load_progress_data,
+    save_db_payload,
+    save_progress_data,
+)
 os.makedirs(f"{ROOT}/data/outputs", exist_ok=True)
@@ -151,6 +157,9 @@ async def _dashboard_data(request: DashboardDataRequest):
     if assignment not in ["task-based", "single-stream"]:
         return JSONResponse(content={"error": "Unsupported campaign assignment type"}, status_code=400)
+    # Get threshold info for the campaign
+    validation_threshold = tasks_data[campaign_id]["info"].get("validation_threshold")
     for user_id, user_val in progress_data[campaign_id].items():
         # shallow copy
         entry = dict(user_val)
@@ -159,6 +168,13 @@ async def _dashboard_data(request: DashboardDataRequest):
             for v in list(entry.get("validations", {}).values())
         ]
+        # Add threshold pass/fail status (only when user is complete)
+        if all(entry["progress"]):
+            entry["threshold_passed"] = check_validation_threshold(
+                tasks_data, progress_data, campaign_id, user_id
+            )
+        else:
+            entry["threshold_passed"] = None
         if not is_privileged:
             entry["token_correct"] = None
@@ -169,7 +185,8 @@ async def _dashboard_data(request: DashboardDataRequest):
     return JSONResponse(
         content={
             "status": "ok",
-            "data": progress_new
+            "data": progress_new,
+            "validation_threshold": validation_threshold
         },
         status_code=200
     )

pearmut/assignment.py CHANGED Viewed

@@ -3,18 +3,23 @@ from typing import Any
 from fastapi.responses import JSONResponse
-from .utils import get_db_log_item
+from .utils import (
+    RESET_MARKER,
+    check_validation_threshold,
+    get_db_log_item,
+    save_db_payload,
+)
 def _completed_response(
+    tasks_data: dict,
     progress_data: dict,
     campaign_id: str,
     user_id: str,
 ) -> JSONResponse:
     """Build a completed response with progress, time, and token."""
     user_progress = progress_data[campaign_id][user_id]
-    # TODO: add check for data quality
-    is_ok = True
+    is_ok = check_validation_threshold(tasks_data, progress_data, campaign_id, user_id)
     return JSONResponse(
         content={
             "status": "completed",
@@ -161,7 +166,7 @@ def get_next_item_taskbased(
     """
     user_progress = progress_data[campaign_id][user_id]
     if all(user_progress["progress"]):
-        return _completed_response(progress_data, campaign_id, user_id)
+        return _completed_response(data_all, progress_data, campaign_id, user_id)
     # find first incomplete item
     item_i = min([i for i, v in enumerate(user_progress["progress"]) if not v])
@@ -208,7 +213,7 @@ def get_next_item_singlestream(
     progress = user_progress["progress"]
     if all(progress):
-        return _completed_response(progress_data, campaign_id, user_id)
+        return _completed_response(data_all, progress_data, campaign_id, user_id)
     # find a random incomplete item
     incomplete_indices = [i for i, v in enumerate(progress) if not v]
@@ -261,20 +266,33 @@ def reset_task(
 ) -> JSONResponse:
     """
     Reset the task progress for the user in the specified campaign.
+    Saves a reset marker to mask existing annotations.
     """
     assignment = tasks_data[campaign_id]["info"]["assignment"]
     if assignment == "task-based":
-        progress_data[campaign_id][user_id]["progress"] = (
-            [False]*len(tasks_data[campaign_id]["data"][user_id])
-        )
+        # Save reset marker for this user to mask existing annotations
+        num_items = len(tasks_data[campaign_id]["data"][user_id])
+        for item_i in range(num_items):
+            save_db_payload(campaign_id, {
+                "user_id": user_id,
+                "item_i": item_i,
+                "annotations": RESET_MARKER
+            })
+        progress_data[campaign_id][user_id]["progress"] = [False] * num_items
         _reset_user_time(progress_data, campaign_id, user_id)
         return JSONResponse(content={"status": "ok"}, status_code=200)
     elif assignment == "single-stream":
+        # Save reset markers for all items (shared pool)
+        num_items = len(tasks_data[campaign_id]["data"])
+        for item_i in range(num_items):
+            save_db_payload(campaign_id, {
+                "user_id": None,
+                "item_i": item_i,
+                "annotations": RESET_MARKER
+            })
         # for single-stream reset all progress
         for uid in progress_data[campaign_id]:
-            progress_data[campaign_id][uid]["progress"] = (
-                [False]*len(tasks_data[campaign_id]["data"])
-            )
+            progress_data[campaign_id][uid]["progress"] = [False] * num_items
         _reset_user_time(progress_data, campaign_id, user_id)
         return JSONResponse(content={"status": "ok"}, status_code=200)
     else:

pearmut/cli.py CHANGED Viewed

@@ -10,7 +10,7 @@ import urllib.parse
 import psutil
-from .utils import ROOT, load_progress_data
+from .utils import ROOT, load_progress_data, save_progress_data
 os.makedirs(f"{ROOT}/data/tasks", exist_ok=True)
 load_progress_data(warn=None)
@@ -50,36 +50,21 @@ def _run(args_unknown):
     )
-def _add_campaign(args_unknown):
+def _add_single_campaign(data_file, overwrite, server):
     """
-    Add a new campaign from a JSON data file.
+    Add a single campaign from a JSON data file.
     """
     import random
     import wonderwords
-    args = argparse.ArgumentParser()
-    args.add_argument(
-        'data_file', type=str,
-        help='Path to the campaign data file'
-    )
-    args.add_argument(
-        "-o", "--overwrite", action="store_true",
-        help="Overwrite existing campaign if it exists"
-    )
-    args.add_argument(
-        "--server", default="http://localhost:8001",
-        help="Prefix server URL for protocol links"
-    )
-    args = args.parse_args(args_unknown)
-    with open(args.data_file, 'r') as f:
+    with open(data_file, 'r') as f:
         campaign_data = json.load(f)
     with open(f"{ROOT}/data/progress.json", "r") as f:
         progress_data = json.load(f)
-    if campaign_data['campaign_id'] in progress_data and not args.overwrite:
+    if campaign_data['campaign_id'] in progress_data and not overwrite:
         print(
             f"Campaign {campaign_data['campaign_id']} already exists.",
             "Use -o to overwrite."
@@ -99,6 +84,11 @@ def _add_campaign(args_unknown):
     # use random words for identifying users
     rng = random.Random(campaign_data["campaign_id"])
     rword = wonderwords.RandomWord(rng=rng)
+    # Parse users specification from info
+    users_spec = campaign_data["info"].get("users")
+    user_tokens = {}  # user_id -> {"pass": ..., "fail": ...}
     if assignment == "task-based":
         tasks = campaign_data["data"]
         if not isinstance(tasks, list):
@@ -110,29 +100,58 @@ def _add_campaign(args_unknown):
         num_users = len(tasks)
     elif assignment == "single-stream":
         tasks = campaign_data["data"]
-        if "num_users" not in campaign_data["info"]:
+        if users_spec is None:
             raise ValueError(
-                "Single-stream campaigns must specify 'num_users' in info.")
+                "Single-stream campaigns must specify 'users' in info.")
         if not isinstance(campaign_data["data"], list):
             raise ValueError(
                 "Single-stream campaign 'data' must be a list of items.")
-        num_users = campaign_data["info"]["num_users"]
+        if isinstance(users_spec, int):
+            num_users = users_spec
+        elif isinstance(users_spec, list):
+            num_users = len(users_spec)
+        else:
+            raise ValueError("'users' must be an integer or a list.")
     elif assignment == "dynamic":
         raise NotImplementedError(
             "Dynamic campaign assignment is not yet implemented.")
     else:
         raise ValueError(f"Unknown campaign assignment type: {assignment}")
-    user_ids = []
-    while len(user_ids) < num_users:
-        # generate random user IDs
-        new_id = f"{rword.random_words(amount=1, include_parts_of_speech=['adjective'])[0]}-{rword.random_words(amount=1, include_parts_of_speech=['noun'])[0]}"
-        if new_id not in user_ids:
-            user_ids.append(new_id)
-    user_ids = [
-        f"{user_id}-{rng.randint(0, 999):03d}"
-        for user_id in user_ids
-    ]
+    # Generate or parse user IDs based on users specification
+    if users_spec is None or isinstance(users_spec, int):
+        # Generate random user IDs
+        user_ids = []
+        while len(user_ids) < num_users:
+            new_id = f"{rword.random_words(amount=1, include_parts_of_speech=['adjective'])[0]}-{rword.random_words(amount=1, include_parts_of_speech=['noun'])[0]}"
+            if new_id not in user_ids:
+                user_ids.append(new_id)
+        user_ids = [
+            f"{user_id}-{rng.randint(0, 999):03d}"
+            for user_id in user_ids
+        ]
+    elif isinstance(users_spec, list):
+        if len(users_spec) != num_users:
+            raise ValueError(
+                f"Number of users ({len(users_spec)}) must match expected count ({num_users}).")
+        if all(isinstance(u, str) for u in users_spec):
+            # List of string IDs
+            user_ids = users_spec
+        elif all(isinstance(u, dict) for u in users_spec):
+            # List of dicts with user_id, token_pass, token_fail
+            user_ids = []
+            for u in users_spec:
+                if "user_id" not in u:
+                    raise ValueError("Each user dict must contain 'user_id'.")
+                user_ids.append(u["user_id"])
+                user_tokens[u["user_id"]] = {
+                    "pass": u.get("token_pass"),
+                    "fail": u.get("token_fail"),
+                }
+        else:
+            raise ValueError("'users' list must contain all strings or all dicts.")
+    else:
+        raise ValueError("'users' must be an integer or a list.")
     # For task-based, data is a dict mapping user_id -> tasks
     # For single-stream, data is a flat list (shared among all users)
@@ -150,6 +169,13 @@ def _add_campaign(args_unknown):
             hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
         )
+    def get_token(user_id, token_type):
+        """Get user token or generate a random one."""
+        token = user_tokens.get(user_id, {}).get(token_type)
+        if token is not None:
+            return token
+        return hashlib.sha256(random.randbytes(16)).hexdigest()[:10]
     user_progress = {
         user_id: {
             # TODO: progress tracking could be based on the assignment type
@@ -162,16 +188,48 @@ def _add_campaign(args_unknown):
             "time_end": None,
             "time": 0,
             "url": (
-                f"{args.server}/{campaign_data["info"]["template"]}.html"
+                f"{campaign_data["info"]["template"]}.html"
                 f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
                 f"&user_id={user_id}"
             ),
-            "token_correct": hashlib.sha256(random.randbytes(16)).hexdigest()[:10],
-            "token_incorrect": hashlib.sha256(random.randbytes(16)).hexdigest()[:10],
+            "token_correct": get_token(user_id, "pass"),
+            "token_incorrect": get_token(user_id, "fail"),
         }
         for user_id in user_ids
     }
+    # Handle assets symlink if specified
+    if "assets" in campaign_data["info"]:
+        assets_real_path = campaign_data["info"]["assets"]
+        # Resolve relative paths from the caller's current working directory
+        assets_real_path = os.path.abspath(assets_real_path)
+        if not os.path.isdir(assets_real_path):
+            raise ValueError(f"Assets path '{assets_real_path}' must be an existing directory.")
+        static_dir = f"{os.path.dirname(os.path.abspath(__file__))}/static"
+        dir_name = assets_real_path.split(os.sep)[-1]
+        if not os.path.isdir(static_dir):
+            raise ValueError(
+                f"Static directory '{static_dir}' does not exist. "
+                "Please build the frontend first."
+            )
+        symlink_path = f"{static_dir}/assets/{dir_name}"
+        # Remove existing symlink if present and we are overriding
+        if os.path.exists(symlink_path):
+            if overwrite:
+                os.remove(symlink_path)
+            else:
+                raise ValueError(f"Assets symlink '{symlink_path}' already exists.")
+        os.symlink(assets_real_path, symlink_path, target_is_directory=True)
+        print(f"Assets symlinked: {symlink_path} -> {assets_real_path}")
+    # commit to transaction
     with open(f"{ROOT}/data/tasks/{campaign_data['campaign_id']}.json", "w") as f:
         json.dump(campaign_data, f, indent=2, ensure_ascii=False)
@@ -180,15 +238,44 @@ def _add_campaign(args_unknown):
     with open(f"{ROOT}/data/progress.json", "w") as f:
         json.dump(progress_data, f, indent=2, ensure_ascii=False)
     print(
-        f"{args.server}/dashboard.html"
+        "🎛️ ",
+        f"{server}/dashboard.html"
         f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
         f"&token={campaign_data['token']}"
     )
-    print("-"*10)
     for user_id, user_val in user_progress.items():
         # point to the protocol URL
-        print(user_val["url"])
+        print(f'{server}/{user_val["url"]}')
+    print()
+def _add_campaign(args_unknown):
+    """
+    Add campaigns from one or more JSON data files.
+    """
+    args = argparse.ArgumentParser()
+    args.add_argument(
+        'data_files', type=str, nargs='+',
+        help='One or more paths to campaign data files'
+    )
+    args.add_argument(
+        "-o", "--overwrite", action="store_true",
+        help="Overwrite existing campaign if it exists"
+    )
+    args.add_argument(
+        "--server", default="http://localhost:8001",
+        help="Prefix server URL for protocol links"
+    )
+    args = args.parse_args(args_unknown)
+    for data_file in args.data_files:
+        try:
+            _add_single_campaign(data_file, args.overwrite, args.server)
+        except Exception as e:
+            print(f"Error processing {data_file}: {e}")
+            exit(1)
 def main():
@@ -213,14 +300,47 @@ def main():
     elif args.command == 'purge':
         import shutil
-        confirm = input(
-            "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
+        # Parse optional campaign name
+        purge_args = argparse.ArgumentParser()
+        purge_args.add_argument(
+            'campaign', type=str, nargs='?', default=None,
+            help='Optional campaign name to purge (purges all if not specified)'
         )
-        if confirm.lower() == 'y':
-            shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)
-            shutil.rmtree(f"{ROOT}/data/outputs", ignore_errors=True)
-            if os.path.exists(f"{ROOT}/data/progress.json"):
-                os.remove(f"{ROOT}/data/progress.json")
-            print("All campaign data purged.")
+        purge_args = purge_args.parse_args(args_unknown)
+        if purge_args.campaign is not None:
+            # Purge specific campaign
+            campaign_id = purge_args.campaign
+            confirm = input(
+                f"Are you sure you want to purge campaign '{campaign_id}'? This action cannot be undone. [y/n] "
+            )
+            if confirm.lower() == 'y':
+                # Remove task file
+                task_file = f"{ROOT}/data/tasks/{campaign_id}.json"
+                if os.path.exists(task_file):
+                    os.remove(task_file)
+                # Remove output file
+                output_file = f"{ROOT}/data/outputs/{campaign_id}.jsonl"
+                if os.path.exists(output_file):
+                    os.remove(output_file)
+                # Remove from progress data
+                progress_data = load_progress_data()
+                if campaign_id in progress_data:
+                    del progress_data[campaign_id]
+                    save_progress_data(progress_data)
+                print(f"Campaign '{campaign_id}' purged.")
+            else:
+                print("Cancelled.")
         else:
-            print("Cancelled.")
+            # Purge all campaigns
+            confirm = input(
+                "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
+            )
+            if confirm.lower() == 'y':
+                shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)
+                shutil.rmtree(f"{ROOT}/data/outputs", ignore_errors=True)
+                if os.path.exists(f"{ROOT}/data/progress.json"):
+                    os.remove(f"{ROOT}/data/progress.json")
+                print("All campaign data purged.")
+            else:
+                print("Cancelled.")

pearmut 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

pearmut 0.2.1py3-none-any.whl → 0.2.3py3-none-any.whl