PyPI - pearmut - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

pearmut 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

pearmut/app.py +55 -25
pearmut/assignment.py +312 -0
pearmut/cli.py +37 -19
pearmut/static/assets/style.css +168 -0
pearmut/static/dashboard.bundle.js +1 -1
pearmut/static/listwise.bundle.js +1 -0
pearmut/static/listwise.html +77 -0
pearmut/static/pointwise.bundle.js +1 -1
pearmut/static/pointwise.html +1 -167
pearmut/utils.py +55 -2
{pearmut-0.1.1.dist-info → pearmut-0.1.3.dist-info}/METADATA +64 -13
pearmut-0.1.3.dist-info/RECORD +19 -0
pearmut/protocols.py +0 -122
pearmut-0.1.1.dist-info/RECORD +0 -17
{pearmut-0.1.1.dist-info → pearmut-0.1.3.dist-info}/WHEEL +0 -0
{pearmut-0.1.1.dist-info → pearmut-0.1.3.dist-info}/entry_points.txt +0 -0
{pearmut-0.1.1.dist-info → pearmut-0.1.3.dist-info}/licenses/LICENSE +0 -0
{pearmut-0.1.1.dist-info → pearmut-0.1.3.dist-info}/top_level.txt +0 -0

pearmut/app.py CHANGED Viewed

@@ -8,8 +8,8 @@ from fastapi.responses import JSONResponse
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
-from .protocols import get_next_item, reset_task, update_progress
-from .utils import ROOT, load_progress_data, save_progress_data
+from .assignment import get_i_item, get_next_item, reset_task, update_progress
+from .utils import ROOT, load_progress_data, save_db_payload, save_progress_data
 os.makedirs(f"{ROOT}/data/outputs", exist_ok=True)
@@ -36,7 +36,7 @@ class LogResponseRequest(BaseModel):
     campaign_id: str
     user_id: str
     item_i: int
-    payload: Any
+    payload: dict[str, Any]
 @app.post("/log-response")
@@ -45,6 +45,7 @@ async def _log_response(request: LogResponseRequest):
     campaign_id = request.campaign_id
     user_id = request.user_id
+    item_i = request.item_i
     if campaign_id not in progress_data:
         return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
@@ -52,8 +53,7 @@ async def _log_response(request: LogResponseRequest):
         return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
     # append response to the output log
-    with open(f"{ROOT}/data/outputs/{campaign_id}.jsonl", "a") as log_file:
-        log_file.write(json.dumps(request.payload, ensure_ascii=False) + "\n")
+    save_db_payload(campaign_id, request.payload | {"user_id": user_id, "item_i": item_i})
     # if actions were submitted, we can log time data
     if "actions" in request.payload:
@@ -97,6 +97,32 @@ async def _get_next_item(request: NextItemRequest):
     )
+class GetItemRequest(BaseModel):
+    campaign_id: str
+    user_id: str
+    item_i: int
+@app.post("/get-i-item")
+async def _get_i_item(request: GetItemRequest):
+    campaign_id = request.campaign_id
+    user_id = request.user_id
+    item_i = request.item_i
+    if campaign_id not in progress_data:
+        return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
+    if user_id not in progress_data[campaign_id]:
+        return JSONResponse(content={"error": "Unknown user ID"}, status_code=400)
+    return get_i_item(
+        campaign_id,
+        user_id,
+        tasks_data,
+        progress_data,
+        item_i,
+    )
 class DashboardDataRequest(BaseModel):
     campaign_id: str
     token: str | None = None
@@ -111,19 +137,20 @@ async def _dashboard_data(request: DashboardDataRequest):
     if campaign_id not in progress_data:
         return JSONResponse(content={"error": "Unknown campaign ID"}, status_code=400)
-    progress_new = {
-        user_id: {
-            **user_val,
-            "total": len(tasks_data[campaign_id]["data"][user_id]),
-        } | (
-            # override if not privileged
-            {
-                "token_correct": None,
-                "token_incorrect": None,
-            } if not is_privileged else {}
-        )
-        for user_id, user_val in progress_data[campaign_id].items()
-    }
+    progress_new = {}
+    assignment = tasks_data[campaign_id]["info"]["assignment"]
+    if assignment not in ["task-based", "single-stream"]:
+        return JSONResponse(content={"error": "Unsupported campaign assignment type"}, status_code=400)
+    for user_id, user_val in progress_data[campaign_id].items():
+        # shallow copy
+        entry = dict(user_val)
+        if not is_privileged:
+            entry["token_correct"] = None
+            entry["token_incorrect"] = None
+        progress_new[user_id] = entry
     return JSONResponse(
         content={
@@ -190,19 +217,22 @@ async def _download_progress(
         return JSONResponse(content={"error": "Mismatched campaign_id and token count"}, status_code=400)
     output = {}
-    for campaign_id, campaign_id in enumerate(campaign_id):
-        if campaign_id not in progress_data:
-            return JSONResponse(content={"error": f"Unknown campaign ID {campaign_id}"}, status_code=400)
-        if token[campaign_id] != tasks_data[campaign_id]["token"]:
-            return JSONResponse(content={"error": f"Invalid token for campaign ID {campaign_id}"}, status_code=400)
+    for i, cid in enumerate(campaign_id):
+        if cid not in progress_data:
+            return JSONResponse(content={"error": f"Unknown campaign ID {cid}"}, status_code=400)
+        if token[i] != tasks_data[cid]["token"]:
+            return JSONResponse(content={"error": f"Invalid token for campaign ID {cid}"}, status_code=400)
-        output[campaign_id] = progress_data[campaign_id]
+        output[cid] = progress_data[cid]
     return JSONResponse(content=output, status_code=200)
+static_dir = f"{os.path.dirname(os.path.abspath(__file__))}/static/"
+if not os.path.exists(static_dir + "index.html"):
+    raise FileNotFoundError("Static directory not found. Please build the frontend first.")
 app.mount(
     "/",
-    StaticFiles(directory=f"{os.path.dirname(os.path.abspath(__file__))}/static/" , html=True, follow_symlink=True),
+    StaticFiles(directory=static_dir, html=True, follow_symlink=True),
     name="static",
 )

pearmut/assignment.py ADDED Viewed

@@ -0,0 +1,312 @@
+import random
+from typing import Any
+from fastapi.responses import JSONResponse
+from .utils import get_db_log_item
+def _completed_response(
+    progress_data: dict,
+    campaign_id: str,
+    user_id: str,
+) -> JSONResponse:
+    """Build a completed response with progress, time, and token."""
+    user_progress = progress_data[campaign_id][user_id]
+    # TODO: add check for data quality
+    is_ok = True
+    return JSONResponse(
+        content={
+            "status": "completed",
+            "progress": user_progress["progress"],
+            "time": user_progress["time"],
+            "token": user_progress["token_correct" if is_ok else "token_incorrect"],
+        },
+        status_code=200
+    )
+def get_next_item(
+    campaign_id: str,
+    user_id: str,
+    tasks_data: dict,
+    progress_data: dict,
+) -> JSONResponse:
+    """
+    Get the next item for the user in the specified campaign.
+    """
+    assignment = tasks_data[campaign_id]["info"]["assignment"]
+    if assignment == "task-based":
+        return get_next_item_taskbased(campaign_id, user_id, tasks_data, progress_data)
+    elif assignment == "single-stream":
+        return get_next_item_singlestream(campaign_id, user_id, tasks_data, progress_data)
+    elif assignment == "dynamic":
+        return get_next_item_dynamic(campaign_id, user_id, tasks_data, progress_data)
+    else:
+        return JSONResponse(content={"error": "Unknown campaign assignment type"}, status_code=400)
+def get_i_item(
+    campaign_id: str,
+    user_id: str,
+    tasks_data: dict,
+    progress_data: dict,
+    item_i: int,
+) -> JSONResponse:
+    """
+    Get a specific item by index for the user in the specified campaign.
+    """
+    assignment = tasks_data[campaign_id]["info"]["assignment"]
+    if assignment == "task-based":
+        return get_i_item_taskbased(campaign_id, user_id, tasks_data, progress_data, item_i)
+    elif assignment == "single-stream":
+        return get_i_item_singlestream(campaign_id, user_id, tasks_data, progress_data, item_i)
+    else:
+        return JSONResponse(content={"error": "Get item not supported for this assignment type"}, status_code=400)
+def get_i_item_taskbased(
+    campaign_id: str,
+    user_id: str,
+    data_all: dict,
+    progress_data: dict,
+    item_i: int,
+) -> JSONResponse:
+    """
+    Get specific item for task-based protocol.
+    """
+    user_progress = progress_data[campaign_id][user_id]
+    if all(user_progress["progress"]):
+        return _completed_response(progress_data, campaign_id, user_id)
+    # try to get existing annotations if any
+    items_existing = get_db_log_item(campaign_id, user_id, item_i)
+    if items_existing:
+        # get the latest ones
+        payload_existing = items_existing[-1]["annotations"]
+    if item_i < 0 or item_i >= len(data_all[campaign_id]["data"][user_id]):
+        return JSONResponse(
+            content={"status": "error", "message": "Item index out of range"},
+            status_code=400
+        )
+    return JSONResponse(
+        content={
+            "status": "ok",
+            "progress": user_progress["progress"],
+            "time": user_progress["time"],
+            "info": {
+                "item_i": item_i,
+            } | {
+                k: v
+                for k, v in data_all[campaign_id]["info"].items()
+                if k.startswith("protocol")
+            },
+            "payload": data_all[campaign_id]["data"][user_id][item_i]
+        } | ({"payload_existing": payload_existing} if items_existing else {}),
+        status_code=200
+    )
+def get_i_item_singlestream(
+    campaign_id: str,
+    user_id: str,
+    data_all: dict,
+    progress_data: dict,
+    item_i: int,
+) -> JSONResponse:
+    """
+    Get specific item for single-stream assignment.
+    """
+    user_progress = progress_data[campaign_id][user_id]
+    if all(user_progress["progress"]):
+        return _completed_response(progress_data, campaign_id, user_id)
+    # try to get existing annotations if any
+    # note the None user_id since it is shared
+    items_existing = get_db_log_item(campaign_id, None, item_i)
+    if items_existing:
+        # get the latest ones
+        payload_existing = items_existing[-1]["annotations"]
+    if item_i < 0 or item_i >= len(data_all[campaign_id]["data"]):
+        return JSONResponse(
+            content={"status": "error", "message": "Item index out of range"},
+            status_code=400
+        )
+    return JSONResponse(
+        content={
+            "status": "ok",
+            "progress": user_progress["progress"],
+            "time": user_progress["time"],
+            "info": {
+                "item_i": item_i,
+            } | {
+                k: v
+                for k, v in data_all[campaign_id]["info"].items()
+                if k.startswith("protocol")
+            },
+            "payload": data_all[campaign_id]["data"][item_i]
+        } | ({"payload_existing": payload_existing} if items_existing else {}),
+        status_code=200
+    )
+def get_next_item_taskbased(
+    campaign_id: str,
+    user_id: str,
+    data_all: dict,
+    progress_data: dict,
+) -> JSONResponse:
+    """
+    Get the next item for task-based assignment.
+    """
+    user_progress = progress_data[campaign_id][user_id]
+    if all(user_progress["progress"]):
+        return _completed_response(progress_data, campaign_id, user_id)
+    # find first incomplete item
+    item_i = min([i for i, v in enumerate(user_progress["progress"]) if not v])
+    # try to get existing annotations if any
+    items_existing = get_db_log_item(campaign_id, user_id, item_i)
+    if items_existing:
+        # get the latest ones
+        payload_existing = items_existing[-1]["annotations"]
+    return JSONResponse(
+        content={
+            "status": "ok",
+            "progress": user_progress["progress"],
+            "time": user_progress["time"],
+            "info": {
+                "item_i": item_i,
+            } | {
+                k: v
+                for k, v in data_all[campaign_id]["info"].items()
+                if k.startswith("protocol")
+            },
+            "payload": data_all[campaign_id]["data"][user_id][item_i]
+        } | ({"payload_existing": payload_existing} if items_existing else {}),
+        status_code=200
+    )
+def get_next_item_singlestream(
+    campaign_id: str,
+    user_id: str,
+    data_all: dict,
+    progress_data: dict,
+) -> JSONResponse:
+    """
+    Get the next item for single-stream assignment.
+    In this mode, all users share the same pool of items.
+    Items are randomly selected from unfinished items.
+    Note: There is a potential race condition where multiple users could
+    receive the same item simultaneously. This is fine since we store all responses.
+    """
+    user_progress = progress_data[campaign_id][user_id]
+    progress = user_progress["progress"]
+    if all(progress):
+        return _completed_response(progress_data, campaign_id, user_id)
+    # find a random incomplete item
+    incomplete_indices = [i for i, v in enumerate(progress) if not v]
+    item_i = random.choice(incomplete_indices)
+    # try to get existing annotations if any
+    # note the None user_id since it is shared
+    items_existing = get_db_log_item(campaign_id, None, item_i)
+    if items_existing:
+        # get the latest ones
+        payload_existing = items_existing[-1]["annotations"]
+    return JSONResponse(
+        content={
+            "status": "ok",
+            "time": user_progress["time"],
+            "progress": progress,
+            "info": {
+                "item_i": item_i,
+            } | {
+                k: v
+                for k, v in data_all[campaign_id]["info"].items()
+                if k.startswith("protocol")
+            },
+            "payload": data_all[campaign_id]["data"][item_i]
+        } | ({"payload_existing": payload_existing} if items_existing else {}),
+        status_code=200
+    )
+def get_next_item_dynamic(campaign_data: dict, user_id: str, progress_data: dict, data_all: dict):
+    raise NotImplementedError("Dynamic protocol is not implemented yet.")
+def _reset_user_time(progress_data: dict, campaign_id: str, user_id: str) -> None:
+    """Reset time tracking fields for a user."""
+    progress_data[campaign_id][user_id]["time"] = 0.0
+    progress_data[campaign_id][user_id]["time_start"] = None
+    progress_data[campaign_id][user_id]["time_end"] = None
+def reset_task(
+    campaign_id: str,
+    user_id: str,
+    tasks_data: dict,
+    progress_data: dict,
+) -> JSONResponse:
+    """
+    Reset the task progress for the user in the specified campaign.
+    """
+    assignment = tasks_data[campaign_id]["info"]["assignment"]
+    if assignment == "task-based":
+        progress_data[campaign_id][user_id]["progress"] = (
+            [False]*len(tasks_data[campaign_id]["data"][user_id])
+        )
+        _reset_user_time(progress_data, campaign_id, user_id)
+        return JSONResponse(content={"status": "ok"}, status_code=200)
+    elif assignment == "single-stream":
+        # for single-stream reset all progress
+        for uid in progress_data[campaign_id]:
+            progress_data[campaign_id][uid]["progress"] = (
+                [False]*len(tasks_data[campaign_id]["data"])
+            )
+        _reset_user_time(progress_data, campaign_id, user_id)
+        return JSONResponse(content={"status": "ok"}, status_code=200)
+    else:
+        return JSONResponse(content={"status": "error", "message": "Reset not supported for this assignment type"}, status_code=400)
+def update_progress(
+    campaign_id: str,
+    user_id: str,
+    tasks_data: dict,
+    progress_data: dict,
+    item_i: int,
+    payload: Any,
+) -> JSONResponse:
+    """
+    Log the user's response for the specified item in the campaign.
+    """
+    assignment = tasks_data[campaign_id]["info"]["assignment"]
+    if assignment == "task-based":
+        # even if it's already set it should be fine
+        progress_data[campaign_id][user_id]["progress"][item_i] = True
+        # TODO: log attention checks/quality?
+        return JSONResponse(content={"status": "ok"}, status_code=200)
+    elif assignment == "single-stream":
+        # progress all users
+        for uid in progress_data[campaign_id]:
+            progress_data[campaign_id][uid]["progress"][item_i] = True
+        return JSONResponse(content={"status": "ok"}, status_code=200)
+    elif assignment == "dynamic":
+        return JSONResponse(content={"status": "error", "message": "Dynamic protocol logging not implemented yet."}, status_code=400)
+    else:
+        return JSONResponse(content={"status": "error", "message": "Unknown campaign assignment type"}, status_code=400)

pearmut/cli.py CHANGED Viewed

@@ -90,33 +90,41 @@ def _add_campaign(args_unknown):
         raise ValueError("Campaign data must contain 'info' field.")
     if "data" not in campaign_data:
         raise ValueError("Campaign data must contain 'data' field.")
-    if "type" not in campaign_data["info"]:
-        raise ValueError("Campaign 'info' must contain 'type' field.")
+    if "assignment" not in campaign_data["info"]:
+        raise ValueError("Campaign 'info' must contain 'assignment' field.")
     if "template" not in campaign_data["info"]:
         raise ValueError("Campaign 'info' must contain 'template' field.")
+    assignment = campaign_data["info"]["assignment"]
     # use random words for identifying users
     rng = random.Random(campaign_data["campaign_id"])
     rword = wonderwords.RandomWord(rng=rng)
-    if campaign_data["info"]["type"] == "task-based":
+    if assignment == "task-based":
         tasks = campaign_data["data"]
         if not isinstance(tasks, list):
-            raise ValueError("Task-based campaign 'data' must be a list of tasks.")
+            raise ValueError(
+                "Task-based campaign 'data' must be a list of tasks.")
         if not all(isinstance(task, list) for task in tasks):
-            raise ValueError("Each task in task-based campaign 'data' must be a list of items.")
-        amount = len(tasks)
-    elif campaign_data["info"]["type"] == "dynamic":
-        if "num_users" not in campaign_data:
-            raise ValueError("Dynamic campaigns must specify 'num_users'.")
+            raise ValueError(
+                "Each task in task-based campaign 'data' must be a list of items.")
+        num_users = len(tasks)
+    elif assignment == "single-stream":
+        tasks = campaign_data["data"]
+        if "num_users" not in campaign_data["info"]:
+            raise ValueError(
+                "Single-stream campaigns must specify 'num_users' in info.")
         if not isinstance(campaign_data["data"], list):
-            raise ValueError("Dynamic campaign 'data' must be a list of items.")
-        amount = campaign_data["num_users"]
+            raise ValueError(
+                "Single-stream campaign 'data' must be a list of items.")
+        num_users = campaign_data["info"]["num_users"]
+    elif assignment == "dynamic":
+        raise NotImplementedError(
+            "Dynamic campaign assignment is not yet implemented.")
     else:
-        raise ValueError(
-            f"Unknown campaign type: {campaign_data["info"]['type']}")
+        raise ValueError(f"Unknown campaign assignment type: {assignment}")
     user_ids = []
-    while len(user_ids) < amount:
+    while len(user_ids) < num_users:
         # generate random user IDs
         new_id = f"{rword.random_words(amount=1, include_parts_of_speech=['adjective'])[0]}-{rword.random_words(amount=1, include_parts_of_speech=['noun'])[0]}"
         if new_id not in user_ids:
@@ -126,10 +134,15 @@ def _add_campaign(args_unknown):
         for user_id in user_ids
     ]
-    campaign_data["data"] = {
-        user_id: task
-        for user_id, task in zip(user_ids, tasks)
-    }
+    # For task-based, data is a dict mapping user_id -> tasks
+    # For single-stream, data is a flat list (shared among all users)
+    if assignment == "task-based":
+        campaign_data["data"] = {
+            user_id: task
+            for user_id, task in zip(user_ids, tasks)
+        }
+    elif assignment == "single-stream":
+        campaign_data["data"] = tasks
     # generate a token for dashboard access if not present
     if "token" not in campaign_data:
@@ -139,7 +152,12 @@ def _add_campaign(args_unknown):
     user_progress = {
         user_id: {
-            "progress": [False]*len(campaign_data["data"][user_id]) if campaign_data["info"]["type"] == "task-based" else [],
+            # TODO: progress tracking could be based on the assignment type
+            "progress": (
+                [False]*len(campaign_data["data"][user_id]) if assignment == "task-based"
+                else [False]*len(campaign_data["data"]) if assignment == "single-stream"
+                else []
+            ),
             "time_start": None,
             "time_end": None,
             "time": 0,

pearmut 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

pearmut 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl