PyPI - pearmut - Versions diffs - 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl - Mend

pearmut 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

pearmut/app.py +36 -29
pearmut/cli.py +119 -13
pearmut/static/dashboard.bundle.js +1 -1
pearmut/static/listwise.bundle.js +1 -1
pearmut/static/pointwise.bundle.js +1 -1
pearmut-0.2.7.dist-info/METADATA +330 -0
pearmut-0.2.7.dist-info/RECORD +19 -0
pearmut-0.2.7.dist-info/licenses/LICENSE +21 -0
pearmut-0.2.5.dist-info/METADATA +0 -345
pearmut-0.2.5.dist-info/RECORD +0 -19
pearmut-0.2.5.dist-info/licenses/LICENSE +0 -201
{pearmut-0.2.5.dist-info → pearmut-0.2.7.dist-info}/WHEEL +0 -0
{pearmut-0.2.5.dist-info → pearmut-0.2.7.dist-info}/entry_points.txt +0 -0
{pearmut-0.2.5.dist-info → pearmut-0.2.7.dist-info}/top_level.txt +0 -0

pearmut/app.py CHANGED Viewed

@@ -30,7 +30,8 @@ app.add_middleware(
 tasks_data = {}
 progress_data = load_progress_data(
-    warn="No progress.json found. Running, but no campaign will be available.")
+    warn="No progress.json found. Running, but no campaign will be available."
+)
 # load all tasks into data_all
 for campaign_id in progress_data.keys():
@@ -60,30 +61,31 @@ async def _log_response(request: LogResponseRequest):
     # append response to the output log
     save_db_payload(
-        campaign_id, request.payload | {"user_id": user_id, "item_i": item_i})
+        campaign_id, request.payload | {"user_id": user_id, "item_i": item_i}
+    )
     # if actions were submitted, we can log time data
     if "actions" in request.payload:
-        times = [
-            x["time"] for x in request.payload["actions"]
-        ]
+        times = [x["time"] for x in request.payload["actions"]]
         if progress_data[campaign_id][user_id]["time_start"] is None:
             progress_data[campaign_id][user_id]["time_start"] = min(times)
         progress_data[campaign_id][user_id]["time_end"] = max(times)
-        progress_data[campaign_id][user_id]["time"] += sum([
-            min(b - a, 60)
-            for a, b in zip(times, times[1:])
-        ])
+        progress_data[campaign_id][user_id]["time"] += sum(
+            [min(b - a, 60) for a, b in zip(times, times[1:])]
+        )
     # Initialize validation_checks if it doesn't exist
     if "validations" in request.payload:
         if "validations" not in progress_data[campaign_id][user_id]:
             progress_data[campaign_id][user_id]["validations"] = {}
-        progress_data[campaign_id][user_id]["validations"][request.item_i] = request.payload["validations"]
+        progress_data[campaign_id][user_id]["validations"][request.item_i] = (
+            request.payload["validations"]
+        )
-    update_progress(campaign_id, user_id, tasks_data,
-                    progress_data, request.item_i, request.payload)
+    update_progress(
+        campaign_id, user_id, tasks_data, progress_data, request.item_i, request.payload
+    )
     save_progress_data(progress_data)
     return JSONResponse(content="ok", status_code=200)
@@ -149,13 +151,15 @@ async def _dashboard_data(request: DashboardDataRequest):
     if campaign_id not in progress_data:
         return JSONResponse(content="Unknown campaign ID", status_code=400)
-    is_privileged = (request.token == tasks_data[campaign_id]["token"])
+    is_privileged = request.token == tasks_data[campaign_id]["token"]
     progress_new = {}
     assignment = tasks_data[campaign_id]["info"]["assignment"]
     if assignment not in ["task-based", "single-stream"]:
-        return JSONResponse(content="Unsupported campaign assignment type", status_code=400)
+        return JSONResponse(
+            content="Unsupported campaign assignment type", status_code=400
+        )
     # Get threshold info for the campaign
     validation_threshold = tasks_data[campaign_id]["info"].get("validation_threshold")
@@ -164,10 +168,9 @@ async def _dashboard_data(request: DashboardDataRequest):
         # shallow copy
         entry = dict(user_val)
         entry["validations"] = [
-            all(v)
-            for v in list(entry.get("validations", {}).values())
+            all(v) for v in list(entry.get("validations", {}).values())
         ]
         # Add threshold pass/fail status (only when user is complete)
         if all(entry["progress"]):
             entry["threshold_passed"] = check_validation_threshold(
@@ -183,11 +186,8 @@ async def _dashboard_data(request: DashboardDataRequest):
         progress_new[user_id] = entry
     return JSONResponse(
-        content={
-            "data": progress_new,
-            "validation_threshold": validation_threshold
-        },
-        status_code=200
+        content={"data": progress_new, "validation_threshold": validation_threshold},
+        status_code=200,
     )
@@ -227,7 +227,9 @@ async def _download_annotations(
     for campaign_id in campaign_id:
         output_path = f"{ROOT}/data/outputs/{campaign_id}.jsonl"
         if campaign_id not in progress_data:
-            return JSONResponse(content=f"Unknown campaign ID {campaign_id}", status_code=400)
+            return JSONResponse(
+                content=f"Unknown campaign ID {campaign_id}", status_code=400
+            )
         if not os.path.exists(output_path):
             output[campaign_id] = []
         else:
@@ -239,28 +241,33 @@ async def _download_annotations(
 @app.get("/download-progress")
 async def _download_progress(
-    campaign_id: list[str] = Query(),
-    token: list[str] = Query()
+    campaign_id: list[str] = Query(), token: list[str] = Query()
 ):
     if len(campaign_id) != len(token):
-        return JSONResponse(content="Mismatched campaign_id and token count", status_code=400)
+        return JSONResponse(
+            content="Mismatched campaign_id and token count", status_code=400
+        )
     output = {}
     for i, cid in enumerate(campaign_id):
         if cid not in progress_data:
             return JSONResponse(content=f"Unknown campaign ID {cid}", status_code=400)
         if token[i] != tasks_data[cid]["token"]:
-            return JSONResponse(content=f"Invalid token for campaign ID {cid}", status_code=400)
+            return JSONResponse(
+                content=f"Invalid token for campaign ID {cid}", status_code=400
+            )
         output[cid] = progress_data[cid]
     return JSONResponse(content=output, status_code=200)
 static_dir = f"{os.path.dirname(os.path.abspath(__file__))}/static/"
 if not os.path.exists(static_dir + "index.html"):
     raise FileNotFoundError(
-        "Static directory not found. Please build the frontend first.")
+        "Static directory not found. Please build the frontend first."
+    )
 app.mount(
     "/",

pearmut/cli.py CHANGED Viewed

@@ -12,6 +12,9 @@ import psutil
 from .utils import ROOT, load_progress_data, save_progress_data
+# Static directory path (constant for consistency)
+STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
 os.makedirs(f"{ROOT}/data/tasks", exist_ok=True)
 load_progress_data(warn=None)
@@ -50,6 +53,40 @@ def _run(args_unknown):
     )
+def _validate_item_structure(items, template):
+    """
+    Validate that items have the correct structure.
+    Items should be lists of dictionaries with 'src' and 'tgt' keys.
+    Args:
+        items: List of item dictionaries to validate
+        template: Template type ('pointwise' or 'listwise') for type validation
+    """
+    if not isinstance(items, list):
+        raise ValueError("Items must be a list")
+    for item in items:
+        if not isinstance(item, dict):
+            raise ValueError("Each item must be a dictionary with 'src' and 'tgt' keys")
+        if 'src' not in item or 'tgt' not in item:
+            raise ValueError("Each item must contain 'src' and 'tgt' keys")
+        # Validate src is always a string
+        if not isinstance(item['src'], str):
+            raise ValueError("Item 'src' must be a string")
+        # Validate tgt type based on template
+        if template == 'listwise':
+            if not isinstance(item['tgt'], list):
+                raise ValueError("Item 'tgt' must be a list for listwise template")
+            # Check that all elements in tgt list are strings
+            if not all(isinstance(t, str) for t in item['tgt']):
+                raise ValueError("All elements in 'tgt' list must be strings for listwise template")
+        elif template == 'pointwise':
+            if not isinstance(item['tgt'], str):
+                raise ValueError("Item 'tgt' must be a string for pointwise template")
 def _add_single_campaign(data_file, overwrite, server):
     """
     Add a single campaign from a JSON data file.
@@ -80,6 +117,7 @@ def _add_single_campaign(data_file, overwrite, server):
         raise ValueError("Campaign 'info' must contain 'template' field.")
     assignment = campaign_data["info"]["assignment"]
+    template = campaign_data["info"]["template"]
     # use random words for identifying users
     rng = random.Random(campaign_data["campaign_id"])
     rword = wonderwords.RandomWord(rng=rng)
@@ -96,6 +134,13 @@ def _add_single_campaign(data_file, overwrite, server):
         if not all(isinstance(task, list) for task in tasks):
             raise ValueError(
                 "Each task in task-based campaign 'data' must be a list of items.")
+        # Validate item structure for each task
+        for task_i, task in enumerate(tasks):
+            for doc_i, doc in enumerate(task):
+                try:
+                    _validate_item_structure(doc, template)
+                except ValueError as e:
+                    raise ValueError(f"Task {task_i}, document {doc_i}: {e}")
         num_users = len(tasks)
     elif assignment == "single-stream":
         tasks = campaign_data["data"]
@@ -105,6 +150,12 @@ def _add_single_campaign(data_file, overwrite, server):
         if not isinstance(campaign_data["data"], list):
             raise ValueError(
                 "Single-stream campaign 'data' must be a list of items.")
+        # Validate item structure for single-stream
+        for doc_i, doc in enumerate(tasks):
+            try:
+                _validate_item_structure(doc, template)
+            except ValueError as e:
+                raise ValueError(f"Document {doc_i}: {e}")
         if isinstance(users_spec, int):
             num_users = users_spec
         elif isinstance(users_spec, list):
@@ -199,30 +250,62 @@ def _add_single_campaign(data_file, overwrite, server):
     # Handle assets symlink if specified
     if "assets" in campaign_data["info"]:
-        assets_real_path = campaign_data["info"]["assets"]
+        assets_config = campaign_data["info"]["assets"]
+        # assets must be a dictionary with source and destination keys
+        if not isinstance(assets_config, dict):
+            raise ValueError("Assets must be a dictionary with 'source' and 'destination' keys.")
+        if "source" not in assets_config or "destination" not in assets_config:
+            raise ValueError("Assets config must contain 'source' and 'destination' keys.")
+        assets_source = assets_config["source"]
+        assets_destination = assets_config["destination"]
+        # Validate destination starts with 'assets/'
+        if not assets_destination.startswith("assets/"):
+            raise ValueError(f"Assets destination '{assets_destination}' must start with 'assets/'.")
         # Resolve relative paths from the caller's current working directory
-        assets_real_path = os.path.abspath(assets_real_path)
+        assets_real_path = os.path.abspath(assets_source)
         if not os.path.isdir(assets_real_path):
-            raise ValueError(f"Assets path '{assets_real_path}' must be an existing directory.")
+            raise ValueError(f"Assets source path '{assets_real_path}' must be an existing directory.")
-        static_dir = f"{os.path.dirname(os.path.abspath(__file__))}/static"
-        dir_name = assets_real_path.split(os.sep)[-1]
-        if not os.path.isdir(static_dir):
+        if not os.path.isdir(STATIC_DIR):
             raise ValueError(
-                f"Static directory '{static_dir}' does not exist. "
+                f"Static directory '{STATIC_DIR}' does not exist. "
                 "Please build the frontend first."
             )
-        symlink_path = f"{static_dir}/assets/{dir_name}"
-        # Remove existing symlink if present and we are overriding
-        if os.path.exists(symlink_path):
+        # Symlink path is based on the destination, stripping the 'assets/' prefix
+        symlink_path = f"{STATIC_DIR}/{assets_destination}"
+        # Remove existing symlink if present and we are overriding the same campaign
+        if os.path.lexists(symlink_path):
+            # Check if any other campaign is using this destination
+            current_campaign_id = campaign_data['campaign_id']
+            tasks_dir = f"{ROOT}/data/tasks"
+            if os.path.exists(tasks_dir):
+                for task_file in os.listdir(tasks_dir):
+                    if task_file.endswith('.json'):
+                        other_campaign_id = task_file[:-5]
+                        if other_campaign_id != current_campaign_id:
+                            with open(f"{tasks_dir}/{task_file}", "r") as f:
+                                other_campaign = json.load(f)
+                            other_assets = other_campaign.get("info", {}).get("assets")
+                            if other_assets and isinstance(other_assets, dict):
+                                if other_assets.get("destination") == assets_destination:
+                                    raise ValueError(
+                                        f"Assets destination '{assets_destination}' is already used by campaign '{other_campaign_id}'."
+                                    )
+            # Only allow overwrite if it's the same campaign
             if overwrite:
                 os.remove(symlink_path)
             else:
-                raise ValueError(f"Assets symlink '{symlink_path}' already exists.")
+                raise ValueError(f"Assets destination '{assets_destination}' is already taken.")
+        # Ensure the assets directory exists
+        os.makedirs(f"{STATIC_DIR}/assets", exist_ok=True)
         os.symlink(assets_real_path, symlink_path, target_is_directory=True)
         print(f"Assets symlinked: {symlink_path} -> {assets_real_path}")
@@ -299,6 +382,20 @@ def main():
     elif args.command == 'purge':
         import shutil
+        def _unlink_assets(campaign_id):
+            """Unlink assets symlink for a campaign if it exists."""
+            task_file = f"{ROOT}/data/tasks/{campaign_id}.json"
+            if not os.path.exists(task_file):
+                return
+            with open(task_file, "r") as f:
+                campaign_data = json.load(f)
+            destination = campaign_data.get("info", {}).get("assets", {}).get("destination")
+            if destination:
+                symlink_path = f"{STATIC_DIR}/{destination}"
+                if os.path.islink(symlink_path):
+                    os.remove(symlink_path)
+                    print(f"Assets symlink removed: {symlink_path}")
         # Parse optional campaign name
         purge_args = argparse.ArgumentParser()
         purge_args.add_argument(
@@ -314,6 +411,8 @@ def main():
                 f"Are you sure you want to purge campaign '{campaign_id}'? This action cannot be undone. [y/n] "
             )
             if confirm.lower() == 'y':
+                # Unlink assets before removing task file
+                _unlink_assets(campaign_id)
                 # Remove task file
                 task_file = f"{ROOT}/data/tasks/{campaign_id}.json"
                 if os.path.exists(task_file):
@@ -336,6 +435,13 @@ def main():
                 "Are you sure you want to purge all campaign data? This action cannot be undone. [y/n] "
             )
             if confirm.lower() == 'y':
+                # Unlink all assets first
+                tasks_dir = f"{ROOT}/data/tasks"
+                if os.path.exists(tasks_dir):
+                    for task_file in os.listdir(tasks_dir):
+                        if task_file.endswith('.json'):
+                            campaign_id = task_file[:-5]
+                            _unlink_assets(campaign_id)
                 shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)
                 shutil.rmtree(f"{ROOT}/data/outputs", ignore_errors=True)
                 if os.path.exists(f"{ROOT}/data/progress.json"):

pearmut 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

pearmut 0.2.5py3-none-any.whl → 0.2.7py3-none-any.whl