PyPI - pearmut - Versions diffs - 0.2.10__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

pearmut 0.2.10py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

pearmut/app.py +19 -19
pearmut/assignment.py +26 -10
pearmut/cli.py +91 -30
pearmut/static/basic.bundle.js +1 -0
pearmut/static/basic.html +74 -0
pearmut/static/dashboard.bundle.js +1 -1
pearmut/static/dashboard.html +1 -1
pearmut/static/index.html +1 -1
pearmut/static/{assets/style.css → style.css} +1 -2
pearmut/utils.py +1 -32
{pearmut-0.2.10.dist-info → pearmut-0.3.0.dist-info}/METADATA +81 -72
pearmut-0.3.0.dist-info/RECORD +17 -0
pearmut/static/listwise.bundle.js +0 -1
pearmut/static/listwise.html +0 -77
pearmut/static/pointwise.bundle.js +0 -1
pearmut/static/pointwise.html +0 -69
pearmut-0.2.10.dist-info/RECORD +0 -19
/pearmut/static/{assets/favicon.svg → favicon.svg} +0 -0
{pearmut-0.2.10.dist-info → pearmut-0.3.0.dist-info}/WHEEL +0 -0
{pearmut-0.2.10.dist-info → pearmut-0.3.0.dist-info}/entry_points.txt +0 -0
{pearmut-0.2.10.dist-info → pearmut-0.3.0.dist-info}/licenses/LICENSE +0 -0
{pearmut-0.2.10.dist-info → pearmut-0.3.0.dist-info}/top_level.txt +0 -0

pearmut/app.py CHANGED Viewed

@@ -206,33 +206,23 @@ async def _dashboard_results(request: DashboardResultsRequest):
     if campaign_id not in progress_data:
         return JSONResponse(content="Unknown campaign ID", status_code=400)
     # Check if token is valid
     if token != tasks_data[campaign_id]["token"]:
         return JSONResponse(content="Invalid token", status_code=400)
     # Compute model scores from annotations
     model_scores = collections.defaultdict(dict)
-    # Iterate through all tasks to find items with 'model' field
+    # Iterate through all tasks to find items with 'models' field (basic template)
     log = get_db_log(campaign_id)
     for entry in log:
-        if "item" not in entry or "annotations" not in entry:
+        if "item" not in entry or "annotation" not in entry:
             continue
-        for item, annotation in zip(entry["item"], entry["annotations"]):
-            if "model" in item:
-                # pointwise
+        for item, annotation in zip(entry["item"], entry["annotation"]):
+            for model, annotation in annotation.items():
                 if "score" in annotation:
-                    # make sure to only keep the latest score for each item
-                    # json.dumps(item) creates a unique item key
-                    model_scores[item["model"]][json.dumps(item)] = annotation["score"]
-            elif "models" in item:
-                # listwise
-                for model, annotation_cand in zip(item["models"], annotation):
-                    if "score" in annotation_cand:
-                        model_scores[model][json.dumps(item)] = (
-                            annotation_cand["score"]
-                        )
+                    model_scores[model][json.dumps(item)] = annotation["score"]
     results = [
         {
@@ -294,7 +284,7 @@ async def _download_annotations(
     return JSONResponse(
         content=output,
         status_code=200,
-        headers={"Content-Disposition": 'inline; filename="annotations.json"'}
+        headers={"Content-Disposition": 'inline; filename="annotations.json"'},
     )
@@ -322,7 +312,7 @@ async def _download_progress(
     return JSONResponse(
         content=output,
         status_code=200,
-        headers={"Content-Disposition": 'inline; filename="progress.json"'}
+        headers={"Content-Disposition": 'inline; filename="progress.json"'},
     )
@@ -332,6 +322,16 @@ if not os.path.exists(static_dir + "index.html"):
         "Static directory not found. Please build the frontend first."
     )
+# Mount user assets from data/assets/
+assets_dir = f"{ROOT}/data/assets"
+os.makedirs(assets_dir, exist_ok=True)
+app.mount(
+    "/assets",
+    StaticFiles(directory=assets_dir, follow_symlink=True),
+    name="assets",
+)
 app.mount(
     "/",
     StaticFiles(directory=static_dir, html=True, follow_symlink=True),

pearmut/assignment.py CHANGED Viewed

@@ -84,9 +84,13 @@ def get_i_item_taskbased(
     # try to get existing annotations if any
     items_existing = get_db_log_item(campaign_id, user_id, item_i)
+    payload_existing = None
     if items_existing:
         # get the latest ones
-        payload_existing = items_existing[-1]["annotations"]
+        latest_item = items_existing[-1]
+        payload_existing = {"annotation": latest_item["annotation"]}
+        if "comment" in latest_item:
+            payload_existing["comment"] = latest_item["comment"]
     if item_i < 0 or item_i >= len(data_all[campaign_id]["data"][user_id]):
         return JSONResponse(
@@ -107,7 +111,7 @@ def get_i_item_taskbased(
                 if k.startswith("protocol")
             },
             "payload": data_all[campaign_id]["data"][user_id][item_i]
-        } | ({"payload_existing": payload_existing} if items_existing else {}),
+        } | ({"payload_existing": payload_existing} if payload_existing else {}),
         status_code=200
     )
@@ -127,9 +131,13 @@ def get_i_item_singlestream(
     # try to get existing annotations if any
     # note the None user_id since it is shared
     items_existing = get_db_log_item(campaign_id, None, item_i)
+    payload_existing = None
     if items_existing:
         # get the latest ones
-        payload_existing = items_existing[-1]["annotations"]
+        latest_item = items_existing[-1]
+        payload_existing = {"annotation": latest_item["annotation"]}
+        if "comment" in latest_item:
+            payload_existing["comment"] = latest_item["comment"]
     if item_i < 0 or item_i >= len(data_all[campaign_id]["data"]):
         return JSONResponse(
@@ -150,7 +158,7 @@ def get_i_item_singlestream(
                 if k.startswith("protocol")
             },
             "payload": data_all[campaign_id]["data"][item_i]
-        } | ({"payload_existing": payload_existing} if items_existing else {}),
+        } | ({"payload_existing": payload_existing} if payload_existing else {}),
         status_code=200
     )
@@ -173,9 +181,13 @@ def get_next_item_taskbased(
     # try to get existing annotations if any
     items_existing = get_db_log_item(campaign_id, user_id, item_i)
+    payload_existing = None
     if items_existing:
         # get the latest ones
-        payload_existing = items_existing[-1]["annotations"]
+        latest_item = items_existing[-1]
+        payload_existing = {"annotation": latest_item["annotation"]}
+        if "comment" in latest_item:
+            payload_existing["comment"] = latest_item["comment"]
     return JSONResponse(
         content={
@@ -190,7 +202,7 @@ def get_next_item_taskbased(
                 if k.startswith("protocol")
             },
             "payload": data_all[campaign_id]["data"][user_id][item_i]
-        } | ({"payload_existing": payload_existing} if items_existing else {}),
+        } | ({"payload_existing": payload_existing} if payload_existing else {}),
         status_code=200
     )
@@ -222,9 +234,13 @@ def get_next_item_singlestream(
     # try to get existing annotations if any
     # note the None user_id since it is shared
     items_existing = get_db_log_item(campaign_id, None, item_i)
+    payload_existing = None
     if items_existing:
         # get the latest ones
-        payload_existing = items_existing[-1]["annotations"]
+        latest_item = items_existing[-1]
+        payload_existing = {"annotation": latest_item["annotation"]}
+        if "comment" in latest_item:
+            payload_existing["comment"] = latest_item["comment"]
     return JSONResponse(
         content={
@@ -239,7 +255,7 @@ def get_next_item_singlestream(
                 if k.startswith("protocol")
             },
             "payload": data_all[campaign_id]["data"][item_i]
-        } | ({"payload_existing": payload_existing} if items_existing else {}),
+        } | ({"payload_existing": payload_existing} if payload_existing else {}),
         status_code=200
     )
@@ -276,7 +292,7 @@ def reset_task(
             save_db_payload(campaign_id, {
                 "user_id": user_id,
                 "item_i": item_i,
-                "annotations": RESET_MARKER
+                "annotation": RESET_MARKER
             })
         progress_data[campaign_id][user_id]["progress"] = [False] * num_items
         _reset_user_time(progress_data, campaign_id, user_id)
@@ -288,7 +304,7 @@ def reset_task(
             save_db_payload(campaign_id, {
                 "user_id": None,
                 "item_i": item_i,
-                "annotations": RESET_MARKER
+                "annotation": RESET_MARKER
             })
         # for single-stream reset all progress
         for uid in progress_data[campaign_id]:

pearmut/cli.py CHANGED Viewed

@@ -12,9 +12,6 @@ import psutil
 from .utils import ROOT, load_progress_data, save_progress_data
-# Static directory path (constant for consistency)
-STATIC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
 os.makedirs(f"{ROOT}/data/tasks", exist_ok=True)
 load_progress_data(warn=None)
@@ -55,14 +52,14 @@ def _run(args_unknown):
     )
-def _validate_item_structure(items, template):
+def _validate_item_structure(items):
     """
     Validate that items have the correct structure.
     Items should be lists of dictionaries with 'src' and 'tgt' keys.
+    The 'tgt' field should be a dictionary mapping model names to translations.
     Args:
         items: List of item dictionaries to validate
-        template: Template type ('pointwise' or 'listwise') for type validation
     """
     if not isinstance(items, list):
         raise ValueError("Items must be a list")
@@ -77,16 +74,82 @@ def _validate_item_structure(items, template):
         if not isinstance(item['src'], str):
             raise ValueError("Item 'src' must be a string")
-        # Validate tgt type based on template
-        if template == 'listwise':
-            if not isinstance(item['tgt'], list):
-                raise ValueError("Item 'tgt' must be a list for listwise template")
-            # Check that all elements in tgt list are strings
-            if not all(isinstance(t, str) for t in item['tgt']):
-                raise ValueError("All elements in 'tgt' list must be strings for listwise template")
-        elif template == 'pointwise':
-            if not isinstance(item['tgt'], str):
-                raise ValueError("Item 'tgt' must be a string for pointwise template")
+        # Validate tgt is a dictionary (basic template with model names)
+        if isinstance(item['tgt'], str):
+            # String not allowed - suggest using dictionary (don't include user input to prevent injection)
+            raise ValueError("Item 'tgt' must be a dictionary mapping model names to translations. For single translation, use {\"default\": \"your_translation\"}")
+        elif isinstance(item['tgt'], dict):
+            # Dictionary mapping model names to translations
+            # Validate that model names don't contain only numbers (JavaScript ordering issue)
+            for model_name, translation in item['tgt'].items():
+                if not isinstance(model_name, str):
+                    raise ValueError("Model names in 'tgt' dictionary must be strings")
+                if model_name.isdigit():
+                    raise ValueError(f"Model name '{model_name}' cannot be only numeric digits (would cause issues in JS/TS)")
+                if not isinstance(translation, str):
+                    raise ValueError(f"Translation for model '{model_name}' must be a string")
+        else:
+            raise ValueError("Item 'tgt' must be a dictionary mapping model names to translations")
+        # Validate error_spans structure if present
+        if 'error_spans' in item:
+            if not isinstance(item['error_spans'], dict):
+                raise ValueError("'error_spans' must be a dictionary mapping model names to error span lists")
+            for model_name, spans in item['error_spans'].items():
+                if not isinstance(spans, list):
+                    raise ValueError(f"Error spans for model '{model_name}' must be a list")
+        # Validate validation structure if present
+        if 'validation' in item:
+            if not isinstance(item['validation'], dict):
+                raise ValueError("'validation' must be a dictionary mapping model names to validation rules")
+            for model_name, val_rule in item['validation'].items():
+                if not isinstance(val_rule, dict):
+                    raise ValueError(f"Validation rule for model '{model_name}' must be a dictionary")
+def _shuffle_campaign_data(campaign_data, rng):
+    """
+    Shuffle campaign data at the document level in-place
+    For each document, randomly shuffles the order of models in the tgt dictionary.
+    Args:
+        campaign_data: The campaign data dictionary
+        rng: Random number generator with campaign-specific seed
+    """
+    def shuffle_document(doc):
+        """Shuffle a single document (list of items) by reordering models in tgt dict."""
+        if not doc or not isinstance(doc, list):
+            return
+        # Get all model names from the first item's tgt dict
+        first_item = doc[0]
+        if 'tgt' not in first_item or not isinstance(first_item['tgt'], dict):
+            return
+        model_names = list(first_item['tgt'].keys())
+        rng.shuffle(model_names)
+        # Reorder tgt dict for all items in the document
+        for item in doc:
+            if 'tgt' in item and isinstance(item['tgt'], dict):
+                item["tgt"] = {
+                    model: item["tgt"][model]
+                    for model in model_names
+                }
+    assignment = campaign_data["info"]["assignment"]
+    if assignment == "task-based":
+        # After transformation, data is a dict mapping user_id -> tasks
+        for user_id, task in campaign_data["data"].items():
+            for doc in task:
+                shuffle_document(doc)
+    elif assignment == "single-stream":
+        # Shuffle each document in the shared pool
+        for doc in campaign_data["data"]:
+            shuffle_document(doc)
 def _add_single_campaign(data_file, overwrite, server):
@@ -115,11 +178,9 @@ def _add_single_campaign(data_file, overwrite, server):
         raise ValueError("Campaign data must contain 'data' field.")
     if "assignment" not in campaign_data["info"]:
         raise ValueError("Campaign 'info' must contain 'assignment' field.")
-    if "template" not in campaign_data["info"]:
-        raise ValueError("Campaign 'info' must contain 'template' field.")
+    # Template defaults to "basic" if not specified
     assignment = campaign_data["info"]["assignment"]
-    template = campaign_data["info"]["template"]
     # use random words for identifying users
     rng = random.Random(campaign_data["campaign_id"])
     rword = wonderwords.RandomWord(rng=rng)
@@ -140,7 +201,7 @@ def _add_single_campaign(data_file, overwrite, server):
         for task_i, task in enumerate(tasks):
             for doc_i, doc in enumerate(task):
                 try:
-                    _validate_item_structure(doc, template)
+                    _validate_item_structure(doc)
                 except ValueError as e:
                     raise ValueError(f"Task {task_i}, document {doc_i}: {e}")
         num_users = len(tasks)
@@ -155,7 +216,7 @@ def _add_single_campaign(data_file, overwrite, server):
         # Validate item structure for single-stream
         for doc_i, doc in enumerate(tasks):
             try:
-                _validate_item_structure(doc, template)
+                _validate_item_structure(doc)
             except ValueError as e:
                 raise ValueError(f"Document {doc_i}: {e}")
         if isinstance(users_spec, int):
@@ -240,7 +301,7 @@ def _add_single_campaign(data_file, overwrite, server):
             "time_end": None,
             "time": 0,
             "url": (
-                f"{campaign_data["info"]["template"]}.html"
+                f"{campaign_data['info'].get("template", "basic")}.html"
                 f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
                 f"&user_id={user_id}"
             ),
@@ -272,15 +333,10 @@ def _add_single_campaign(data_file, overwrite, server):
         if not os.path.isdir(assets_real_path):
             raise ValueError(f"Assets source path '{assets_real_path}' must be an existing directory.")
-        if not os.path.isdir(STATIC_DIR):
-            raise ValueError(
-                f"Static directory '{STATIC_DIR}' does not exist. "
-                "Please build the frontend first."
-            )
         # Symlink path is based on the destination, stripping the 'assets/' prefix
-        symlink_path = f"{STATIC_DIR}/{assets_destination}".rstrip("/")
+        # User assets are now stored under data/assets/ instead of static/assets/
+        symlink_path = f"{ROOT}/data/{assets_destination}".rstrip("/")
         # Remove existing symlink if present and we are overriding the same campaign
         if os.path.lexists(symlink_path):
@@ -312,6 +368,11 @@ def _add_single_campaign(data_file, overwrite, server):
         print(f"Assets symlinked: {symlink_path} -> {assets_real_path}")
+    # Shuffle data if shuffle parameter is true (defaults to true)
+    should_shuffle = campaign_data["info"].get("shuffle", True)
+    if should_shuffle:
+        _shuffle_campaign_data(campaign_data, rng)
     # commit to transaction
     with open(f"{ROOT}/data/tasks/{campaign_data['campaign_id']}.json", "w") as f:
         json.dump(campaign_data, f, indent=2, ensure_ascii=False)
@@ -392,7 +453,7 @@ def main():
                 campaign_data = json.load(f)
             destination = campaign_data.get("info", {}).get("assets", {}).get("destination")
             if destination:
-                symlink_path = f"{STATIC_DIR}/{destination}".rstrip("/")
+                symlink_path = f"{ROOT}/data/{destination}".rstrip("/")
                 if os.path.islink(symlink_path):
                     os.remove(symlink_path)
                     print(f"Assets symlink removed: {symlink_path}")

pearmut 0.2.10__py3-none-any.whl → 0.3.0__py3-none-any.whl

pearmut 0.2.10py3-none-any.whl → 0.3.0py3-none-any.whl