PyPI - pearmut - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend

pearmut 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

pearmut/app.py +7 -3
pearmut/cli.py +65 -19
pearmut/static/basic.bundle.js +1 -1
pearmut/static/basic.html +25 -2
pearmut/static/dashboard.bundle.js +1 -1
pearmut/static/dashboard.html +2 -2
pearmut/static/index.bundle.js +1 -0
pearmut/static/index.html +1 -1
pearmut/static/style.css +1 -1
{pearmut-0.3.1.dist-info → pearmut-0.3.3.dist-info}/METADATA +4 -3
pearmut-0.3.3.dist-info/RECORD +18 -0
pearmut-0.3.1.dist-info/RECORD +0 -17
{pearmut-0.3.1.dist-info → pearmut-0.3.3.dist-info}/WHEEL +0 -0
{pearmut-0.3.1.dist-info → pearmut-0.3.3.dist-info}/entry_points.txt +0 -0
{pearmut-0.3.1.dist-info → pearmut-0.3.3.dist-info}/licenses/LICENSE +0 -0
{pearmut-0.3.1.dist-info → pearmut-0.3.3.dist-info}/top_level.txt +0 -0

pearmut/app.py CHANGED Viewed

@@ -221,7 +221,7 @@ async def _dashboard_results(request: DashboardResultsRequest):
             continue
         for item, annotation in zip(entry["item"], entry["annotation"]):
             for model, annotation in annotation.items():
-                if "score" in annotation:
+                if "score" in annotation and annotation["score"] is not None:
                     model_scores[model][json.dumps(item)] = annotation["score"]
     results = [
@@ -284,7 +284,9 @@ async def _download_annotations(
     return JSONResponse(
         content=output,
         status_code=200,
-        headers={"Content-Disposition": 'inline; filename="annotations.json"'},
+        headers={
+            "Content-Disposition": 'attachment; filename="annotations.json"',
+        },
     )
@@ -312,7 +314,9 @@ async def _download_progress(
     return JSONResponse(
         content=output,
         status_code=200,
-        headers={"Content-Disposition": 'inline; filename="progress.json"'},
+        headers={
+            "Content-Disposition": 'attachment; filename="progress.json"',
+        },
     )

pearmut/cli.py CHANGED Viewed

@@ -34,21 +34,25 @@ def _run(args_unknown):
     # print access dashboard URL for all campaigns
     if tasks_data:
-        print(
-            args.server + "/dashboard.html?" + "&".join([
-                f"campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
-                for campaign_id, campaign_data in tasks_data.items()
-            ]),
-            # this is important to flush
-            flush=True,
-        )
+        dashboard_url = args.server + "/dashboard.html?" + "&".join([
+            f"campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
+            for campaign_id, campaign_data in tasks_data.items()
+        ])
+        print("\033[92mNow serving Pearmut, use the following URL to access the everything-dashboard:\033[0m")
+        print("🍐", dashboard_url+"\n", flush=True)
+    # disable startup message
+    uvicorn.config.LOGGING_CONFIG["loggers"]["uvicorn.error"]["level"] = "WARNING"
+    # set time logging
+    uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["datefmt"] = "%Y-%m-%d %H:%M"
+    uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["fmt"] = (
+        '%(asctime)s %(levelprefix)s %(client_addr)s - %(request_line)s %(status_code)s'
+    )
     uvicorn.run(
         app,
-        host="127.0.0.1",
+        host="0.0.0.0",
         port=args.port,
         reload=False,
-        # log_level="info",
     )
@@ -108,6 +112,38 @@ def _validate_item_structure(items):
                     raise ValueError(f"Validation rule for model '{model_name}' must be a dictionary")
+def _validate_document_models(doc):
+    """
+    Validate that all items in a document have the same model outputs.
+    Args:
+        doc: List of items in a document
+    Returns:
+        None if valid
+    Raises:
+        ValueError: If items have different model outputs
+    """
+    # Get model names from the first item
+    first_item = doc[0]
+    first_models = set(first_item['tgt'].keys())
+    # Check all other items have the same model names
+    for i, item in enumerate(doc[1:], start=1):
+        if 'tgt' not in item or not isinstance(item['tgt'], dict):
+            continue
+        item_models = set(item['tgt'].keys())
+        if item_models != first_models:
+            raise ValueError(
+                f"Document contains items with different model outputs. "
+                f"Item 0 has models {sorted(first_models)}, but item {i} has models {sorted(item_models)}. "
+                f"This is fine, but we can't shuffle (on by default). "
+                f"To fix this, set 'shuffle': false in the campaign 'info' section. "
+            )
 def _shuffle_campaign_data(campaign_data, rng):
     """
     Shuffle campaign data at the document level in-place
@@ -120,14 +156,11 @@ def _shuffle_campaign_data(campaign_data, rng):
     """
     def shuffle_document(doc):
         """Shuffle a single document (list of items) by reordering models in tgt dict."""
-        if not doc or not isinstance(doc, list):
-            return
+        # Validate that all items have the same models
+        _validate_document_models(doc)
         # Get all model names from the first item's tgt dict
         first_item = doc[0]
-        if 'tgt' not in first_item or not isinstance(first_item['tgt'], dict):
-            return
         model_names = list(first_item['tgt'].keys())
         rng.shuffle(model_names)
@@ -182,7 +215,7 @@ def _add_single_campaign(data_file, overwrite, server):
     # Template defaults to "basic" if not specified
     assignment = campaign_data["info"]["assignment"]
     # use random words for identifying users
-    rng = random.Random(campaign_data["campaign_id"])
+    rng = random.Random()
     rword = wonderwords.RandomWord(rng=rng)
     # Parse users specification from info
@@ -265,6 +298,16 @@ def _add_single_campaign(data_file, overwrite, server):
             raise ValueError("'users' list must contain all strings or all dicts.")
     else:
         raise ValueError("'users' must be an integer or a list.")
+    if "protocol" not in campaign_data["info"]:
+        campaign_data["info"]["protocol"] = "ESA"
+        print("Warning: 'protocol' not specified in campaign info. Defaulting to 'ESA'.")
+    # Remove output file when overwriting (after all validations pass)
+    if overwrite and campaign_data['campaign_id'] in progress_data:
+        output_file = f"{ROOT}/data/outputs/{campaign_data['campaign_id']}.jsonl"
+        if os.path.exists(output_file):
+            os.remove(output_file)
     # For task-based, data is a dict mapping user_id -> tasks
     # For single-stream, data is a flat list (shared among all users)
@@ -391,7 +434,7 @@ def _add_single_campaign(data_file, overwrite, server):
     )
     for user_id, user_val in user_progress.items():
         # point to the protocol URL
-        print(f'{server}/{user_val["url"]}')
+        print(f'🧑 {server}/{user_val["url"]}')
     print()
@@ -465,10 +508,14 @@ def main():
             help='Optional campaign name to purge (purges all if not specified)'
         )
         purge_args = purge_args.parse_args(args_unknown)
+        progress_data = load_progress_data()
         if purge_args.campaign is not None:
             # Purge specific campaign
             campaign_id = purge_args.campaign
+            if campaign_id not in progress_data:
+                print(f"Campaign '{campaign_id}' does not exist.")
+                return
             confirm = input(
                 f"Are you sure you want to purge campaign '{campaign_id}'? This action cannot be undone. [y/n] "
             )
@@ -498,7 +545,6 @@ def main():
             )
             if confirm.lower() == 'y':
                 # Unlink all assets first
-                progress_data = load_progress_data()
                 for campaign_id in progress_data.keys():
                     _unlink_assets(campaign_id)
                 shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)

pearmut 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

pearmut 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl