pearmut 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearmut/app.py CHANGED
@@ -221,7 +221,7 @@ async def _dashboard_results(request: DashboardResultsRequest):
221
221
  continue
222
222
  for item, annotation in zip(entry["item"], entry["annotation"]):
223
223
  for model, annotation in annotation.items():
224
- if "score" in annotation:
224
+ if "score" in annotation and annotation["score"] is not None:
225
225
  model_scores[model][json.dumps(item)] = annotation["score"]
226
226
 
227
227
  results = [
@@ -284,7 +284,9 @@ async def _download_annotations(
284
284
  return JSONResponse(
285
285
  content=output,
286
286
  status_code=200,
287
- headers={"Content-Disposition": 'inline; filename="annotations.json"'},
287
+ headers={
288
+ "Content-Disposition": 'attachment; filename="annotations.json"',
289
+ },
288
290
  )
289
291
 
290
292
 
@@ -312,7 +314,9 @@ async def _download_progress(
312
314
  return JSONResponse(
313
315
  content=output,
314
316
  status_code=200,
315
- headers={"Content-Disposition": 'inline; filename="progress.json"'},
317
+ headers={
318
+ "Content-Disposition": 'attachment; filename="progress.json"',
319
+ },
316
320
  )
317
321
 
318
322
 
pearmut/cli.py CHANGED
@@ -34,21 +34,25 @@ def _run(args_unknown):
34
34
 
35
35
  # print access dashboard URL for all campaigns
36
36
  if tasks_data:
37
- print(
38
- args.server + "/dashboard.html?" + "&".join([
39
- f"campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
40
- for campaign_id, campaign_data in tasks_data.items()
41
- ]),
42
- # this is important to flush
43
- flush=True,
44
- )
45
-
37
+ dashboard_url = args.server + "/dashboard.html?" + "&".join([
38
+ f"campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
39
+ for campaign_id, campaign_data in tasks_data.items()
40
+ ])
41
+ print("\033[92mNow serving Pearmut, use the following URL to access the everything-dashboard:\033[0m")
42
+ print("🍐", dashboard_url+"\n", flush=True)
43
+
44
+ # disable startup message
45
+ uvicorn.config.LOGGING_CONFIG["loggers"]["uvicorn.error"]["level"] = "WARNING"
46
+ # set time logging
47
+ uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["datefmt"] = "%Y-%m-%d %H:%M"
48
+ uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["fmt"] = (
49
+ '%(asctime)s %(levelprefix)s %(client_addr)s - %(request_line)s %(status_code)s'
50
+ )
46
51
  uvicorn.run(
47
52
  app,
48
53
  host="0.0.0.0",
49
54
  port=args.port,
50
55
  reload=False,
51
- # log_level="info",
52
56
  )
53
57
 
54
58
 
@@ -108,6 +112,38 @@ def _validate_item_structure(items):
108
112
  raise ValueError(f"Validation rule for model '{model_name}' must be a dictionary")
109
113
 
110
114
 
115
+ def _validate_document_models(doc):
116
+ """
117
+ Validate that all items in a document have the same model outputs.
118
+
119
+ Args:
120
+ doc: List of items in a document
121
+
122
+ Returns:
123
+ None if valid
124
+
125
+ Raises:
126
+ ValueError: If items have different model outputs
127
+ """
128
+ # Get model names from the first item
129
+ first_item = doc[0]
130
+ first_models = set(first_item['tgt'].keys())
131
+
132
+ # Check all other items have the same model names
133
+ for i, item in enumerate(doc[1:], start=1):
134
+ if 'tgt' not in item or not isinstance(item['tgt'], dict):
135
+ continue
136
+
137
+ item_models = set(item['tgt'].keys())
138
+ if item_models != first_models:
139
+ raise ValueError(
140
+ f"Document contains items with different model outputs. "
141
+ f"Item 0 has models {sorted(first_models)}, but item {i} has models {sorted(item_models)}. "
142
+ f"This is fine, but we can't shuffle (on by default). "
143
+ f"To fix this, set 'shuffle': false in the campaign 'info' section. "
144
+ )
145
+
146
+
111
147
  def _shuffle_campaign_data(campaign_data, rng):
112
148
  """
113
149
  Shuffle campaign data at the document level in-place
@@ -120,14 +156,11 @@ def _shuffle_campaign_data(campaign_data, rng):
120
156
  """
121
157
  def shuffle_document(doc):
122
158
  """Shuffle a single document (list of items) by reordering models in tgt dict."""
123
- if not doc or not isinstance(doc, list):
124
- return
159
+ # Validate that all items have the same models
160
+ _validate_document_models(doc)
125
161
 
126
162
  # Get all model names from the first item's tgt dict
127
163
  first_item = doc[0]
128
- if 'tgt' not in first_item or not isinstance(first_item['tgt'], dict):
129
- return
130
-
131
164
  model_names = list(first_item['tgt'].keys())
132
165
  rng.shuffle(model_names)
133
166
 
@@ -270,6 +303,12 @@ def _add_single_campaign(data_file, overwrite, server):
270
303
  campaign_data["info"]["protocol"] = "ESA"
271
304
  print("Warning: 'protocol' not specified in campaign info. Defaulting to 'ESA'.")
272
305
 
306
+ # Remove output file when overwriting (after all validations pass)
307
+ if overwrite and campaign_data['campaign_id'] in progress_data:
308
+ output_file = f"{ROOT}/data/outputs/{campaign_data['campaign_id']}.jsonl"
309
+ if os.path.exists(output_file):
310
+ os.remove(output_file)
311
+
273
312
  # For task-based, data is a dict mapping user_id -> tasks
274
313
  # For single-stream, data is a flat list (shared among all users)
275
314
  if assignment == "task-based":
@@ -395,7 +434,7 @@ def _add_single_campaign(data_file, overwrite, server):
395
434
  )
396
435
  for user_id, user_val in user_progress.items():
397
436
  # point to the protocol URL
398
- print(f'{server}/{user_val["url"]}')
437
+ print(f'🧑 {server}/{user_val["url"]}')
399
438
  print()
400
439
 
401
440
 
@@ -469,10 +508,14 @@ def main():
469
508
  help='Optional campaign name to purge (purges all if not specified)'
470
509
  )
471
510
  purge_args = purge_args.parse_args(args_unknown)
511
+ progress_data = load_progress_data()
472
512
 
473
513
  if purge_args.campaign is not None:
474
514
  # Purge specific campaign
475
515
  campaign_id = purge_args.campaign
516
+ if campaign_id not in progress_data:
517
+ print(f"Campaign '{campaign_id}' does not exist.")
518
+ return
476
519
  confirm = input(
477
520
  f"Are you sure you want to purge campaign '{campaign_id}'? This action cannot be undone. [y/n] "
478
521
  )
@@ -502,7 +545,6 @@ def main():
502
545
  )
503
546
  if confirm.lower() == 'y':
504
547
  # Unlink all assets first
505
- progress_data = load_progress_data()
506
548
  for campaign_id in progress_data.keys():
507
549
  _unlink_assets(campaign_id)
508
550
  shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)