pearmut 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearmut/app.py CHANGED
@@ -221,7 +221,7 @@ async def _dashboard_results(request: DashboardResultsRequest):
221
221
  continue
222
222
  for item, annotation in zip(entry["item"], entry["annotation"]):
223
223
  for model, annotation in annotation.items():
224
- if "score" in annotation:
224
+ if "score" in annotation and annotation["score"] is not None:
225
225
  model_scores[model][json.dumps(item)] = annotation["score"]
226
226
 
227
227
  results = [
@@ -284,7 +284,9 @@ async def _download_annotations(
284
284
  return JSONResponse(
285
285
  content=output,
286
286
  status_code=200,
287
- headers={"Content-Disposition": 'inline; filename="annotations.json"'},
287
+ headers={
288
+ "Content-Disposition": 'attachment; filename="annotations.json"',
289
+ },
288
290
  )
289
291
 
290
292
 
@@ -312,7 +314,9 @@ async def _download_progress(
312
314
  return JSONResponse(
313
315
  content=output,
314
316
  status_code=200,
315
- headers={"Content-Disposition": 'inline; filename="progress.json"'},
317
+ headers={
318
+ "Content-Disposition": 'attachment; filename="progress.json"',
319
+ },
316
320
  )
317
321
 
318
322
 
pearmut/cli.py CHANGED
@@ -34,21 +34,25 @@ def _run(args_unknown):
34
34
 
35
35
  # print access dashboard URL for all campaigns
36
36
  if tasks_data:
37
- print(
38
- args.server + "/dashboard.html?" + "&".join([
39
- f"campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
40
- for campaign_id, campaign_data in tasks_data.items()
41
- ]),
42
- # this is important to flush
43
- flush=True,
44
- )
45
-
37
+ dashboard_url = args.server + "/dashboard.html?" + "&".join([
38
+ f"campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
39
+ for campaign_id, campaign_data in tasks_data.items()
40
+ ])
41
+ print("\033[92mNow serving Pearmut, use the following URL to access the everything-dashboard:\033[0m")
42
+ print("🍐", dashboard_url+"\n", flush=True)
43
+
44
+ # disable startup message
45
+ uvicorn.config.LOGGING_CONFIG["loggers"]["uvicorn.error"]["level"] = "WARNING"
46
+ # set time logging
47
+ uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["datefmt"] = "%Y-%m-%d %H:%M"
48
+ uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["fmt"] = (
49
+ '%(asctime)s %(levelprefix)s %(client_addr)s - %(request_line)s %(status_code)s'
50
+ )
46
51
  uvicorn.run(
47
52
  app,
48
- host="127.0.0.1",
53
+ host="0.0.0.0",
49
54
  port=args.port,
50
55
  reload=False,
51
- # log_level="info",
52
56
  )
53
57
 
54
58
 
@@ -108,6 +112,38 @@ def _validate_item_structure(items):
108
112
  raise ValueError(f"Validation rule for model '{model_name}' must be a dictionary")
109
113
 
110
114
 
115
+ def _validate_document_models(doc):
116
+ """
117
+ Validate that all items in a document have the same model outputs.
118
+
119
+ Args:
120
+ doc: List of items in a document
121
+
122
+ Returns:
123
+ None if valid
124
+
125
+ Raises:
126
+ ValueError: If items have different model outputs
127
+ """
128
+ # Get model names from the first item
129
+ first_item = doc[0]
130
+ first_models = set(first_item['tgt'].keys())
131
+
132
+ # Check all other items have the same model names
133
+ for i, item in enumerate(doc[1:], start=1):
134
+ if 'tgt' not in item or not isinstance(item['tgt'], dict):
135
+ continue
136
+
137
+ item_models = set(item['tgt'].keys())
138
+ if item_models != first_models:
139
+ raise ValueError(
140
+ f"Document contains items with different model outputs. "
141
+ f"Item 0 has models {sorted(first_models)}, but item {i} has models {sorted(item_models)}. "
142
+ f"This is fine, but we can't shuffle (on by default). "
143
+ f"To fix this, set 'shuffle': false in the campaign 'info' section. "
144
+ )
145
+
146
+
111
147
  def _shuffle_campaign_data(campaign_data, rng):
112
148
  """
113
149
  Shuffle campaign data at the document level in-place
@@ -120,14 +156,11 @@ def _shuffle_campaign_data(campaign_data, rng):
120
156
  """
121
157
  def shuffle_document(doc):
122
158
  """Shuffle a single document (list of items) by reordering models in tgt dict."""
123
- if not doc or not isinstance(doc, list):
124
- return
159
+ # Validate that all items have the same models
160
+ _validate_document_models(doc)
125
161
 
126
162
  # Get all model names from the first item's tgt dict
127
163
  first_item = doc[0]
128
- if 'tgt' not in first_item or not isinstance(first_item['tgt'], dict):
129
- return
130
-
131
164
  model_names = list(first_item['tgt'].keys())
132
165
  rng.shuffle(model_names)
133
166
 
@@ -182,7 +215,7 @@ def _add_single_campaign(data_file, overwrite, server):
182
215
  # Template defaults to "basic" if not specified
183
216
  assignment = campaign_data["info"]["assignment"]
184
217
  # use random words for identifying users
185
- rng = random.Random(campaign_data["campaign_id"])
218
+ rng = random.Random()
186
219
  rword = wonderwords.RandomWord(rng=rng)
187
220
 
188
221
  # Parse users specification from info
@@ -265,6 +298,16 @@ def _add_single_campaign(data_file, overwrite, server):
265
298
  raise ValueError("'users' list must contain all strings or all dicts.")
266
299
  else:
267
300
  raise ValueError("'users' must be an integer or a list.")
301
+
302
+ if "protocol" not in campaign_data["info"]:
303
+ campaign_data["info"]["protocol"] = "ESA"
304
+ print("Warning: 'protocol' not specified in campaign info. Defaulting to 'ESA'.")
305
+
306
+ # Remove output file when overwriting (after all validations pass)
307
+ if overwrite and campaign_data['campaign_id'] in progress_data:
308
+ output_file = f"{ROOT}/data/outputs/{campaign_data['campaign_id']}.jsonl"
309
+ if os.path.exists(output_file):
310
+ os.remove(output_file)
268
311
 
269
312
  # For task-based, data is a dict mapping user_id -> tasks
270
313
  # For single-stream, data is a flat list (shared among all users)
@@ -391,7 +434,7 @@ def _add_single_campaign(data_file, overwrite, server):
391
434
  )
392
435
  for user_id, user_val in user_progress.items():
393
436
  # point to the protocol URL
394
- print(f'{server}/{user_val["url"]}')
437
+ print(f'🧑 {server}/{user_val["url"]}')
395
438
  print()
396
439
 
397
440
 
@@ -465,10 +508,14 @@ def main():
465
508
  help='Optional campaign name to purge (purges all if not specified)'
466
509
  )
467
510
  purge_args = purge_args.parse_args(args_unknown)
511
+ progress_data = load_progress_data()
468
512
 
469
513
  if purge_args.campaign is not None:
470
514
  # Purge specific campaign
471
515
  campaign_id = purge_args.campaign
516
+ if campaign_id not in progress_data:
517
+ print(f"Campaign '{campaign_id}' does not exist.")
518
+ return
472
519
  confirm = input(
473
520
  f"Are you sure you want to purge campaign '{campaign_id}'? This action cannot be undone. [y/n] "
474
521
  )
@@ -498,7 +545,6 @@ def main():
498
545
  )
499
546
  if confirm.lower() == 'y':
500
547
  # Unlink all assets first
501
- progress_data = load_progress_data()
502
548
  for campaign_id in progress_data.keys():
503
549
  _unlink_assets(campaign_id)
504
550
  shutil.rmtree(f"{ROOT}/data/tasks", ignore_errors=True)