pearmut 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pearmut/cli.py CHANGED
@@ -3,20 +3,49 @@ Command-line interface for managing and running the Pearmut server.
3
3
  """
4
4
 
5
5
  import argparse
6
+ import atexit
7
+ import fcntl
6
8
  import hashlib
7
9
  import json
8
10
  import os
9
11
  import urllib.parse
10
12
 
11
- import psutil
12
-
13
- from .utils import ROOT, load_progress_data, save_progress_data
13
+ from .utils import (
14
+ ROOT,
15
+ TOKEN_MAIN,
16
+ is_form_document,
17
+ load_progress_data,
18
+ save_progress_data,
19
+ )
14
20
 
15
21
  os.makedirs(f"{ROOT}/data/tasks", exist_ok=True)
16
22
  load_progress_data(warn=None)
17
23
 
18
24
 
19
25
  def _run(args_unknown):
26
+ # Acquire lock before starting server
27
+ lock_file = f"{ROOT}/data/.lock"
28
+ try:
29
+ lock_fd = open(lock_file, "a+")
30
+ fcntl.flock(lock_fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
31
+ lock_fd.seek(0)
32
+ lock_fd.truncate()
33
+ lock_fd.write(str(os.getpid()))
34
+ lock_fd.flush()
35
+ except BlockingIOError:
36
+ try:
37
+ with open(lock_file, "r") as f:
38
+ pid = f.read().strip()
39
+ print("You can't run multiple instances of Pearmut in the same directory.")
40
+ if pid:
41
+ print(f"Another instance (PID {pid}) is holding the lock.")
42
+ except (FileNotFoundError, PermissionError, OSError):
43
+ print("You can't run multiple instances of Pearmut in the same directory.")
44
+ exit(1)
45
+
46
+ # Register cleanup to remove lock file on exit
47
+ atexit.register(lambda: os.path.exists(lock_file) and os.remove(lock_file))
48
+
20
49
  import uvicorn
21
50
 
22
51
  from .app import app, tasks_data
@@ -33,29 +62,29 @@ def _run(args_unknown):
33
62
  args = args.parse_args(args_unknown)
34
63
 
35
64
  # print access dashboard URL for all campaigns
36
- if tasks_data:
37
- dashboard_url = (
38
- args.server
39
- + "/dashboard.html?"
40
- + "&".join(
41
- [
42
- f"campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data["token"]}"
43
- for campaign_id, campaign_data in tasks_data.items()
44
- ]
45
- )
46
- )
47
- print(
48
- "\033[92mNow serving Pearmut, use the following URL to access the everything-dashboard:\033[0m"
65
+ dashboard_url = (
66
+ args.server
67
+ + "/dashboard?"
68
+ + f"token_main={TOKEN_MAIN}"
69
+ + "".join(
70
+ [
71
+ f"&campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data['token']}"
72
+ for campaign_id, campaign_data in tasks_data.items()
73
+ ]
49
74
  )
50
- print("🍐", dashboard_url + "\n", flush=True)
75
+ )
76
+ print(
77
+ "\033[92mNow serving Pearmut, use the following URL to access the everything-dashboard:\033[0m"
78
+ )
79
+ print("🍐", dashboard_url + "\n", flush=True)
51
80
 
52
81
  # disable startup message
53
82
  uvicorn.config.LOGGING_CONFIG["loggers"]["uvicorn.error"]["level"] = "WARNING"
54
83
  # set time logging
55
84
  uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["datefmt"] = "%Y-%m-%d %H:%M"
56
- uvicorn.config.LOGGING_CONFIG["formatters"]["access"][
57
- "fmt"
58
- ] = "%(asctime)s %(levelprefix)s %(client_addr)s - %(request_line)s %(status_code)s"
85
+ uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["fmt"] = (
86
+ "%(asctime)s %(levelprefix)s %(client_addr)s - %(request_line)s %(status_code)s"
87
+ )
59
88
  uvicorn.run(
60
89
  app,
61
90
  host="0.0.0.0",
@@ -67,8 +96,11 @@ def _run(args_unknown):
67
96
  def _validate_item_structure(items):
68
97
  """
69
98
  Validate that items have the correct structure.
70
- Items should be lists of dictionaries with 'tgt' and optionally 'src' and/or 'ref' keys.
71
- The 'tgt' field should be a dictionary mapping model names to translations.
99
+ Items can be either:
100
+ 1. Evaluation items: dictionaries with 'tgt' and optionally 'src' and/or 'ref' keys
101
+ 2. Form items: dictionaries with 'text' and 'form' keys
102
+
103
+ A document must contain either all evaluation items or all form items (not mixed).
72
104
 
73
105
  Args:
74
106
  items: List of item dictionaries to validate
@@ -76,68 +108,100 @@ def _validate_item_structure(items):
76
108
  if not isinstance(items, list):
77
109
  raise ValueError("Items must be a list")
78
110
 
79
- for item in items:
80
- if not isinstance(item, dict):
81
- raise ValueError("Each item must be a dictionary with 'tgt' key")
82
- if "tgt" not in item:
83
- raise ValueError("Each item must contain 'tgt' key")
111
+ if not items:
112
+ raise ValueError("Items list cannot be empty")
84
113
 
85
- # Validate src is a string if present
86
- if "src" in item and not isinstance(item["src"], str):
87
- raise ValueError("Item 'src' must be a string")
114
+ # Check if first item is a form item or evaluation item
115
+ first_item = items[0]
116
+ if not isinstance(first_item, dict):
117
+ raise ValueError("Each item must be a dictionary")
88
118
 
89
- # Validate ref is a string if present
90
- if "ref" in item and not isinstance(item["ref"], str):
91
- raise ValueError("Item 'ref' must be a string")
119
+ first_item_is_form = "text" in first_item and "form" in first_item
92
120
 
93
- # Validate tgt is a dictionary (basic template with model names)
94
- if isinstance(item["tgt"], str):
95
- # String not allowed - suggest using dictionary (don't include user input to prevent injection)
96
- raise ValueError(
97
- 'Item \'tgt\' must be a dictionary mapping model names to translations. For single translation, use {"default": "your_translation"}'
98
- )
99
- elif isinstance(item["tgt"], dict):
100
- # Dictionary mapping model names to translations
101
- # Validate that model names don't contain only numbers (JavaScript ordering issue)
102
- for model_name, translation in item["tgt"].items():
103
- if not isinstance(model_name, str):
104
- raise ValueError("Model names in 'tgt' dictionary must be strings")
105
- if model_name.isdigit():
106
- raise ValueError(
107
- f"Model name '{model_name}' cannot be only numeric digits (would cause issues in JS/TS)"
108
- )
109
- if not isinstance(translation, str):
110
- raise ValueError(
111
- f"Translation for model '{model_name}' must be a string"
112
- )
121
+ for item in items:
122
+ if not isinstance(item, dict):
123
+ raise ValueError("Each item must be a dictionary")
124
+
125
+ # Check consistency: all items must be same type (form or evaluation)
126
+ current_is_form = "text" in item and "form" in item
127
+ if current_is_form != first_item_is_form:
128
+ raise ValueError("Document cannot mix form items and evaluation items")
129
+
130
+ if first_item_is_form:
131
+ # Validate form item structure
132
+ if "text" not in item:
133
+ raise ValueError("Form item must contain 'text' key")
134
+ if "form" not in item:
135
+ raise ValueError("Form item must contain 'form' key")
136
+ if not isinstance(item["text"], str):
137
+ raise ValueError("Form item 'text' must be a string")
138
+ if item["form"] not in {None, "number", "string", "choices", "script"}:
139
+ raise ValueError(
140
+ "Form item 'form' must be null, 'number', 'string', 'choices', or 'script'"
141
+ )
113
142
  else:
114
- raise ValueError(
115
- "Item 'tgt' must be a dictionary mapping model names to translations"
116
- )
143
+ # Validate evaluation item structure
144
+ if "tgt" not in item:
145
+ raise ValueError("Each item must contain 'tgt' key")
146
+
147
+ # Validate src is a string if present
148
+ if "src" in item and not isinstance(item["src"], str):
149
+ raise ValueError("Item 'src' must be a string")
150
+
151
+ # Validate ref is a string if present
152
+ if "ref" in item and not isinstance(item["ref"], str):
153
+ raise ValueError("Item 'ref' must be a string")
117
154
 
118
- # Validate error_spans structure if present
119
- if "error_spans" in item:
120
- if not isinstance(item["error_spans"], dict):
155
+ # Validate tgt is a dictionary (annotate template with model names)
156
+ if isinstance(item["tgt"], str):
157
+ # String not allowed - suggest using dictionary (don't include user input to prevent injection)
121
158
  raise ValueError(
122
- "'error_spans' must be a dictionary mapping model names to error span lists"
159
+ 'Item \'tgt\' must be a dictionary mapping model names to translations. For single translation, use {"default": "your_translation"}'
123
160
  )
124
- for model_name, spans in item["error_spans"].items():
125
- if not isinstance(spans, list):
126
- raise ValueError(
127
- f"Error spans for model '{model_name}' must be a list"
128
- )
129
-
130
- # Validate validation structure if present
131
- if "validation" in item:
132
- if not isinstance(item["validation"], dict):
161
+ elif isinstance(item["tgt"], dict):
162
+ # Dictionary mapping model names to translations
163
+ # Validate that model names don't contain only numbers (JavaScript ordering issue)
164
+ for model_name, translation in item["tgt"].items():
165
+ if not isinstance(model_name, str):
166
+ raise ValueError(
167
+ "Model names in 'tgt' dictionary must be strings"
168
+ )
169
+ if model_name.isdigit():
170
+ raise ValueError(
171
+ f"Model name '{model_name}' cannot be only numeric digits (would cause issues in JS/TS)"
172
+ )
173
+ if not isinstance(translation, str):
174
+ raise ValueError(
175
+ f"Translation for model '{model_name}' must be a string"
176
+ )
177
+ else:
133
178
  raise ValueError(
134
- "'validation' must be a dictionary mapping model names to validation rules"
179
+ "Item 'tgt' must be a dictionary mapping model names to translations"
135
180
  )
136
- for model_name, val_rule in item["validation"].items():
137
- if not isinstance(val_rule, dict):
181
+
182
+ # Validate error_spans structure if present
183
+ if "error_spans" in item:
184
+ if not isinstance(item["error_spans"], dict):
185
+ raise ValueError(
186
+ "'error_spans' must be a dictionary mapping model names to error span lists"
187
+ )
188
+ for model_name, spans in item["error_spans"].items():
189
+ if not isinstance(spans, list):
190
+ raise ValueError(
191
+ f"Error spans for model '{model_name}' must be a list"
192
+ )
193
+
194
+ # Validate validation structure if present
195
+ if "validation" in item:
196
+ if not isinstance(item["validation"], dict):
138
197
  raise ValueError(
139
- f"Validation rule for model '{model_name}' must be a dictionary"
198
+ "'validation' must be a dictionary mapping model names to validation rules"
140
199
  )
200
+ for model_name, val_rule in item["validation"].items():
201
+ if not isinstance(val_rule, dict):
202
+ raise ValueError(
203
+ f"Validation rule for model '{model_name}' must be a dictionary"
204
+ )
141
205
 
142
206
 
143
207
  def _validate_document_models(doc):
@@ -185,6 +249,10 @@ def _shuffle_campaign_data(campaign_data, rng):
185
249
 
186
250
  def shuffle_document(doc):
187
251
  """Shuffle a single document (list of items) by reordering models in tgt dict."""
252
+ # Skip shuffling for form documents (they don't have tgt)
253
+ if is_form_document(doc):
254
+ return # Form documents don't need shuffling
255
+
188
256
  # Validate that all items have the same models
189
257
  _validate_document_models(doc)
190
258
 
@@ -238,7 +306,7 @@ def _add_single_campaign(campaign_data, overwrite, server):
238
306
  if "assignment" not in campaign_data["info"]:
239
307
  raise ValueError("Campaign 'info' must contain 'assignment' field.")
240
308
 
241
- # Template defaults to "basic" if not specified
309
+ # Template defaults to "annotate" if not specified
242
310
  assignment = campaign_data["info"]["assignment"]
243
311
  # use random words for identifying users
244
312
  rng = random.Random()
@@ -248,6 +316,20 @@ def _add_single_campaign(campaign_data, overwrite, server):
248
316
  users_spec = campaign_data["info"].get("users")
249
317
  user_tokens = {} # user_id -> {"pass": ..., "fail": ...}
250
318
 
319
+ # Validate and process data_welcome if present
320
+ data_welcome = campaign_data.get("data_welcome", [])
321
+ if data_welcome:
322
+ if not isinstance(data_welcome, list):
323
+ raise ValueError("'data_welcome' must be a list of documents.")
324
+ # Validate welcome documents structure - each should be a list of items
325
+ for doc_i, doc in enumerate(data_welcome):
326
+ if not isinstance(doc, list):
327
+ raise ValueError(f"Welcome document {doc_i} must be a list of items.")
328
+ try:
329
+ _validate_item_structure(doc)
330
+ except ValueError as e:
331
+ raise ValueError(f"Welcome document {doc_i}: {e}")
332
+
251
333
  if assignment == "task-based":
252
334
  tasks = campaign_data["data"]
253
335
  if not isinstance(tasks, list):
@@ -303,14 +385,14 @@ def _add_single_campaign(campaign_data, overwrite, server):
303
385
  # Validate dynamic-specific parameters
304
386
  if "dynamic_top" not in campaign_data["info"]:
305
387
  campaign_data["info"]["dynamic_top"] = 2
306
- if "dynamic_first" not in campaign_data["info"]:
307
- campaign_data["info"]["dynamic_first"] = 5
388
+ if "dynamic_warmup" not in campaign_data["info"]:
389
+ campaign_data["info"]["dynamic_warmup"] = 5
308
390
  if "dynamic_contrastive_models" not in campaign_data["info"]:
309
391
  campaign_data["info"]["dynamic_contrastive_models"] = 1
310
- # Validate that dynamic_first is at least 1
311
- assert (
312
- campaign_data["info"]["dynamic_first"] >= 1
313
- ), "dynamic_first must be at least 1"
392
+ # Validate that dynamic_warmup is at least 1
393
+ assert campaign_data["info"]["dynamic_warmup"] >= 1, (
394
+ "dynamic_warmup must be at least 1"
395
+ )
314
396
  # Validate that dynamic_contrastive_models is at most dynamic_top
315
397
  assert (
316
398
  campaign_data["info"]["dynamic_contrastive_models"]
@@ -324,9 +406,9 @@ def _add_single_campaign(campaign_data, overwrite, server):
324
406
  for item in campaign_data["data"]:
325
407
  if item and len(item) > 0:
326
408
  item_models = set(item[0]["tgt"].keys())
327
- assert (
328
- item_models == all_models
329
- ), "All items must have the same model outputs"
409
+ assert item_models == all_models, (
410
+ "All items must have the same model outputs"
411
+ )
330
412
  else:
331
413
  raise ValueError(f"Unknown campaign assignment type: {assignment}")
332
414
 
@@ -391,13 +473,20 @@ def _add_single_campaign(campaign_data, overwrite, server):
391
473
  if os.path.exists(output_file):
392
474
  os.remove(output_file)
393
475
 
476
+ # Prepend data_welcome to tasks if present
477
+ if data_welcome:
478
+ if assignment == "task-based":
479
+ tasks = [task for task in tasks]
480
+ elif assignment in ["single-stream", "dynamic"]:
481
+ tasks = data_welcome + tasks
482
+
394
483
  # For task-based, data is a dict mapping user_id -> tasks
395
484
  # For single-stream and dynamic, data is a flat list (shared among all users)
396
485
  if assignment == "task-based":
397
486
  campaign_data["data"] = {
398
487
  user_id: task for user_id, task in zip(user_ids, tasks)
399
488
  }
400
- elif assignment in ["single-stream", "dynamic"]:
489
+ elif assignment in {"single-stream", "dynamic"}:
401
490
  campaign_data["data"] = tasks
402
491
 
403
492
  # generate a token for dashboard access if not present
@@ -413,25 +502,24 @@ def _add_single_campaign(campaign_data, overwrite, server):
413
502
 
414
503
  user_progress = {
415
504
  user_id: {
416
- # TODO: progress tracking could be based on the assignment type
505
+ # Progress tracking: None | "completed" for task-based,
506
+ # None | "completed" | "completed_foreign" for single-stream/dynamic
417
507
  "progress": (
418
- [False] * len(campaign_data["data"][user_id])
508
+ [None] * len(campaign_data["data"][user_id])
419
509
  if assignment == "task-based"
420
- else (
421
- [False] * len(campaign_data["data"])
422
- if assignment == "single-stream"
423
- else (
424
- [list() for _ in range(len(campaign_data["data"]))]
425
- if assignment == "dynamic"
426
- else []
427
- )
428
- )
510
+ else [None] * len(campaign_data["data"])
511
+ if assignment == "single-stream"
512
+ else [{model: None for model in all_models}]
513
+ * len(campaign_data["data"])
514
+ if assignment == "dynamic"
515
+ else int(f"Invalid assignment: {assignment}")
429
516
  ),
517
+ "progress_welcome": [None] * len(data_welcome),
430
518
  "time_start": None,
431
519
  "time_end": None,
432
520
  "time": 0,
433
521
  "url": (
434
- f"{campaign_data['info'].get("template", "basic")}.html"
522
+ f"{campaign_data['info'].get('template', 'annotate')}"
435
523
  f"?campaign_id={urllib.parse.quote_plus(campaign_data['campaign_id'])}"
436
524
  f"&user_id={user_id}"
437
525
  ),
@@ -527,7 +615,7 @@ def _add_single_campaign(campaign_data, overwrite, server):
527
615
  )
528
616
  for user_id, user_val in user_progress.items():
529
617
  # point to the protocol URL
530
- print(f'🧑 {server}/{user_val["url"]}')
618
+ print(f"🧑 {server}/{user_val['url']}")
531
619
  print()
532
620
 
533
621
 
@@ -578,13 +666,6 @@ def main():
578
666
  )
579
667
  args, args_unknown = args.parse_known_args()
580
668
 
581
- # enforce that only one pearmut process is running
582
- for p in psutil.process_iter():
583
- if "pearmut" == p.name() and p.pid != os.getpid():
584
- print("Exit all running pearmut processes before running more commands.")
585
- print(p)
586
- exit(1)
587
-
588
669
  if args.command == "run":
589
670
  _run(args_unknown)
590
671
  elif args.command == "add":
pearmut/results_export.py CHANGED
@@ -39,7 +39,7 @@ def compute_model_scores(campaign_id):
39
39
  # Compute model scores from annotations
40
40
  model_scores = collections.defaultdict(dict)
41
41
 
42
- # Iterate through all tasks to find items with 'models' field (basic template)
42
+ # Iterate through all tasks to find items with 'models' field (annotate template)
43
43
  log = get_db_log(campaign_id)
44
44
  for entry in log:
45
45
  if "item" not in entry or "annotation" not in entry: