pearmut 1.0.2__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pearmut/app.py +8 -5
- pearmut/assignment.py +336 -82
- pearmut/cli.py +145 -82
- pearmut/static/annotate.bundle.js +1 -1
- pearmut/static/annotate.html +11 -7
- pearmut/static/dashboard.bundle.js +1 -1
- pearmut/static/dashboard.html +1 -1
- pearmut/static/index.html +1 -1
- pearmut/static/style.css +38 -0
- pearmut/utils.py +38 -21
- {pearmut-1.0.2.dist-info → pearmut-1.1.0.dist-info}/METADATA +74 -1
- pearmut-1.1.0.dist-info/RECORD +20 -0
- {pearmut-1.0.2.dist-info → pearmut-1.1.0.dist-info}/WHEEL +1 -1
- pearmut-1.0.2.dist-info/RECORD +0 -20
- {pearmut-1.0.2.dist-info → pearmut-1.1.0.dist-info}/entry_points.txt +0 -0
- {pearmut-1.0.2.dist-info → pearmut-1.1.0.dist-info}/licenses/LICENSE +0 -0
- {pearmut-1.0.2.dist-info → pearmut-1.1.0.dist-info}/top_level.txt +0 -0
pearmut/cli.py
CHANGED
|
@@ -10,13 +10,18 @@ import json
|
|
|
10
10
|
import os
|
|
11
11
|
import urllib.parse
|
|
12
12
|
|
|
13
|
-
from .utils import
|
|
13
|
+
from .utils import (
|
|
14
|
+
ROOT,
|
|
15
|
+
TOKEN_MAIN,
|
|
16
|
+
is_form_document,
|
|
17
|
+
load_progress_data,
|
|
18
|
+
save_progress_data,
|
|
19
|
+
)
|
|
14
20
|
|
|
15
21
|
os.makedirs(f"{ROOT}/data/tasks", exist_ok=True)
|
|
16
22
|
load_progress_data(warn=None)
|
|
17
23
|
|
|
18
24
|
|
|
19
|
-
|
|
20
25
|
def _run(args_unknown):
|
|
21
26
|
# Acquire lock before starting server
|
|
22
27
|
lock_file = f"{ROOT}/data/.lock"
|
|
@@ -63,7 +68,7 @@ def _run(args_unknown):
|
|
|
63
68
|
+ f"token_main={TOKEN_MAIN}"
|
|
64
69
|
+ "".join(
|
|
65
70
|
[
|
|
66
|
-
f"&campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data[
|
|
71
|
+
f"&campaign_id={urllib.parse.quote_plus(campaign_id)}&token={campaign_data['token']}"
|
|
67
72
|
for campaign_id, campaign_data in tasks_data.items()
|
|
68
73
|
]
|
|
69
74
|
)
|
|
@@ -73,14 +78,13 @@ def _run(args_unknown):
|
|
|
73
78
|
)
|
|
74
79
|
print("🍐", dashboard_url + "\n", flush=True)
|
|
75
80
|
|
|
76
|
-
|
|
77
81
|
# disable startup message
|
|
78
82
|
uvicorn.config.LOGGING_CONFIG["loggers"]["uvicorn.error"]["level"] = "WARNING"
|
|
79
83
|
# set time logging
|
|
80
84
|
uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["datefmt"] = "%Y-%m-%d %H:%M"
|
|
81
|
-
uvicorn.config.LOGGING_CONFIG["formatters"]["access"][
|
|
82
|
-
"
|
|
83
|
-
|
|
85
|
+
uvicorn.config.LOGGING_CONFIG["formatters"]["access"]["fmt"] = (
|
|
86
|
+
"%(asctime)s %(levelprefix)s %(client_addr)s - %(request_line)s %(status_code)s"
|
|
87
|
+
)
|
|
84
88
|
uvicorn.run(
|
|
85
89
|
app,
|
|
86
90
|
host="0.0.0.0",
|
|
@@ -92,8 +96,11 @@ def _run(args_unknown):
|
|
|
92
96
|
def _validate_item_structure(items):
|
|
93
97
|
"""
|
|
94
98
|
Validate that items have the correct structure.
|
|
95
|
-
Items
|
|
96
|
-
|
|
99
|
+
Items can be either:
|
|
100
|
+
1. Evaluation items: dictionaries with 'tgt' and optionally 'src' and/or 'ref' keys
|
|
101
|
+
2. Form items: dictionaries with 'text' and 'form' keys
|
|
102
|
+
|
|
103
|
+
A document must contain either all evaluation items or all form items (not mixed).
|
|
97
104
|
|
|
98
105
|
Args:
|
|
99
106
|
items: List of item dictionaries to validate
|
|
@@ -101,68 +108,100 @@ def _validate_item_structure(items):
|
|
|
101
108
|
if not isinstance(items, list):
|
|
102
109
|
raise ValueError("Items must be a list")
|
|
103
110
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
raise ValueError("Each item must be a dictionary with 'tgt' key")
|
|
107
|
-
if "tgt" not in item:
|
|
108
|
-
raise ValueError("Each item must contain 'tgt' key")
|
|
111
|
+
if not items:
|
|
112
|
+
raise ValueError("Items list cannot be empty")
|
|
109
113
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
114
|
+
# Check if first item is a form item or evaluation item
|
|
115
|
+
first_item = items[0]
|
|
116
|
+
if not isinstance(first_item, dict):
|
|
117
|
+
raise ValueError("Each item must be a dictionary")
|
|
113
118
|
|
|
114
|
-
|
|
115
|
-
if "ref" in item and not isinstance(item["ref"], str):
|
|
116
|
-
raise ValueError("Item 'ref' must be a string")
|
|
119
|
+
first_item_is_form = "text" in first_item and "form" in first_item
|
|
117
120
|
|
|
118
|
-
|
|
119
|
-
if isinstance(item
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
121
|
+
for item in items:
|
|
122
|
+
if not isinstance(item, dict):
|
|
123
|
+
raise ValueError("Each item must be a dictionary")
|
|
124
|
+
|
|
125
|
+
# Check consistency: all items must be same type (form or evaluation)
|
|
126
|
+
current_is_form = "text" in item and "form" in item
|
|
127
|
+
if current_is_form != first_item_is_form:
|
|
128
|
+
raise ValueError("Document cannot mix form items and evaluation items")
|
|
129
|
+
|
|
130
|
+
if first_item_is_form:
|
|
131
|
+
# Validate form item structure
|
|
132
|
+
if "text" not in item:
|
|
133
|
+
raise ValueError("Form item must contain 'text' key")
|
|
134
|
+
if "form" not in item:
|
|
135
|
+
raise ValueError("Form item must contain 'form' key")
|
|
136
|
+
if not isinstance(item["text"], str):
|
|
137
|
+
raise ValueError("Form item 'text' must be a string")
|
|
138
|
+
if item["form"] not in {None, "number", "string", "choices", "script"}:
|
|
139
|
+
raise ValueError(
|
|
140
|
+
"Form item 'form' must be null, 'number', 'string', 'choices', or 'script'"
|
|
141
|
+
)
|
|
138
142
|
else:
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
143
|
+
# Validate evaluation item structure
|
|
144
|
+
if "tgt" not in item:
|
|
145
|
+
raise ValueError("Each item must contain 'tgt' key")
|
|
146
|
+
|
|
147
|
+
# Validate src is a string if present
|
|
148
|
+
if "src" in item and not isinstance(item["src"], str):
|
|
149
|
+
raise ValueError("Item 'src' must be a string")
|
|
150
|
+
|
|
151
|
+
# Validate ref is a string if present
|
|
152
|
+
if "ref" in item and not isinstance(item["ref"], str):
|
|
153
|
+
raise ValueError("Item 'ref' must be a string")
|
|
142
154
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
155
|
+
# Validate tgt is a dictionary (annotate template with model names)
|
|
156
|
+
if isinstance(item["tgt"], str):
|
|
157
|
+
# String not allowed - suggest using dictionary (don't include user input to prevent injection)
|
|
146
158
|
raise ValueError(
|
|
147
|
-
|
|
159
|
+
'Item \'tgt\' must be a dictionary mapping model names to translations. For single translation, use {"default": "your_translation"}'
|
|
148
160
|
)
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
161
|
+
elif isinstance(item["tgt"], dict):
|
|
162
|
+
# Dictionary mapping model names to translations
|
|
163
|
+
# Validate that model names don't contain only numbers (JavaScript ordering issue)
|
|
164
|
+
for model_name, translation in item["tgt"].items():
|
|
165
|
+
if not isinstance(model_name, str):
|
|
166
|
+
raise ValueError(
|
|
167
|
+
"Model names in 'tgt' dictionary must be strings"
|
|
168
|
+
)
|
|
169
|
+
if model_name.isdigit():
|
|
170
|
+
raise ValueError(
|
|
171
|
+
f"Model name '{model_name}' cannot be only numeric digits (would cause issues in JS/TS)"
|
|
172
|
+
)
|
|
173
|
+
if not isinstance(translation, str):
|
|
174
|
+
raise ValueError(
|
|
175
|
+
f"Translation for model '{model_name}' must be a string"
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
158
178
|
raise ValueError(
|
|
159
|
-
"'
|
|
179
|
+
"Item 'tgt' must be a dictionary mapping model names to translations"
|
|
160
180
|
)
|
|
161
|
-
|
|
162
|
-
|
|
181
|
+
|
|
182
|
+
# Validate error_spans structure if present
|
|
183
|
+
if "error_spans" in item:
|
|
184
|
+
if not isinstance(item["error_spans"], dict):
|
|
163
185
|
raise ValueError(
|
|
164
|
-
|
|
186
|
+
"'error_spans' must be a dictionary mapping model names to error span lists"
|
|
165
187
|
)
|
|
188
|
+
for model_name, spans in item["error_spans"].items():
|
|
189
|
+
if not isinstance(spans, list):
|
|
190
|
+
raise ValueError(
|
|
191
|
+
f"Error spans for model '{model_name}' must be a list"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
# Validate validation structure if present
|
|
195
|
+
if "validation" in item:
|
|
196
|
+
if not isinstance(item["validation"], dict):
|
|
197
|
+
raise ValueError(
|
|
198
|
+
"'validation' must be a dictionary mapping model names to validation rules"
|
|
199
|
+
)
|
|
200
|
+
for model_name, val_rule in item["validation"].items():
|
|
201
|
+
if not isinstance(val_rule, dict):
|
|
202
|
+
raise ValueError(
|
|
203
|
+
f"Validation rule for model '{model_name}' must be a dictionary"
|
|
204
|
+
)
|
|
166
205
|
|
|
167
206
|
|
|
168
207
|
def _validate_document_models(doc):
|
|
@@ -210,6 +249,10 @@ def _shuffle_campaign_data(campaign_data, rng):
|
|
|
210
249
|
|
|
211
250
|
def shuffle_document(doc):
|
|
212
251
|
"""Shuffle a single document (list of items) by reordering models in tgt dict."""
|
|
252
|
+
# Skip shuffling for form documents (they don't have tgt)
|
|
253
|
+
if is_form_document(doc):
|
|
254
|
+
return # Form documents don't need shuffling
|
|
255
|
+
|
|
213
256
|
# Validate that all items have the same models
|
|
214
257
|
_validate_document_models(doc)
|
|
215
258
|
|
|
@@ -273,6 +316,20 @@ def _add_single_campaign(campaign_data, overwrite, server):
|
|
|
273
316
|
users_spec = campaign_data["info"].get("users")
|
|
274
317
|
user_tokens = {} # user_id -> {"pass": ..., "fail": ...}
|
|
275
318
|
|
|
319
|
+
# Validate and process data_welcome if present
|
|
320
|
+
data_welcome = campaign_data.get("data_welcome", [])
|
|
321
|
+
if data_welcome:
|
|
322
|
+
if not isinstance(data_welcome, list):
|
|
323
|
+
raise ValueError("'data_welcome' must be a list of documents.")
|
|
324
|
+
# Validate welcome documents structure - each should be a list of items
|
|
325
|
+
for doc_i, doc in enumerate(data_welcome):
|
|
326
|
+
if not isinstance(doc, list):
|
|
327
|
+
raise ValueError(f"Welcome document {doc_i} must be a list of items.")
|
|
328
|
+
try:
|
|
329
|
+
_validate_item_structure(doc)
|
|
330
|
+
except ValueError as e:
|
|
331
|
+
raise ValueError(f"Welcome document {doc_i}: {e}")
|
|
332
|
+
|
|
276
333
|
if assignment == "task-based":
|
|
277
334
|
tasks = campaign_data["data"]
|
|
278
335
|
if not isinstance(tasks, list):
|
|
@@ -328,14 +385,14 @@ def _add_single_campaign(campaign_data, overwrite, server):
|
|
|
328
385
|
# Validate dynamic-specific parameters
|
|
329
386
|
if "dynamic_top" not in campaign_data["info"]:
|
|
330
387
|
campaign_data["info"]["dynamic_top"] = 2
|
|
331
|
-
if "
|
|
332
|
-
campaign_data["info"]["
|
|
388
|
+
if "dynamic_warmup" not in campaign_data["info"]:
|
|
389
|
+
campaign_data["info"]["dynamic_warmup"] = 5
|
|
333
390
|
if "dynamic_contrastive_models" not in campaign_data["info"]:
|
|
334
391
|
campaign_data["info"]["dynamic_contrastive_models"] = 1
|
|
335
|
-
# Validate that
|
|
336
|
-
assert (
|
|
337
|
-
|
|
338
|
-
)
|
|
392
|
+
# Validate that dynamic_warmup is at least 1
|
|
393
|
+
assert campaign_data["info"]["dynamic_warmup"] >= 1, (
|
|
394
|
+
"dynamic_warmup must be at least 1"
|
|
395
|
+
)
|
|
339
396
|
# Validate that dynamic_contrastive_models is at most dynamic_top
|
|
340
397
|
assert (
|
|
341
398
|
campaign_data["info"]["dynamic_contrastive_models"]
|
|
@@ -349,9 +406,9 @@ def _add_single_campaign(campaign_data, overwrite, server):
|
|
|
349
406
|
for item in campaign_data["data"]:
|
|
350
407
|
if item and len(item) > 0:
|
|
351
408
|
item_models = set(item[0]["tgt"].keys())
|
|
352
|
-
assert (
|
|
353
|
-
|
|
354
|
-
)
|
|
409
|
+
assert item_models == all_models, (
|
|
410
|
+
"All items must have the same model outputs"
|
|
411
|
+
)
|
|
355
412
|
else:
|
|
356
413
|
raise ValueError(f"Unknown campaign assignment type: {assignment}")
|
|
357
414
|
|
|
@@ -416,13 +473,20 @@ def _add_single_campaign(campaign_data, overwrite, server):
|
|
|
416
473
|
if os.path.exists(output_file):
|
|
417
474
|
os.remove(output_file)
|
|
418
475
|
|
|
476
|
+
# Prepend data_welcome to tasks if present
|
|
477
|
+
if data_welcome:
|
|
478
|
+
if assignment == "task-based":
|
|
479
|
+
tasks = [task for task in tasks]
|
|
480
|
+
elif assignment in ["single-stream", "dynamic"]:
|
|
481
|
+
tasks = data_welcome + tasks
|
|
482
|
+
|
|
419
483
|
# For task-based, data is a dict mapping user_id -> tasks
|
|
420
484
|
# For single-stream and dynamic, data is a flat list (shared among all users)
|
|
421
485
|
if assignment == "task-based":
|
|
422
486
|
campaign_data["data"] = {
|
|
423
487
|
user_id: task for user_id, task in zip(user_ids, tasks)
|
|
424
488
|
}
|
|
425
|
-
elif assignment in
|
|
489
|
+
elif assignment in {"single-stream", "dynamic"}:
|
|
426
490
|
campaign_data["data"] = tasks
|
|
427
491
|
|
|
428
492
|
# generate a token for dashboard access if not present
|
|
@@ -438,20 +502,19 @@ def _add_single_campaign(campaign_data, overwrite, server):
|
|
|
438
502
|
|
|
439
503
|
user_progress = {
|
|
440
504
|
user_id: {
|
|
441
|
-
#
|
|
505
|
+
# Progress tracking: None | "completed" for task-based,
|
|
506
|
+
# None | "completed" | "completed_foreign" for single-stream/dynamic
|
|
442
507
|
"progress": (
|
|
443
|
-
[
|
|
508
|
+
[None] * len(campaign_data["data"][user_id])
|
|
444
509
|
if assignment == "task-based"
|
|
445
|
-
else (
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
else []
|
|
452
|
-
)
|
|
453
|
-
)
|
|
510
|
+
else [None] * len(campaign_data["data"])
|
|
511
|
+
if assignment == "single-stream"
|
|
512
|
+
else [{model: None for model in all_models}]
|
|
513
|
+
* len(campaign_data["data"])
|
|
514
|
+
if assignment == "dynamic"
|
|
515
|
+
else int(f"Invalid assignment: {assignment}")
|
|
454
516
|
),
|
|
517
|
+
"progress_welcome": [None] * len(data_welcome),
|
|
455
518
|
"time_start": None,
|
|
456
519
|
"time_end": None,
|
|
457
520
|
"time": 0,
|
|
@@ -552,7 +615,7 @@ def _add_single_campaign(campaign_data, overwrite, server):
|
|
|
552
615
|
)
|
|
553
616
|
for user_id, user_val in user_progress.items():
|
|
554
617
|
# point to the protocol URL
|
|
555
|
-
print(f
|
|
618
|
+
print(f"🧑 {server}/{user_val['url']}")
|
|
556
619
|
print()
|
|
557
620
|
|
|
558
621
|
|