@moleculeagora/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -0
- package/dist/index.js +30368 -0
- package/dist/index.js.map +1 -0
- package/dist/python-v1/agora_runtime.py +282 -0
- package/dist/python-v1/answer-set-metric.py +264 -0
- package/dist/python-v1/assertion-set-evaluation.py +879 -0
- package/dist/python-v1/exact-match.py +60 -0
- package/dist/python-v1/l4-composition.py +435 -0
- package/dist/python-v1/multi-output-tabular-metric.py +392 -0
- package/dist/python-v1/panel-ranking-metric.py +622 -0
- package/dist/python-v1/project-test.py +256 -0
- package/dist/python-v1/protein-binder-assay-metric.py +600 -0
- package/dist/python-v1/public-tool-metric.py +161 -0
- package/dist/python-v1/ranking-metric.py +426 -0
- package/dist/python-v1/reference-artifact-assertion.py +532 -0
- package/dist/python-v1/rubric-validation.py +246 -0
- package/dist/python-v1/solver-python-stdio-test.py +160 -0
- package/dist/python-v1/statistical-endpoint-test-v2.py +629 -0
- package/dist/python-v1/statistical-endpoint-test.py +442 -0
- package/dist/python-v1/table-metric.py +1291 -0
- package/dist/release-metadata.json +7 -0
- package/package.json +67 -0
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import math
|
|
3
|
+
import os
|
|
4
|
+
import subprocess
|
|
5
|
+
import sys
|
|
6
|
+
import tempfile
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from agora_runtime import (
|
|
10
|
+
fail_runtime,
|
|
11
|
+
load_json_file,
|
|
12
|
+
load_runtime_context,
|
|
13
|
+
reject_submission,
|
|
14
|
+
resolve_evaluation_artifact,
|
|
15
|
+
resolve_scoring_asset,
|
|
16
|
+
resolve_submission_artifact,
|
|
17
|
+
safe_extract_zip,
|
|
18
|
+
write_score,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def require_string(value, label):
|
|
23
|
+
if not isinstance(value, str) or not value.strip():
|
|
24
|
+
fail_runtime(f"{label} must be a non-empty string.")
|
|
25
|
+
return value.strip()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def require_positive_int(value, label):
|
|
29
|
+
if isinstance(value, bool) or not isinstance(value, int) or value <= 0:
|
|
30
|
+
fail_runtime(f"{label} must be a positive integer.")
|
|
31
|
+
return value
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def require_json_object(path, label):
|
|
35
|
+
try:
|
|
36
|
+
data = load_json_file(path, label=label)
|
|
37
|
+
except FileNotFoundError:
|
|
38
|
+
fail_runtime(f"{label} does not exist.")
|
|
39
|
+
except OSError as error:
|
|
40
|
+
fail_runtime(f"Unable to read {label}: {error}.")
|
|
41
|
+
except RuntimeError as error:
|
|
42
|
+
fail_runtime(str(error))
|
|
43
|
+
if not isinstance(data, dict):
|
|
44
|
+
fail_runtime(f"{label} must be a JSON object.")
|
|
45
|
+
return data
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def resolve_profile_determinism_env(runtime_context):
|
|
49
|
+
profile = runtime_context.get("runtime_profile")
|
|
50
|
+
if not isinstance(profile, dict):
|
|
51
|
+
fail_runtime("runtime_profile must be an object.")
|
|
52
|
+
determinism_env = profile.get("determinism_env")
|
|
53
|
+
if not isinstance(determinism_env, dict):
|
|
54
|
+
fail_runtime("runtime_profile.determinism_env must be an object.")
|
|
55
|
+
resolved = {}
|
|
56
|
+
for name, value in determinism_env.items():
|
|
57
|
+
key = require_string(name, "runtime_profile.determinism_env key")
|
|
58
|
+
resolved[key] = require_string(
|
|
59
|
+
value,
|
|
60
|
+
f"runtime_profile.determinism_env.{key}",
|
|
61
|
+
)
|
|
62
|
+
return resolved
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def require_relative_path(value, label):
|
|
66
|
+
raw_path = require_string(value, label)
|
|
67
|
+
relative_path = Path(raw_path)
|
|
68
|
+
if relative_path.is_absolute() or any(
|
|
69
|
+
part in {"", ".", ".."} for part in relative_path.parts
|
|
70
|
+
):
|
|
71
|
+
fail_runtime(f"{label} must be a safe relative path inside the criterion bundle.")
|
|
72
|
+
return relative_path
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def resolve_inside(root, relative_path, label):
|
|
76
|
+
resolved_root = root.resolve()
|
|
77
|
+
resolved_path = (resolved_root / relative_path).resolve()
|
|
78
|
+
try:
|
|
79
|
+
resolved_path.relative_to(resolved_root)
|
|
80
|
+
except ValueError:
|
|
81
|
+
fail_runtime(f"{label} must stay inside the criterion bundle.")
|
|
82
|
+
return resolved_path
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def read_output_json(path):
|
|
86
|
+
try:
|
|
87
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
88
|
+
except FileNotFoundError:
|
|
89
|
+
fail_runtime("Criterion entrypoint did not write AGORA_SCORE_OUTPUT_PATH.")
|
|
90
|
+
except json.JSONDecodeError as error:
|
|
91
|
+
fail_runtime(f"Criterion score output is not valid JSON: {error.msg}.")
|
|
92
|
+
except OSError as error:
|
|
93
|
+
fail_runtime(f"Unable to read criterion score output: {error}.")
|
|
94
|
+
if not isinstance(data, dict):
|
|
95
|
+
fail_runtime("Criterion score output must be a JSON object.")
|
|
96
|
+
return data
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def require_score(value):
|
|
100
|
+
if isinstance(value, bool) or not isinstance(value, (int, float)):
|
|
101
|
+
fail_runtime("Criterion score output field score must be a number.")
|
|
102
|
+
score = float(value)
|
|
103
|
+
if not math.isfinite(score):
|
|
104
|
+
fail_runtime("Criterion score output field score must be finite.")
|
|
105
|
+
if score < 0 or score > 1:
|
|
106
|
+
fail_runtime("Criterion score output field score must be in [0, 1].")
|
|
107
|
+
return score
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def truncate_text(value):
|
|
111
|
+
limit = 4000
|
|
112
|
+
if len(value) <= limit:
|
|
113
|
+
return value
|
|
114
|
+
return value[:limit] + "...<truncated>"
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def run_criterion(entrypoint_path, criterion_dir, env, timeout_ms):
|
|
118
|
+
try:
|
|
119
|
+
return subprocess.run(
|
|
120
|
+
[sys.executable, str(entrypoint_path)],
|
|
121
|
+
cwd=str(criterion_dir),
|
|
122
|
+
env=env,
|
|
123
|
+
capture_output=True,
|
|
124
|
+
text=True,
|
|
125
|
+
timeout=timeout_ms / 1000.0,
|
|
126
|
+
check=False,
|
|
127
|
+
)
|
|
128
|
+
except subprocess.TimeoutExpired:
|
|
129
|
+
reject_submission(
|
|
130
|
+
f"Criterion entrypoint exceeded timeout_ms={timeout_ms}.",
|
|
131
|
+
details={"timeout_ms": timeout_ms},
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def build_criterion_env(runtime_context, criterion_dir, submission_dir, output_path):
|
|
136
|
+
env = {
|
|
137
|
+
**resolve_profile_determinism_env(runtime_context),
|
|
138
|
+
"AGORA_CRITERION_DIR": str(criterion_dir),
|
|
139
|
+
"AGORA_SUBMISSION_DIR": str(submission_dir),
|
|
140
|
+
"AGORA_SCORE_OUTPUT_PATH": str(output_path),
|
|
141
|
+
"AGORA_RANDOM_SEED": os.environ.get("AGORA_RANDOM_SEED", "0"),
|
|
142
|
+
}
|
|
143
|
+
return env
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def main():
|
|
147
|
+
runtime_context = load_runtime_context()
|
|
148
|
+
config_path = resolve_scoring_asset(
|
|
149
|
+
runtime_context,
|
|
150
|
+
"compiled_config",
|
|
151
|
+
kind="config",
|
|
152
|
+
)
|
|
153
|
+
config = require_json_object(config_path, "compiled_config")
|
|
154
|
+
criterion_role = require_string(
|
|
155
|
+
config.get("criterion_role"),
|
|
156
|
+
"compiled_config.criterion_role",
|
|
157
|
+
)
|
|
158
|
+
submission_role = require_string(
|
|
159
|
+
config.get("submission_role"),
|
|
160
|
+
"compiled_config.submission_role",
|
|
161
|
+
)
|
|
162
|
+
manifest_file = require_string(
|
|
163
|
+
config.get("criterion_manifest_file"),
|
|
164
|
+
"compiled_config.criterion_manifest_file",
|
|
165
|
+
)
|
|
166
|
+
timeout_ms = require_positive_int(
|
|
167
|
+
config.get("timeout_ms"),
|
|
168
|
+
"compiled_config.timeout_ms",
|
|
169
|
+
)
|
|
170
|
+
final_score_key = require_string(
|
|
171
|
+
runtime_context.get("final_score_key"),
|
|
172
|
+
"runtime_context.final_score_key",
|
|
173
|
+
)
|
|
174
|
+
criterion_archive_path = resolve_evaluation_artifact(runtime_context, criterion_role)
|
|
175
|
+
submission_archive_path = resolve_submission_artifact(runtime_context, submission_role)
|
|
176
|
+
|
|
177
|
+
with tempfile.TemporaryDirectory(prefix="agora-project-test-") as working_root:
|
|
178
|
+
working_dir = Path(working_root)
|
|
179
|
+
criterion_dir = working_dir / "criterion"
|
|
180
|
+
submission_dir = working_dir / "candidate"
|
|
181
|
+
score_output_path = working_dir / "criterion-score.json"
|
|
182
|
+
|
|
183
|
+
safe_extract_zip(
|
|
184
|
+
criterion_archive_path,
|
|
185
|
+
criterion_dir,
|
|
186
|
+
label=f"evaluation artifact {criterion_role}",
|
|
187
|
+
)
|
|
188
|
+
safe_extract_zip(
|
|
189
|
+
submission_archive_path,
|
|
190
|
+
submission_dir,
|
|
191
|
+
label=f"submission artifact {submission_role}",
|
|
192
|
+
invalid_handler=reject_submission,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
manifest_path = criterion_dir / manifest_file
|
|
196
|
+
manifest = require_json_object(manifest_path, manifest_file)
|
|
197
|
+
version = require_string(
|
|
198
|
+
manifest.get("version"),
|
|
199
|
+
f"{manifest_file}.version",
|
|
200
|
+
)
|
|
201
|
+
if version != "project_test_v1":
|
|
202
|
+
fail_runtime(f"Unsupported project-test manifest version {version}.")
|
|
203
|
+
|
|
204
|
+
entrypoint_relative_path = require_relative_path(
|
|
205
|
+
manifest.get("entrypoint"),
|
|
206
|
+
f"{manifest_file}.entrypoint",
|
|
207
|
+
)
|
|
208
|
+
entrypoint_path = resolve_inside(
|
|
209
|
+
criterion_dir,
|
|
210
|
+
entrypoint_relative_path,
|
|
211
|
+
f"{manifest_file}.entrypoint",
|
|
212
|
+
)
|
|
213
|
+
if not entrypoint_path.is_file():
|
|
214
|
+
fail_runtime(f"Criterion entrypoint does not exist: {entrypoint_relative_path}.")
|
|
215
|
+
|
|
216
|
+
manifest_timeout_ms = manifest.get("timeout_ms", timeout_ms)
|
|
217
|
+
manifest_timeout_ms = require_positive_int(
|
|
218
|
+
manifest_timeout_ms,
|
|
219
|
+
f"{manifest_file}.timeout_ms",
|
|
220
|
+
)
|
|
221
|
+
if manifest_timeout_ms > timeout_ms:
|
|
222
|
+
fail_runtime(
|
|
223
|
+
f"{manifest_file}.timeout_ms must be less than or equal to compiled_config.timeout_ms."
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
run = run_criterion(
|
|
227
|
+
entrypoint_path,
|
|
228
|
+
criterion_dir,
|
|
229
|
+
build_criterion_env(
|
|
230
|
+
runtime_context,
|
|
231
|
+
criterion_dir,
|
|
232
|
+
submission_dir,
|
|
233
|
+
score_output_path,
|
|
234
|
+
),
|
|
235
|
+
manifest_timeout_ms,
|
|
236
|
+
)
|
|
237
|
+
if run.returncode != 0:
|
|
238
|
+
reject_submission(
|
|
239
|
+
f"Criterion entrypoint exited with code {run.returncode}.",
|
|
240
|
+
details={
|
|
241
|
+
"returncode": run.returncode,
|
|
242
|
+
"stdout": truncate_text(run.stdout),
|
|
243
|
+
"stderr": truncate_text(run.stderr),
|
|
244
|
+
},
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
output = read_output_json(score_output_path)
|
|
248
|
+
score = require_score(output.get("score"))
|
|
249
|
+
details = output.get("details", {})
|
|
250
|
+
if not isinstance(details, dict):
|
|
251
|
+
fail_runtime("Criterion score output field details must be a JSON object.")
|
|
252
|
+
write_score(score=score, details={**details, final_score_key: score})
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
if __name__ == "__main__":
|
|
256
|
+
main()
|