@moleculeagora/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,879 @@
1
+ import csv
2
+ import json
3
+ import math
4
+ import re
5
+ import tempfile
6
+ from pathlib import Path
7
+
8
+ from agora_runtime import (
9
+ fail_runtime,
10
+ load_json_file,
11
+ load_runtime_context,
12
+ resolve_evaluation_artifact,
13
+ reject_submission,
14
+ resolve_scoring_asset,
15
+ resolve_submission_artifact,
16
+ safe_extract_zip,
17
+ write_score,
18
+ )
19
+
20
+ FIELD_RE = re.compile(r"[A-Za-z_][A-Za-z0-9_]*")
21
+ MOLECULE_TABLE_SCAN_MAX_ROWS = 1000
22
+ MORGAN_RADIUS = 2
23
+ MORGAN_BITS = 2048
24
+
25
+
26
+ def require_string(value, label):
27
+ if not isinstance(value, str) or not value.strip():
28
+ fail_runtime(f"{label} must be a non-empty string.")
29
+ return value.strip()
30
+
31
+
32
+ def require_object(value, label):
33
+ if not isinstance(value, dict):
34
+ fail_runtime(f"{label} must be a JSON object.")
35
+ return value
36
+
37
+
38
+ def require_list(value, label):
39
+ if not isinstance(value, list):
40
+ fail_runtime(f"{label} must be an array.")
41
+ return value
42
+
43
+
44
+ def require_number(value, label):
45
+ if isinstance(value, bool) or not isinstance(value, (int, float)):
46
+ fail_runtime(f"{label} must be a finite number.")
47
+ number = float(value)
48
+ if not math.isfinite(number):
49
+ fail_runtime(f"{label} must be a finite number.")
50
+ return number
51
+
52
+
53
+ def require_positive_number(value, label):
54
+ number = require_number(value, label)
55
+ if number <= 0:
56
+ fail_runtime(f"{label} must be positive.")
57
+ return number
58
+
59
+
60
+ def require_nonnegative_number(value, label):
61
+ number = require_number(value, label)
62
+ if number < 0:
63
+ fail_runtime(f"{label} must be nonnegative.")
64
+ return number
65
+
66
+
67
+ def require_positive_int(value, label):
68
+ if isinstance(value, bool) or not isinstance(value, int) or value <= 0:
69
+ fail_runtime(f"{label} must be a positive integer.")
70
+ return value
71
+
72
+
73
+ def require_nonnegative_int(value, label):
74
+ if isinstance(value, bool) or not isinstance(value, int) or value < 0:
75
+ fail_runtime(f"{label} must be a nonnegative integer.")
76
+ return value
77
+
78
+
79
+ def require_unit_interval(value, label):
80
+ number = require_number(value, label)
81
+ if number < 0 or number > 1:
82
+ fail_runtime(f"{label} must be between 0 and 1.")
83
+ return number
84
+
85
+
86
+ def require_json_scalar(value, label):
87
+ if (
88
+ value is None
89
+ or isinstance(value, str)
90
+ or isinstance(value, bool)
91
+ or (
92
+ isinstance(value, (int, float))
93
+ and not isinstance(value, bool)
94
+ and math.isfinite(float(value))
95
+ )
96
+ ):
97
+ return value
98
+ fail_runtime(f"{label} must be a JSON scalar.")
99
+
100
+
101
+ def normalize_member_path(value, label):
102
+ path_value = require_string(value, label)
103
+ path_parts = path_value.split("/")
104
+ member_path = Path(path_value)
105
+ if member_path.is_absolute() or any(
106
+ part in {"", ".", ".."} for part in path_parts
107
+ ):
108
+ fail_runtime(f"{label} must be a safe relative archive path.")
109
+ return path_value
110
+
111
+
112
+ def resolve_member(root, path_value, label):
113
+ relative_path = normalize_member_path(path_value, label)
114
+ root_path = root.resolve()
115
+ target = (root_path / relative_path).resolve()
116
+ try:
117
+ target.relative_to(root_path)
118
+ except ValueError:
119
+ fail_runtime(f"{label} must stay inside the extracted archive root.")
120
+ return target
121
+
122
+
123
+ def load_compiled_config(path):
124
+ try:
125
+ return load_json_file(path, label="compiled_config")
126
+ except RuntimeError as error:
127
+ fail_runtime(str(error))
128
+
129
+
130
+ def read_text(path, label):
131
+ try:
132
+ return True, path.read_text(encoding="utf-8"), None
133
+ except FileNotFoundError:
134
+ return False, None, f"{label} is missing."
135
+ except UnicodeDecodeError:
136
+ return False, None, f"{label} is not valid UTF-8 text."
137
+ except OSError as error:
138
+ return False, None, f"{label} could not be read: {error}."
139
+
140
+
141
+ def load_json_member(path, label):
142
+ try:
143
+ return True, json.loads(path.read_text(encoding="utf-8")), None
144
+ except FileNotFoundError:
145
+ return False, None, f"{label} is missing."
146
+ except UnicodeDecodeError:
147
+ return False, None, f"{label} is not valid UTF-8 text."
148
+ except json.JSONDecodeError as error:
149
+ return False, None, f"{label} is not valid JSON: {error.msg}."
150
+ except OSError as error:
151
+ return False, None, f"{label} could not be read: {error}."
152
+
153
+
154
+ def parse_json_path(path_value, label):
155
+ path = require_string(path_value, label)
156
+ if path == "$":
157
+ return []
158
+ if not path.startswith("$"):
159
+ fail_runtime(f"{label} must start with $.")
160
+ tokens = []
161
+ index = 1
162
+ while index < len(path):
163
+ if path[index] == ".":
164
+ index += 1
165
+ match = FIELD_RE.match(path, index)
166
+ if not match:
167
+ fail_runtime(f"{label} contains an invalid field segment.")
168
+ tokens.append(("field", match.group(0)))
169
+ index = match.end()
170
+ continue
171
+ if path[index] == "[":
172
+ end = path.find("]", index)
173
+ if end == -1:
174
+ fail_runtime(f"{label} contains an unterminated array index.")
175
+ raw_index = path[index + 1 : end]
176
+ if not raw_index.isdigit() or (
177
+ len(raw_index) > 1 and raw_index.startswith("0")
178
+ ):
179
+ fail_runtime(f"{label} contains an invalid array index.")
180
+ tokens.append(("index", int(raw_index)))
181
+ index = end + 1
182
+ continue
183
+ fail_runtime(f"{label} contains an unsupported path segment.")
184
+ return tokens
185
+
186
+
187
+ def extract_json_path(data, tokens):
188
+ current = data
189
+ for kind, token in tokens:
190
+ if kind == "field":
191
+ if not isinstance(current, dict) or token not in current:
192
+ return False, None, f"JSON field {token} is missing."
193
+ current = current[token]
194
+ continue
195
+ if not isinstance(current, list) or token >= len(current):
196
+ return False, None, f"JSON array index {token} is missing."
197
+ current = current[token]
198
+ return True, current, None
199
+
200
+
201
+ def json_scalar_equal(actual, expected):
202
+ if expected is None or isinstance(expected, bool):
203
+ return actual is expected
204
+ if isinstance(expected, str):
205
+ return isinstance(actual, str) and actual == expected
206
+ if isinstance(expected, (int, float)) and not isinstance(expected, bool):
207
+ return (
208
+ isinstance(actual, (int, float))
209
+ and not isinstance(actual, bool)
210
+ and float(actual) == float(expected)
211
+ )
212
+ return False
213
+
214
+
215
+ def read_csv_cell(path, row_number, column, label):
216
+ try:
217
+ with path.open("r", encoding="utf-8", newline="") as handle:
218
+ reader = csv.DictReader(handle)
219
+ if reader.fieldnames is None:
220
+ return False, None, f"{label} is missing a header row."
221
+ if column not in reader.fieldnames:
222
+ return False, None, f"{label} is missing CSV column {column}."
223
+ for current_row, row in enumerate(reader, start=1):
224
+ if current_row == row_number:
225
+ return True, row.get(column, ""), None
226
+ except FileNotFoundError:
227
+ return False, None, f"{label} is missing."
228
+ except UnicodeDecodeError:
229
+ return False, None, f"{label} is not valid UTF-8 text."
230
+ except csv.Error as error:
231
+ return False, None, f"{label} is not valid CSV: {error}."
232
+ except OSError as error:
233
+ return False, None, f"{label} could not be read: {error}."
234
+ return False, None, f"{label} is missing CSV row {row_number}."
235
+
236
+
237
+ def read_csv_rows(path, label, side):
238
+ try:
239
+ with path.open("r", encoding="utf-8", newline="") as handle:
240
+ reader = csv.DictReader(handle)
241
+ if reader.fieldnames is None:
242
+ side_failure(side, f"{label} is missing a header row.")
243
+ rows = []
244
+ for index, row in enumerate(reader):
245
+ if index >= MOLECULE_TABLE_SCAN_MAX_ROWS:
246
+ break
247
+ rows.append(row)
248
+ except FileNotFoundError:
249
+ side_failure(side, f"{label} is missing.")
250
+ except UnicodeDecodeError:
251
+ side_failure(side, f"{label} is not valid UTF-8 text.")
252
+ except csv.Error as error:
253
+ side_failure(side, f"{label} is not valid CSV: {error}.")
254
+ except OSError as error:
255
+ side_failure(side, f"{label} could not be read: {error}.")
256
+ return reader.fieldnames, rows
257
+
258
+
259
+ def compare_number(actual, comparator, expected):
260
+ if comparator == "eq":
261
+ return actual == expected
262
+ if comparator == "neq":
263
+ return actual != expected
264
+ if comparator == "lt":
265
+ return actual < expected
266
+ if comparator == "lte":
267
+ return actual <= expected
268
+ if comparator == "gt":
269
+ return actual > expected
270
+ if comparator == "gte":
271
+ return actual >= expected
272
+ fail_runtime(f"numeric comparator {comparator} is unsupported.")
273
+
274
+
275
+ def to_finite_number(value, label):
276
+ if isinstance(value, bool) or not isinstance(value, (int, float, str)):
277
+ return False, None, f"{label} is not numeric."
278
+ try:
279
+ number = float(value)
280
+ except ValueError:
281
+ return False, None, f"{label} is not numeric."
282
+ if not math.isfinite(number):
283
+ return False, None, f"{label} is not finite."
284
+ return True, number, None
285
+
286
+
287
+ def extract_numeric(root, extractor, label):
288
+ extractor_kind = require_string(extractor.get("kind"), f"{label}.kind")
289
+ if extractor_kind == "json_path":
290
+ path = resolve_member(root, extractor.get("path"), f"{label}.path")
291
+ ok, data, reason = load_json_member(path, f"{label}.path")
292
+ if not ok:
293
+ return False, None, reason
294
+ tokens = parse_json_path(extractor.get("json_path"), f"{label}.json_path")
295
+ ok, value, reason = extract_json_path(data, tokens)
296
+ if not ok:
297
+ return False, None, reason
298
+ return to_finite_number(value, label)
299
+ if extractor_kind == "csv_cell":
300
+ path = resolve_member(root, extractor.get("path"), f"{label}.path")
301
+ row = require_positive_int(extractor.get("row"), f"{label}.row")
302
+ column = require_string(extractor.get("column"), f"{label}.column")
303
+ ok, value, reason = read_csv_cell(path, row, column, f"{label}.path")
304
+ if not ok:
305
+ return False, None, reason
306
+ return to_finite_number(value, label)
307
+ if extractor_kind == "regex_capture":
308
+ path = resolve_member(root, extractor.get("path"), f"{label}.path")
309
+ ok, text, reason = read_text(path, f"{label}.path")
310
+ if not ok:
311
+ return False, None, reason
312
+ pattern = require_string(extractor.get("pattern"), f"{label}.pattern")
313
+ group = require_positive_int(extractor.get("group"), f"{label}.group")
314
+ try:
315
+ match = re.search(pattern, text)
316
+ except re.error as error:
317
+ fail_runtime(f"{label}.pattern is not a valid regex: {error}.")
318
+ if not match:
319
+ return False, None, f"{label}.pattern did not match."
320
+ try:
321
+ value = match.group(group)
322
+ except IndexError:
323
+ fail_runtime(f"{label}.group references a missing capture group.")
324
+ return to_finite_number(value, label)
325
+ fail_runtime(f"{label}.kind={extractor_kind} is unsupported.")
326
+
327
+
328
+ def side_failure(side, message):
329
+ if side == "candidate":
330
+ reject_submission(
331
+ f"Candidate molecule source is invalid: {message}",
332
+ details={"rejected_side": "candidate"},
333
+ )
334
+ if side == "reference":
335
+ fail_runtime(f"Reference molecule source is invalid: {message}")
336
+ fail_runtime(f"Unknown molecule source side {side}.")
337
+
338
+
339
+ def load_rdkit(label):
340
+ try:
341
+ from rdkit import Chem, DataStructs
342
+ from rdkit.Chem import Descriptors, Lipinski, rdFingerprintGenerator, rdMolDescriptors
343
+ except ImportError as error:
344
+ fail_runtime(f"{label} requires RDKit in the selected runtime profile: {error}.")
345
+ return {
346
+ "Chem": Chem,
347
+ "DataStructs": DataStructs,
348
+ "Descriptors": Descriptors,
349
+ "Lipinski": Lipinski,
350
+ "rdFingerprintGenerator": rdFingerprintGenerator,
351
+ "rdMolDescriptors": rdMolDescriptors,
352
+ }
353
+
354
+
355
+ def select_smiles_from_csv(root, source, label, side):
356
+ path = resolve_member(root, source.get("path"), f"{label}.path")
357
+ smiles_column = require_string(source.get("smiles_column"), f"{label}.smiles_column")
358
+ fieldnames, rows = read_csv_rows(path, f"{label}.path", side)
359
+ if smiles_column not in fieldnames:
360
+ side_failure(side, f"{label}.path is missing SMILES column {smiles_column}.")
361
+
362
+ has_row_id = "row_id" in source
363
+ has_row_index = "row_index" in source
364
+ if has_row_id == has_row_index:
365
+ fail_runtime(f"{label} must define exactly one of row_id or row_index.")
366
+
367
+ if has_row_index:
368
+ row_index = require_nonnegative_int(source.get("row_index"), f"{label}.row_index")
369
+ if row_index >= MOLECULE_TABLE_SCAN_MAX_ROWS:
370
+ fail_runtime(
371
+ f"{label}.row_index must be less than {MOLECULE_TABLE_SCAN_MAX_ROWS}."
372
+ )
373
+ if row_index >= len(rows):
374
+ side_failure(side, f"{label}.row_index selected missing row {row_index}.")
375
+ return rows[row_index].get(smiles_column, "")
376
+
377
+ row_id = require_object(source.get("row_id"), f"{label}.row_id")
378
+ row_id_column = require_string(row_id.get("column"), f"{label}.row_id.column")
379
+ row_id_value = require_string(row_id.get("value"), f"{label}.row_id.value")
380
+ if row_id_column == smiles_column:
381
+ fail_runtime(f"{label}.row_id.column must differ from smiles_column.")
382
+ if row_id_column not in fieldnames:
383
+ side_failure(side, f"{label}.path is missing row id column {row_id_column}.")
384
+ matches = [row for row in rows if row.get(row_id_column, "") == row_id_value]
385
+ if not matches:
386
+ side_failure(
387
+ side,
388
+ f"{label}.row_id selected no row with {row_id_column}={row_id_value!r} within the first {MOLECULE_TABLE_SCAN_MAX_ROWS} rows.",
389
+ )
390
+ if len(matches) > 1:
391
+ side_failure(
392
+ side,
393
+ f"{label}.row_id selected duplicate rows with {row_id_column}={row_id_value!r}.",
394
+ )
395
+ return matches[0].get(smiles_column, "")
396
+
397
+
398
+ def molecule_from_smiles(root, source, label, side):
399
+ rdkit = load_rdkit(label)
400
+ smiles = select_smiles_from_csv(root, source, label, side).strip()
401
+ if not smiles:
402
+ side_failure(side, f"{label}.smiles_column selected an empty SMILES value.")
403
+ try:
404
+ molecule = rdkit["Chem"].MolFromSmiles(smiles, sanitize=True)
405
+ except Exception as error:
406
+ side_failure(side, f"{label} SMILES could not be parsed: {error}.")
407
+ if molecule is None:
408
+ side_failure(side, f"{label} SMILES could not be parsed with sanitization.")
409
+ return molecule
410
+
411
+
412
+ def molecule_from_mol(root, source, label, side):
413
+ rdkit = load_rdkit(label)
414
+ path = resolve_member(root, source.get("path"), f"{label}.path")
415
+ ok, block, reason = read_text(path, f"{label}.path")
416
+ if not ok:
417
+ side_failure(side, reason)
418
+ try:
419
+ molecule = rdkit["Chem"].MolFromMolBlock(
420
+ block,
421
+ sanitize=True,
422
+ removeHs=False,
423
+ )
424
+ except Exception as error:
425
+ side_failure(side, f"{label}.path MOL block could not be parsed: {error}.")
426
+ if molecule is None:
427
+ side_failure(side, f"{label}.path MOL block could not be parsed with sanitization.")
428
+ return molecule
429
+
430
+
431
+ def molecule_from_sdf(root, source, label, side):
432
+ rdkit = load_rdkit(label)
433
+ path = resolve_member(root, source.get("path"), f"{label}.path")
434
+ record_index = require_nonnegative_int(
435
+ source.get("record_index"),
436
+ f"{label}.record_index",
437
+ )
438
+ if record_index >= MOLECULE_TABLE_SCAN_MAX_ROWS:
439
+ fail_runtime(
440
+ f"{label}.record_index must be less than {MOLECULE_TABLE_SCAN_MAX_ROWS}."
441
+ )
442
+ try:
443
+ supplier = rdkit["Chem"].SDMolSupplier(
444
+ str(path),
445
+ sanitize=True,
446
+ removeHs=False,
447
+ )
448
+ molecule = supplier[record_index]
449
+ except FileNotFoundError:
450
+ side_failure(side, f"{label}.path is missing.")
451
+ except IndexError:
452
+ side_failure(side, f"{label}.record_index selected missing record {record_index}.")
453
+ except Exception as error:
454
+ side_failure(side, f"{label}.path SDF could not be parsed: {error}.")
455
+ if molecule is None:
456
+ side_failure(side, f"{label}.record_index could not be parsed with sanitization.")
457
+ return molecule
458
+
459
+
460
+ def load_molecule(root, source, label, side):
461
+ molecule_source = require_object(source, label)
462
+ source_kind = require_string(molecule_source.get("kind"), f"{label}.kind")
463
+ if source_kind == "smiles_csv":
464
+ return molecule_from_smiles(root, molecule_source, label, side)
465
+ if source_kind == "mol":
466
+ return molecule_from_mol(root, molecule_source, label, side)
467
+ if source_kind == "sdf":
468
+ return molecule_from_sdf(root, molecule_source, label, side)
469
+ fail_runtime(f"{label}.kind={source_kind} is unsupported.")
470
+
471
+
472
+ def canonical_smiles(molecule, label):
473
+ rdkit = load_rdkit(label)
474
+ try:
475
+ return rdkit["Chem"].MolToSmiles(molecule, canonical=True)
476
+ except Exception as error:
477
+ fail_runtime(f"{label} canonical SMILES failed: {error}.")
478
+
479
+
480
+ def descriptor_value(molecule, descriptor, label):
481
+ rdkit = load_rdkit(label)
482
+ try:
483
+ if descriptor == "mol_wt":
484
+ return float(rdkit["Descriptors"].MolWt(molecule))
485
+ if descriptor == "exact_mol_wt":
486
+ return float(rdkit["Descriptors"].ExactMolWt(molecule))
487
+ if descriptor == "logp":
488
+ return float(rdkit["Descriptors"].MolLogP(molecule))
489
+ if descriptor == "tpsa":
490
+ return float(rdkit["rdMolDescriptors"].CalcTPSA(molecule))
491
+ if descriptor == "h_donors":
492
+ return float(rdkit["Lipinski"].NumHDonors(molecule))
493
+ if descriptor == "h_acceptors":
494
+ return float(rdkit["Lipinski"].NumHAcceptors(molecule))
495
+ if descriptor == "rotatable_bonds":
496
+ return float(rdkit["Lipinski"].NumRotatableBonds(molecule))
497
+ if descriptor == "heavy_atom_count":
498
+ return float(molecule.GetNumHeavyAtoms())
499
+ except Exception as error:
500
+ fail_runtime(f"{label} descriptor {descriptor} failed: {error}.")
501
+ fail_runtime(f"{label}.descriptor={descriptor} is unsupported.")
502
+
503
+
504
+ def morgan_fingerprint(molecule, label):
505
+ rdkit = load_rdkit(label)
506
+ try:
507
+ generator = rdkit["rdFingerprintGenerator"].GetMorganGenerator(
508
+ radius=MORGAN_RADIUS,
509
+ fpSize=MORGAN_BITS,
510
+ )
511
+ return generator.GetFingerprint(molecule)
512
+ except AttributeError:
513
+ return rdkit["rdMolDescriptors"].GetMorganFingerprintAsBitVect(
514
+ molecule,
515
+ MORGAN_RADIUS,
516
+ nBits=MORGAN_BITS,
517
+ )
518
+ except Exception as error:
519
+ fail_runtime(f"{label} Morgan fingerprint failed: {error}.")
520
+
521
+
522
+ def tanimoto_similarity(candidate_molecule, reference_molecule, label):
523
+ rdkit = load_rdkit(label)
524
+ try:
525
+ candidate_fingerprint = morgan_fingerprint(candidate_molecule, label)
526
+ reference_fingerprint = morgan_fingerprint(reference_molecule, label)
527
+ return float(
528
+ rdkit["DataStructs"].TanimotoSimilarity(
529
+ candidate_fingerprint,
530
+ reference_fingerprint,
531
+ )
532
+ )
533
+ except Exception as error:
534
+ fail_runtime(f"{label} Tanimoto similarity failed: {error}.")
535
+
536
+
537
+ def require_reference_root(reference_root, label):
538
+ if reference_root is None:
539
+ fail_runtime(f"{label} requires compiled_config.reference_role.")
540
+ return reference_root
541
+
542
+
543
+ def normalize_assertion(value, index):
544
+ assertion = require_object(value, f"assertion_set.assertions[{index}]")
545
+ assertion_id = require_string(
546
+ assertion.get("id"),
547
+ f"assertion_set.assertions[{index}].id",
548
+ )
549
+ assertion_kind = require_string(
550
+ assertion.get("kind"),
551
+ f"assertion_set.assertions[{index}].kind",
552
+ )
553
+ weight = require_positive_number(
554
+ assertion.get("weight"),
555
+ f"assertion_set.assertions[{index}].weight",
556
+ )
557
+ return assertion_id, assertion_kind, weight, assertion
558
+
559
+
560
+ def evaluate_assertion(candidate_root, reference_root, assertion, index):
561
+ assertion_id, assertion_kind, weight, raw = normalize_assertion(
562
+ assertion,
563
+ index,
564
+ )
565
+ label = f"assertion_set.assertions[{index}]"
566
+
567
+ if assertion_kind == "file_exists":
568
+ path = resolve_member(candidate_root, raw.get("path"), f"{label}.path")
569
+ passed = path.is_file()
570
+ reason = None if passed else "file is missing."
571
+ elif assertion_kind == "file_absent":
572
+ path = resolve_member(candidate_root, raw.get("path"), f"{label}.path")
573
+ passed = not path.exists()
574
+ reason = None if passed else "file is present."
575
+ elif assertion_kind == "text_equals":
576
+ path = resolve_member(candidate_root, raw.get("path"), f"{label}.path")
577
+ expected = raw.get("expected")
578
+ if not isinstance(expected, str):
579
+ fail_runtime(f"{label}.expected must be a string.")
580
+ ok, text, reason = read_text(path, f"{label}.path")
581
+ passed = ok and text == expected
582
+ elif assertion_kind == "text_regex":
583
+ path = resolve_member(candidate_root, raw.get("path"), f"{label}.path")
584
+ pattern = require_string(raw.get("pattern"), f"{label}.pattern")
585
+ ok, text, reason = read_text(path, f"{label}.path")
586
+ if ok:
587
+ try:
588
+ passed = re.search(pattern, text) is not None
589
+ except re.error as error:
590
+ fail_runtime(f"{label}.pattern is not a valid regex: {error}.")
591
+ reason = None if passed else "regex did not match."
592
+ else:
593
+ passed = False
594
+ elif assertion_kind == "json_path_equals":
595
+ path = resolve_member(candidate_root, raw.get("path"), f"{label}.path")
596
+ expected = require_json_scalar(raw.get("expected"), f"{label}.expected")
597
+ ok, data, reason = load_json_member(path, f"{label}.path")
598
+ if ok:
599
+ tokens = parse_json_path(raw.get("json_path"), f"{label}.json_path")
600
+ ok, actual, reason = extract_json_path(data, tokens)
601
+ passed = ok and json_scalar_equal(actual, expected)
602
+ if ok and not passed:
603
+ reason = "JSON scalar did not equal expected value."
604
+ elif assertion_kind == "json_path_number_compare":
605
+ path = resolve_member(candidate_root, raw.get("path"), f"{label}.path")
606
+ comparator = require_string(raw.get("comparator"), f"{label}.comparator")
607
+ expected = require_number(raw.get("value"), f"{label}.value")
608
+ ok, data, reason = load_json_member(path, f"{label}.path")
609
+ if ok:
610
+ tokens = parse_json_path(raw.get("json_path"), f"{label}.json_path")
611
+ ok, actual, reason = extract_json_path(data, tokens)
612
+ if ok:
613
+ ok, actual_number, reason = to_finite_number(actual, label)
614
+ passed = ok and compare_number(actual_number, comparator, expected)
615
+ if ok and not passed:
616
+ reason = f"number comparison {comparator} failed."
617
+ elif assertion_kind == "csv_cell_equals":
618
+ path = resolve_member(candidate_root, raw.get("path"), f"{label}.path")
619
+ row = require_positive_int(raw.get("row"), f"{label}.row")
620
+ column = require_string(raw.get("column"), f"{label}.column")
621
+ expected = raw.get("expected")
622
+ if not isinstance(expected, str):
623
+ fail_runtime(f"{label}.expected must be a string.")
624
+ ok, value, reason = read_csv_cell(path, row, column, f"{label}.path")
625
+ passed = ok and value == expected
626
+ if ok and not passed:
627
+ reason = "CSV cell did not equal expected value."
628
+ elif assertion_kind == "numeric_tolerance":
629
+ extractor = require_object(raw.get("extractor"), f"{label}.extractor")
630
+ expected = require_number(raw.get("expected"), f"{label}.expected")
631
+ tolerance = require_nonnegative_number(
632
+ raw.get("absolute_tolerance"),
633
+ f"{label}.absolute_tolerance",
634
+ )
635
+ ok, actual_number, reason = extract_numeric(candidate_root, extractor, label)
636
+ passed = ok and abs(actual_number - expected) <= tolerance
637
+ if ok and not passed:
638
+ reason = "numeric value exceeded absolute tolerance."
639
+ elif assertion_kind == "numeric_interval":
640
+ extractor = require_object(raw.get("extractor"), f"{label}.extractor")
641
+ minimum = require_number(raw.get("min"), f"{label}.min")
642
+ maximum = require_number(raw.get("max"), f"{label}.max")
643
+ if minimum > maximum:
644
+ fail_runtime(f"{label}.min must be less than or equal to max.")
645
+ inclusive_min = bool(raw.get("inclusive_min", True))
646
+ inclusive_max = bool(raw.get("inclusive_max", True))
647
+ ok, actual_number, reason = extract_numeric(candidate_root, extractor, label)
648
+ if ok:
649
+ above_min = (
650
+ actual_number >= minimum
651
+ if inclusive_min
652
+ else actual_number > minimum
653
+ )
654
+ below_max = (
655
+ actual_number <= maximum
656
+ if inclusive_max
657
+ else actual_number < maximum
658
+ )
659
+ passed = above_min and below_max
660
+ else:
661
+ passed = False
662
+ if ok and not passed:
663
+ reason = "numeric value was outside the expected interval."
664
+ elif assertion_kind == "molecule_valid":
665
+ load_molecule(
666
+ candidate_root,
667
+ raw.get("source"),
668
+ f"{label}.source",
669
+ "candidate",
670
+ )
671
+ passed = True
672
+ reason = None
673
+ elif assertion_kind == "molecule_canonical_smiles_equals":
674
+ candidate_molecule = load_molecule(
675
+ candidate_root,
676
+ raw.get("candidate"),
677
+ f"{label}.candidate",
678
+ "candidate",
679
+ )
680
+ reference_molecule = load_molecule(
681
+ require_reference_root(reference_root, label),
682
+ raw.get("reference"),
683
+ f"{label}.reference",
684
+ "reference",
685
+ )
686
+ candidate_smiles = canonical_smiles(candidate_molecule, f"{label}.candidate")
687
+ reference_smiles = canonical_smiles(reference_molecule, f"{label}.reference")
688
+ passed = candidate_smiles == reference_smiles
689
+ reason = (
690
+ None
691
+ if passed
692
+ else f"canonical SMILES differed: candidate={candidate_smiles!r} reference={reference_smiles!r}."
693
+ )
694
+ elif assertion_kind == "molecule_descriptor_tolerance":
695
+ descriptor = require_string(raw.get("descriptor"), f"{label}.descriptor")
696
+ tolerance = require_nonnegative_number(
697
+ raw.get("absolute_tolerance"),
698
+ f"{label}.absolute_tolerance",
699
+ )
700
+ candidate_molecule = load_molecule(
701
+ candidate_root,
702
+ raw.get("candidate"),
703
+ f"{label}.candidate",
704
+ "candidate",
705
+ )
706
+ reference_molecule = load_molecule(
707
+ require_reference_root(reference_root, label),
708
+ raw.get("reference"),
709
+ f"{label}.reference",
710
+ "reference",
711
+ )
712
+ candidate_value = descriptor_value(
713
+ candidate_molecule,
714
+ descriptor,
715
+ f"{label}.candidate",
716
+ )
717
+ reference_value = descriptor_value(
718
+ reference_molecule,
719
+ descriptor,
720
+ f"{label}.reference",
721
+ )
722
+ difference = abs(candidate_value - reference_value)
723
+ passed = difference <= tolerance
724
+ reason = (
725
+ None
726
+ if passed
727
+ else f"descriptor {descriptor} difference {difference} exceeded absolute tolerance {tolerance}."
728
+ )
729
+ elif assertion_kind == "molecule_fingerprint_similarity":
730
+ fingerprint = require_string(raw.get("fingerprint"), f"{label}.fingerprint")
731
+ metric = require_string(raw.get("metric"), f"{label}.metric")
732
+ if fingerprint != "morgan_r2_2048":
733
+ fail_runtime(f"{label}.fingerprint={fingerprint} is unsupported.")
734
+ if metric != "tanimoto":
735
+ fail_runtime(f"{label}.metric={metric} is unsupported.")
736
+ min_similarity = require_unit_interval(
737
+ raw.get("min_similarity"),
738
+ f"{label}.min_similarity",
739
+ )
740
+ candidate_molecule = load_molecule(
741
+ candidate_root,
742
+ raw.get("candidate"),
743
+ f"{label}.candidate",
744
+ "candidate",
745
+ )
746
+ reference_molecule = load_molecule(
747
+ require_reference_root(reference_root, label),
748
+ raw.get("reference"),
749
+ f"{label}.reference",
750
+ "reference",
751
+ )
752
+ similarity = tanimoto_similarity(
753
+ candidate_molecule,
754
+ reference_molecule,
755
+ label,
756
+ )
757
+ passed = similarity >= min_similarity
758
+ reason = (
759
+ None
760
+ if passed
761
+ else f"fingerprint similarity {similarity} was below min_similarity {min_similarity}."
762
+ )
763
+ else:
764
+ fail_runtime(f"{label}.kind={assertion_kind} is unsupported.")
765
+
766
+ return {
767
+ "id": assertion_id,
768
+ "kind": assertion_kind,
769
+ "weight": weight,
770
+ "passed": bool(passed),
771
+ "score": 1.0 if passed else 0.0,
772
+ **({} if reason is None else {"reason": reason}),
773
+ }
774
+
775
+
776
+ def normalize_assertions(assertion_set):
777
+ assertions = require_list(
778
+ assertion_set.get("assertions"),
779
+ "assertion_set.assertions",
780
+ )
781
+ if not assertions:
782
+ fail_runtime("assertion_set.assertions must be non-empty.")
783
+ seen = set()
784
+ for index, assertion in enumerate(assertions):
785
+ assertion_id, _, _, _ = normalize_assertion(assertion, index)
786
+ if assertion_id in seen:
787
+ fail_runtime(
788
+ f"assertion_set.assertions id {assertion_id} is duplicated."
789
+ )
790
+ seen.add(assertion_id)
791
+ return assertions
792
+
793
+
794
+ def main():
795
+ runtime_context = load_runtime_context()
796
+ config_path = resolve_scoring_asset(
797
+ runtime_context,
798
+ "compiled_config",
799
+ kind="config",
800
+ )
801
+ config = require_object(load_compiled_config(config_path), "compiled_config")
802
+ submission_role = require_string(
803
+ config.get("submission_role"),
804
+ "compiled_config.submission_role",
805
+ )
806
+ reference_role = config.get("reference_role")
807
+ if reference_role is not None:
808
+ reference_role = require_string(
809
+ reference_role,
810
+ "compiled_config.reference_role",
811
+ )
812
+ config_score_key = require_string(
813
+ config.get("final_score_key"),
814
+ "compiled_config.final_score_key",
815
+ )
816
+ final_score_key = require_string(
817
+ runtime_context.get("final_score_key"),
818
+ "runtime_context.final_score_key",
819
+ )
820
+ if config_score_key != final_score_key:
821
+ fail_runtime("compiled_config.final_score_key must match runtime context.")
822
+ assertion_set = require_object(
823
+ config.get("assertion_set"),
824
+ "compiled_config.assertion_set",
825
+ )
826
+ assertions = normalize_assertions(assertion_set)
827
+ submission_path = resolve_submission_artifact(runtime_context, submission_role)
828
+ reference_path = (
829
+ resolve_evaluation_artifact(runtime_context, reference_role)
830
+ if reference_role is not None
831
+ else None
832
+ )
833
+
834
+ with tempfile.TemporaryDirectory(prefix="agora-artifact-assertion-") as temp_dir:
835
+ extracted_root = Path(temp_dir) / "candidate"
836
+ safe_extract_zip(
837
+ submission_path,
838
+ extracted_root,
839
+ label=f"submission artifact {submission_role}",
840
+ invalid_handler=reject_submission,
841
+ )
842
+ reference_root = None
843
+ if reference_path is not None:
844
+ reference_root = Path(temp_dir) / "reference"
845
+ safe_extract_zip(
846
+ reference_path,
847
+ reference_root,
848
+ label=f"evaluation artifact {reference_role}",
849
+ )
850
+ results = [
851
+ evaluate_assertion(extracted_root, reference_root, assertion, index)
852
+ for index, assertion in enumerate(assertions)
853
+ ]
854
+
855
+ total_weight = sum(result["weight"] for result in results)
856
+ if not math.isfinite(total_weight) or total_weight <= 0:
857
+ fail_runtime("assertion_set.assertions must declare finite positive total weight.")
858
+ earned_weight = sum(
859
+ result["weight"] for result in results if result["passed"]
860
+ )
861
+ score = earned_weight / total_weight
862
+ if not math.isfinite(score):
863
+ fail_runtime("assertion_set score must be finite.")
864
+ passed_count = sum(1 for result in results if result["passed"])
865
+ write_score(
866
+ score=score,
867
+ details={
868
+ final_score_key: score,
869
+ "passed_assertions": passed_count,
870
+ "total_assertions": len(results),
871
+ "earned_weight": earned_weight,
872
+ "total_weight": total_weight,
873
+ "assertion_results": results,
874
+ },
875
+ )
876
+
877
+
878
+ if __name__ == "__main__":
879
+ main()