@moleculeagora/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,246 @@
1
+ from agora_runtime import (
2
+ fail_runtime,
3
+ load_json_file,
4
+ load_runtime_context,
5
+ reject_submission,
6
+ resolve_evaluation_artifact,
7
+ resolve_scoring_asset,
8
+ resolve_submission_artifact,
9
+ write_score,
10
+ )
11
+
12
+
13
+ def require_string(value, label):
14
+ if not isinstance(value, str) or not value.strip():
15
+ fail_runtime(f"{label} must be a non-empty string.")
16
+ return value.strip()
17
+
18
+
19
+ def require_string_list(value, label):
20
+ if value is None:
21
+ return []
22
+ if not isinstance(value, list):
23
+ fail_runtime(f"{label} must be an array of strings.")
24
+ normalized = []
25
+ for index, item in enumerate(value):
26
+ if not isinstance(item, str) or not item.strip():
27
+ fail_runtime(f"{label}[{index}] must be a non-empty string.")
28
+ normalized.append(item.strip())
29
+ return normalized
30
+
31
+
32
+ def require_number(value, label):
33
+ if isinstance(value, bool) or not isinstance(value, (int, float)):
34
+ fail_runtime(f"{label} must be a number.")
35
+ return float(value)
36
+
37
+
38
+ def require_positive_number(value, label):
39
+ number = require_number(value, label)
40
+ if number <= 0:
41
+ fail_runtime(f"{label} must be positive.")
42
+ return number
43
+
44
+
45
+ def require_positive_int(value, label):
46
+ if isinstance(value, bool) or not isinstance(value, int):
47
+ fail_runtime(f"{label} must be a positive integer.")
48
+ if value <= 0:
49
+ fail_runtime(f"{label} must be a positive integer.")
50
+ return value
51
+
52
+
53
+ def require_object(value, label):
54
+ if not isinstance(value, dict):
55
+ fail_runtime(f"{label} must be a JSON object.")
56
+ return value
57
+
58
+
59
+ def normalize_check(value, index):
60
+ check = require_object(value, f"rubric.checks[{index}]")
61
+ rule = require_string(check.get("rule"), f"rubric.checks[{index}].rule")
62
+ normalized = {
63
+ "id": require_string(check.get("id"), f"rubric.checks[{index}].id"),
64
+ "weight": require_positive_number(check.get("weight"), f"rubric.checks[{index}].weight"),
65
+ "rule": rule,
66
+ }
67
+ if rule == "required_path":
68
+ normalized["path"] = require_string(check.get("path"), f"rubric.checks[{index}].path")
69
+ return normalized
70
+ if rule == "string_min_length":
71
+ normalized["path"] = require_string(check.get("path"), f"rubric.checks[{index}].path")
72
+ normalized["min"] = require_positive_int(check.get("min"), f"rubric.checks[{index}].min")
73
+ return normalized
74
+ if rule == "array_min_length":
75
+ normalized["path"] = require_string(check.get("path"), f"rubric.checks[{index}].path")
76
+ normalized["min"] = require_positive_int(check.get("min"), f"rubric.checks[{index}].min")
77
+ return normalized
78
+ if rule == "forbidden_terms_absent":
79
+ normalized["paths"] = require_string_list(
80
+ check.get("paths"),
81
+ f"rubric.checks[{index}].paths",
82
+ )
83
+ normalized["terms"] = require_string_list(
84
+ check.get("terms"),
85
+ f"rubric.checks[{index}].terms",
86
+ )
87
+ if not normalized["paths"] or not normalized["terms"]:
88
+ fail_runtime(f"rubric.checks[{index}] must declare non-empty paths and terms.")
89
+ return normalized
90
+ if rule == "enum":
91
+ normalized["path"] = require_string(check.get("path"), f"rubric.checks[{index}].path")
92
+ normalized["allowed"] = require_string_list(
93
+ check.get("allowed"),
94
+ f"rubric.checks[{index}].allowed",
95
+ )
96
+ if not normalized["allowed"]:
97
+ fail_runtime(f"rubric.checks[{index}].allowed must be non-empty.")
98
+ return normalized
99
+ fail_runtime(
100
+ f"rubric.checks[{index}].rule={rule} is not supported. Supported rules are required_path, string_min_length, array_min_length, forbidden_terms_absent, enum."
101
+ )
102
+
103
+
104
+ def normalize_aggregation(rubric):
105
+ aggregation = rubric.get("aggregation")
106
+ aggregation = require_object(aggregation, "rubric.aggregation")
107
+ strategy = require_string(aggregation.get("strategy"), "rubric.aggregation.strategy")
108
+ if strategy != "weighted_average":
109
+ fail_runtime(
110
+ f"rubric.aggregation.strategy={strategy} is not supported. Supported strategy is weighted_average."
111
+ )
112
+ return strategy
113
+
114
+
115
+ def normalize_checks(rubric):
116
+ checks_value = rubric.get("checks")
117
+ if not isinstance(checks_value, list) or not checks_value:
118
+ fail_runtime("rubric.checks must be a non-empty array.")
119
+ checks = [normalize_check(check, index) for index, check in enumerate(checks_value)]
120
+ seen_ids = set()
121
+ for check in checks:
122
+ if check["id"] in seen_ids:
123
+ fail_runtime(f"rubric.checks id {check['id']} is duplicated.")
124
+ seen_ids.add(check["id"])
125
+ return checks
126
+
127
+
128
+ def has_required_content(value):
129
+ if value is None:
130
+ return False
131
+ if isinstance(value, str):
132
+ return len(value.strip()) > 0
133
+ if isinstance(value, (list, dict)):
134
+ return len(value) > 0
135
+ return True
136
+
137
+
138
+ def evaluate_check(check, record):
139
+ rule = check["rule"]
140
+ if rule == "required_path":
141
+ return check["path"] in record and has_required_content(record.get(check["path"]))
142
+ if rule == "string_min_length":
143
+ value = record.get(check["path"])
144
+ return isinstance(value, str) and len(value.strip()) >= check["min"]
145
+ if rule == "array_min_length":
146
+ value = record.get(check["path"])
147
+ return isinstance(value, list) and len(value) >= check["min"]
148
+ if rule == "forbidden_terms_absent":
149
+ lowered_terms = [term.lower() for term in check["terms"]]
150
+ for path in check["paths"]:
151
+ value = record.get(path)
152
+ if not isinstance(value, str):
153
+ continue
154
+ lowered_value = value.lower()
155
+ if any(term in lowered_value for term in lowered_terms):
156
+ return False
157
+ return True
158
+ if rule == "enum":
159
+ value = record.get(check["path"])
160
+ return isinstance(value, str) and value in check["allowed"]
161
+ fail_runtime(f"rubric.checks rule {rule} is not supported.")
162
+
163
+
164
+ def load_json_object(path, label, *, reject_invalid):
165
+ try:
166
+ data = load_json_file(path, label=label)
167
+ except RuntimeError as error:
168
+ if reject_invalid:
169
+ reject_submission(str(error))
170
+ fail_runtime(str(error))
171
+ if not isinstance(data, dict):
172
+ message = f"{label} must be a JSON object."
173
+ if reject_invalid:
174
+ reject_submission(message)
175
+ fail_runtime(message)
176
+ return data
177
+
178
+
179
+ def main():
180
+ runtime_context = load_runtime_context()
181
+ config_path = resolve_scoring_asset(
182
+ runtime_context,
183
+ "compiled_config",
184
+ kind="config",
185
+ )
186
+ try:
187
+ config = load_json_file(config_path, label="compiled_config")
188
+ except RuntimeError as error:
189
+ fail_runtime(str(error))
190
+ rubric_role = require_string(config.get("evaluation_role"), "compiled_config.evaluation_role")
191
+ submission_role = require_string(config.get("submission_role"), "compiled_config.submission_role")
192
+ final_score_key = require_string(
193
+ runtime_context.get("final_score_key"),
194
+ "runtime_context.final_score_key",
195
+ )
196
+ rubric_path = resolve_evaluation_artifact(runtime_context, rubric_role)
197
+ submission_path = resolve_submission_artifact(runtime_context, submission_role)
198
+ rubric = load_json_object(
199
+ rubric_path,
200
+ f"evaluation artifact {rubric_role}",
201
+ reject_invalid=False,
202
+ )
203
+ record = load_json_object(
204
+ submission_path,
205
+ f"submission artifact {submission_role}",
206
+ reject_invalid=True,
207
+ )
208
+ aggregation_strategy = normalize_aggregation(rubric)
209
+ checks = normalize_checks(rubric)
210
+ total_weight = sum(check["weight"] for check in checks)
211
+ if total_weight <= 0:
212
+ fail_runtime("rubric.checks must declare a positive total weight.")
213
+ check_results = []
214
+ earned_weight = 0.0
215
+ for check in checks:
216
+ passed = evaluate_check(check, record)
217
+ rule_score = 1.0 if passed else 0.0
218
+ earned_weight += check["weight"] * rule_score
219
+ check_results.append(
220
+ {
221
+ "id": check["id"],
222
+ "rule": check["rule"],
223
+ "weight": check["weight"],
224
+ "score": rule_score,
225
+ "passed": passed,
226
+ }
227
+ )
228
+ score = earned_weight / total_weight
229
+ passed_checks = sum(1 for result in check_results if result["passed"])
230
+ write_score(
231
+ score=score,
232
+ details={
233
+ final_score_key: score,
234
+ "validated_fields": passed_checks,
235
+ "passed_checks": passed_checks,
236
+ "total_checks": len(checks),
237
+ "earned_weight": earned_weight,
238
+ "total_weight": total_weight,
239
+ "aggregation_strategy": aggregation_strategy,
240
+ "check_results": check_results,
241
+ },
242
+ )
243
+
244
+
245
+ if __name__ == "__main__":
246
+ main()
@@ -0,0 +1,160 @@
1
+ import subprocess
2
+ import sys
3
+ import tempfile
4
+ from pathlib import Path
5
+
6
+ from agora_runtime import (
7
+ fail_runtime,
8
+ load_json_file,
9
+ load_runtime_context,
10
+ resolve_evaluation_artifact,
11
+ resolve_scoring_asset,
12
+ resolve_submission_artifact,
13
+ safe_extract_zip,
14
+ write_score,
15
+ )
16
+
17
+
18
+ def require_string(value, label):
19
+ if not isinstance(value, str) or not value.strip():
20
+ fail_runtime(f"{label} must be a non-empty string.")
21
+ return value.strip()
22
+
23
+
24
+ def require_int(value, label):
25
+ if not isinstance(value, int) or value <= 0:
26
+ fail_runtime(f"{label} must be a positive integer.")
27
+ return value
28
+
29
+
30
+ def load_json_object(path, label):
31
+ try:
32
+ data = load_json_file(path, label=label)
33
+ except RuntimeError as error:
34
+ fail_runtime(str(error))
35
+ if not isinstance(data, dict):
36
+ fail_runtime(f"{label} must be a JSON object.")
37
+ return data
38
+
39
+
40
+ def read_required_text(path, label):
41
+ try:
42
+ return path.read_text(encoding="utf-8")
43
+ except FileNotFoundError:
44
+ fail_runtime(f"Missing {label} at {path}.")
45
+ except OSError as error:
46
+ fail_runtime(f"Unable to read {label}: {error}.")
47
+
48
+
49
+ def normalize_output(text, strip_trailing_whitespace):
50
+ if strip_trailing_whitespace:
51
+ return text.rstrip()
52
+ return text
53
+
54
+
55
+ def run_python_solution(solution_path, stdin_text, timeout_ms, working_dir):
56
+ try:
57
+ return subprocess.run(
58
+ [sys.executable, str(solution_path)],
59
+ input=stdin_text,
60
+ capture_output=True,
61
+ text=True,
62
+ timeout=timeout_ms / 1000.0,
63
+ cwd=str(working_dir),
64
+ check=False,
65
+ )
66
+ except subprocess.TimeoutExpired:
67
+ return None
68
+
69
+
70
+ def main():
71
+ runtime_context = load_runtime_context()
72
+ config_path = resolve_scoring_asset(
73
+ runtime_context,
74
+ "compiled_config",
75
+ kind="config",
76
+ )
77
+ config = load_json_object(config_path, "compiled_config")
78
+ harness_role = require_string(config.get("evaluation_role"), "compiled_config.evaluation_role")
79
+ solution_role = require_string(config.get("submission_role"), "compiled_config.submission_role")
80
+ final_score_key = require_string(
81
+ runtime_context.get("final_score_key"),
82
+ "runtime_context.final_score_key",
83
+ )
84
+ harness_path = resolve_evaluation_artifact(runtime_context, harness_role)
85
+ solution_path = resolve_submission_artifact(runtime_context, solution_role)
86
+ with tempfile.TemporaryDirectory(prefix="agora-harness-") as working_root:
87
+ working_dir = Path(working_root)
88
+ safe_extract_zip(
89
+ harness_path,
90
+ working_dir,
91
+ label=f"evaluation artifact {harness_role}",
92
+ )
93
+ manifest_path = working_dir / "agora-harness.json"
94
+ harness_manifest = load_json_object(manifest_path, "agora-harness.json")
95
+ version = require_string(harness_manifest.get("version"), "agora-harness.version")
96
+ language = require_string(harness_manifest.get("language"), "agora-harness.language")
97
+ if version != "v1":
98
+ fail_runtime(f"Unsupported harness version {version}.")
99
+ if language != "python":
100
+ fail_runtime(f"Unsupported harness language {language}.")
101
+ timeout_ms = require_int(
102
+ harness_manifest.get("timeout_ms", 30000),
103
+ "agora-harness.timeout_ms",
104
+ )
105
+ strip_trailing_whitespace = bool(
106
+ harness_manifest.get("strip_trailing_whitespace", False)
107
+ )
108
+ tests = harness_manifest.get("tests")
109
+ if not isinstance(tests, list) or len(tests) == 0:
110
+ fail_runtime("agora-harness.tests must declare at least one test case.")
111
+ passed_tests = 0
112
+ for index, test_case in enumerate(tests):
113
+ if not isinstance(test_case, dict):
114
+ fail_runtime(f"agora-harness.tests[{index}] must be an object.")
115
+ stdin_path = working_dir / require_string(
116
+ test_case.get("stdin_path"),
117
+ f"agora-harness.tests[{index}].stdin_path",
118
+ )
119
+ expected_stdout_path = working_dir / require_string(
120
+ test_case.get("expected_stdout_path"),
121
+ f"agora-harness.tests[{index}].expected_stdout_path",
122
+ )
123
+ stdin_text = read_required_text(
124
+ stdin_path,
125
+ f"test stdin {stdin_path.name}",
126
+ )
127
+ expected_stdout = normalize_output(
128
+ read_required_text(
129
+ expected_stdout_path,
130
+ f"test expected stdout {expected_stdout_path.name}",
131
+ ),
132
+ strip_trailing_whitespace,
133
+ )
134
+ run = run_python_solution(
135
+ solution_path,
136
+ stdin_text,
137
+ timeout_ms,
138
+ working_dir,
139
+ )
140
+ if run is None:
141
+ continue
142
+ if run.returncode != 0:
143
+ continue
144
+ actual_stdout = normalize_output(run.stdout, strip_trailing_whitespace)
145
+ if actual_stdout == expected_stdout:
146
+ passed_tests += 1
147
+ total_tests = len(tests)
148
+ score = passed_tests / total_tests
149
+ write_score(
150
+ score=score,
151
+ details={
152
+ final_score_key: score,
153
+ "tests_passed": passed_tests,
154
+ "total_tests": total_tests,
155
+ },
156
+ )
157
+
158
+
159
+ if __name__ == "__main__":
160
+ main()