npm - @moleculeagora/cli - Versions diffs - 0.1.0 - Mend

@moleculeagora/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md +31 -0
package/dist/index.js +30368 -0
package/dist/index.js.map +1 -0
package/dist/python-v1/agora_runtime.py +282 -0
package/dist/python-v1/answer-set-metric.py +264 -0
package/dist/python-v1/assertion-set-evaluation.py +879 -0
package/dist/python-v1/exact-match.py +60 -0
package/dist/python-v1/l4-composition.py +435 -0
package/dist/python-v1/multi-output-tabular-metric.py +392 -0
package/dist/python-v1/panel-ranking-metric.py +622 -0
package/dist/python-v1/project-test.py +256 -0
package/dist/python-v1/protein-binder-assay-metric.py +600 -0
package/dist/python-v1/public-tool-metric.py +161 -0
package/dist/python-v1/ranking-metric.py +426 -0
package/dist/python-v1/reference-artifact-assertion.py +532 -0
package/dist/python-v1/rubric-validation.py +246 -0
package/dist/python-v1/solver-python-stdio-test.py +160 -0
package/dist/python-v1/statistical-endpoint-test-v2.py +629 -0
package/dist/python-v1/statistical-endpoint-test.py +442 -0
package/dist/python-v1/table-metric.py +1291 -0
package/dist/release-metadata.json +7 -0
package/package.json +67 -0

package/dist/python-v1/statistical-endpoint-test.py ADDED Viewed

@@ -0,0 +1,442 @@
+import csv
+import math
+from agora_runtime import (
+    fail_runtime,
+    load_json_file,
+    load_runtime_context,
+    reject_submission,
+    resolve_scoring_asset,
+    resolve_submission_artifact,
+    write_score,
+)
+SUPPORTED_DIRECTIONS = ("treatment_lt_control", "treatment_gt_control")
+BETA_CONTINUED_FRACTION_MAX_ITERATIONS = 200
+BETA_CONTINUED_FRACTION_EPSILON = 3e-14
+BETA_CONTINUED_FRACTION_MIN_FLOAT = 1e-300
+def require_string(value, label, invalid_handler=fail_runtime):
+    if not isinstance(value, str) or not value.strip():
+        invalid_handler(f"{label} must be a non-empty string.")
+    return value.strip()
+def require_config_dict(config, key):
+    value = config.get(key)
+    if not isinstance(value, dict):
+        fail_runtime(f"compiled_config.{key} must be an object.")
+    return value
+def require_alpha(value):
+    if isinstance(value, bool) or not isinstance(value, (int, float)):
+        fail_runtime("endpoint_test.alpha must be a finite number.")
+    alpha = float(value)
+    if not math.isfinite(alpha) or alpha <= 0.0 or alpha >= 1.0:
+        fail_runtime("endpoint_test.alpha must be finite, greater than 0, and less than 1.")
+    return alpha
+def require_minimum_n(value):
+    if isinstance(value, bool) or not isinstance(value, int):
+        fail_runtime("endpoint_test.minimum_n_per_group must be an integer.")
+    if value < 2:
+        fail_runtime("endpoint_test.minimum_n_per_group must be at least 2.")
+    return value
+def load_endpoint_test_config(config):
+    endpoint_test = require_config_dict(config, "endpoint_test")
+    group_column = require_string(
+        endpoint_test.get("group_column"),
+        "endpoint_test.group_column",
+    )
+    value_column = require_string(
+        endpoint_test.get("value_column"),
+        "endpoint_test.value_column",
+    )
+    if group_column == value_column:
+        fail_runtime("endpoint_test.group_column and endpoint_test.value_column must be different.")
+    treatment_group = require_string(
+        endpoint_test.get("treatment_group"),
+        "endpoint_test.treatment_group",
+    )
+    control_group = require_string(
+        endpoint_test.get("control_group"),
+        "endpoint_test.control_group",
+    )
+    if treatment_group == control_group:
+        fail_runtime("endpoint_test.treatment_group and endpoint_test.control_group must be different.")
+    expected_direction = require_string(
+        endpoint_test.get("expected_direction"),
+        "endpoint_test.expected_direction",
+    )
+    if expected_direction not in SUPPORTED_DIRECTIONS:
+        fail_runtime(
+            f"endpoint_test.expected_direction must be one of {', '.join(SUPPORTED_DIRECTIONS)}."
+        )
+    return {
+        "group_column": group_column,
+        "value_column": value_column,
+        "treatment_group": treatment_group,
+        "control_group": control_group,
+        "expected_direction": expected_direction,
+        "alpha": require_alpha(endpoint_test.get("alpha")),
+        "minimum_n_per_group": require_minimum_n(
+            endpoint_test.get("minimum_n_per_group")
+        ),
+    }
+def require_observations_slot(runtime_context, role, params):
+    artifact_contract = runtime_context.get("artifact_contract")
+    if not isinstance(artifact_contract, dict):
+        fail_runtime("Runtime context is missing artifact_contract.")
+    slots = artifact_contract.get("submission")
+    if not isinstance(slots, list):
+        fail_runtime("Runtime context is missing artifact_contract.submission.")
+    for slot in slots:
+        if not isinstance(slot, dict) or slot.get("role") != role:
+            continue
+        validator = slot.get("validator")
+        if not isinstance(validator, dict) or validator.get("kind") != "csv_columns":
+            fail_runtime(
+                f"submission role {role} must use validator.kind=csv_columns for two_group_endpoint_test@1."
+            )
+        required = validator.get("required")
+        if not isinstance(required, list):
+            fail_runtime(
+                f"submission role {role} validator.required must be an array for two_group_endpoint_test@1."
+            )
+        required_columns = {
+            str(column).strip()
+            for column in required
+            if isinstance(column, str) and str(column).strip()
+        }
+        expected_columns = {
+            "observation_id",
+            params["group_column"],
+            params["value_column"],
+        }
+        missing = sorted(expected_columns - required_columns)
+        if missing:
+            fail_runtime(
+                f"submission role {role} validator.required must include {', '.join(missing)} for two_group_endpoint_test@1."
+            )
+        return
+    fail_runtime(f"Runtime context is missing submission slot for role {role}.")
+def read_csv_rows(path, label):
+    try:
+        with path.open("r", encoding="utf-8", newline="") as handle:
+            reader = csv.DictReader(handle)
+            fieldnames = reader.fieldnames
+            if not fieldnames:
+                reject_submission(f"{label} must include a CSV header row.")
+            normalized = []
+            seen = set()
+            for fieldname in fieldnames:
+                if not isinstance(fieldname, str) or not fieldname.strip():
+                    reject_submission(f"{label} contains an empty CSV column name.")
+                column = fieldname.strip()
+                if column in seen:
+                    reject_submission(f"{label} contains duplicate CSV column {column!r}.")
+                seen.add(column)
+                normalized.append((fieldname, column))
+            rows = []
+            for row_index, row in enumerate(reader, start=2):
+                if None in row:
+                    reject_submission(f"{label} row {row_index} has too many columns.")
+                rows.append(
+                    {
+                        normalized_name: row.get(raw_name, "")
+                        for raw_name, normalized_name in normalized
+                    }
+                )
+    except FileNotFoundError:
+        reject_submission(f"Missing {label} at {path}.")
+    except OSError as error:
+        reject_submission(f"Unable to read {label}: {error}.")
+    return [column for _, column in normalized], rows
+def require_columns(fieldnames, required_columns, label):
+    missing = [column for column in required_columns if column not in fieldnames]
+    if missing:
+        reject_submission(f"{label} is missing required columns: {', '.join(missing)}.")
+def parse_finite_number(raw_value, label):
+    text = str(raw_value).strip() if raw_value is not None else ""
+    if not text:
+        reject_submission(f"{label} must be present.")
+    try:
+        value = float(text)
+    except ValueError:
+        reject_submission(f"{label} must be numeric, received {text!r}.")
+    if not math.isfinite(value):
+        reject_submission(f"{label} must be finite.")
+    return value
+def load_observations(path, role, params):
+    fieldnames, rows = read_csv_rows(path, f"submission artifact {role}")
+    require_columns(
+        fieldnames,
+        ["observation_id", params["group_column"], params["value_column"]],
+        f"submission artifact {role}",
+    )
+    seen_observation_ids = set()
+    treatment_values = []
+    control_values = []
+    for row_index, row in enumerate(rows, start=2):
+        observation_id = require_string(
+            row.get("observation_id"),
+            f"submission artifact {role} row {row_index} observation_id",
+            reject_submission,
+        )
+        if observation_id in seen_observation_ids:
+            reject_submission(
+                f"submission artifact {role} contains duplicate observation_id {observation_id!r}."
+            )
+        seen_observation_ids.add(observation_id)
+        group = require_string(
+            row.get(params["group_column"]),
+            f"submission artifact {role} row {row_index} {params['group_column']}",
+            reject_submission,
+        )
+        value = parse_finite_number(
+            row.get(params["value_column"]),
+            f"submission artifact {role} row {row_index} {params['value_column']}",
+        )
+        if group == params["treatment_group"]:
+            treatment_values.append(value)
+        elif group == params["control_group"]:
+            control_values.append(value)
+    return treatment_values, control_values
+def require_group_size(values, minimum_n, group_label, group_kind):
+    if len(values) < minimum_n:
+        reject_submission(
+            f"two_group_endpoint_test@1 requires at least {minimum_n} {group_kind} observations for group {group_label!r}; received {len(values)}.",
+            details={f"n_{group_kind}": len(values)},
+        )
+def sample_mean(values):
+    return sum(values) / len(values)
+def sample_variance(values, mean):
+    return sum((value - mean) ** 2 for value in values) / (len(values) - 1)
+def beta_continued_fraction(a, b, x):
+    qab = a + b
+    qap = a + 1.0
+    qam = a - 1.0
+    c = 1.0
+    d = 1.0 - (qab * x / qap)
+    if abs(d) < BETA_CONTINUED_FRACTION_MIN_FLOAT:
+        d = BETA_CONTINUED_FRACTION_MIN_FLOAT
+    d = 1.0 / d
+    h = d
+    for iteration in range(1, BETA_CONTINUED_FRACTION_MAX_ITERATIONS + 1):
+        m2 = 2 * iteration
+        aa = iteration * (b - iteration) * x / ((qam + m2) * (a + m2))
+        d = 1.0 + aa * d
+        if abs(d) < BETA_CONTINUED_FRACTION_MIN_FLOAT:
+            d = BETA_CONTINUED_FRACTION_MIN_FLOAT
+        c = 1.0 + aa / c
+        if abs(c) < BETA_CONTINUED_FRACTION_MIN_FLOAT:
+            c = BETA_CONTINUED_FRACTION_MIN_FLOAT
+        d = 1.0 / d
+        h *= d * c
+        aa = -((a + iteration) * (qab + iteration) * x) / (
+            (a + m2) * (qap + m2)
+        )
+        d = 1.0 + aa * d
+        if abs(d) < BETA_CONTINUED_FRACTION_MIN_FLOAT:
+            d = BETA_CONTINUED_FRACTION_MIN_FLOAT
+        c = 1.0 + aa / c
+        if abs(c) < BETA_CONTINUED_FRACTION_MIN_FLOAT:
+            c = BETA_CONTINUED_FRACTION_MIN_FLOAT
+        d = 1.0 / d
+        delta = d * c
+        h *= delta
+        if abs(delta - 1.0) < BETA_CONTINUED_FRACTION_EPSILON:
+            return h
+    fail_runtime("regularized incomplete beta calculation did not converge.")
+def regularized_incomplete_beta(x, a, b):
+    if a <= 0.0 or b <= 0.0:
+        fail_runtime("regularized incomplete beta parameters must be positive.")
+    if x < 0.0 or x > 1.0:
+        fail_runtime("regularized incomplete beta x must be in [0, 1].")
+    if x == 0.0:
+        return 0.0
+    if x == 1.0:
+        return 1.0
+    log_front = (
+        math.lgamma(a + b)
+        - math.lgamma(a)
+        - math.lgamma(b)
+        + (a * math.log(x))
+        + (b * math.log1p(-x))
+    )
+    front = math.exp(log_front)
+    if x < (a + 1.0) / (a + b + 2.0):
+        return front * beta_continued_fraction(a, b, x) / a
+    return 1.0 - (front * beta_continued_fraction(b, a, 1.0 - x) / b)
+def clamp_probability(value):
+    if not math.isfinite(value):
+        fail_runtime("Student t p-value calculation produced a non-finite result.")
+    if value < 0.0 and value > -1e-15:
+        return 0.0
+    if value > 1.0 and value < 1.0 + 1e-15:
+        return 1.0
+    if value < 0.0 or value > 1.0:
+        fail_runtime("Student t p-value calculation produced a value outside [0, 1].")
+    return value
+def two_sided_student_t_p_value(test_statistic, degrees_of_freedom):
+    if degrees_of_freedom <= 0.0 or not math.isfinite(degrees_of_freedom):
+        fail_runtime("Welch-Satterthwaite degrees of freedom must be positive and finite.")
+    t_abs = abs(test_statistic)
+    if t_abs == 0.0:
+        return 1.0
+    x = degrees_of_freedom / (degrees_of_freedom + (t_abs * t_abs))
+    return clamp_probability(
+        regularized_incomplete_beta(x, degrees_of_freedom / 2.0, 0.5)
+    )
+def compute_welch_test(treatment_values, control_values):
+    treatment_mean = sample_mean(treatment_values)
+    control_mean = sample_mean(control_values)
+    treatment_variance = sample_variance(treatment_values, treatment_mean)
+    control_variance = sample_variance(control_values, control_mean)
+    treatment_term = treatment_variance / len(treatment_values)
+    control_term = control_variance / len(control_values)
+    standard_error_squared = treatment_term + control_term
+    if not math.isfinite(standard_error_squared) or standard_error_squared <= 0.0:
+        reject_submission(
+            "Welch t-test requires positive within-group variance; observed zero pooled standard error."
+        )
+    effect = treatment_mean - control_mean
+    test_statistic = effect / math.sqrt(standard_error_squared)
+    denominator = 0.0
+    if treatment_term > 0.0:
+        denominator += (treatment_term * treatment_term) / (len(treatment_values) - 1)
+    if control_term > 0.0:
+        denominator += (control_term * control_term) / (len(control_values) - 1)
+    if denominator <= 0.0 or not math.isfinite(denominator):
+        reject_submission(
+            "Welch t-test requires positive variance in at least one group."
+        )
+    degrees_of_freedom = (standard_error_squared * standard_error_squared) / denominator
+    p_value = two_sided_student_t_p_value(test_statistic, degrees_of_freedom)
+    return {
+        "treatment_mean": treatment_mean,
+        "control_mean": control_mean,
+        "effect": effect,
+        "test_statistic": test_statistic,
+        "degrees_of_freedom": degrees_of_freedom,
+        "p_value": p_value,
+    }
+def direction_matches(effect, expected_direction):
+    if expected_direction == "treatment_lt_control":
+        return effect < 0.0
+    return effect > 0.0
+def main():
+    runtime_context = load_runtime_context()
+    config_path = resolve_scoring_asset(
+        runtime_context,
+        "compiled_config",
+        kind="config",
+    )
+    try:
+        config = load_json_file(config_path, label="compiled_config")
+    except RuntimeError as error:
+        fail_runtime(str(error))
+    if not isinstance(config, dict):
+        fail_runtime("compiled_config must be a JSON object.")
+    submission_role = require_string(
+        config.get("submission_role"),
+        "compiled_config.submission_role",
+    )
+    final_score_key = require_string(
+        config.get("final_score_key"),
+        "compiled_config.final_score_key",
+    )
+    objective = require_string(
+        runtime_context.get("objective"),
+        "runtime_context.objective",
+    )
+    if objective != "maximize":
+        fail_runtime("two_group_endpoint_test@1 requires objective=maximize.")
+    params = load_endpoint_test_config(config)
+    require_observations_slot(runtime_context, submission_role, params)
+    observations_path = resolve_submission_artifact(runtime_context, submission_role)
+    treatment_values, control_values = load_observations(
+        observations_path,
+        submission_role,
+        params,
+    )
+    require_group_size(
+        treatment_values,
+        params["minimum_n_per_group"],
+        params["treatment_group"],
+        "treatment",
+    )
+    require_group_size(
+        control_values,
+        params["minimum_n_per_group"],
+        params["control_group"],
+        "control",
+    )
+    result = compute_welch_test(treatment_values, control_values)
+    matched_direction = direction_matches(result["effect"], params["expected_direction"])
+    score = 1.0 if result["p_value"] < params["alpha"] and matched_direction else 0.0
+    details = {
+        final_score_key: score,
+        "score": score,
+        "p_value": result["p_value"],
+        "effect": result["effect"],
+        "test_statistic": result["test_statistic"],
+        "degrees_of_freedom": result["degrees_of_freedom"],
+        "treatment_mean": result["treatment_mean"],
+        "control_mean": result["control_mean"],
+        "n_treatment": len(treatment_values),
+        "n_control": len(control_values),
+        "direction_matched": matched_direction,
+        "expected_direction": params["expected_direction"],
+        "alpha": params["alpha"],
+        "minimum_n_per_group": params["minimum_n_per_group"],
+        "treatment_group": params["treatment_group"],
+        "control_group": params["control_group"],
+        "group_column": params["group_column"],
+        "value_column": params["value_column"],
+    }
+    write_score(score=score, details=details)
+if __name__ == "__main__":
+    main()