npm - @routerlab/core - Versions diffs - 0.0.1 - Mend

@routerlab/core 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/LICENSE +201 -0
package/README.md +92 -0
package/dist/candidates.json +46 -0
package/dist/cost.d.ts +120 -0
package/dist/cost.d.ts.map +1 -0
package/dist/cost.js +496 -0
package/dist/cost.js.map +1 -0
package/dist/index.d.ts +8 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +21 -0
package/dist/index.js.map +1 -0
package/dist/quality_predictor.d.ts +94 -0
package/dist/quality_predictor.d.ts.map +1 -0
package/dist/quality_predictor.js +338 -0
package/dist/quality_predictor.js.map +1 -0
package/dist/quality_prior.d.ts +24 -0
package/dist/quality_prior.d.ts.map +1 -0
package/dist/quality_prior.js +109 -0
package/dist/quality_prior.js.map +1 -0
package/dist/router.d.ts +27 -0
package/dist/router.d.ts.map +1 -0
package/dist/router.js +201 -0
package/dist/router.js.map +1 -0
package/dist/types.d.ts +95 -0
package/dist/types.d.ts.map +1 -0
package/dist/types.js +8 -0
package/dist/types.js.map +1 -0
package/package.json +27 -0

package/dist/quality_predictor.js ADDED Viewed

@@ -0,0 +1,338 @@
+// quality_predictor.ts — calibrated quality predictor for routerlab.
+//
+// --------------------------------------------------------------------------
+// WHY THIS MODULE EXISTS
+// --------------------------------------------------------------------------
+// Phase 2 shipped a hardcoded per-(taskClass, model) quality prior in
+// `quality_prior.ts`. That prior was a deliberate placeholder: routing
+// decisions were "shaped right" but uncalibrated. Phase 3 replaces it with
+// a real, data-driven predictor whose mean and 95% confidence interval are
+// computed from the eval-harness measurements written to
+// `eval/results/quality_table.json` by the `router-frontier` agent.
+//
+// Differentiation versus prior art:
+// a calibrated pre-call quality estimator is not novel on its own —
+// RouteLLM's matrix factorization, BEST-Route's difficulty heads, and
+// cross-attention routers all do this. Our differentiation is reporting
+// **explicit confidence intervals** (Wilson score, n-aware) so that a
+// caller can route on a *lower-bound* quality estimate at high confidence
+// rather than a point estimate, and pair that with atlas-grounded cost
+// (see `cost.ts`) for a fully accountable routing trace.
+//
+// --------------------------------------------------------------------------
+// DATA SOURCE PRECEDENCE
+// --------------------------------------------------------------------------
+//   1. `eval/results/quality_table.json` on disk if present (read once,
+//      memoized). Each cell carries `{ trials, successes }` from the eval
+//      harness. Mean is `successes/trials`; CI is Wilson score 95%.
+//      `n` is `trials`.
+//   2. Otherwise the seeded prior table is used. The prior is
+//      treated as if it were a measurement with `PRIOR_N` synthetic
+//      trials so that the CI still has a defined shape. Callers can
+//      detect this from the `n` field — it equals `PRIOR_N` when the
+//      data is the fallback rather than a real measurement.
+//
+// --------------------------------------------------------------------------
+// PURITY & DETERMINISM
+// --------------------------------------------------------------------------
+// All math is pure and deterministic. The only side effect is the one-time
+// disk read at module init, memoized in `qualityTableCache`. Tests can
+// reset that cache via `__resetQualityCacheForTest`.
+import { readFileSync } from "node:fs";
+import { dirname, resolve as resolvePath } from "node:path";
+import { fileURLToPath } from "node:url";
+import { __QUALITY_PRIOR_TABLE } from "./quality_prior.js";
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+/**
+ * Z-score for a 95% confidence interval. Two-sided, normal approximation.
+ * Hardcoded to avoid pulling in a stats dependency; the Wilson interval
+ * uses this value verbatim.
+ */
+const Z_95 = 1.959963984540054;
+/**
+ * Default quality when neither the measured table nor the prior carries
+ * an entry for (taskClass, model). Matches `quality_prior.ts`'s default
+ * so callers see consistent behaviour across the two paths.
+ */
+const DEFAULT_QUALITY = 0.5;
+/**
+ * Synthetic trial count assigned to a fallback-prior cell so that the CI
+ * is well-defined. Chosen at 10 trials, which yields a wide CI (~+/-0.3
+ * for p=0.5) — that's correct behaviour: the prior should be treated as
+ * weak evidence, and a caller routing on `lo95` will be appropriately
+ * conservative until real measurements land.
+ */
+export const PRIOR_N = 10;
+/**
+ * The default location of the eval-harness quality table on disk.
+ * Resolved repo-relative from this module's URL so the predictor works on
+ * any machine without hardcoded absolute paths. This file lives at
+ * `packages/core/src/quality_predictor.ts`; the quality table is at
+ * `eval/results/quality_table.json` (4 levels up to repo root, then down).
+ * Tests override via the env var below.
+ */
+const DEFAULT_QUALITY_TABLE_PATH = resolvePath(dirname(fileURLToPath(import.meta.url)), "..", "..", "..", "..", "eval", "results", "quality_table.json");
+/**
+ * Env var that overrides `DEFAULT_QUALITY_TABLE_PATH`. Tests set this to
+ * a tmp file; production callers can use it to ship custom calibrations.
+ */
+const QUALITY_TABLE_PATH_ENV_VAR = "ROUTERLAB_QUALITY_TABLE_PATH";
+// ---------------------------------------------------------------------------
+// Wilson score interval (pure math)
+// ---------------------------------------------------------------------------
+/**
+ * Wilson score 95% confidence interval for a binomial proportion.
+ *
+ * Reference: Wilson, E. B. (1927). "Probable inference, the law of
+ * succession, and statistical inference." JASA 22(158): 209-212.
+ *
+ * Why Wilson over Wald: Wald (the textbook normal approximation, p̂ ±
+ * z*sqrt(p̂(1-p̂)/n)) collapses to the empty interval when p̂ ∈ {0, 1}
+ * and underestimates uncertainty for small n. Wilson is well-defined at
+ * the boundary, has better coverage at small n, and is the recommended
+ * default for binomial CIs in modern stats texts (e.g. Agresti & Coull,
+ * 1998 — "Approximate is better than 'exact' for interval estimation of
+ * binomial proportions").
+ *
+ * Inputs:
+ *   - `successes`: integer in [0, trials].
+ *   - `trials`: positive integer.
+ *
+ * Returns the (lo, hi) tuple clamped to [0, 1]. Pure function.
+ */
+export function wilsonScore95(successes, trials) {
+    if (!Number.isFinite(successes) || !Number.isFinite(trials)) {
+        throw new Error("wilsonScore95: successes and trials must be finite numbers");
+    }
+    if (trials <= 0) {
+        throw new Error("wilsonScore95: trials must be a positive integer");
+    }
+    if (successes < 0 || successes > trials) {
+        throw new Error("wilsonScore95: successes must be in [0, trials]");
+    }
+    const n = trials;
+    const p = successes / n;
+    const z = Z_95;
+    const z2 = z * z;
+    const denom = 1 + z2 / n;
+    const center = (p + z2 / (2 * n)) / denom;
+    const radius = (z * Math.sqrt((p * (1 - p)) / n + z2 / (4 * n * n))) / denom;
+    // Boundary fix-ups. At p=1 or p=0 the analytical Wilson bounds touch
+    // the unit interval exactly, but floating-point evaluation drifts by a
+    // few ulps. Snap those cases so callers get exact 0/1 values and so
+    // that downstream comparisons (e.g. `hi >= mean` when mean = 1) hold.
+    let lo = Math.max(0, center - radius);
+    let hi = Math.min(1, center + radius);
+    if (successes === trials)
+        hi = 1;
+    if (successes === 0)
+        lo = 0;
+    return { lo, hi };
+}
+const TASK_CLASSES = new Set([
+    "qa",
+    "codegen",
+    "summarization",
+    "classification",
+    "reasoning",
+]);
+const isTaskClass = (s) => TASK_CLASSES.has(s);
+const parseQualityFile = (path, raw) => {
+    let parsed;
+    try {
+        parsed = JSON.parse(raw);
+    }
+    catch (cause) {
+        throw new Error(`quality_table.json at "${path}" is not valid JSON: ${cause instanceof Error ? cause.message : String(cause)}`);
+    }
+    if (parsed === null || typeof parsed !== "object") {
+        throw new Error(`quality_table.json at "${path}" root must be an object`);
+    }
+    const file = parsed;
+    const cellsIn = file.cells;
+    if (cellsIn === undefined || typeof cellsIn !== "object" || cellsIn === null) {
+        // Schema-compatible but empty: treat as no measurements present and
+        // fall back to the prior, mirroring cost.ts's tolerance of a partly
+        // populated atlas file. The caller will see `source: "prior"` cells.
+        return buildPriorTable();
+    }
+    const cellsOut = {};
+    for (const [modelId, perTask] of Object.entries(cellsIn)) {
+        if (typeof perTask !== "object" || perTask === null)
+            continue;
+        const inner = {};
+        for (const [taskKey, cell] of Object.entries(perTask)) {
+            if (!isTaskClass(taskKey))
+                continue;
+            if (typeof cell !== "object" || cell === null)
+                continue;
+            const successes = cell.successes;
+            const trials = cell.trials;
+            if (typeof successes !== "number" ||
+                !Number.isFinite(successes) ||
+                successes < 0) {
+                continue;
+            }
+            if (typeof trials !== "number" || !Number.isFinite(trials) || trials <= 0) {
+                continue;
+            }
+            if (successes > trials)
+                continue;
+            inner[taskKey] = {
+                successes: Math.floor(successes),
+                trials: Math.floor(trials),
+            };
+        }
+        if (Object.keys(inner).length > 0) {
+            cellsOut[modelId] = inner;
+        }
+    }
+    if (Object.keys(cellsOut).length === 0) {
+        // File parsed but produced no usable cells — same fallback as missing.
+        return buildPriorTable();
+    }
+    return {
+        source: "measured",
+        cells: cellsOut,
+        loadedFrom: path,
+        ...(typeof file.generated_at === "string" ? { generatedAt: file.generated_at } : {}),
+    };
+};
+/**
+ * Build a `QualityTable` from the hardcoded prior. The prior values
+ * are interpreted as probabilities; we synthesize `(successes, trials)`
+ * with `trials = PRIOR_N` so the Wilson CI has a defined shape.
+ *
+ * The synthesized successes are clamped to `[1, PRIOR_N - 1]`. Intuition:
+ * the prior is **weak evidence**, never a certainty. A cell that says
+ * "0.95 quality" should not back the implausibly strong claim "perfect
+ * on 10/10 synthetic trials" — that would convince a strict caller
+ * (`qualityBar = 1.0`) to route to the model with no real measurements
+ * to support it. Capping at `PRIOR_N - 1` keeps the prior conservative
+ * and ensures a `qualityBar = 1.0` request always falls through to real
+ * data or errors out — both of which are the correct behaviour.
+ */
+const buildPriorTable = () => {
+    const cells = {};
+    for (const [modelId, row] of Object.entries(__QUALITY_PRIOR_TABLE)) {
+        const inner = {};
+        for (const taskKey of TASK_CLASSES) {
+            const p = row[taskKey];
+            // Round half-up, then clamp to the open interval (0, PRIOR_N).
+            const rounded = Math.round(p * PRIOR_N);
+            const successes = Math.min(PRIOR_N - 1, Math.max(1, rounded));
+            inner[taskKey] = { successes, trials: PRIOR_N };
+        }
+        cells[modelId] = inner;
+    }
+    return { source: "prior", cells };
+};
+// ---------------------------------------------------------------------------
+// Memoization
+// ---------------------------------------------------------------------------
+let qualityTableCache;
+const resolveQualityTablePath = () => {
+    const fromEnv = process.env[QUALITY_TABLE_PATH_ENV_VAR];
+    if (fromEnv !== undefined && fromEnv.length > 0)
+        return fromEnv;
+    return DEFAULT_QUALITY_TABLE_PATH;
+};
+const loadQualityTable = () => {
+    const path = resolveQualityTablePath();
+    let raw;
+    try {
+        raw = readFileSync(path, "utf8");
+    }
+    catch {
+        // File not present — the expected state before the eval harness runs.
+        // Fall back to the seeded prior so routing stays usable.
+        return buildPriorTable();
+    }
+    return parseQualityFile(path, raw);
+};
+const getQualityTable = () => {
+    if (qualityTableCache !== undefined)
+        return qualityTableCache;
+    qualityTableCache = loadQualityTable();
+    return qualityTableCache;
+};
+/**
+ * Test-only hook: clears the memoized quality table so the next lookup
+ * re-reads disk. Mirrors `__resetCalibrationCacheForTest` in `cost.ts`.
+ * Not part of the public API surface but intentionally exported with the
+ * `__`-prefix convention so tests can reach for it explicitly.
+ */
+export const __resetQualityCacheForTest = () => {
+    qualityTableCache = undefined;
+};
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+/**
+ * Return the expected quality for `(taskClass, modelId)` as a point estimate
+ * in [0, 1].
+ *
+ * Backward-compatible signature: matches `quality_prior.predictQuality` so
+ * the router doesn't need to change. The mean comes from
+ * `predictQualityWithCI` — callers wanting uncertainty should use that
+ * function directly.
+ *
+ * Unknown models receive `DEFAULT_QUALITY = 0.5`.
+ */
+export function predictQuality(taskClass, modelId) {
+    return predictQualityWithCI(taskClass, modelId).mean;
+}
+/**
+ * Return the expected quality for `(taskClass, modelId)` with a 95%
+ * Wilson-score confidence interval.
+ *
+ * - `mean` is `successes / trials`.
+ * - `lo95` / `hi95` is the Wilson score interval at the 95% level.
+ * - `n` is the trial count (real for measured cells, `PRIOR_N` for the
+ *   seeded prior fallback).
+ *
+ * Unknown (taskClass, modelId) pairs return a uniform-prior estimate:
+ *   `{ mean: 0.5, lo95, hi95, n: PRIOR_N }`
+ * where the CI is the Wilson 95% interval for 5 successes in 10 trials —
+ * intentionally wide to reflect that no data backs the estimate.
+ */
+export function predictQualityWithCI(taskClass, modelId) {
+    const table = getQualityTable();
+    const row = table.cells[modelId];
+    const cell = row !== undefined ? row[taskClass] : undefined;
+    if (cell === undefined) {
+        // Unknown model or unknown task class for a known model: return a
+        // uniform prior with PRIOR_N synthetic trials so the CI is defined.
+        const successes = Math.round(DEFAULT_QUALITY * PRIOR_N);
+        const { lo, hi } = wilsonScore95(successes, PRIOR_N);
+        return {
+            mean: DEFAULT_QUALITY,
+            lo95: lo,
+            hi95: hi,
+            n: PRIOR_N,
+        };
+    }
+    const mean = cell.successes / cell.trials;
+    const { lo, hi } = wilsonScore95(cell.successes, cell.trials);
+    return { mean, lo95: lo, hi95: hi, n: cell.trials };
+}
+/**
+ * Introspection helper. Returns whether the predictor is currently serving
+ * measurements or the fallback prior, and the metadata of the loaded file.
+ * Useful for the CLI's `route --debug` output and for the paper's
+ * reproducibility appendix.
+ */
+export function getQualitySourceInfo() {
+    const table = getQualityTable();
+    const info = {
+        source: table.source,
+    };
+    if (table.loadedFrom !== undefined)
+        info.loadedFrom = table.loadedFrom;
+    if (table.generatedAt !== undefined)
+        info.generatedAt = table.generatedAt;
+    return info;
+}
+//# sourceMappingURL=quality_predictor.js.map

package/dist/quality_predictor.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"quality_predictor.js","sourceRoot":"","sources":["../src/quality_predictor.ts"],"names":[],"mappings":"AAAA,qEAAqE;AACrE,EAAE;AACF,6EAA6E;AAC7E,yBAAyB;AACzB,6EAA6E;AAC7E,sEAAsE;AACtE,uEAAuE;AACvE,2EAA2E;AAC3E,2EAA2E;AAC3E,yDAAyD;AACzD,oEAAoE;AACpE,EAAE;AACF,oCAAoC;AACpC,oEAAoE;AACpE,sEAAsE;AACtE,wEAAwE;AACxE,sEAAsE;AACtE,0EAA0E;AAC1E,uEAAuE;AACvE,yDAAyD;AACzD,EAAE;AACF,6EAA6E;AAC7E,yBAAyB;AACzB,6EAA6E;AAC7E,wEAAwE;AACxE,0EAA0E;AAC1E,oEAAoE;AACpE,wBAAwB;AACxB,8DAA8D;AAC9D,oEAAoE;AACpE,oEAAoE;AACpE,qEAAqE;AACrE,4DAA4D;AAC5D,EAAE;AACF,6EAA6E;AAC7E,uBAAuB;AACvB,6EAA6E;AAC7E,2EAA2E;AAC3E,uEAAuE;AACvE,qDAAqD;AAErD,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,OAAO,EAAE,OAAO,IAAI,WAAW,EAAE,MAAM,WAAW,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAuB3D,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E;AAE9E;;;;GAIG;AACH,MAAM,IAAI,GAAG,iBAAiB,CAAC;AAE/B;;;;GAIG;AACH,MAAM,eAAe,GAAG,GAAG,CAAC;AAE5B;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,OAAO,GAAG,EAAE,CAAC;AAE1B;;;;;;;GAOG;AACH,MAAM,0BAA0B,GAAG,WAAW,CAC5C,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EACvC,IAAI,EACJ,IAAI,EACJ,IAAI,EACJ,IAAI,EACJ,MAAM,EACN,SAAS,EACT,oBAAoB,CACrB,CAAC;AAEF;;;GAGG;AACH,MAAM,0BAA0B,GAAG,8BAA8B,CAAC;AAElE,8EAA8E;AAC9E,oCAAoC;AACpC,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,UAAU,aAAa,CAAC,SAAiB,EAAE,MAAc;IAC7D,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC5D,MAAM,IAAI,KAAK,CAAC,4DAA4D,CAAC,CAAC;IAChF,CAAC;IACD,IAAI,MAAM,IAAI,CAAC,EAAE,CAAC;QAChB,MAAM,IAAI,KAAK,CAAC,kDAAkD,CAAC,CAAC;IACtE,CAAC;IACD,IAAI,SAAS,GAAG,CAAC,IAAI,SAAS,GAAG,MAAM,EAAE,CAAC;QACxC,MAAM,IAAI,KAAK,CAAC,iDAAiD,CAAC,CAAC;IACrE,CAAC;IAED,MAAM,CAAC,GAAG,MAAM,CAAC;IACjB,MAAM,CAAC,GAAG,SAAS,GAAG,CAAC,CAAC;IACxB,MAAM,CAAC,GAAG,IAAI,CAAC;IACf,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;IAEjB,MAAM,KAAK,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IACzB,MAAM,MAAM,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC;IAC1C,MAAM,MAAM,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC;IAE7E,qEAAqE;IACrE,uEAAuE;IACvE,oEAAoE;IACpE,sEAAsE;IACtE,IAAI,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC,CAAC;IACtC,IAAI,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC,CAAC;IACtC,IAAI,SAAS,KAAK,MAAM;QAAE,EAAE,GAAG,CAAC,CAAC;IACjC,IAAI,SAAS,KAAK,CAAC;QAAE,EAAE,GAAG,CAAC,CAAC;IAC5B,OAAO,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC;AACpB,CAAC;AAkDD,MAAM,YAAY,GAA2B,IAAI,GAAG,CAAY;IAC9D,IAAI;IACJ,SAAS;IACT,eAAe;IACf,gBAAgB;IAChB,WAAW;CACZ,CAAC,CAAC;AAEH,MAAM,WAAW,GAAG,CAAC,CAAS,EAAkB,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,CAAc,CAAC,CAAC;AAEpF,MAAM,gBAAgB,GAAG,CAAC,IAAY,EAAE,GAAW,EAAgB,EAAE;IACnE,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC3B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CACb,0BAA0B,IAAI,wBAC5B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CACvD,EAAE,CACH,CAAC;IACJ,CAAC;IACD,IAAI,MAAM,KAAK,IAAI,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QAClD,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,0BAA0B,CAAC,CAAC;IAC5E,CAAC;IACD,MAAM,IAAI,GAAG,MAAwB,CAAC;IACtC,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC;IAC3B,IAAI,OAAO,KAAK,SAAS,IAAI,OAAO,OAAO,KAAK,QAAQ,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;QAC7E,oEAAoE;QACpE,oEAAoE;QACpE,qEAAqE;QACrE,OAAO,eAAe,EAAE,CAAC;IAC3B,CAAC;IAED,MAAM,QAAQ,GAA4D,EAAE,CAAC;IAC7E,KAAK,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QACzD,IAAI,OAAO,OAAO,KAAK,QAAQ,IAAI,OAAO,KAAK,IAAI;YAAE,SAAS;QAC9D,MAAM,KAAK,GAA4C,EAAE,CAAC;QAC1D,KAAK,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;YACtD,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC;gBAAE,SAAS;YACpC,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI;gBAAE,SAAS;YACxD,MAAM,SAAS,GAAI,IAAgC,CAAC,SAAS,CAAC;YAC9D,MAAM,MAAM,GAAI,IAA6B,CAAC,MAAM,CAAC;YACrD,IACE,OAAO,SAAS,KAAK,QAAQ;gBAC7B,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC;gBAC3B,SAAS,GAAG,CAAC,EACb,CAAC;gBACD,SAAS;YACX,CAAC;YACD,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,IAAI,CAAC,EAAE,CAAC;gBAC1E,SAAS;YACX,CAAC;YACD,IAAI,SAAS,GAAG,MAAM;gBAAE,SAAS;YACjC,KAAK,CAAC,OAAO,CAAC,GAAG;gBACf,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC;gBAChC,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;aAC3B,CAAC;QACJ,CAAC;QACD,IAAI,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClC,QAAQ,CAAC,OAAO,CAAC,GAAG,KAAK,CAAC;QAC5B,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvC,uEAAuE;QACvE,OAAO,eAAe,EAAE,CAAC;IAC3B,CAAC;IAED,OAAO;QACL,MAAM,EAAE,UAAU;QAClB,KAAK,EAAE,QAAQ;QACf,UAAU,EAAE,IAAI;QAChB,GAAG,CAAC,OAAO,IAAI,CAAC,YAAY,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACrF,CAAC;AACJ,CAAC,CAAC;AAEF;;;;;;;;;;;;;GAaG;AACH,MAAM,eAAe,GAAG,GAAiB,EAAE;IACzC,MAAM,KAAK,GAA4D,EAAE,CAAC;IAC1E,KAAK,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,qBAAqB,CAAC,EAAE,CAAC;QACnE,MAAM,KAAK,GAA4C,EAAE,CAAC;QAC1D,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;YACnC,MAAM,CAAC,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;YACvB,+DAA+D;YAC/D,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC;YACxC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;YAC9D,KAAK,CAAC,OAAO,CAAC,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC;QAClD,CAAC;QACD,KAAK,CAAC,OAAO,CAAC,GAAG,KAAK,CAAC;IACzB,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;AACpC,CAAC,CAAC;AAEF,8EAA8E;AAC9E,cAAc;AACd,8EAA8E;AAE9E,IAAI,iBAA2C,CAAC;AAEhD,MAAM,uBAAuB,GAAG,GAAW,EAAE;IAC3C,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;IACxD,IAAI,OAAO,KAAK,SAAS,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,OAAO,CAAC;IAChE,OAAO,0BAA0B,CAAC;AACpC,CAAC,CAAC;AAEF,MAAM,gBAAgB,GAAG,GAAiB,EAAE;IAC1C,MAAM,IAAI,GAAG,uBAAuB,EAAE,CAAC;IACvC,IAAI,GAAW,CAAC;IAChB,IAAI,CAAC;QACH,GAAG,GAAG,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IACnC,CAAC;IAAC,MAAM,CAAC;QACP,sEAAsE;QACtE,yDAAyD;QACzD,OAAO,eAAe,EAAE,CAAC;IAC3B,CAAC;IACD,OAAO,gBAAgB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;AACrC,CAAC,CAAC;AAEF,MAAM,eAAe,GAAG,GAAiB,EAAE;IACzC,IAAI,iBAAiB,KAAK,SAAS;QAAE,OAAO,iBAAiB,CAAC;IAC9D,iBAAiB,GAAG,gBAAgB,EAAE,CAAC;IACvC,OAAO,iBAAiB,CAAC;AAC3B,CAAC,CAAC;AAEF;;;;;GAKG;AACH,MAAM,CAAC,MAAM,0BAA0B,GAAG,GAAS,EAAE;IACnD,iBAAiB,GAAG,SAAS,CAAC;AAChC,CAAC,CAAC;AAEF,8EAA8E;AAC9E,aAAa;AACb,8EAA8E;AAE9E;;;;;;;;;;GAUG;AACH,MAAM,UAAU,cAAc,CAAC,SAAoB,EAAE,OAAe;IAClE,OAAO,oBAAoB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;AACvD,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,oBAAoB,CAClC,SAAoB,EACpB,OAAe;IAEf,MAAM,KAAK,GAAG,eAAe,EAAE,CAAC;IAChC,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACjC,MAAM,IAAI,GAAG,GAAG,KAAK,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAC5D,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;QACvB,kEAAkE;QAClE,oEAAoE;QACpE,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,GAAG,OAAO,CAAC,CAAC;QACxD,MAAM,EAAE,EAAE,EAAE,EAAE,EAAE,GAAG,aAAa,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QACrD,OAAO;YACL,IAAI,EAAE,eAAe;YACrB,IAAI,EAAE,EAAE;YACR,IAAI,EAAE,EAAE;YACR,CAAC,EAAE,OAAO;SACX,CAAC;IACJ,CAAC;IACD,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC;IAC1C,MAAM,EAAE,EAAE,EAAE,EAAE,EAAE,GAAG,aAAa,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;IAC9D,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC;AACtD,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,oBAAoB;IAKlC,MAAM,KAAK,GAAG,eAAe,EAAE,CAAC;IAChC,MAAM,IAAI,GAAgF;QACxF,MAAM,EAAE,KAAK,CAAC,MAAM;KACrB,CAAC;IACF,IAAI,KAAK,CAAC,UAAU,KAAK,SAAS;QAAE,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC,UAAU,CAAC;IACvE,IAAI,KAAK,CAAC,WAAW,KAAK,SAAS;QAAE,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC,WAAW,CAAC;IAC1E,OAAO,IAAI,CAAC;AACd,CAAC"}

package/dist/quality_prior.d.ts ADDED Viewed

@@ -0,0 +1,24 @@
+import type { TaskClass } from "./types.ts";
+/**
+ * Internal type: a quality estimate per task class for a single model id.
+ * Missing entries fall back to `DEFAULT_QUALITY`.
+ */
+type QualityRow = Record<TaskClass, number>;
+/**
+ * Look up the expected quality for `(taskClass, modelId)` from the seeded
+ * prior table. Kept exported for backward-compat with older callers and
+ * for tests that need to inspect the raw prior. New code should prefer
+ * `predictQuality` / `predictQualityWithCI` from `quality_predictor.ts`,
+ * which serves measured data when available and falls back to this
+ * table otherwise.
+ *
+ * Models not in the table fall back to `DEFAULT_QUALITY = 0.5`.
+ */
+export declare function predictQuality(taskClass: TaskClass, modelId: string): number;
+/**
+ * Exposed for tests and for introspection by downstream tooling (e.g. the
+ * Phase 3 calibration harness, which uses this as a starting prior).
+ */
+export declare const __QUALITY_PRIOR_TABLE: Readonly<Record<string, QualityRow>>;
+export {};
+//# sourceMappingURL=quality_prior.d.ts.map

package/dist/quality_prior.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"quality_prior.d.ts","sourceRoot":"","sources":["../src/quality_prior.ts"],"names":[],"mappings":"AAiCA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAE5C;;;GAGG;AACH,KAAK,UAAU,GAAG,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;AA0D5C;;;;;;;;;GASG;AACH,wBAAgB,cAAc,CAAC,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAM5E;AAED;;;GAGG;AACH,eAAO,MAAM,qBAAqB,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CACxD,CAAC"}

package/dist/quality_prior.js ADDED Viewed

@@ -0,0 +1,109 @@
+// quality_prior.ts — seeded fallback prior for the routing engine.
+//
+// PHASE 2 -> PHASE 3 ROLE SHIFT: This file used to be the source of truth
+// for `predictQuality`. In Phase 3 it has been demoted to a fallback prior
+// consumed by `quality_predictor.ts`. The real, calibrated quality
+// estimates now come from measured eval data at
+// `eval/results/quality_table.json`. When that file is missing or
+// produces no usable cells (e.g. project bootstrap, fresh checkout
+// before the eval harness has run), `quality_predictor.ts` synthesizes
+// (successes, trials) counts from this prior table so the Wilson CI is
+// well-defined. The `predictQuality()` function below is kept for
+// backward-compat — `index.ts` now re-exports the predictor's version
+// in preference — but the table data is the load-bearing artifact.
+//
+// Prior-art note: a calibrated
+// pre-call quality predictor on its own is not novel — RouteLLM's matrix
+// factorization, BEST-Route's difficulty heads, and cross-attention
+// routers all do this. Our differentiation comes from pairing the
+// predictor with atlas-grounded empirical token costs PLUS reporting
+// explicit Wilson 95% CIs (so callers can route on a confident lower
+// bound rather than a point estimate), NOT from the predictor's
+// algorithmic structure.
+//
+// Design choices for the prior values:
+//   - Bigger / newer models score higher on harder tasks (codegen,
+//     reasoning) where parameter count and training recency dominate.
+//   - Smaller / cheaper models score competitively on easier tasks
+//     (classification, summarization, simple QA) where capability
+//     headroom is wasted.
+//   - Values are eyeballed from public eval reports (MT-Bench, HumanEval,
+//     MMLU, etc.) circa late-2025 / early-2026 — they are NOT measured
+//     here. Phase 3 replaces them with measurements.
+const DEFAULT_QUALITY = 0.5;
+/**
+ * Hardcoded per-(model, task) quality prior. The key is the model id from
+ * the candidate pool (see `candidates.json`). Values are expected quality
+ * in [0, 1] interpreted as "fraction of tasks of this class that this
+ * model gets right at the rubric's threshold."
+ *
+ * REPLACE LATER with a calibrated predictor; see the prior-art
+ * survey for differentiation requirements.
+ */
+const QUALITY_PRIOR = {
+    "claude-opus-4-7": {
+        qa: 0.95,
+        codegen: 0.93,
+        summarization: 0.94,
+        classification: 0.96,
+        reasoning: 0.95,
+    },
+    "claude-sonnet-4-6": {
+        qa: 0.91,
+        codegen: 0.88,
+        summarization: 0.92,
+        classification: 0.93,
+        reasoning: 0.89,
+    },
+    "claude-haiku-4-5": {
+        qa: 0.84,
+        codegen: 0.78,
+        summarization: 0.86,
+        classification: 0.89,
+        reasoning: 0.78,
+    },
+    "llama-3.3-70b": {
+        qa: 0.82,
+        codegen: 0.79,
+        summarization: 0.83,
+        classification: 0.87,
+        reasoning: 0.76,
+    },
+    "llama-3.1-8b": {
+        qa: 0.68,
+        codegen: 0.58,
+        summarization: 0.72,
+        classification: 0.78,
+        reasoning: 0.55,
+    },
+    "mixtral-8x7b": {
+        qa: 0.74,
+        codegen: 0.69,
+        summarization: 0.77,
+        classification: 0.81,
+        reasoning: 0.65,
+    },
+};
+/**
+ * Look up the expected quality for `(taskClass, modelId)` from the seeded
+ * prior table. Kept exported for backward-compat with older callers and
+ * for tests that need to inspect the raw prior. New code should prefer
+ * `predictQuality` / `predictQualityWithCI` from `quality_predictor.ts`,
+ * which serves measured data when available and falls back to this
+ * table otherwise.
+ *
+ * Models not in the table fall back to `DEFAULT_QUALITY = 0.5`.
+ */
+export function predictQuality(taskClass, modelId) {
+    const row = QUALITY_PRIOR[modelId];
+    if (row === undefined) {
+        return DEFAULT_QUALITY;
+    }
+    return row[taskClass];
+}
+/**
+ * Exposed for tests and for introspection by downstream tooling (e.g. the
+ * Phase 3 calibration harness, which uses this as a starting prior).
+ */
+export const __QUALITY_PRIOR_TABLE = QUALITY_PRIOR;
+//# sourceMappingURL=quality_prior.js.map

package/dist/quality_prior.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"quality_prior.js","sourceRoot":"","sources":["../src/quality_prior.ts"],"names":[],"mappings":"AAAA,mEAAmE;AACnE,EAAE;AACF,0EAA0E;AAC1E,2EAA2E;AAC3E,mEAAmE;AACnE,gDAAgD;AAChD,kEAAkE;AAClE,mEAAmE;AACnE,uEAAuE;AACvE,uEAAuE;AACvE,kEAAkE;AAClE,sEAAsE;AACtE,mEAAmE;AACnE,EAAE;AACF,+BAA+B;AAC/B,yEAAyE;AACzE,oEAAoE;AACpE,kEAAkE;AAClE,qEAAqE;AACrE,qEAAqE;AACrE,gEAAgE;AAChE,yBAAyB;AACzB,EAAE;AACF,uCAAuC;AACvC,mEAAmE;AACnE,sEAAsE;AACtE,mEAAmE;AACnE,kEAAkE;AAClE,0BAA0B;AAC1B,0EAA0E;AAC1E,uEAAuE;AACvE,qDAAqD;AAUrD,MAAM,eAAe,GAAG,GAAG,CAAC;AAE5B;;;;;;;;GAQG;AACH,MAAM,aAAa,GAA+B;IAChD,iBAAiB,EAAE;QACjB,EAAE,EAAE,IAAI;QACR,OAAO,EAAE,IAAI;QACb,aAAa,EAAE,IAAI;QACnB,cAAc,EAAE,IAAI;QACpB,SAAS,EAAE,IAAI;KAChB;IACD,mBAAmB,EAAE;QACnB,EAAE,EAAE,IAAI;QACR,OAAO,EAAE,IAAI;QACb,aAAa,EAAE,IAAI;QACnB,cAAc,EAAE,IAAI;QACpB,SAAS,EAAE,IAAI;KAChB;IACD,kBAAkB,EAAE;QAClB,EAAE,EAAE,IAAI;QACR,OAAO,EAAE,IAAI;QACb,aAAa,EAAE,IAAI;QACnB,cAAc,EAAE,IAAI;QACpB,SAAS,EAAE,IAAI;KAChB;IACD,eAAe,EAAE;QACf,EAAE,EAAE,IAAI;QACR,OAAO,EAAE,IAAI;QACb,aAAa,EAAE,IAAI;QACnB,cAAc,EAAE,IAAI;QACpB,SAAS,EAAE,IAAI;KAChB;IACD,cAAc,EAAE;QACd,EAAE,EAAE,IAAI;QACR,OAAO,EAAE,IAAI;QACb,aAAa,EAAE,IAAI;QACnB,cAAc,EAAE,IAAI;QACpB,SAAS,EAAE,IAAI;KAChB;IACD,cAAc,EAAE;QACd,EAAE,EAAE,IAAI;QACR,OAAO,EAAE,IAAI;QACb,aAAa,EAAE,IAAI;QACnB,cAAc,EAAE,IAAI;QACpB,SAAS,EAAE,IAAI;KAChB;CACF,CAAC;AAEF;;;;;;;;;GASG;AACH,MAAM,UAAU,cAAc,CAAC,SAAoB,EAAE,OAAe;IAClE,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IACnC,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;QACtB,OAAO,eAAe,CAAC;IACzB,CAAC;IACD,OAAO,GAAG,CAAC,SAAS,CAAC,CAAC;AACxB,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAChC,aAAa,CAAC"}

package/dist/router.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+import type { ModelCandidate, RouteDecision, RouteRequest } from "./types.ts";
+/**
+ * Return the default candidate pool. Useful for callers that want to
+ * inspect the pool, filter it, or extend it before routing.
+ */
+export declare function getDefaultCandidates(): readonly ModelCandidate[];
+/**
+ * The public routing entrypoint.
+ *
+ * Pipeline:
+ *   1. Validate the request.
+ *   2. Resolve the candidate pool (caller override > shipped default).
+ *   3. Filter out candidates that fail the quality bar, the cost budget,
+ *      or the model's context window. Record reasons.
+ *   4. Sort survivors by expected cost ascending (quality breaks ties).
+ *   5. Pick the cheapest survivor as `chosen`; the next three as
+ *      `fallbacks`. Anything below that is in `skipped` only if it failed
+ *      a constraint — extra cheap-survivors past the fallback list are
+ *      simply not returned.
+ *   6. If no candidate survives, throw with the full skipped list so the
+ *      caller can see exactly what went wrong.
+ *
+ * Sync return: this function does no I/O. Returning a promise would be
+ * misleading.
+ */
+export declare function route(request: RouteRequest): RouteDecision;
+//# sourceMappingURL=router.d.ts.map

package/dist/router.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"router.d.ts","sourceRoot":"","sources":["../src/router.ts"],"names":[],"mappings":"AA0CA,OAAO,KAAK,EACV,cAAc,EACd,aAAa,EAGb,YAAY,EAEb,MAAM,YAAY,CAAC;AAcpB;;;GAGG;AACH,wBAAgB,oBAAoB,IAAI,SAAS,cAAc,EAAE,CAEhE;AA4JD;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,KAAK,CAAC,OAAO,EAAE,YAAY,GAAG,aAAa,CAsB1D"}