npm - open-classify - Versions diffs - 0.1.1 → 0.1.2 - Mend

open-classify 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/README.md +30 -24
package/dist/src/aggregator.d.ts +4 -1
package/dist/src/aggregator.js +25 -15
package/dist/src/classifiers/custom/{conversation_diegest → conversation_digest}/manifest.json +3 -1
package/dist/src/classifiers/custom/{conversation_diegest → conversation_digest}/prompt.md +1 -1
package/dist/src/classifiers/custom/memory_retrieval_queries/manifest.json +2 -0
package/dist/src/classifiers/stock/model_specialization/manifest.json +4 -1
package/dist/src/classifiers/stock/preflight/manifest.json +4 -1
package/dist/src/classifiers/stock/prompt_injection/manifest.json +12 -0
package/dist/src/classifiers/stock/prompts/confidence.md +3 -3
package/dist/src/classifiers/stock/prompts/custom-output.md +7 -1
package/dist/src/classifiers/stock/prompts/preflight.md +7 -7
package/dist/src/classifiers/stock/prompts/prompt-injection-output.md +5 -0
package/dist/src/classifiers/stock/prompts/prompt_injection.md +24 -0
package/dist/src/classifiers/stock/prompts/reason.md +1 -1
package/dist/src/classifiers/stock/prompts/specialty.md +8 -6
package/dist/src/classifiers/stock/prompts/tier.md +1 -1
package/dist/src/classifiers/stock/routing/manifest.json +4 -1
package/dist/src/classifiers/stock/tools/manifest.json +2 -0
package/dist/src/config.d.ts +2 -0
package/dist/src/config.js +33 -1
package/dist/src/enums.d.ts +3 -7
package/dist/src/enums.js +7 -30
package/dist/src/index.js +1 -1
package/dist/src/input.js +1 -1
package/dist/src/manifest.d.ts +31 -23
package/dist/src/manifest.js +5 -1
package/dist/src/ollama.d.ts +2 -1
package/dist/src/ollama.js +1 -0
package/dist/src/pipeline.d.ts +1 -0
package/dist/src/pipeline.js +78 -48
package/dist/src/stock-prompt.js +1 -1
package/dist/src/stock-validation.d.ts +1 -2
package/dist/src/stock-validation.js +23 -40
package/dist/src/stock.d.ts +12 -11
package/dist/src/stock.js +21 -1
package/dist/src/ui-server.js +12 -5
package/dist/src/validation.d.ts +0 -1
package/dist/src/validation.js +0 -37
package/docs/adding-a-classifier.md +131 -0
package/docs/manifests.md +127 -0
package/docs/resolver.md +104 -0
package/docs/signals.md +102 -0
package/downstream-models.json +124 -0
package/open-classify.config.example.json +5 -1
package/package.json +3 -1
package/dist/src/classifiers/stock/prompts/security-output.md +0 -8
package/dist/src/classifiers/stock/prompts/security.md +0 -26
package/dist/src/classifiers/stock/security/manifest.json +0 -12

package/dist/src/ui-server.js CHANGED Viewed

@@ -21,17 +21,17 @@ import { createServer } from "node:http";
 import { extname, join, normalize } from "node:path";
 import { loadCatalog } from "./catalog.js";
 import { CLASSIFIER_NAMES, REGISTRY } from "./classifiers.js";
+import { DEFAULT_CERTAINTY_THRESHOLD, certaintyThreshold, } from "./aggregator.js";
 import { classifierModelsFromConfig, loadOpenClassifyConfig, } from "./config.js";
-import { DOWNSTREAM_MODEL_TIER_VALUES, MODEL_SPECIALIZATION_VALUES, SECURITY_DECISION_VALUES, SECURITY_RISK_LEVEL_VALUES, SECURITY_SIGNAL_VALUES, } from "./enums.js";
+import { DEFAULT_CERTAINTY_GATE } from "./pipeline.js";
+import { DOWNSTREAM_MODEL_TIER_VALUES, MODEL_SPECIALIZATION_VALUES, PROMPT_INJECTION_RISK_LEVEL_VALUES, } from "./enums.js";
 import { createOllamaClassifierRunner, OLLAMA_CONTEXT_LENGTH, OLLAMA_DEFAULT_CATALOG_PATH, OLLAMA_MIN_AVAILABLE_MEMORY_BYTES, OLLAMA_MIN_TOTAL_MEMORY_BYTES, OLLAMA_REQUIRED_PARALLELISM, } from "./ollama.js";
 import { classifyOpenClassifyInput } from "./pipeline.js";
 // Served at GET /api/enums so the UI never needs to duplicate shared enum values.
 const CLASSIFIER_ENUMS = {
     downstream_model_tier: [...DOWNSTREAM_MODEL_TIER_VALUES],
     model_specialization: [...MODEL_SPECIALIZATION_VALUES],
-    security_decision: [...SECURITY_DECISION_VALUES],
-    security_risk_level: [...SECURITY_RISK_LEVEL_VALUES],
-    security_signal: [...SECURITY_SIGNAL_VALUES],
+    prompt_injection_risk_level: [...PROMPT_INJECTION_RISK_LEVEL_VALUES],
 };
 const CLASSIFIER_METADATA = REGISTRY.map((classifier) => ({
     name: classifier.name,
@@ -77,7 +77,13 @@ async function route(request, response) {
             return;
         }
         if (request.method === "GET" && url.pathname === "/api/classifiers") {
-            sendJson(response, { classifiers: CLASSIFIER_METADATA });
+            sendJson(response, {
+                classifiers: CLASSIFIER_METADATA,
+                aggregator: {
+                    certaintyGate: OPEN_CLASSIFY_CONFIG?.aggregator?.certaintyGate ?? DEFAULT_CERTAINTY_GATE,
+                    certaintyThreshold: certaintyThreshold(OPEN_CLASSIFY_CONFIG?.aggregator) ?? DEFAULT_CERTAINTY_THRESHOLD,
+                },
+            });
             return;
         }
         if (request.method === "GET") {
@@ -181,6 +187,7 @@ async function classifyStream(request, response) {
         const result = await classifyOpenClassifyInput(input, {
             runClassifier,
             catalog: loadCatalog(CATALOG_PATH),
+            aggregator: OPEN_CLASSIFY_CONFIG?.aggregator,
             signal: clientAbortController.signal,
         });
         send("pipeline_completed", result);

package/dist/src/validation.d.ts CHANGED Viewed

@@ -11,7 +11,6 @@ export declare function requireStringArray(value: unknown, classifier: string, m
 export declare function requireStringMaxLength(value: unknown, classifier: string, model: string, path: string, maxChars: number): string;
 export declare function requireNonEmptyStringMaxLength(value: unknown, classifier: string, model: string, path: string, maxChars: number): string;
 export declare function requireEnum<const Values extends readonly string[]>(value: unknown, values: Values, classifier: string, model: string, path: string): Values[number];
-export declare function requireConfidence(value: unknown, classifier: string, model: string, path?: string): number;
 export declare function ensureExactKeys(value: Record<string, unknown>, keys: readonly string[], classifier: string, model: string): void;
 export declare function ensureNoDuplicates(values: string[], classifier: string, model: string, path: string): void;
 export declare function isRecord(value: unknown): value is Record<string, unknown>;

package/dist/src/validation.js CHANGED Viewed

@@ -67,43 +67,6 @@ export function requireEnum(value, values, classifier, model, path) {
     }
     return value;
 }
-// `confidence` must be a finite number in [0, 1]. Required on every
-// classifier output (ClassifierResultBase); fallback shapes use 0.
-export function requireConfidence(value, classifier, model, path = "confidence") {
-    const confidence = normalizeConfidence(value);
-    if (typeof confidence !== "number" ||
-        !Number.isFinite(confidence) ||
-        confidence < 0 ||
-        confidence > 1) {
-        throwInvalid(classifier, model, `${path} must be a number between 0 and 1 inclusive`);
-    }
-    return confidence;
-}
-function normalizeConfidence(value) {
-    if (typeof value === "number") {
-        return value > 1 && value <= 100 ? value / 100 : value;
-    }
-    if (typeof value !== "string")
-        return value;
-    const text = value.trim().toLowerCase();
-    if (text === "")
-        return value;
-    if (text.endsWith("%")) {
-        const percent = Number(text.slice(0, -1).trim());
-        return Number.isFinite(percent) ? percent / 100 : value;
-    }
-    const numeric = Number(text);
-    if (Number.isFinite(numeric)) {
-        return numeric > 1 && numeric <= 100 ? numeric / 100 : numeric;
-    }
-    if (text === "high")
-        return 0.9;
-    if (text === "medium")
-        return 0.5;
-    if (text === "low")
-        return 0.2;
-    return value;
-}
 export function ensureExactKeys(value, keys, classifier, model) {
     const expected = new Set(keys);
     for (const key of Object.keys(value)) {

package/docs/adding-a-classifier.md ADDED Viewed

@@ -0,0 +1,131 @@
+# Adding a classifier
+Most additions are custom classifiers. You drop two files in a directory; the runtime picks them up. No TypeScript registry edits required.
+## 1. Pick a directory
+Custom classifier:
+```
+src/classifiers/custom/<name>/
+├── manifest.json
+└── prompt.md
+```
+Stock classifier names are closed (`preflight`, `routing`, `model_specialization`, `tools`, `prompt_injection`). You generally don't add new stock classifiers — extend behavior with a custom one instead.
+## 2. Write the manifest
+```json
+{
+  "kind": "custom",
+  "name": "topic_tags",
+  "version": "1.0.0",
+  "purpose": "Tag the message with a small set of topic labels for analytics.",
+  "order": 70,
+  "fallback": {
+    "reason": "Classifier failed; no tags generated.",
+    "certainty": "no_signal",
+    "output": { "tags": [] }
+  },
+  "output_schema": {
+    "type": "object",
+    "additionalProperties": false,
+    "required": ["tags"],
+    "properties": {
+      "tags": {
+        "type": "array", "maxItems": 5,
+        "items": { "type": "string", "minLength": 1, "maxLength": 40 }
+      }
+    }
+  }
+}
+```
+Rules:
+- `name` must match the directory name.
+- `name` must not collide with a stock classifier name.
+- `order` must not collide with any other classifier.
+- `fallback` must validate against your `output_schema`.
+See [manifests.md](manifests.md) for the full field list.
+## 3. Write the prompt
+`prompt.md` is the classifier-specific instruction text. The runtime composes it with an auto-generated preamble that describes the JSON output envelope, so your prompt can focus on the classification rule:
+```markdown
+You are the topic_tags classifier.
+Tags are short single-word topic labels (lowercase, no spaces). Use at most five.
+Return an empty array when no clear topic applies.
+Do not invent tags for vague or ambiguous messages.
+```
+Keep it focused. Don't put aggregation or routing rules in prompts — those live in the runtime and catalog.
+## 4. Build and test
+```sh
+npm run build   # validates the manifest, sorts the registry, copies assets
+npm test
+```
+If the manifest is malformed, the loader throws `ClassifierManifestError` with the path and a specific reason.
+## 5. Consume the output
+```ts
+const result = await classifyWithOllama(input, { catalog });
+if (result.action === "route") {
+  const tags = result.classifier_outputs.topic_tags?.tags ?? [];
+}
+```
+`result.audit.custom_outputs[]` carries the same data with required `reason` and `certainty` metadata if you need to inspect them.
+## Choosing the classifier model
+For apps and OSS installs, prefer `open-classify.config.json`:
+```json
+{
+  "runner": {
+    "provider": "ollama",
+    "defaultModel": "gemma4:e4b-it-q4_K_M",
+    "models": {
+      "custom": {
+        "topic_tags": "qwen2.5:7b-instruct-q4_K_M"
+      }
+    }
+  }
+}
+```
+`runner.defaultModel` applies to every classifier without an override. `runner.models.stock` contains built-in classifier ids; `runner.models.custom` contains custom classifier ids.
+Classifier manifests may also carry an Ollama hint for packaged classifiers:
+```json
+{
+  "backend": { "ollama": { "base_model": "qwen2.5:7b-instruct-q4_K_M" } }
+}
+```
+Config file and function options take precedence over manifest hints.
+## Replacing the backend
+For full backend control, implement your own `RunClassifier` and pass it to `classifyOpenClassifyInput`:
+```ts
+import { classifyOpenClassifyInput, loadCatalog } from "open-classify";
+const runClassifier: RunClassifier = async (name, input, signal) => {
+  // call OpenAI, Anthropic, a remote service, etc.
+  // return a ClassifierOutput matching the classifier's contract.
+};
+await classifyOpenClassifyInput(input, { runClassifier, catalog: loadCatalog(...) });
+```

package/docs/manifests.md ADDED Viewed

@@ -0,0 +1,127 @@
+# Manifest reference
+Every classifier directory contains a `manifest.json`. Custom classifiers also contain a `prompt.md`. Stock prompt markdown lives in `src/classifiers/stock/prompts/` and is assembled at runtime.
+## Layout
+```
+src/classifiers/
+  stock/prompts/              # built-in prompt markdown
+    base.md
+    confidence.md
+    reason.md
+    tier.md
+    specialty.md
+    tools-output.md
+    tools.md
+  stock/<name>/                # built-in classifier
+    manifest.json
+  custom/<name>/               # caller-defined classifier
+    manifest.json
+    prompt.md
+```
+The `kind` field in the manifest must match the parent directory (`stock` or `custom`). Mismatches are rejected at load time.
+## Common fields
+| Field | Required | Description |
+|---|---|---|
+| `kind` | yes | `"stock"` or `"custom"` |
+| `name` | yes | Classifier id. Must match the directory name. |
+| `version` | yes | Contract version surfaced in `meta.classifiers[name].version`. |
+| `purpose` | yes | Human-readable description. |
+| `order` | yes | Integer sort key. Duplicate orders are rejected. |
+| `fallback` | yes | Output emitted when the classifier errors or times out. Must validate against the kind's output contract. |
+| `backend.ollama.base_model` | no | Packaged Ollama model hint for this classifier. User config and function options take precedence. |
+## Stock manifests
+Stock manifests use a closed set of names (`preflight`, `routing`, `model_specialization`, `tools`, `prompt_injection`). The runtime knows each name's signal type, so there's no `emits` field. Fallbacks must satisfy the signal contract for that name (see [signals.md](signals.md)).
+The `tools` classifier additionally takes:
+| Field | Required | Description |
+|---|---|---|
+| `tools` | no | Array of `{ id, description }`. Restricts which tool ids the classifier may emit. |
+Example (`src/classifiers/stock/prompt_injection/manifest.json`):
+```json
+{
+  "kind": "stock",
+  "name": "prompt_injection",
+  "version": "1.0.0",
+  "purpose": "Assess whether the target message contains prompt-injection attempts.",
+  "order": 50,
+  "fallback": {
+    "reason": "Classifier failed; prompt-injection risk is unknown.",
+    "certainty": "no_signal",
+    "risk_level": "unknown"
+  }
+}
+```
+## Custom manifests
+| Field | Required | Description |
+|---|---|---|
+| `output_schema` | yes | JSON Schema (Ajv-validated) for the `output` payload. |
+Custom classifier names must not collide with any stock classifier name.
+Example:
+```json
+{
+  "kind": "custom",
+  "name": "memory_retrieval_queries",
+  "version": "1.0.0",
+  "purpose": "Generate saved-memory query hints for caller-owned memory retrieval.",
+  "order": 60,
+  "fallback": {
+    "reason": "Classifier failed; no memory queries generated.",
+    "certainty": "no_signal",
+    "output": { "queries": [] }
+  },
+  "output_schema": {
+    "type": "object",
+    "additionalProperties": false,
+    "required": ["queries"],
+    "properties": {
+      "queries": {
+        "type": "array", "maxItems": 5,
+        "items": { "type": "string", "minLength": 1, "maxLength": 120 }
+      }
+    }
+  }
+}
+```
+## Prompt files
+Stock prompt files live together in `src/classifiers/stock/prompts/`. The runtime assembles shared markdown (`base.md`, `reason.md`, `confidence.md`, `classifier-header.md`) with focused stock sections such as `tier.md`, `specialty.md`, `tools-output.md`, and the stock classifier file (`preflight.md`, `routing.md`, `model_specialization.md`, `tools.md`, or `prompt_injection.md`).
+Dynamic prompt sections use small markdown slots. For example, `tools.md` contains `{{allowed_tools}}`, and the runtime renders the allowed tool list from the tools manifest.
+Custom `prompt.md` is the classifier-specific instruction text. The runtime composes it with the shared JSON output envelope, so prompts can stay focused on classifier behavior:
+- what the classifier decides
+- when to emit each declared field
+- when to omit optional fields
+- short examples only when they clarify a boundary
+Do not put aggregation or model-id rules in prompts — those live in the runtime and catalog.
+## Validation rejections
+The loader rejects manifests that:
+- declare unsupported fields
+- collide on `name` or `order`
+- have an empty custom `prompt.md`
+- declare a custom name that matches a stock classifier
+- declare `kind` that doesn't match the parent directory
+- have a `fallback` that doesn't satisfy the signal or `output_schema`
+- are missing `output_schema` on a custom classifier
+- declare `tools` on any classifier other than the `tools` stock classifier

package/docs/resolver.md ADDED Viewed

@@ -0,0 +1,104 @@
+# Aggregation and model resolution
+The aggregator merges classifier outputs into an `Envelope`, picks a concrete model from the catalog, and returns a `PipelineResult`.
+## Certainty threshold
+Default: `0.65`. Configurable via `aggregator.certaintyThreshold` on `classifyOpenClassifyInput`. `aggregator.confidenceThreshold` remains as a deprecated compatibility alias.
+Per-classifier signals are emitted with `certainty` tags. The aggregator maps those tags to scores:
+```ts
+{
+  no_signal: 0.00,
+  very_weak: 0.15,
+  weak: 0.30,
+  tentative: 0.45,
+  reasonable: 0.60,
+  strong: 0.75,
+  very_strong: 0.88,
+  near_certain: 0.97,
+}
+```
+Signals with scores below the threshold are dropped from aggregation. Missing certainty is invalid for validated classifier outputs. Dropped routing axes are reported on `audit.model_recommendation.resolution.constraints_dropped` with `reason: "low_confidence"`.
+Custom classifier outputs are surfaced regardless of certainty (callers can decide what to do with them), but the value still goes through schema validation.
+## Whole-run certainty gate
+Before returning a normal `route`, the pipeline calculates mapped certainty scores for every classifier result, including custom classifiers. Fallback outputs use explicit `certainty: "no_signal"`, which counts as `0`.
+`aggregator.certaintyGate` controls whether low whole-run certainty becomes `action: "block"`:
+- `min_score` (default) — compare the lowest classifier score to `certaintyThreshold`.
+- `avg_score` — compare the arithmetic mean of all classifier scores to `certaintyThreshold`.
+- `off` — do not block based on whole-run certainty.
+When this gate fires, `fired_by` is `"certainty_gate"` and `reason` / `audit.certainty_gate` include `kind: "low_certainty"`, the mode, threshold, observed score, per-classifier scores, and low classifier names.
+## Routing axis merge
+`routing` emits the `model_tier` axis. `model_specialization` emits the `specialization` axis. The aggregator includes each axis only when its classifier's certainty score meets the configured threshold.
+## Short-circuits
+The pipeline aborts early when:
+1. `preflight.final_reply` is present with certainty score ≥ threshold → `{ action: "reply", reply: { text } }`.
+2. `prompt_injection.risk_level === "high_risk"` with certainty score ≥ threshold → `{ action: "block" }`.
+3. `prompt_injection.risk_level === "unknown"` with certainty score ≥ threshold → `{ action: "block" }`.
+Preflight is evaluated first (it's cheaper to gate). Only these two stock signals can short-circuit; custom classifiers cannot.
+## Model resolution
+Inputs:
+- `specialization` (soft) — must be in the model's `specializations[]`.
+- `model_tier` (soft) — must equal the model's `tier`.
+Resolution passes (first non-empty match wins):
+1. specialization + tier
+2. specialization only
+3. tier only
+4. no constraints
+Within a pass, candidates are ranked:
+1. lowest **price index** (`input_tokens_cpm + output_tokens_cpm`, or `0` if pricing is absent)
+2. larger `params_in_billions`
+3. larger `context_window`
+4. earlier catalog order
+If every pass returns no candidates, the resolver returns `catalog.default` with `fell_back_to_default: true`. (In practice the no-constraints pass always finds at least one model unless the catalog is empty, so the default-fallback path is defensive.)
+## Resolution audit
+Every `route` result carries a resolution report:
+```ts
+{
+  constraints_used: { specialization?: ..., tier?: ... },
+  constraints_dropped: Array<{
+    axis: "specialization" | "tier",
+    reason: "low_confidence" | "no_match_relaxed" | "default_fallback"
+  }>,
+  confidences: { routing?: number },
+  fell_back_to_default: boolean,
+}
+```
+Drop reasons:
+- `low_confidence` — the classifier emitted the axis but below threshold.
+- `no_match_relaxed` — the axis was requested but no model matched, so the resolver relaxed it.
+- `default_fallback` — every pass failed; the resolver used `catalog.default`.
+## Custom outputs
+After aggregation:
+- `result.classifier_outputs` is a flat `Record<name, unknown>` of validated custom outputs.
+- `result.audit.custom_outputs` is the same data with `reason` and `certainty` metadata attached.

package/docs/signals.md ADDED Viewed

@@ -0,0 +1,102 @@
+# Signal contracts
+Stock classifier outputs are typed signals. Every classifier output must include `reason` (≤120 chars) and `certainty`. The aggregator maps certainty tags to numeric scores and drops below-threshold signals (default threshold: `0.65`).
+```ts
+type Certainty =
+  | "no_signal"
+  | "very_weak"
+  | "weak"
+  | "tentative"
+  | "reasonable"
+  | "strong"
+  | "very_strong"
+  | "near_certain";
+```
+## `preflight` — `FinalReplySignal | AckReplySignal`
+```ts
+{
+  final_reply?: { reply: string };  // ≤200 chars; short-circuits to action=reply
+  ack_reply?:   { reply: string };  // ≤200 chars; passthrough to caller
+  reason: string;
+  certainty: Certainty;
+}
+```
+- Emit `final_reply` only for tiny terminal answers (greetings, thanks, simple arithmetic). Never for drafting, analysis, or generated work.
+- Emit `ack_reply` when downstream work should continue and a courtesy acknowledgement helps.
+- `final_reply` and `ack_reply` are mutually exclusive.
+- A confident `final_reply` aborts the pipeline and returns `{ action: "reply", reply: { text } }`.
+## `routing` — `RoutingSignal` (tier axis)
+```ts
+{
+  model_tier?: "local_fast" | "local_small" | "local_strong" | "local_coding"
+             | "frontier_fast" | "frontier_strong" | "frontier_coding";
+  reason: string;
+  certainty: Certainty;
+}
+```
+Tier feeds the catalog resolver as a soft constraint.
+## `model_specialization` — `RoutingSignal` (specialization axis)
+```ts
+{
+  specialization?: "chat" | "reasoning" | "planning" | "writing" | "summarization"
+                 | "coding" | "tool_use" | "computer_use" | "vision";
+  reason: string;
+  certainty: Certainty;
+}
+```
+`routing` and `model_specialization` both contribute to downstream model resolution, but each owns one axis: `routing` owns `model_tier`; `model_specialization` owns `specialization`.
+## `tools` — `ToolsSignal`
+```ts
+{
+  tools: string[];
+  reason: string;
+  certainty: Certainty;
+}
+```
+- An empty `tools` array means no downstream tools are required.
+- `tools` must not contain duplicates.
+- Allowed ids are declared per-manifest in `tools`. The built-in tools classifier ships with `workspace`, `web`, `communications`, `documents`, `spreadsheets`, `project_management`, `developer_platforms`.
+## `prompt_injection` — `PromptInjectionSignal`
+```ts
+{
+  risk_level: "normal" | "suspicious" | "high_risk" | "unknown";
+  reason: string;
+  certainty: Certainty;
+}
+```
+This classifier is strictly about prompt injection: attempts to override higher-priority instructions, reveal hidden prompts, or make the assistant obey untrusted text as instructions. Destructive or sensitive ordinary requests are not prompt injection by themselves.
+Short-circuit behavior:
+- Confident `risk_level: "high_risk"` → `{ action: "block", reason: { kind: "prompt_injection", risk_level } }`.
+- Confident `risk_level: "unknown"` → `{ action: "block", reason: { kind: "prompt_injection", risk_level } }`.
+## Custom classifier output
+Custom classifiers emit an opaque `output` value validated against `output_schema`:
+```ts
+{
+  output: unknown;        // matches manifest output_schema
+  reason: string;
+  certainty: Certainty;
+}
+```
+The aggregator never reads custom `output` when picking a route or model. It surfaces values on `result.classifier_outputs.<classifier_name>` and on `result.audit.custom_outputs[]`.

package/downstream-models.json ADDED Viewed

@@ -0,0 +1,124 @@
+{
+  "models": [
+    {
+      "id": "gpt-5.5",
+      "provider": "openai",
+      "runtime": "api",
+      "specializations": [
+        "chat",
+        "writing",
+        "reasoning",
+        "planning",
+        "coding",
+        "tool_use"
+      ],
+      "tier": "frontier_strong",
+      "params_in_billions": null,
+      "context_window": 1050000,
+      "max_output_tokens": 128000,
+      "input_tokens_cpm": 5,
+      "cached_tokens_cpm": 0.5,
+      "output_tokens_cpm": 30
+    },
+    {
+      "id": "gpt-5.4-mini",
+      "provider": "openai",
+      "runtime": "api",
+      "specializations": [
+        "chat",
+        "writing",
+        "reasoning",
+        "planning",
+        "coding",
+        "computer_use",
+        "tool_use"
+      ],
+      "tier": "frontier_fast",
+      "params_in_billions": null,
+      "context_window": 400000,
+      "max_output_tokens": 128000,
+      "input_tokens_cpm": 0.75,
+      "cached_tokens_cpm": 0.075,
+      "output_tokens_cpm": 4.5
+    },
+    {
+      "id": "gemini-3-flash-preview",
+      "provider": "google",
+      "runtime": "api",
+      "specializations": [
+        "chat",
+        "writing",
+        "reasoning",
+        "planning",
+        "coding",
+        "tool_use",
+        "computer_use",
+        "vision"
+      ],
+      "tier": "frontier_fast",
+      "params_in_billions": null,
+      "context_window": 1048576,
+      "max_output_tokens": 65536,
+      "input_tokens_cpm": 0.5,
+      "cached_tokens_cpm": 0.05,
+      "output_tokens_cpm": 3
+    },
+    {
+      "id": "gpt-5.3-codex",
+      "provider": "openai",
+      "runtime": "api",
+      "specializations": [
+        "coding"
+      ],
+      "tier": "frontier_coding",
+      "params_in_billions": null,
+      "context_window": 400000,
+      "max_output_tokens": 128000,
+      "input_tokens_cpm": 1.75,
+      "cached_tokens_cpm": 0.175,
+      "output_tokens_cpm": 14
+    },
+    {
+      "id": "qwen2.5-coder:14b-instruct-q4_K_M",
+      "provider": "ollama",
+      "runtime": "local",
+      "specializations": [
+        "coding",
+        "tool_use"
+      ],
+      "tier": "local_coding",
+      "params_in_billions": 14.7,
+      "context_window": 32768,
+      "upstream_max_context_window": 131072,
+      "input_tokens_cpm": 0,
+      "cached_tokens_cpm": 0,
+      "output_tokens_cpm": 0
+    },
+    {
+      "id": "gemma3:4b",
+      "provider": "ollama",
+      "runtime": "local",
+      "specializations": [
+        "chat",
+        "writing",
+        "summarization",
+        "reasoning",
+        "vision"
+      ],
+      "tier": "local_small",
+      "params_in_billions": 4,
+      "context_window": 128000,
+      "input_tokens_cpm": 0,
+      "cached_tokens_cpm": 0,
+      "output_tokens_cpm": 0
+    }
+  ],
+  "default": "gpt-5.4-mini",
+  "pricing_unit": "USD per 1M tokens",
+  "notes": [
+    "OpenAI and Google model parameter counts are not publicly disclosed, so params_in_billions is null for frontier API models.",
+    "Gemini 3 Flash Preview pricing uses Google AI's Standard paid text/image/video token rates; audio, batch, flex, priority, grounding, and cache storage rates differ.",
+    "Local Ollama models have no API token price, but they still have local compute, memory, electricity, and latency costs.",
+    "For qwen2.5-coder:14b, Ollama lists 32K context for the 14B instruct tags, while the upstream model card lists 131,072 tokens as the full supported context."
+  ]
+}