open-classify 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +30 -24
  2. package/dist/src/aggregator.d.ts +4 -1
  3. package/dist/src/aggregator.js +25 -15
  4. package/dist/src/classifiers/custom/{conversation_diegest → conversation_digest}/manifest.json +3 -1
  5. package/dist/src/classifiers/custom/{conversation_diegest → conversation_digest}/prompt.md +1 -1
  6. package/dist/src/classifiers/custom/memory_retrieval_queries/manifest.json +2 -0
  7. package/dist/src/classifiers/stock/model_specialization/manifest.json +4 -1
  8. package/dist/src/classifiers/stock/preflight/manifest.json +4 -1
  9. package/dist/src/classifiers/stock/prompt_injection/manifest.json +12 -0
  10. package/dist/src/classifiers/stock/prompts/confidence.md +3 -3
  11. package/dist/src/classifiers/stock/prompts/custom-output.md +7 -1
  12. package/dist/src/classifiers/stock/prompts/preflight.md +7 -7
  13. package/dist/src/classifiers/stock/prompts/prompt-injection-output.md +5 -0
  14. package/dist/src/classifiers/stock/prompts/prompt_injection.md +24 -0
  15. package/dist/src/classifiers/stock/prompts/reason.md +1 -1
  16. package/dist/src/classifiers/stock/prompts/specialty.md +8 -6
  17. package/dist/src/classifiers/stock/prompts/tier.md +1 -1
  18. package/dist/src/classifiers/stock/routing/manifest.json +4 -1
  19. package/dist/src/classifiers/stock/tools/manifest.json +2 -0
  20. package/dist/src/config.d.ts +2 -0
  21. package/dist/src/config.js +33 -1
  22. package/dist/src/enums.d.ts +3 -7
  23. package/dist/src/enums.js +7 -30
  24. package/dist/src/index.js +1 -1
  25. package/dist/src/input.js +1 -1
  26. package/dist/src/manifest.d.ts +31 -23
  27. package/dist/src/manifest.js +5 -1
  28. package/dist/src/ollama.d.ts +2 -1
  29. package/dist/src/ollama.js +1 -0
  30. package/dist/src/pipeline.d.ts +1 -0
  31. package/dist/src/pipeline.js +78 -48
  32. package/dist/src/stock-prompt.js +1 -1
  33. package/dist/src/stock-validation.d.ts +1 -2
  34. package/dist/src/stock-validation.js +23 -40
  35. package/dist/src/stock.d.ts +12 -11
  36. package/dist/src/stock.js +21 -1
  37. package/dist/src/ui-server.js +12 -5
  38. package/dist/src/validation.d.ts +0 -1
  39. package/dist/src/validation.js +0 -37
  40. package/docs/adding-a-classifier.md +131 -0
  41. package/docs/manifests.md +127 -0
  42. package/docs/resolver.md +104 -0
  43. package/docs/signals.md +102 -0
  44. package/downstream-models.json +124 -0
  45. package/open-classify.config.example.json +5 -1
  46. package/package.json +3 -1
  47. package/dist/src/classifiers/stock/prompts/security-output.md +0 -8
  48. package/dist/src/classifiers/stock/prompts/security.md +0 -26
  49. package/dist/src/classifiers/stock/security/manifest.json +0 -12
@@ -21,17 +21,17 @@ import { createServer } from "node:http";
21
21
  import { extname, join, normalize } from "node:path";
22
22
  import { loadCatalog } from "./catalog.js";
23
23
  import { CLASSIFIER_NAMES, REGISTRY } from "./classifiers.js";
24
+ import { DEFAULT_CERTAINTY_THRESHOLD, certaintyThreshold, } from "./aggregator.js";
24
25
  import { classifierModelsFromConfig, loadOpenClassifyConfig, } from "./config.js";
25
- import { DOWNSTREAM_MODEL_TIER_VALUES, MODEL_SPECIALIZATION_VALUES, SECURITY_DECISION_VALUES, SECURITY_RISK_LEVEL_VALUES, SECURITY_SIGNAL_VALUES, } from "./enums.js";
26
+ import { DEFAULT_CERTAINTY_GATE } from "./pipeline.js";
27
+ import { DOWNSTREAM_MODEL_TIER_VALUES, MODEL_SPECIALIZATION_VALUES, PROMPT_INJECTION_RISK_LEVEL_VALUES, } from "./enums.js";
26
28
  import { createOllamaClassifierRunner, OLLAMA_CONTEXT_LENGTH, OLLAMA_DEFAULT_CATALOG_PATH, OLLAMA_MIN_AVAILABLE_MEMORY_BYTES, OLLAMA_MIN_TOTAL_MEMORY_BYTES, OLLAMA_REQUIRED_PARALLELISM, } from "./ollama.js";
27
29
  import { classifyOpenClassifyInput } from "./pipeline.js";
28
30
  // Served at GET /api/enums so the UI never needs to duplicate shared enum values.
29
31
  const CLASSIFIER_ENUMS = {
30
32
  downstream_model_tier: [...DOWNSTREAM_MODEL_TIER_VALUES],
31
33
  model_specialization: [...MODEL_SPECIALIZATION_VALUES],
32
- security_decision: [...SECURITY_DECISION_VALUES],
33
- security_risk_level: [...SECURITY_RISK_LEVEL_VALUES],
34
- security_signal: [...SECURITY_SIGNAL_VALUES],
34
+ prompt_injection_risk_level: [...PROMPT_INJECTION_RISK_LEVEL_VALUES],
35
35
  };
36
36
  const CLASSIFIER_METADATA = REGISTRY.map((classifier) => ({
37
37
  name: classifier.name,
@@ -77,7 +77,13 @@ async function route(request, response) {
77
77
  return;
78
78
  }
79
79
  if (request.method === "GET" && url.pathname === "/api/classifiers") {
80
- sendJson(response, { classifiers: CLASSIFIER_METADATA });
80
+ sendJson(response, {
81
+ classifiers: CLASSIFIER_METADATA,
82
+ aggregator: {
83
+ certaintyGate: OPEN_CLASSIFY_CONFIG?.aggregator?.certaintyGate ?? DEFAULT_CERTAINTY_GATE,
84
+ certaintyThreshold: certaintyThreshold(OPEN_CLASSIFY_CONFIG?.aggregator) ?? DEFAULT_CERTAINTY_THRESHOLD,
85
+ },
86
+ });
81
87
  return;
82
88
  }
83
89
  if (request.method === "GET") {
@@ -181,6 +187,7 @@ async function classifyStream(request, response) {
181
187
  const result = await classifyOpenClassifyInput(input, {
182
188
  runClassifier,
183
189
  catalog: loadCatalog(CATALOG_PATH),
190
+ aggregator: OPEN_CLASSIFY_CONFIG?.aggregator,
184
191
  signal: clientAbortController.signal,
185
192
  });
186
193
  send("pipeline_completed", result);
@@ -11,7 +11,6 @@ export declare function requireStringArray(value: unknown, classifier: string, m
11
11
  export declare function requireStringMaxLength(value: unknown, classifier: string, model: string, path: string, maxChars: number): string;
12
12
  export declare function requireNonEmptyStringMaxLength(value: unknown, classifier: string, model: string, path: string, maxChars: number): string;
13
13
  export declare function requireEnum<const Values extends readonly string[]>(value: unknown, values: Values, classifier: string, model: string, path: string): Values[number];
14
- export declare function requireConfidence(value: unknown, classifier: string, model: string, path?: string): number;
15
14
  export declare function ensureExactKeys(value: Record<string, unknown>, keys: readonly string[], classifier: string, model: string): void;
16
15
  export declare function ensureNoDuplicates(values: string[], classifier: string, model: string, path: string): void;
17
16
  export declare function isRecord(value: unknown): value is Record<string, unknown>;
@@ -67,43 +67,6 @@ export function requireEnum(value, values, classifier, model, path) {
67
67
  }
68
68
  return value;
69
69
  }
70
- // `confidence` must be a finite number in [0, 1]. Required on every
71
- // classifier output (ClassifierResultBase); fallback shapes use 0.
72
- export function requireConfidence(value, classifier, model, path = "confidence") {
73
- const confidence = normalizeConfidence(value);
74
- if (typeof confidence !== "number" ||
75
- !Number.isFinite(confidence) ||
76
- confidence < 0 ||
77
- confidence > 1) {
78
- throwInvalid(classifier, model, `${path} must be a number between 0 and 1 inclusive`);
79
- }
80
- return confidence;
81
- }
82
- function normalizeConfidence(value) {
83
- if (typeof value === "number") {
84
- return value > 1 && value <= 100 ? value / 100 : value;
85
- }
86
- if (typeof value !== "string")
87
- return value;
88
- const text = value.trim().toLowerCase();
89
- if (text === "")
90
- return value;
91
- if (text.endsWith("%")) {
92
- const percent = Number(text.slice(0, -1).trim());
93
- return Number.isFinite(percent) ? percent / 100 : value;
94
- }
95
- const numeric = Number(text);
96
- if (Number.isFinite(numeric)) {
97
- return numeric > 1 && numeric <= 100 ? numeric / 100 : numeric;
98
- }
99
- if (text === "high")
100
- return 0.9;
101
- if (text === "medium")
102
- return 0.5;
103
- if (text === "low")
104
- return 0.2;
105
- return value;
106
- }
107
70
  export function ensureExactKeys(value, keys, classifier, model) {
108
71
  const expected = new Set(keys);
109
72
  for (const key of Object.keys(value)) {
@@ -0,0 +1,131 @@
1
+ # Adding a classifier
2
+
3
+ Most additions are custom classifiers. You drop two files in a directory; the runtime picks them up. No TypeScript registry edits required.
4
+
5
+ ## 1. Pick a directory
6
+
7
+ Custom classifier:
8
+
9
+ ```
10
+ src/classifiers/custom/<name>/
11
+ ├── manifest.json
12
+ └── prompt.md
13
+ ```
14
+
15
+ Stock classifier names are closed (`preflight`, `routing`, `model_specialization`, `tools`, `prompt_injection`). You generally don't add new stock classifiers — extend behavior with a custom one instead.
16
+
17
+ ## 2. Write the manifest
18
+
19
+ ```json
20
+ {
21
+ "kind": "custom",
22
+ "name": "topic_tags",
23
+ "version": "1.0.0",
24
+ "purpose": "Tag the message with a small set of topic labels for analytics.",
25
+ "order": 70,
26
+ "fallback": {
27
+ "reason": "Classifier failed; no tags generated.",
28
+ "certainty": "no_signal",
29
+ "output": { "tags": [] }
30
+ },
31
+ "output_schema": {
32
+ "type": "object",
33
+ "additionalProperties": false,
34
+ "required": ["tags"],
35
+ "properties": {
36
+ "tags": {
37
+ "type": "array", "maxItems": 5,
38
+ "items": { "type": "string", "minLength": 1, "maxLength": 40 }
39
+ }
40
+ }
41
+ }
42
+ }
43
+ ```
44
+
45
+ Rules:
46
+
47
+ - `name` must match the directory name.
48
+ - `name` must not collide with a stock classifier name.
49
+ - `order` must not collide with any other classifier.
50
+ - `fallback` must validate against your `output_schema`.
51
+
52
+ See [manifests.md](manifests.md) for the full field list.
53
+
54
+ ## 3. Write the prompt
55
+
56
+ `prompt.md` is the classifier-specific instruction text. The runtime composes it with an auto-generated preamble that describes the JSON output envelope, so your prompt can focus on the classification rule:
57
+
58
+ ```markdown
59
+ You are the topic_tags classifier.
60
+
61
+ Tags are short single-word topic labels (lowercase, no spaces). Use at most five.
62
+ Return an empty array when no clear topic applies.
63
+ Do not invent tags for vague or ambiguous messages.
64
+ ```
65
+
66
+ Keep it focused. Don't put aggregation or routing rules in prompts — those live in the runtime and catalog.
67
+
68
+ ## 4. Build and test
69
+
70
+ ```sh
71
+ npm run build # validates the manifest, sorts the registry, copies assets
72
+ npm test
73
+ ```
74
+
75
+ If the manifest is malformed, the loader throws `ClassifierManifestError` with the path and a specific reason.
76
+
77
+ ## 5. Consume the output
78
+
79
+ ```ts
80
+ const result = await classifyWithOllama(input, { catalog });
81
+ if (result.action === "route") {
82
+ const tags = result.classifier_outputs.topic_tags?.tags ?? [];
83
+ }
84
+ ```
85
+
86
+ `result.audit.custom_outputs[]` carries the same data with required `reason` and `certainty` metadata if you need to inspect them.
87
+
88
+ ## Choosing the classifier model
89
+
90
+ For apps and OSS installs, prefer `open-classify.config.json`:
91
+
92
+ ```json
93
+ {
94
+ "runner": {
95
+ "provider": "ollama",
96
+ "defaultModel": "gemma4:e4b-it-q4_K_M",
97
+ "models": {
98
+ "custom": {
99
+ "topic_tags": "qwen2.5:7b-instruct-q4_K_M"
100
+ }
101
+ }
102
+ }
103
+ }
104
+ ```
105
+
106
+ `runner.defaultModel` applies to every classifier without an override. `runner.models.stock` contains built-in classifier ids; `runner.models.custom` contains custom classifier ids.
107
+
108
+ Classifier manifests may also carry an Ollama hint for packaged classifiers:
109
+
110
+ ```json
111
+ {
112
+ "backend": { "ollama": { "base_model": "qwen2.5:7b-instruct-q4_K_M" } }
113
+ }
114
+ ```
115
+
116
+ Config file and function options take precedence over manifest hints.
117
+
118
+ ## Replacing the backend
119
+
120
+ For full backend control, implement your own `RunClassifier` and pass it to `classifyOpenClassifyInput`:
121
+
122
+ ```ts
123
+ import { classifyOpenClassifyInput, loadCatalog } from "open-classify";
124
+
125
+ const runClassifier: RunClassifier = async (name, input, signal) => {
126
+ // call OpenAI, Anthropic, a remote service, etc.
127
+ // return a ClassifierOutput matching the classifier's contract.
128
+ };
129
+
130
+ await classifyOpenClassifyInput(input, { runClassifier, catalog: loadCatalog(...) });
131
+ ```
@@ -0,0 +1,127 @@
1
+ # Manifest reference
2
+
3
+ Every classifier directory contains a `manifest.json`. Custom classifiers also contain a `prompt.md`. Stock prompt markdown lives in `src/classifiers/stock/prompts/` and is assembled at runtime.
4
+
5
+ ## Layout
6
+
7
+ ```
8
+ src/classifiers/
9
+ stock/prompts/ # built-in prompt markdown
10
+ base.md
11
+ confidence.md
12
+ reason.md
13
+ tier.md
14
+ specialty.md
15
+ tools-output.md
16
+ tools.md
17
+ stock/<name>/ # built-in classifier
18
+ manifest.json
19
+ custom/<name>/ # caller-defined classifier
20
+ manifest.json
21
+ prompt.md
22
+ ```
23
+
24
+ The `kind` field in the manifest must match the parent directory (`stock` or `custom`). Mismatches are rejected at load time.
25
+
26
+ ## Common fields
27
+
28
+ | Field | Required | Description |
29
+ |---|---|---|
30
+ | `kind` | yes | `"stock"` or `"custom"` |
31
+ | `name` | yes | Classifier id. Must match the directory name. |
32
+ | `version` | yes | Contract version surfaced in `meta.classifiers[name].version`. |
33
+ | `purpose` | yes | Human-readable description. |
34
+ | `order` | yes | Integer sort key. Duplicate orders are rejected. |
35
+ | `fallback` | yes | Output emitted when the classifier errors or times out. Must validate against the kind's output contract. |
36
+ | `backend.ollama.base_model` | no | Packaged Ollama model hint for this classifier. User config and function options take precedence. |
37
+
38
+ ## Stock manifests
39
+
40
+ Stock manifests use a closed set of names (`preflight`, `routing`, `model_specialization`, `tools`, `prompt_injection`). The runtime knows each name's signal type, so there's no `emits` field. Fallbacks must satisfy the signal contract for that name (see [signals.md](signals.md)).
41
+
42
+ The `tools` classifier additionally takes:
43
+
44
+ | Field | Required | Description |
45
+ |---|---|---|
46
+ | `tools` | no | Array of `{ id, description }`. Restricts which tool ids the classifier may emit. |
47
+
48
+ Example (`src/classifiers/stock/prompt_injection/manifest.json`):
49
+
50
+ ```json
51
+ {
52
+ "kind": "stock",
53
+ "name": "prompt_injection",
54
+ "version": "1.0.0",
55
+ "purpose": "Assess whether the target message contains prompt-injection attempts.",
56
+ "order": 50,
57
+ "fallback": {
58
+ "reason": "Classifier failed; prompt-injection risk is unknown.",
59
+ "certainty": "no_signal",
60
+ "risk_level": "unknown"
61
+ }
62
+ }
63
+ ```
64
+
65
+ ## Custom manifests
66
+
67
+ | Field | Required | Description |
68
+ |---|---|---|
69
+ | `output_schema` | yes | JSON Schema (Ajv-validated) for the `output` payload. |
70
+
71
+ Custom classifier names must not collide with any stock classifier name.
72
+
73
+ Example:
74
+
75
+ ```json
76
+ {
77
+ "kind": "custom",
78
+ "name": "memory_retrieval_queries",
79
+ "version": "1.0.0",
80
+ "purpose": "Generate saved-memory query hints for caller-owned memory retrieval.",
81
+ "order": 60,
82
+ "fallback": {
83
+ "reason": "Classifier failed; no memory queries generated.",
84
+ "certainty": "no_signal",
85
+ "output": { "queries": [] }
86
+ },
87
+ "output_schema": {
88
+ "type": "object",
89
+ "additionalProperties": false,
90
+ "required": ["queries"],
91
+ "properties": {
92
+ "queries": {
93
+ "type": "array", "maxItems": 5,
94
+ "items": { "type": "string", "minLength": 1, "maxLength": 120 }
95
+ }
96
+ }
97
+ }
98
+ }
99
+ ```
100
+
101
+ ## Prompt files
102
+
103
+ Stock prompt files live together in `src/classifiers/stock/prompts/`. The runtime assembles shared markdown (`base.md`, `reason.md`, `confidence.md`, `classifier-header.md`) with focused stock sections such as `tier.md`, `specialty.md`, `tools-output.md`, and the stock classifier file (`preflight.md`, `routing.md`, `model_specialization.md`, `tools.md`, or `prompt_injection.md`).
104
+
105
+ Dynamic prompt sections use small markdown slots. For example, `tools.md` contains `{{allowed_tools}}`, and the runtime renders the allowed tool list from the tools manifest.
106
+
107
+ Custom `prompt.md` is the classifier-specific instruction text. The runtime composes it with the shared JSON output envelope, so prompts can stay focused on classifier behavior:
108
+
109
+ - what the classifier decides
110
+ - when to emit each declared field
111
+ - when to omit optional fields
112
+ - short examples only when they clarify a boundary
113
+
114
+ Do not put aggregation or model-id rules in prompts — those live in the runtime and catalog.
115
+
116
+ ## Validation rejections
117
+
118
+ The loader rejects manifests that:
119
+
120
+ - declare unsupported fields
121
+ - collide on `name` or `order`
122
+ - have an empty custom `prompt.md`
123
+ - declare a custom name that matches a stock classifier
124
+ - declare `kind` that doesn't match the parent directory
125
+ - have a `fallback` that doesn't satisfy the signal or `output_schema`
126
+ - are missing `output_schema` on a custom classifier
127
+ - declare `tools` on any classifier other than the `tools` stock classifier
@@ -0,0 +1,104 @@
1
+ # Aggregation and model resolution
2
+
3
+ The aggregator merges classifier outputs into an `Envelope`, picks a concrete model from the catalog, and returns a `PipelineResult`.
4
+
5
+ ## Certainty threshold
6
+
7
+ Default: `0.65`. Configurable via `aggregator.certaintyThreshold` on `classifyOpenClassifyInput`. `aggregator.confidenceThreshold` remains as a deprecated compatibility alias.
8
+
9
+ Per-classifier signals are emitted with `certainty` tags. The aggregator maps those tags to scores:
10
+
11
+ ```ts
12
+ {
13
+ no_signal: 0.00,
14
+ very_weak: 0.15,
15
+ weak: 0.30,
16
+ tentative: 0.45,
17
+ reasonable: 0.60,
18
+ strong: 0.75,
19
+ very_strong: 0.88,
20
+ near_certain: 0.97,
21
+ }
22
+ ```
23
+
24
+ Signals with scores below the threshold are dropped from aggregation. Missing certainty is invalid for validated classifier outputs. Dropped routing axes are reported on `audit.model_recommendation.resolution.constraints_dropped` with `reason: "low_confidence"`.
25
+
26
+ Custom classifier outputs are surfaced regardless of certainty (callers can decide what to do with them), but the value still goes through schema validation.
27
+
28
+ ## Whole-run certainty gate
29
+
30
+ Before returning a normal `route`, the pipeline calculates mapped certainty scores for every classifier result, including custom classifiers. Fallback outputs use explicit `certainty: "no_signal"`, which counts as `0`.
31
+
32
+ `aggregator.certaintyGate` controls whether low whole-run certainty becomes `action: "block"`:
33
+
34
+ - `min_score` (default) — compare the lowest classifier score to `certaintyThreshold`.
35
+ - `avg_score` — compare the arithmetic mean of all classifier scores to `certaintyThreshold`.
36
+ - `off` — do not block based on whole-run certainty.
37
+
38
+ When this gate fires, `fired_by` is `"certainty_gate"` and `reason` / `audit.certainty_gate` include `kind: "low_certainty"`, the mode, threshold, observed score, per-classifier scores, and low classifier names.
39
+
40
+ ## Routing axis merge
41
+
42
+ `routing` emits the `model_tier` axis. `model_specialization` emits the `specialization` axis. The aggregator includes each axis only when its classifier's certainty score meets the configured threshold.
43
+
44
+ ## Short-circuits
45
+
46
+ The pipeline aborts early when:
47
+
48
+ 1. `preflight.final_reply` is present with certainty score ≥ threshold → `{ action: "reply", reply: { text } }`.
49
+ 2. `prompt_injection.risk_level === "high_risk"` with certainty score ≥ threshold → `{ action: "block" }`.
50
+ 3. `prompt_injection.risk_level === "unknown"` with certainty score ≥ threshold → `{ action: "block" }`.
51
+
52
+ Preflight is evaluated first (it's cheaper to gate). Only these two stock signals can short-circuit; custom classifiers cannot.
53
+
54
+ ## Model resolution
55
+
56
+ Inputs:
57
+
58
+ - `specialization` (soft) — must be in the model's `specializations[]`.
59
+ - `model_tier` (soft) — must equal the model's `tier`.
60
+
61
+ Resolution passes (first non-empty match wins):
62
+
63
+ 1. specialization + tier
64
+ 2. specialization only
65
+ 3. tier only
66
+ 4. no constraints
67
+
68
+ Within a pass, candidates are ranked:
69
+
70
+ 1. lowest **price index** (`input_tokens_cpm + output_tokens_cpm`, or `0` if pricing is absent)
71
+ 2. larger `params_in_billions`
72
+ 3. larger `context_window`
73
+ 4. earlier catalog order
74
+
75
+ If every pass returns no candidates, the resolver returns `catalog.default` with `fell_back_to_default: true`. (In practice the no-constraints pass always finds at least one model unless the catalog is empty, so the default-fallback path is defensive.)
76
+
77
+ ## Resolution audit
78
+
79
+ Every `route` result carries a resolution report:
80
+
81
+ ```ts
82
+ {
83
+ constraints_used: { specialization?: ..., tier?: ... },
84
+ constraints_dropped: Array<{
85
+ axis: "specialization" | "tier",
86
+ reason: "low_confidence" | "no_match_relaxed" | "default_fallback"
87
+ }>,
88
+ confidences: { routing?: number },
89
+ fell_back_to_default: boolean,
90
+ }
91
+ ```
92
+
93
+ Drop reasons:
94
+
95
+ - `low_confidence` — the classifier emitted the axis but below threshold.
96
+ - `no_match_relaxed` — the axis was requested but no model matched, so the resolver relaxed it.
97
+ - `default_fallback` — every pass failed; the resolver used `catalog.default`.
98
+
99
+ ## Custom outputs
100
+
101
+ After aggregation:
102
+
103
+ - `result.classifier_outputs` is a flat `Record<name, unknown>` of validated custom outputs.
104
+ - `result.audit.custom_outputs` is the same data with `reason` and `certainty` metadata attached.
@@ -0,0 +1,102 @@
1
+ # Signal contracts
2
+
3
+ Stock classifier outputs are typed signals. Every classifier output must include `reason` (≤120 chars) and `certainty`. The aggregator maps certainty tags to numeric scores and drops below-threshold signals (default threshold: `0.65`).
4
+
5
+ ```ts
6
+ type Certainty =
7
+ | "no_signal"
8
+ | "very_weak"
9
+ | "weak"
10
+ | "tentative"
11
+ | "reasonable"
12
+ | "strong"
13
+ | "very_strong"
14
+ | "near_certain";
15
+ ```
16
+
17
+ ## `preflight` — `FinalReplySignal | AckReplySignal`
18
+
19
+ ```ts
20
+ {
21
+ final_reply?: { reply: string }; // ≤200 chars; short-circuits to action=reply
22
+ ack_reply?: { reply: string }; // ≤200 chars; passthrough to caller
23
+ reason: string;
24
+ certainty: Certainty;
25
+ }
26
+ ```
27
+
28
+ - Emit `final_reply` only for tiny terminal answers (greetings, thanks, simple arithmetic). Never for drafting, analysis, or generated work.
29
+ - Emit `ack_reply` when downstream work should continue and a courtesy acknowledgement helps.
30
+ - `final_reply` and `ack_reply` are mutually exclusive.
31
+ - A confident `final_reply` aborts the pipeline and returns `{ action: "reply", reply: { text } }`.
32
+
33
+ ## `routing` — `RoutingSignal` (tier axis)
34
+
35
+ ```ts
36
+ {
37
+ model_tier?: "local_fast" | "local_small" | "local_strong" | "local_coding"
38
+ | "frontier_fast" | "frontier_strong" | "frontier_coding";
39
+ reason: string;
40
+ certainty: Certainty;
41
+ }
42
+ ```
43
+
44
+ Tier feeds the catalog resolver as a soft constraint.
45
+
46
+ ## `model_specialization` — `RoutingSignal` (specialization axis)
47
+
48
+ ```ts
49
+ {
50
+ specialization?: "chat" | "reasoning" | "planning" | "writing" | "summarization"
51
+ | "coding" | "tool_use" | "computer_use" | "vision";
52
+ reason: string;
53
+ certainty: Certainty;
54
+ }
55
+ ```
56
+
57
+ `routing` and `model_specialization` both contribute to downstream model resolution, but each owns one axis: `routing` owns `model_tier`; `model_specialization` owns `specialization`.
58
+
59
+ ## `tools` — `ToolsSignal`
60
+
61
+ ```ts
62
+ {
63
+ tools: string[];
64
+ reason: string;
65
+ certainty: Certainty;
66
+ }
67
+ ```
68
+
69
+ - An empty `tools` array means no downstream tools are required.
70
+ - `tools` must not contain duplicates.
71
+ - Allowed ids are declared per-manifest in `tools`. The built-in tools classifier ships with `workspace`, `web`, `communications`, `documents`, `spreadsheets`, `project_management`, `developer_platforms`.
72
+
73
+ ## `prompt_injection` — `PromptInjectionSignal`
74
+
75
+ ```ts
76
+ {
77
+ risk_level: "normal" | "suspicious" | "high_risk" | "unknown";
78
+ reason: string;
79
+ certainty: Certainty;
80
+ }
81
+ ```
82
+
83
+ This classifier is strictly about prompt injection: attempts to override higher-priority instructions, reveal hidden prompts, or make the assistant obey untrusted text as instructions. Destructive or sensitive ordinary requests are not prompt injection by themselves.
84
+
85
+ Short-circuit behavior:
86
+
87
+ - Confident `risk_level: "high_risk"` → `{ action: "block", reason: { kind: "prompt_injection", risk_level } }`.
88
+ - Confident `risk_level: "unknown"` → `{ action: "block", reason: { kind: "prompt_injection", risk_level } }`.
89
+
90
+ ## Custom classifier output
91
+
92
+ Custom classifiers emit an opaque `output` value validated against `output_schema`:
93
+
94
+ ```ts
95
+ {
96
+ output: unknown; // matches manifest output_schema
97
+ reason: string;
98
+ certainty: Certainty;
99
+ }
100
+ ```
101
+
102
+ The aggregator never reads custom `output` when picking a route or model. It surfaces values on `result.classifier_outputs.<classifier_name>` and on `result.audit.custom_outputs[]`.
@@ -0,0 +1,124 @@
1
+ {
2
+ "models": [
3
+ {
4
+ "id": "gpt-5.5",
5
+ "provider": "openai",
6
+ "runtime": "api",
7
+ "specializations": [
8
+ "chat",
9
+ "writing",
10
+ "reasoning",
11
+ "planning",
12
+ "coding",
13
+ "tool_use"
14
+ ],
15
+ "tier": "frontier_strong",
16
+ "params_in_billions": null,
17
+ "context_window": 1050000,
18
+ "max_output_tokens": 128000,
19
+ "input_tokens_cpm": 5,
20
+ "cached_tokens_cpm": 0.5,
21
+ "output_tokens_cpm": 30
22
+ },
23
+ {
24
+ "id": "gpt-5.4-mini",
25
+ "provider": "openai",
26
+ "runtime": "api",
27
+ "specializations": [
28
+ "chat",
29
+ "writing",
30
+ "reasoning",
31
+ "planning",
32
+ "coding",
33
+ "computer_use",
34
+ "tool_use"
35
+ ],
36
+ "tier": "frontier_fast",
37
+ "params_in_billions": null,
38
+ "context_window": 400000,
39
+ "max_output_tokens": 128000,
40
+ "input_tokens_cpm": 0.75,
41
+ "cached_tokens_cpm": 0.075,
42
+ "output_tokens_cpm": 4.5
43
+ },
44
+ {
45
+ "id": "gemini-3-flash-preview",
46
+ "provider": "google",
47
+ "runtime": "api",
48
+ "specializations": [
49
+ "chat",
50
+ "writing",
51
+ "reasoning",
52
+ "planning",
53
+ "coding",
54
+ "tool_use",
55
+ "computer_use",
56
+ "vision"
57
+ ],
58
+ "tier": "frontier_fast",
59
+ "params_in_billions": null,
60
+ "context_window": 1048576,
61
+ "max_output_tokens": 65536,
62
+ "input_tokens_cpm": 0.5,
63
+ "cached_tokens_cpm": 0.05,
64
+ "output_tokens_cpm": 3
65
+ },
66
+ {
67
+ "id": "gpt-5.3-codex",
68
+ "provider": "openai",
69
+ "runtime": "api",
70
+ "specializations": [
71
+ "coding"
72
+ ],
73
+ "tier": "frontier_coding",
74
+ "params_in_billions": null,
75
+ "context_window": 400000,
76
+ "max_output_tokens": 128000,
77
+ "input_tokens_cpm": 1.75,
78
+ "cached_tokens_cpm": 0.175,
79
+ "output_tokens_cpm": 14
80
+ },
81
+ {
82
+ "id": "qwen2.5-coder:14b-instruct-q4_K_M",
83
+ "provider": "ollama",
84
+ "runtime": "local",
85
+ "specializations": [
86
+ "coding",
87
+ "tool_use"
88
+ ],
89
+ "tier": "local_coding",
90
+ "params_in_billions": 14.7,
91
+ "context_window": 32768,
92
+ "upstream_max_context_window": 131072,
93
+ "input_tokens_cpm": 0,
94
+ "cached_tokens_cpm": 0,
95
+ "output_tokens_cpm": 0
96
+ },
97
+ {
98
+ "id": "gemma3:4b",
99
+ "provider": "ollama",
100
+ "runtime": "local",
101
+ "specializations": [
102
+ "chat",
103
+ "writing",
104
+ "summarization",
105
+ "reasoning",
106
+ "vision"
107
+ ],
108
+ "tier": "local_small",
109
+ "params_in_billions": 4,
110
+ "context_window": 128000,
111
+ "input_tokens_cpm": 0,
112
+ "cached_tokens_cpm": 0,
113
+ "output_tokens_cpm": 0
114
+ }
115
+ ],
116
+ "default": "gpt-5.4-mini",
117
+ "pricing_unit": "USD per 1M tokens",
118
+ "notes": [
119
+ "OpenAI and Google model parameter counts are not publicly disclosed, so params_in_billions is null for frontier API models.",
120
+ "Gemini 3 Flash Preview pricing uses Google AI's Standard paid text/image/video token rates; audio, batch, flex, priority, grounding, and cache storage rates differ.",
121
+ "Local Ollama models have no API token price, but they still have local compute, memory, electricity, and latency costs.",
122
+ "For qwen2.5-coder:14b, Ollama lists 32K context for the 14B instruct tags, while the upstream model card lists 131,072 tokens as the full supported context."
123
+ ]
124
+ }