open-classify 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +60 -63
- package/dist/src/aggregator.d.ts +7 -23
- package/dist/src/aggregator.js +108 -186
- package/dist/src/classifiers/{routing → model_tier}/manifest.json +2 -2
- package/dist/src/classifiers/{routing → model_tier}/prompt.md +1 -1
- package/dist/src/classifiers/preflight/manifest.json +9 -8
- package/dist/src/classifiers/preflight/prompt.md +12 -6
- package/dist/src/classifiers/prompt_injection/manifest.json +2 -3
- package/dist/src/classify.d.ts +1 -2
- package/dist/src/classify.js +0 -2
- package/dist/src/config.d.ts +0 -2
- package/dist/src/config.js +1 -23
- package/dist/src/index.js +2 -3
- package/dist/src/manifest.d.ts +25 -70
- package/dist/src/pipeline.d.ts +1 -2
- package/dist/src/pipeline.js +22 -89
- package/dist/src/stock-validation.js +8 -4
- package/docs/adding-a-classifier.md +5 -3
- package/docs/manifests.md +6 -6
- package/docs/resolver.md +20 -44
- package/docs/signals.md +18 -8
- package/open-classify.config.example.json +1 -4
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
Decide what should happen to a user message <em>before</em> it reaches your downstream model.
|
|
7
7
|
</p>
|
|
8
8
|
|
|
9
|
-
Open Classify is a pre-routing layer for AI products. It runs a small set of fast classifiers in parallel against the latest user message, then returns a single
|
|
9
|
+
Open Classify is a pre-routing layer for AI products. It runs a small set of fast classifiers in parallel against the latest user message, then returns a single `PipelineResult` your app can act on: an action (`route`, `block`, or `reply`), a downstream model recommendation, a tool exposure list, an optional immediate reply, and any custom signals your own classifiers contribute.
|
|
10
10
|
|
|
11
11
|
Use it when your frontier model should not be the first thing every request touches. Open Classify can handle tiny terminal replies before they hit an expensive model, recommend the right downstream model for the actual task, suggest what tools or context the downstream model should receive, and add a focused prompt-injection pass.
|
|
12
12
|
|
|
@@ -17,7 +17,7 @@ message
|
|
|
17
17
|
normalize + trim classifier context
|
|
18
18
|
│
|
|
19
19
|
├─► preflight ─────────────► final_reply? / ack_reply?
|
|
20
|
-
├─►
|
|
20
|
+
├─► model_tier ────────────► model_tier?
|
|
21
21
|
├─► model_specialization ──► model_specialization?
|
|
22
22
|
├─► tools ─────────────────► tools?
|
|
23
23
|
├─► prompt_injection ─────► risk_level?
|
|
@@ -28,18 +28,18 @@ normalize + trim classifier context
|
|
|
28
28
|
aggregator + model catalog
|
|
29
29
|
│
|
|
30
30
|
▼
|
|
31
|
-
|
|
31
|
+
PipelineResult { action, model_id, tools, reply, ... }
|
|
32
32
|
```
|
|
33
33
|
|
|
34
34
|
Every classifier uses the same manifest shape and emits the same output envelope: `{ reason, certainty, ...payload }`. Some payload fields are **reserved** — like `model_tier`, `final_reply`, and `risk_level` — and the aggregator knows how to consume them into a routing decision. Everything else is your classifier's own data and passes through to the caller untouched.
|
|
35
35
|
|
|
36
36
|
## Why Open Classify
|
|
37
37
|
|
|
38
|
-
- **Spend frontier tokens only when they matter.** Simple greetings, thanks, spelling checks, and small arithmetic can be answered immediately
|
|
39
|
-
- **Keep the user interface responsive.** For complex work, preflight
|
|
38
|
+
- **Spend frontier tokens only when they matter.** Simple greetings, thanks, spelling checks, and small arithmetic can be answered immediately (`action: "reply"`) without sending the request downstream.
|
|
39
|
+
- **Keep the user interface responsive.** For complex work, preflight emits an `ack_reply` — a task-specific acknowledgement your UI can show while routing the real request.
|
|
40
40
|
- **Pick the right model per message.** Classifiers emit soft constraints like tier and specialization; your catalog turns those into a concrete model optimized for cost, capability, and fit.
|
|
41
41
|
- **Shape downstream context intentionally.** Built-in and custom classifiers can recommend tools, retrieval queries, summaries, or other context hints without passing the full conversation history back to the caller.
|
|
42
|
-
- **Add another defensive layer.** The `prompt_injection` classifier surfaces instruction-override attempts
|
|
42
|
+
- **Add another defensive layer.** The `prompt_injection` classifier surfaces instruction-override attempts. High-risk or unknown injection risk automatically sets `action: "block"`.
|
|
43
43
|
|
|
44
44
|
## Install
|
|
45
45
|
|
|
@@ -62,10 +62,18 @@ const result = await classify({
|
|
|
62
62
|
],
|
|
63
63
|
});
|
|
64
64
|
|
|
65
|
-
|
|
66
|
-
//
|
|
67
|
-
|
|
68
|
-
|
|
65
|
+
if (result.action === "block") {
|
|
66
|
+
// classification error or prompt injection — handle appropriately
|
|
67
|
+
console.error(result.block_reason, result.failed_classifiers);
|
|
68
|
+
} else if (result.action === "reply") {
|
|
69
|
+
// preflight can answer this immediately — skip the downstream model
|
|
70
|
+
respondToUser(result.reply.text);
|
|
71
|
+
} else {
|
|
72
|
+
// route to the downstream model
|
|
73
|
+
callDownstream(result.model_id, result.tools);
|
|
74
|
+
respondToUser(result.reply?.text); // show the ack while it works
|
|
75
|
+
}
|
|
76
|
+
|
|
69
77
|
const queries = result.classifier_outputs.memory_retrieval_queries?.queries;
|
|
70
78
|
```
|
|
71
79
|
|
|
@@ -73,36 +81,39 @@ const queries = result.classifier_outputs.memory_retrieval_queries?.queries;
|
|
|
73
81
|
|
|
74
82
|
### Classifying assistant output
|
|
75
83
|
|
|
76
|
-
`inspect()` is a lean second pass for the **assistant's reply**. It only runs classifiers tagged `applies_to: "both"` (or `"assistant"`) in their manifest, and returns
|
|
84
|
+
`inspect()` is a lean second pass for the **assistant's reply**. It only runs classifiers tagged `applies_to: "both"` (or `"assistant"`) in their manifest, and returns the per-classifier outputs plus the message that was inspected — no routing, no action, no block logic.
|
|
77
85
|
|
|
78
86
|
```ts
|
|
79
|
-
const
|
|
87
|
+
const result = await inspect({
|
|
80
88
|
messages: [
|
|
81
89
|
{ role: "user", text: "Summarize the contract." },
|
|
82
90
|
{ role: "assistant", text: "The contract has three notable risks…" },
|
|
83
91
|
],
|
|
84
92
|
});
|
|
85
93
|
|
|
86
|
-
|
|
94
|
+
// result.message is { role: "assistant", text: "..." }
|
|
95
|
+
const risk = result.classifier_outputs.prompt_injection?.risk_level;
|
|
87
96
|
```
|
|
88
97
|
|
|
89
|
-
Use it for things like prompt-injection checks on model output, summarized slugs, or any classifier you want to apply post-hoc. The built-in `prompt_injection` classifier ships tagged `"both"`, so it runs in both passes; everything else is `"user"` by default.
|
|
98
|
+
Use it for things like prompt-injection checks on model output, summarized slugs, or any classifier you want to apply post-hoc. The built-in `prompt_injection` classifier ships tagged `"both"`, so it runs in both passes; everything else is `"user"` by default.
|
|
90
99
|
|
|
91
100
|
## What you get back
|
|
92
101
|
|
|
93
|
-
Every call returns a `PipelineResult`:
|
|
102
|
+
Every `classify()` call returns a `PipelineResult`:
|
|
94
103
|
|
|
95
104
|
| Field | What it is |
|
|
96
105
|
|---|---|
|
|
97
|
-
| `action` |
|
|
106
|
+
| `action` | `"route"` \| `"block"` \| `"reply"` |
|
|
107
|
+
| `block_reason` | `"prompt_injection"` \| `"classification_error"` (only when `action === "block"`) |
|
|
98
108
|
| `target_message_hash` | Stable 8-hex fingerprint of the target message |
|
|
99
|
-
| `
|
|
100
|
-
| `
|
|
101
|
-
| `
|
|
102
|
-
| `
|
|
103
|
-
| `
|
|
104
|
-
|
|
105
|
-
|
|
109
|
+
| `model_id` | Concrete model id chosen from your catalog (or `null` if unresolvable) |
|
|
110
|
+
| `tools` | Recommended tool ids (always an array; empty if not emitted) |
|
|
111
|
+
| `reply` | `{ text }` — the `ack_reply` or `final_reply` text, if any |
|
|
112
|
+
| `prompt_injection` | `{ risk_level }` from the injection classifier, or `null` |
|
|
113
|
+
| `avg_certainty` | Arithmetic mean certainty score (float 0–1) across all classifiers |
|
|
114
|
+
| `min_certainty` | Minimum certainty score (float 0–1) across all classifiers |
|
|
115
|
+
| `failed_classifiers` | Names of classifiers that errored or timed out (always present; may be empty) |
|
|
116
|
+
| `classifier_outputs` | Each classifier's payload with `reason` (string) and `certainty` (float) |
|
|
106
117
|
|
|
107
118
|
Example result:
|
|
108
119
|
|
|
@@ -110,30 +121,19 @@ Example result:
|
|
|
110
121
|
{
|
|
111
122
|
"action": "route",
|
|
112
123
|
"target_message_hash": "b11d5268",
|
|
113
|
-
"
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
124
|
+
"model_id": "gpt-5.5",
|
|
125
|
+
"tools": ["workspace"],
|
|
126
|
+
"reply": { "text": "On it — I'll review the contract now." },
|
|
127
|
+
"prompt_injection": { "risk_level": "normal" },
|
|
128
|
+
"avg_certainty": 0.84,
|
|
129
|
+
"min_certainty": 0.75,
|
|
130
|
+
"failed_classifiers": [],
|
|
118
131
|
"classifier_outputs": {
|
|
119
|
-
"
|
|
120
|
-
"model_specialization": { "model_specialization": "coding" },
|
|
121
|
-
"tools": { "tools": ["workspace"] },
|
|
122
|
-
"prompt_injection": { "risk_level": "normal" },
|
|
123
|
-
"memory_retrieval_queries": { "queries": ["user code review preferences"] }
|
|
124
|
-
},
|
|
125
|
-
"audit": {
|
|
126
|
-
"ack_reply": { "text": "Let me check." },
|
|
127
|
-
"routing": { "model_tier": "frontier_strong", "model_specialization": "coding" },
|
|
128
|
-
"tools": { "tools": ["workspace"] },
|
|
129
|
-
"prompt_injection": { "risk_level": "normal" },
|
|
130
|
-
"classifier_outputs": [ /* every classifier's full output, with reason + certainty */ ],
|
|
131
|
-
"model_recommendation": {
|
|
132
|
-
"id": "gpt-5.5",
|
|
133
|
-
"context_window": 1050000,
|
|
134
|
-
"resolution": { "...": "..." }
|
|
135
|
-
},
|
|
136
|
-
"meta": { "classifiers": { "...": "..." } }
|
|
132
|
+
"model_tier": { "model_tier": "frontier_strong", "reason": "...", "certainty": 0.88 },
|
|
133
|
+
"model_specialization": { "model_specialization": "coding", "reason": "...", "certainty": 0.75 },
|
|
134
|
+
"tools": { "tools": ["workspace"], "reason": "...", "certainty": 0.88 },
|
|
135
|
+
"prompt_injection": { "risk_level": "normal", "reason": "...", "certainty": 0.97 },
|
|
136
|
+
"memory_retrieval_queries": { "queries": ["user code review preferences"], "reason": "...", "certainty": 0.75 }
|
|
137
137
|
}
|
|
138
138
|
}
|
|
139
139
|
```
|
|
@@ -142,16 +142,16 @@ Example result:
|
|
|
142
142
|
|
|
143
143
|
Open Classify ships with eight built-in classifiers; all use the same manifest shape. There is no distinction between "stock" and "custom" — the runtime only cares about which **reserved fields** a classifier declares.
|
|
144
144
|
|
|
145
|
-
| Name | Reserved fields | What the aggregator does with it |
|
|
146
|
-
|
|
147
|
-
| `preflight` | `final_reply`, `ack_reply` |
|
|
148
|
-
| `
|
|
149
|
-
| `model_specialization` | `model_specialization` | Feeds the catalog resolver as a soft constraint |
|
|
150
|
-
| `tools` | `tools` | Sets `
|
|
151
|
-
| `prompt_injection` | `risk_level` |
|
|
152
|
-
| `memory_retrieval_queries` | — | Passes through to `classifier_outputs
|
|
153
|
-
| `conversation_digest` | — | Passes through |
|
|
154
|
-
| `context_shift` | — | Passes through |
|
|
145
|
+
| Name | dispatch_order | Reserved fields | What the aggregator does with it |
|
|
146
|
+
|---|---|---|---|
|
|
147
|
+
| `preflight` | 10 | `final_reply`, `ack_reply` | Sets `action: "reply"` or populates `result.reply` |
|
|
148
|
+
| `model_tier` | 20 | `model_tier` | Feeds the catalog resolver as a soft constraint |
|
|
149
|
+
| `model_specialization` | 30 | `model_specialization` | Feeds the catalog resolver as a soft constraint |
|
|
150
|
+
| `tools` | 40 | `tools` | Sets `result.tools` |
|
|
151
|
+
| `prompt_injection` | 50 | `risk_level` | High-risk/unknown → `action: "block"`; suspicious → advisory |
|
|
152
|
+
| `memory_retrieval_queries` | 60 | — | Passes through to `classifier_outputs` |
|
|
153
|
+
| `conversation_digest` | 70 | — | Passes through |
|
|
154
|
+
| `context_shift` | 80 | — | Passes through |
|
|
155
155
|
|
|
156
156
|
Reserved fields are well-known output keys with canonical JSON Schemas and prompt fragments baked into the runtime. When you declare one in your manifest, you don't have to redeclare its enum values or shape — the runtime injects them.
|
|
157
157
|
|
|
@@ -198,7 +198,7 @@ Rules:
|
|
|
198
198
|
|
|
199
199
|
- `name` must match the directory name.
|
|
200
200
|
- Reserved field names cannot appear in `output_schema.properties` — declare them in `reserved_fields` instead.
|
|
201
|
-
- `fallback`
|
|
201
|
+
- `fallback` requires only `reason` and `certainty`; reserved and custom required fields are exempt from the fallback check.
|
|
202
202
|
- If you want hand-picked examples (preflight does this), add an `output_schema.examples` array. Each entry must validate against the composed schema at load time. Otherwise the runtime synthesizes a skeleton example for you.
|
|
203
203
|
|
|
204
204
|
Consume your output:
|
|
@@ -212,7 +212,7 @@ See [docs/adding-a-classifier.md](docs/adding-a-classifier.md) for a full walkth
|
|
|
212
212
|
|
|
213
213
|
## Using reserved fields in your own classifier
|
|
214
214
|
|
|
215
|
-
Any classifier can emit reserved fields. If you write your own `task_router` that emits `model_tier`, the aggregator will fold it into the model resolution alongside the built-in `
|
|
215
|
+
Any classifier can emit reserved fields. If you write your own `task_router` that emits `model_tier`, the aggregator will fold it into the model resolution alongside the built-in `model_tier` classifier — highest-certainty contributor wins, ties broken by manifest `dispatch_order` ascending.
|
|
216
216
|
|
|
217
217
|
```json
|
|
218
218
|
{
|
|
@@ -262,7 +262,7 @@ Classifiers never emit model ids. They emit constraints; your catalog maps const
|
|
|
262
262
|
}
|
|
263
263
|
```
|
|
264
264
|
|
|
265
|
-
The resolver picks the cheapest model matching `model_specialization` and `model_tier`, relaxing constraints in order when nothing fits
|
|
265
|
+
The resolver picks the cheapest model matching `model_specialization` and `model_tier`, relaxing constraints in order when nothing fits. See [docs/resolver.md](docs/resolver.md) for ranking details.
|
|
266
266
|
|
|
267
267
|
## Input contract
|
|
268
268
|
|
|
@@ -292,14 +292,11 @@ cp open-classify.config.example.json open-classify.config.json
|
|
|
292
292
|
"provider": "ollama",
|
|
293
293
|
"defaultModel": "gemma4:e4b-it-q4_K_M",
|
|
294
294
|
"models": {
|
|
295
|
-
"
|
|
295
|
+
"model_tier": "qwen2.5:7b-instruct-q4_K_M",
|
|
296
296
|
"prompt_injection": "llama-guard3:8b",
|
|
297
297
|
"memory_retrieval_queries": "qwen2.5:7b-instruct-q4_K_M"
|
|
298
298
|
}
|
|
299
299
|
},
|
|
300
|
-
"aggregator": {
|
|
301
|
-
"certaintyThreshold": 0.65
|
|
302
|
-
},
|
|
303
300
|
"catalog": "downstream-models.json"
|
|
304
301
|
}
|
|
305
302
|
```
|
package/dist/src/aggregator.d.ts
CHANGED
|
@@ -1,28 +1,12 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
import type { AckReplySignal,
|
|
3
|
-
|
|
4
|
-
import type { ClassifierInput } from "./types.js";
|
|
5
|
-
export declare const DEFAULT_CERTAINTY_THRESHOLD = 0.65;
|
|
6
|
-
/** @deprecated Use DEFAULT_CERTAINTY_THRESHOLD. */
|
|
7
|
-
export declare const DEFAULT_CONFIDENCE_THRESHOLD = 0.65;
|
|
8
|
-
export interface ComposeEnvelopeArgs {
|
|
1
|
+
import type { Catalog, ClassifierPublicOutputs, ClassifierRegistry, ClassifierResults, PipelineResult } from "./manifest.js";
|
|
2
|
+
import type { AckReplySignal, FinalReplySignal, ToolsSignal } from "./stock.js";
|
|
3
|
+
export interface AssembleResultArgs {
|
|
9
4
|
readonly registry: ClassifierRegistry;
|
|
10
5
|
readonly results: ClassifierResults;
|
|
6
|
+
readonly failedClassifiers: ReadonlyArray<string>;
|
|
11
7
|
readonly catalog: Catalog;
|
|
12
|
-
readonly input: ClassifierInput;
|
|
13
|
-
readonly config?: AggregatorConfig;
|
|
14
8
|
}
|
|
15
|
-
|
|
16
|
-
export declare function
|
|
17
|
-
export declare function
|
|
18
|
-
export declare function resolveModel(results: Readonly<{
|
|
19
|
-
routing?: {
|
|
20
|
-
model_tier?: DownstreamModelTier;
|
|
21
|
-
certainty?: Certainty;
|
|
22
|
-
};
|
|
23
|
-
model_specialization?: {
|
|
24
|
-
model_specialization?: ModelSpecialization;
|
|
25
|
-
certainty?: Certainty;
|
|
26
|
-
};
|
|
27
|
-
}>, catalog: Catalog, threshold: number): ModelRecommendation;
|
|
9
|
+
type AssembledResult = Omit<PipelineResult, "target_message_hash">;
|
|
10
|
+
export declare function assembleResult(args: AssembleResultArgs): AssembledResult;
|
|
11
|
+
export declare function buildPublicOutputs(registry: ClassifierRegistry, results: ClassifierResults): ClassifierPublicOutputs;
|
|
28
12
|
export type { FinalReplySignal, AckReplySignal, ToolsSignal };
|
package/dist/src/aggregator.js
CHANGED
|
@@ -1,119 +1,121 @@
|
|
|
1
1
|
import { certaintyScore } from "./stock.js";
|
|
2
|
-
export
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
const
|
|
7
|
-
const
|
|
8
|
-
const
|
|
9
|
-
const
|
|
10
|
-
const
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
if (raw === undefined)
|
|
48
|
-
continue;
|
|
49
|
-
const confidence = scoreCertainty(output.certainty);
|
|
50
|
-
if (confidence < threshold)
|
|
51
|
-
continue;
|
|
52
|
-
if (best === undefined || confidence > best.confidence) {
|
|
53
|
-
best = { value: raw, confidence, source: manifest.name };
|
|
54
|
-
}
|
|
2
|
+
export function assembleResult(args) {
|
|
3
|
+
const { registry, results, failedClassifiers, catalog } = args;
|
|
4
|
+
// Pick reserved fields — highest certainty wins, no threshold gate.
|
|
5
|
+
const finalReply = pickField(registry, results, "final_reply");
|
|
6
|
+
const ackReply = pickField(registry, results, "ack_reply");
|
|
7
|
+
const modelTier = pickField(registry, results, "model_tier");
|
|
8
|
+
const modelSpec = pickField(registry, results, "model_specialization");
|
|
9
|
+
const toolsPick = pickField(registry, results, "tools");
|
|
10
|
+
const riskLevel = pickField(registry, results, "risk_level");
|
|
11
|
+
// Resolve concrete model id.
|
|
12
|
+
let model_id = null;
|
|
13
|
+
try {
|
|
14
|
+
const routing = mergeRouting(modelTier?.value, modelSpec?.value);
|
|
15
|
+
model_id = resolveModelFromRouting(routing, catalog).id;
|
|
16
|
+
}
|
|
17
|
+
catch {
|
|
18
|
+
// Catalog error — model_id stays null.
|
|
19
|
+
}
|
|
20
|
+
const tools = toolsPick?.value ?? [];
|
|
21
|
+
const reply = finalReply?.value
|
|
22
|
+
? { text: finalReply.value.text }
|
|
23
|
+
: ackReply?.value
|
|
24
|
+
? { text: ackReply.value.text }
|
|
25
|
+
: null;
|
|
26
|
+
const prompt_injection = riskLevel?.value !== undefined ? { risk_level: riskLevel.value } : null;
|
|
27
|
+
const { avg_certainty, min_certainty } = certaintySummary(registry, results);
|
|
28
|
+
const classifier_outputs = buildPublicOutputs(registry, results);
|
|
29
|
+
// Determine action. Priority: prompt_injection > classification_error > reply > route.
|
|
30
|
+
const isInjectionBlock = riskLevel?.value === "high_risk" || riskLevel?.value === "unknown";
|
|
31
|
+
const isClassificationError = failedClassifiers.length > 0 || reply === null || model_id === null;
|
|
32
|
+
let action;
|
|
33
|
+
let block_reason;
|
|
34
|
+
if (isInjectionBlock) {
|
|
35
|
+
action = "block";
|
|
36
|
+
block_reason = "prompt_injection";
|
|
37
|
+
}
|
|
38
|
+
else if (isClassificationError) {
|
|
39
|
+
action = "block";
|
|
40
|
+
block_reason = "classification_error";
|
|
41
|
+
}
|
|
42
|
+
else if (finalReply?.value !== undefined) {
|
|
43
|
+
action = "reply";
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
action = "route";
|
|
55
47
|
}
|
|
56
|
-
return best;
|
|
57
|
-
}
|
|
58
|
-
function mergeRouting(tier, model_specialization) {
|
|
59
|
-
if (tier === undefined && model_specialization === undefined)
|
|
60
|
-
return undefined;
|
|
61
48
|
return {
|
|
62
|
-
|
|
63
|
-
...(
|
|
49
|
+
action,
|
|
50
|
+
...(block_reason !== undefined ? { block_reason } : {}),
|
|
51
|
+
model_id,
|
|
52
|
+
tools,
|
|
53
|
+
reply,
|
|
54
|
+
prompt_injection,
|
|
55
|
+
avg_certainty,
|
|
56
|
+
min_certainty,
|
|
57
|
+
failed_classifiers: failedClassifiers,
|
|
58
|
+
classifier_outputs,
|
|
64
59
|
};
|
|
65
60
|
}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
return Math.max(...finite);
|
|
71
|
-
}
|
|
72
|
-
function buildAuditOutputs(registry, results) {
|
|
73
|
-
const out = [];
|
|
61
|
+
// Build the public classifier_outputs map. Keeps reason + payload fields;
|
|
62
|
+
// converts certainty label to float score.
|
|
63
|
+
export function buildPublicOutputs(registry, results) {
|
|
64
|
+
const out = {};
|
|
74
65
|
for (const manifest of registry) {
|
|
75
66
|
const result = results[manifest.name];
|
|
76
67
|
if (result === undefined)
|
|
77
68
|
continue;
|
|
78
|
-
|
|
69
|
+
const { certainty, ...rest } = result;
|
|
70
|
+
out[manifest.name] = {
|
|
71
|
+
...rest,
|
|
72
|
+
certainty: scoreCertainty(certainty),
|
|
73
|
+
};
|
|
79
74
|
}
|
|
80
75
|
return out;
|
|
81
76
|
}
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
function hasLowConfidenceReservedField(registry, results, field, threshold) {
|
|
77
|
+
function certaintySummary(registry, results) {
|
|
78
|
+
const scores = registry.map((m) => scoreCertainty(results[m.name]?.certainty));
|
|
79
|
+
if (scores.length === 0)
|
|
80
|
+
return { avg_certainty: 0, min_certainty: 0 };
|
|
81
|
+
const min_certainty = Math.min(...scores);
|
|
82
|
+
const avg_certainty = scores.reduce((sum, v) => sum + v, 0) / scores.length;
|
|
83
|
+
return { min_certainty, avg_certainty };
|
|
84
|
+
}
|
|
85
|
+
// Highest certainty wins; ties broken by registry order (already sorted by
|
|
86
|
+
// dispatch_order ascending).
|
|
87
|
+
function pickField(registry, results, field) {
|
|
88
|
+
let best;
|
|
95
89
|
for (const manifest of registry) {
|
|
96
90
|
if (!manifest.reservedFields.includes(field))
|
|
97
91
|
continue;
|
|
98
92
|
const output = results[manifest.name];
|
|
99
93
|
if (output === undefined)
|
|
100
94
|
continue;
|
|
101
|
-
|
|
95
|
+
const raw = output[field];
|
|
96
|
+
if (raw === undefined)
|
|
102
97
|
continue;
|
|
103
|
-
|
|
104
|
-
|
|
98
|
+
const score = scoreCertainty(output.certainty);
|
|
99
|
+
if (best === undefined || score > best.score) {
|
|
100
|
+
best = { value: raw, source: manifest.name, score };
|
|
101
|
+
}
|
|
105
102
|
}
|
|
106
|
-
return
|
|
103
|
+
return best;
|
|
107
104
|
}
|
|
108
105
|
function scoreCertainty(certainty) {
|
|
109
106
|
return certainty === undefined ? 0 : certaintyScore[certainty];
|
|
110
107
|
}
|
|
111
|
-
|
|
108
|
+
// ─── Model resolution ────────────────────────────────────────────────────────
|
|
109
|
+
function mergeRouting(tier, specialization) {
|
|
110
|
+
if (tier === undefined && specialization === undefined)
|
|
111
|
+
return undefined;
|
|
112
|
+
return {
|
|
113
|
+
...(tier === undefined ? {} : { model_tier: tier }),
|
|
114
|
+
...(specialization === undefined ? {} : { model_specialization: specialization }),
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
function resolveModelFromRouting(routing, catalog) {
|
|
112
118
|
const requested = {};
|
|
113
|
-
const confidences = {};
|
|
114
|
-
if (confidence !== undefined) {
|
|
115
|
-
confidences.routing = confidence;
|
|
116
|
-
}
|
|
117
119
|
if (routing?.model_specialization !== undefined) {
|
|
118
120
|
requested.model_specialization = routing.model_specialization;
|
|
119
121
|
}
|
|
@@ -121,74 +123,27 @@ export function resolveModelFromRouting(routing, catalog, confidence, ignoredCon
|
|
|
121
123
|
requested.model_tier = routing.model_tier;
|
|
122
124
|
}
|
|
123
125
|
const passes = [
|
|
124
|
-
{
|
|
125
|
-
{
|
|
126
|
-
{
|
|
127
|
-
{
|
|
126
|
+
{ useSpec: true, useTier: true },
|
|
127
|
+
{ useSpec: true, useTier: false },
|
|
128
|
+
{ useSpec: false, useTier: true },
|
|
129
|
+
{ useSpec: false, useTier: false },
|
|
128
130
|
];
|
|
129
131
|
for (const pass of passes) {
|
|
130
|
-
const
|
|
131
|
-
const matching = catalog.models.filter((
|
|
132
|
+
const constraints = constraintsForPass(requested, pass);
|
|
133
|
+
const matching = catalog.models.filter((m) => matchesConstraints(m, constraints));
|
|
132
134
|
if (matching.length === 0)
|
|
133
135
|
continue;
|
|
134
|
-
|
|
135
|
-
return {
|
|
136
|
-
...modelRecommendationFields(winner),
|
|
137
|
-
resolution: {
|
|
138
|
-
constraints_used,
|
|
139
|
-
constraints_dropped: [
|
|
140
|
-
...ignoredConstraints,
|
|
141
|
-
...relaxedConstraints(requested, constraints_used),
|
|
142
|
-
],
|
|
143
|
-
confidences,
|
|
144
|
-
fell_back_to_default: false,
|
|
145
|
-
},
|
|
146
|
-
};
|
|
136
|
+
return { id: pickBestModel(matching, catalog.models).id };
|
|
147
137
|
}
|
|
148
|
-
const fallback = catalog.models.find((
|
|
138
|
+
const fallback = catalog.models.find((m) => m.id === catalog.default);
|
|
149
139
|
if (!fallback) {
|
|
150
|
-
throw new Error(`catalog default "${catalog.default}" not found in models
|
|
140
|
+
throw new Error(`catalog default "${catalog.default}" not found in models`);
|
|
151
141
|
}
|
|
152
|
-
return {
|
|
153
|
-
...modelRecommendationFields(fallback),
|
|
154
|
-
resolution: {
|
|
155
|
-
constraints_used: {},
|
|
156
|
-
constraints_dropped: [
|
|
157
|
-
...ignoredConstraints,
|
|
158
|
-
...defaultFallbackConstraints(requested),
|
|
159
|
-
],
|
|
160
|
-
confidences,
|
|
161
|
-
fell_back_to_default: true,
|
|
162
|
-
},
|
|
163
|
-
};
|
|
164
|
-
}
|
|
165
|
-
// Test-friendly convenience wrapper: given typed result outputs for the
|
|
166
|
-
// routing-bearing classifiers, merge their reserved fields and resolve a
|
|
167
|
-
// model.
|
|
168
|
-
export function resolveModel(results, catalog, threshold) {
|
|
169
|
-
const routingCert = scoreCertainty(results.routing?.certainty);
|
|
170
|
-
const specCert = scoreCertainty(results.model_specialization?.certainty);
|
|
171
|
-
const tier = routingCert >= threshold ? results.routing?.model_tier : undefined;
|
|
172
|
-
const model_specialization = specCert >= threshold ? results.model_specialization?.model_specialization : undefined;
|
|
173
|
-
const merged = mergeRouting(tier, model_specialization);
|
|
174
|
-
const dropped = [];
|
|
175
|
-
if (tier === undefined && results.routing?.model_tier !== undefined && routingCert < threshold) {
|
|
176
|
-
dropped.push({ axis: "model_tier", reason: "low_confidence" });
|
|
177
|
-
}
|
|
178
|
-
if (model_specialization === undefined &&
|
|
179
|
-
results.model_specialization?.model_specialization !== undefined &&
|
|
180
|
-
specCert < threshold) {
|
|
181
|
-
dropped.push({ axis: "model_specialization", reason: "low_confidence" });
|
|
182
|
-
}
|
|
183
|
-
const confidence = maxConfidence([
|
|
184
|
-
results.routing?.certainty === undefined ? undefined : routingCert,
|
|
185
|
-
results.model_specialization?.certainty === undefined ? undefined : specCert,
|
|
186
|
-
]);
|
|
187
|
-
return resolveModelFromRouting(merged, catalog, confidence, dropped);
|
|
142
|
+
return { id: fallback.id };
|
|
188
143
|
}
|
|
189
144
|
function constraintsForPass(requested, pass) {
|
|
190
145
|
return {
|
|
191
|
-
...(pass.
|
|
146
|
+
...(pass.useSpec && requested.model_specialization !== undefined
|
|
192
147
|
? { model_specialization: requested.model_specialization }
|
|
193
148
|
: {}),
|
|
194
149
|
...(pass.useTier && requested.model_tier !== undefined
|
|
@@ -201,32 +156,11 @@ function matchesConstraints(model, constraints) {
|
|
|
201
156
|
model.specializations.includes(constraints.model_specialization)) &&
|
|
202
157
|
(constraints.model_tier === undefined || model.tier === constraints.model_tier));
|
|
203
158
|
}
|
|
204
|
-
function relaxedConstraints(requested, used) {
|
|
205
|
-
const dropped = [];
|
|
206
|
-
if (requested.model_specialization !== undefined && used.model_specialization === undefined) {
|
|
207
|
-
dropped.push({ axis: "model_specialization", reason: "no_match_relaxed" });
|
|
208
|
-
}
|
|
209
|
-
if (requested.model_tier !== undefined && used.model_tier === undefined) {
|
|
210
|
-
dropped.push({ axis: "model_tier", reason: "no_match_relaxed" });
|
|
211
|
-
}
|
|
212
|
-
return dropped;
|
|
213
|
-
}
|
|
214
|
-
function defaultFallbackConstraints(requested) {
|
|
215
|
-
const dropped = [];
|
|
216
|
-
if (requested.model_specialization !== undefined) {
|
|
217
|
-
dropped.push({ axis: "model_specialization", reason: "default_fallback" });
|
|
218
|
-
}
|
|
219
|
-
if (requested.model_tier !== undefined) {
|
|
220
|
-
dropped.push({ axis: "model_tier", reason: "default_fallback" });
|
|
221
|
-
}
|
|
222
|
-
return dropped;
|
|
223
|
-
}
|
|
224
159
|
function pickBestModel(candidates, catalogOrder) {
|
|
225
160
|
let winner = candidates[0];
|
|
226
161
|
for (let i = 1; i < candidates.length; i++) {
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
winner = candidate;
|
|
162
|
+
if (compareModels(candidates[i], winner, catalogOrder) < 0) {
|
|
163
|
+
winner = candidates[i];
|
|
230
164
|
}
|
|
231
165
|
}
|
|
232
166
|
return winner;
|
|
@@ -238,27 +172,15 @@ function compareModels(a, b, catalogOrder) {
|
|
|
238
172
|
if (a.params_in_billions !== b.params_in_billions) {
|
|
239
173
|
return comparableParams(b) - comparableParams(a);
|
|
240
174
|
}
|
|
241
|
-
if (a.context_window !== b.context_window)
|
|
175
|
+
if (a.context_window !== b.context_window)
|
|
242
176
|
return b.context_window - a.context_window;
|
|
243
|
-
}
|
|
244
177
|
return catalogOrder.indexOf(a) - catalogOrder.indexOf(b);
|
|
245
178
|
}
|
|
246
179
|
function priceIndex(model) {
|
|
247
|
-
if (model.input_tokens_cpm === undefined || model.output_tokens_cpm === undefined)
|
|
180
|
+
if (model.input_tokens_cpm === undefined || model.output_tokens_cpm === undefined)
|
|
248
181
|
return 0;
|
|
249
|
-
}
|
|
250
182
|
return model.input_tokens_cpm + model.output_tokens_cpm;
|
|
251
183
|
}
|
|
252
184
|
function comparableParams(model) {
|
|
253
185
|
return model.params_in_billions ?? 0;
|
|
254
186
|
}
|
|
255
|
-
function modelRecommendationFields(winner) {
|
|
256
|
-
return {
|
|
257
|
-
id: winner.id,
|
|
258
|
-
params_in_billions: winner.params_in_billions,
|
|
259
|
-
context_window: winner.context_window,
|
|
260
|
-
...(winner.input_tokens_cpm === undefined ? {} : { input_tokens_cpm: winner.input_tokens_cpm }),
|
|
261
|
-
...(winner.cached_tokens_cpm === undefined ? {} : { cached_tokens_cpm: winner.cached_tokens_cpm }),
|
|
262
|
-
...(winner.output_tokens_cpm === undefined ? {} : { output_tokens_cpm: winner.output_tokens_cpm }),
|
|
263
|
-
};
|
|
264
|
-
}
|