open-classify 0.1.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -24
- package/dist/src/classifiers/custom/context_shift/manifest.json +31 -0
- package/dist/src/classifiers/custom/context_shift/prompt.md +12 -0
- package/dist/src/classifiers/stock/prompts/preflight-output.md +5 -5
- package/dist/src/classifiers/stock/prompts/preflight.md +9 -9
- package/dist/src/classifiers/stock/prompts/tools-output.md +4 -0
- package/dist/src/classifiers.js +2 -5
- package/dist/src/classify.d.ts +23 -0
- package/dist/src/classify.js +51 -0
- package/dist/src/config.js +1 -11
- package/dist/src/index.d.ts +1 -0
- package/dist/src/index.js +1 -0
- package/dist/src/manifest.d.ts +8 -40
- package/dist/src/manifest.js +1 -5
- package/dist/src/ollama.d.ts +0 -12
- package/dist/src/ollama.js +0 -37
- package/dist/src/pipeline.d.ts +2 -1
- package/dist/src/pipeline.js +55 -151
- package/dist/src/stock-validation.js +7 -7
- package/dist/src/stock.d.ts +2 -2
- package/docs/adding-a-classifier.md +2 -1
- package/open-classify.config.example.json +1 -2
- package/package.json +1 -3
- package/dist/src/ui-server.d.ts +0 -1
- package/dist/src/ui-server.js +0 -257
package/README.md
CHANGED
|
@@ -54,16 +54,15 @@ Node 18+. The packaged runner is local Ollama and ships with `gemma4:e4b-it-q4_K
|
|
|
54
54
|
## Hello World
|
|
55
55
|
|
|
56
56
|
```ts
|
|
57
|
-
import {
|
|
57
|
+
import { createClassifier } from "open-classify";
|
|
58
58
|
|
|
59
|
-
const
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
);
|
|
59
|
+
const classify = createClassifier();
|
|
60
|
+
|
|
61
|
+
const result = await classify({
|
|
62
|
+
messages: [
|
|
63
|
+
{ role: "user", text: "Can you review the attached contract?" },
|
|
64
|
+
],
|
|
65
|
+
});
|
|
67
66
|
|
|
68
67
|
if (result.action === "route") {
|
|
69
68
|
// result.downstream.model_id is a concrete model from your catalog.
|
|
@@ -72,6 +71,8 @@ if (result.action === "route") {
|
|
|
72
71
|
}
|
|
73
72
|
```
|
|
74
73
|
|
|
74
|
+
`createClassifier` builds the runner and loads the model catalog once. Reuse the returned `classify` function across your app — every call is a plain function invocation, no re-initialization.
|
|
75
|
+
|
|
75
76
|
## What you get back
|
|
76
77
|
|
|
77
78
|
Every call returns a `PipelineResult` with one of three `action` values:
|
|
@@ -82,7 +83,7 @@ Every call returns a `PipelineResult` with one of three `action` values:
|
|
|
82
83
|
| `reply` | Preflight had a tiny terminal reply | `reply.text` |
|
|
83
84
|
| `block` | Prompt injection flagged confident `high_risk` / `unknown`, or the certainty gate fired | `reason.kind` plus prompt-injection or low-certainty details |
|
|
84
85
|
|
|
85
|
-
All three also carry `
|
|
86
|
+
All three also carry `target_message_hash` (the stable 8-hex fingerprint of the target message), `classifier_outputs` (custom classifier payloads, keyed by name), and an `audit` block. Route results include the downstream target message, not the caller's message history. Short-circuit results include the firing classifier's audit context.
|
|
86
87
|
|
|
87
88
|
For complex requests, look for `audit.ack_reply` on `route` results. It is the immediate acknowledgement your UI can show while the downstream model works. For trivial requests, `result.reply.text` is the complete response and no downstream model is needed.
|
|
88
89
|
|
|
@@ -91,7 +92,7 @@ Example `route` result:
|
|
|
91
92
|
```json
|
|
92
93
|
{
|
|
93
94
|
"action": "route",
|
|
94
|
-
"
|
|
95
|
+
"target_message_hash": "b11d5268",
|
|
95
96
|
"downstream": {
|
|
96
97
|
"model_id": "gpt-5.5",
|
|
97
98
|
"tools": { "tools": ["workspace"] },
|
|
@@ -101,7 +102,7 @@ Example `route` result:
|
|
|
101
102
|
"memory_retrieval_queries": { "queries": ["user code review preferences"] }
|
|
102
103
|
},
|
|
103
104
|
"audit": {
|
|
104
|
-
"ack_reply": { "
|
|
105
|
+
"ack_reply": { "text": "Let me check." },
|
|
105
106
|
"routing": { "model_tier": "frontier_strong" },
|
|
106
107
|
"model_specialization": { "specialization": "coding" },
|
|
107
108
|
"tools": { "tools": ["workspace"] },
|
|
@@ -217,20 +218,19 @@ The resolver picks the cheapest model matching `specialization` and `tier`, rela
|
|
|
217
218
|
|
|
218
219
|
## Input contract
|
|
219
220
|
|
|
220
|
-
`
|
|
221
|
+
`classify({ messages })` — that's the whole input.
|
|
221
222
|
|
|
222
223
|
- `messages` is chronological, oldest to newest, and must end with the user message you want classified.
|
|
223
224
|
- Open Classify keeps whole messages only, drops oldest first to fit a 5,000-char budget, and caps history at 20 messages.
|
|
224
225
|
- Unknown fields are rejected, not passed through.
|
|
225
226
|
|
|
226
|
-
## Local
|
|
227
|
+
## Local setup
|
|
227
228
|
|
|
228
229
|
```sh
|
|
229
230
|
npm run setup
|
|
230
|
-
npm run start
|
|
231
231
|
```
|
|
232
232
|
|
|
233
|
-
|
|
233
|
+
Checks prerequisites (Node, npm, Ollama), confirms the base model is pulled, installs dependencies, and builds. Idempotent — safe to re-run.
|
|
234
234
|
|
|
235
235
|
Optional Ollama runtime config:
|
|
236
236
|
|
|
@@ -261,7 +261,7 @@ cp open-classify.config.example.json open-classify.config.json
|
|
|
261
261
|
}
|
|
262
262
|
```
|
|
263
263
|
|
|
264
|
-
`runner.provider` currently supports `"ollama"` only. `runner.defaultModel` applies to any classifier without an explicit entry. `runner.models.stock` configures built-in classifiers; `runner.models.custom` configures custom classifiers by manifest name. `aggregator.certaintyGate` can be `"min_score"` (lowest score across all stock and custom classifiers), `"avg_score"`, or `"off"`. The setup and
|
|
264
|
+
`runner.provider` currently supports `"ollama"` only. `runner.defaultModel` applies to any classifier without an explicit entry. `runner.models.stock` configures built-in classifiers; `runner.models.custom` configures custom classifiers by manifest name. `aggregator.certaintyGate` can be `"min_score"` (lowest score across all stock and custom classifiers), `"avg_score"`, or `"off"`. The setup script and `loadOpenClassifyConfig()` read `open-classify.config.json`, or `OPEN_CLASSIFY_CONFIG` when you want a different path.
|
|
265
265
|
|
|
266
266
|
## Bring your own backend
|
|
267
267
|
|
|
@@ -275,7 +275,19 @@ type RunClassifier = (
|
|
|
275
275
|
) => Promise<ClassifierOutput>;
|
|
276
276
|
```
|
|
277
277
|
|
|
278
|
-
Pass any `RunClassifier` to `
|
|
278
|
+
Pass any `RunClassifier` to `createClassifier` to back classifiers with OpenAI, Anthropic, a remote service, or anything else. The factory takes care of catalog loading and pipeline wiring; you only own the per-classifier call.
|
|
279
|
+
|
|
280
|
+
```ts
|
|
281
|
+
import { createClassifier, type RunClassifier } from "open-classify";
|
|
282
|
+
|
|
283
|
+
const runClassifier: RunClassifier = async (name, input, signal) => {
|
|
284
|
+
// call your provider of choice, return a ClassifierOutput
|
|
285
|
+
};
|
|
286
|
+
|
|
287
|
+
const classify = createClassifier({ runClassifier });
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
For the lowest-level entry point, `classifyOpenClassifyInput(input, { runClassifier, catalog })` skips the factory entirely.
|
|
279
291
|
|
|
280
292
|
## Further reading
|
|
281
293
|
|
|
@@ -287,10 +299,5 @@ Pass any `RunClassifier` to `classifyOpenClassifyInput(input, { runClassifier, c
|
|
|
287
299
|
## Development
|
|
288
300
|
|
|
289
301
|
```sh
|
|
290
|
-
npm test
|
|
291
|
-
npm run ui # build + serve the local workbench
|
|
302
|
+
npm test # build + run the Node test runner suite
|
|
292
303
|
```
|
|
293
|
-
|
|
294
|
-
## Screenshot
|
|
295
|
-
|
|
296
|
-

|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"kind": "custom",
|
|
3
|
+
"name": "context_shift",
|
|
4
|
+
"version": "1.0.0",
|
|
5
|
+
"purpose": "Classify whether the latest message continues, branches from, returns to, or starts a conversation thread.",
|
|
6
|
+
"order": 80,
|
|
7
|
+
"fallback": {
|
|
8
|
+
"reason": "Classifier failed; context relationship is ambiguous.",
|
|
9
|
+
"certainty": "no_signal",
|
|
10
|
+
"output": {
|
|
11
|
+
"decision": "ambiguous"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"output_schema": {
|
|
15
|
+
"type": "object",
|
|
16
|
+
"additionalProperties": false,
|
|
17
|
+
"required": ["decision"],
|
|
18
|
+
"properties": {
|
|
19
|
+
"decision": {
|
|
20
|
+
"type": "string",
|
|
21
|
+
"enum": [
|
|
22
|
+
"same_active_thread",
|
|
23
|
+
"related_branch",
|
|
24
|
+
"return_to_prior_thread",
|
|
25
|
+
"new_thread",
|
|
26
|
+
"ambiguous"
|
|
27
|
+
]
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
You are the context_shift classifier for an AI assistant routing system.
|
|
2
|
+
|
|
3
|
+
`output.decision` describes how the final user message relates to the visible conversation history.
|
|
4
|
+
|
|
5
|
+
Use `same_active_thread` when the final message directly continues, clarifies, corrects, or asks for the next step on the active topic.
|
|
6
|
+
Use `related_branch` when it starts a distinct subtask or angle that still depends on the active topic.
|
|
7
|
+
Use `return_to_prior_thread` when it resumes an earlier visible topic after the active topic changed.
|
|
8
|
+
Use `new_thread` when it starts a materially independent topic that does not rely on the visible conversation history.
|
|
9
|
+
Use `ambiguous` when the visible history is insufficient to choose one of the other labels.
|
|
10
|
+
|
|
11
|
+
Do not infer hidden conversations, saved memories, external thread ids, or user intent that is not visible in the provided messages.
|
|
12
|
+
Certainty should reflect confidence in the chosen label; `ambiguous` may have high certainty when ambiguity is the correct judgment.
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Emit one of these optional fields when applicable:
|
|
2
2
|
|
|
3
|
-
- final_reply: {"
|
|
3
|
+
- final_reply: {"text":"..."} only for tiny terminal answers that need no downstream work.
|
|
4
4
|
Do not use final_reply for drafting, rewriting, analysis, coding, research, or any generated work.
|
|
5
|
-
|
|
6
|
-
- ack_reply: {"
|
|
7
|
-
|
|
5
|
+
text must be 200 characters or fewer.
|
|
6
|
+
- ack_reply: {"text":"..."} when downstream work should continue and a brief acknowledgement would help.
|
|
7
|
+
text must be 200 characters or fewer.
|
|
8
8
|
|
|
9
9
|
Omit both when the request is ambiguous or no acknowledgement is useful.
|
|
10
|
-
Do not answer the user except inside final_reply.
|
|
10
|
+
Do not answer the user except inside final_reply.text or ack_reply.text.
|
|
@@ -8,35 +8,35 @@ Decide whether the target user message can be answered immediately with a tiny t
|
|
|
8
8
|
|
|
9
9
|
Emit **at most one** of these fields:
|
|
10
10
|
|
|
11
|
-
- `final_reply: {"
|
|
12
|
-
- `ack_reply: {"
|
|
11
|
+
- `final_reply: {"text":"..."}` - the reply text **is the complete answer to the user**. Nothing else happens after this. Use for tiny terminal answers like greetings, thanks, spelling, simple arithmetic, and similarly trivial replies.
|
|
12
|
+
- `ack_reply: {"text":"..."}` - a brief acknowledgement shown while downstream work continues. Use when the request needs generated work (drafting, analysis, coding, research) and a courtesy line helps. The text must not contain the answer.
|
|
13
13
|
|
|
14
14
|
Omit both fields when the request is ambiguous or no acknowledgement is useful.
|
|
15
15
|
|
|
16
16
|
Both replies must be 200 characters or fewer.
|
|
17
|
-
Do not address the user anywhere except inside `final_reply.
|
|
17
|
+
Do not address the user anywhere except inside `final_reply.text` or `ack_reply.text`.
|
|
18
18
|
|
|
19
19
|
## Examples
|
|
20
20
|
|
|
21
21
|
User: `hi`
|
|
22
|
-
-> `{"reason":"Greeting.","certainty":"near_certain","final_reply":{"
|
|
22
|
+
-> `{"reason":"Greeting.","certainty":"near_certain","final_reply":{"text":"Hi!"}}`
|
|
23
23
|
Why: greeting needs no downstream model - the reply IS the answer.
|
|
24
24
|
|
|
25
25
|
User: `thanks!`
|
|
26
|
-
-> `{"reason":"Closing acknowledgement.","certainty":"near_certain","final_reply":{"
|
|
26
|
+
-> `{"reason":"Closing acknowledgement.","certainty":"near_certain","final_reply":{"text":"Anytime."}}`
|
|
27
27
|
|
|
28
28
|
User: `what's 2 + 2?`
|
|
29
|
-
-> `{"reason":"Trivial arithmetic.","certainty":"very_strong","final_reply":{"
|
|
29
|
+
-> `{"reason":"Trivial arithmetic.","certainty":"very_strong","final_reply":{"text":"4"}}`
|
|
30
30
|
|
|
31
31
|
User: `how do you spell necessary?`
|
|
32
|
-
-> `{"reason":"Spelling lookup.","certainty":"very_strong","final_reply":{"
|
|
32
|
+
-> `{"reason":"Spelling lookup.","certainty":"very_strong","final_reply":{"text":"necessary"}}`
|
|
33
33
|
|
|
34
34
|
User: `draft an email apologizing to the team for the missed deadline`
|
|
35
|
-
-> `{"reason":"Generated writing task.","certainty":"very_strong","ack_reply":{"
|
|
35
|
+
-> `{"reason":"Generated writing task.","certainty":"very_strong","ack_reply":{"text":"On it."}}`
|
|
36
36
|
Why: the request needs drafted prose. `final_reply` would skip the actual work.
|
|
37
37
|
|
|
38
38
|
User: `review the routing code in this repo`
|
|
39
|
-
-> `{"reason":"Needs code analysis.","certainty":"very_strong","ack_reply":{"
|
|
39
|
+
-> `{"reason":"Needs code analysis.","certainty":"very_strong","ack_reply":{"text":"Let me check."}}`
|
|
40
40
|
|
|
41
41
|
User: `what should I do about the contract?`
|
|
42
42
|
-> `{"reason":"Ambiguous; needs downstream model.","certainty":"strong"}`
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
Emit the tools verdict as top-level fields:
|
|
2
2
|
|
|
3
|
+
- reason: required compressed justification, 120 characters or fewer
|
|
4
|
+
- certainty: required certainty tag from the shared certainty enum
|
|
3
5
|
- tools: array of allowed tool ids
|
|
4
6
|
|
|
5
7
|
{{allowed_tools}}
|
|
6
8
|
|
|
7
9
|
An empty tools array means no downstream tools are required.
|
|
10
|
+
|
|
11
|
+
Shape: {"reason":"...","certainty":"strong","tools":["workspace"]}.
|
package/dist/src/classifiers.js
CHANGED
|
@@ -62,17 +62,14 @@ function loadClassifierManifest(classifierDir, expectedKind) {
|
|
|
62
62
|
return { ...manifest, systemPrompt };
|
|
63
63
|
}
|
|
64
64
|
function validateRegistry(manifests) {
|
|
65
|
+
// Duplicate orders are allowed: same-order classifiers schedule adjacent
|
|
66
|
+
// and run in parallel when concurrency permits, sequentially otherwise.
|
|
65
67
|
const names = new Set();
|
|
66
|
-
const orders = new Set();
|
|
67
68
|
for (const manifest of manifests) {
|
|
68
69
|
if (names.has(manifest.name)) {
|
|
69
70
|
throw new ClassifierManifestError(`duplicate classifier name: ${manifest.name}`);
|
|
70
71
|
}
|
|
71
72
|
names.add(manifest.name);
|
|
72
|
-
if (orders.has(manifest.order)) {
|
|
73
|
-
throw new ClassifierManifestError(`duplicate classifier order: ${manifest.order}`);
|
|
74
|
-
}
|
|
75
|
-
orders.add(manifest.order);
|
|
76
73
|
}
|
|
77
74
|
}
|
|
78
75
|
export const REGISTRY = loadClassifierRegistry();
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { type RunClassifier } from "./classifiers.js";
|
|
2
|
+
import { type OpenClassifyConfig } from "./config.js";
|
|
3
|
+
import type { AggregatorConfig, Catalog, PipelineResult } from "./manifest.js";
|
|
4
|
+
import type { OpenClassifyInput } from "./types.js";
|
|
5
|
+
export type Classifier = (input: OpenClassifyInput, options?: {
|
|
6
|
+
signal?: AbortSignal;
|
|
7
|
+
}) => Promise<PipelineResult>;
|
|
8
|
+
export interface CreateClassifierOptions {
|
|
9
|
+
runClassifier?: RunClassifier;
|
|
10
|
+
catalog?: Catalog;
|
|
11
|
+
config?: OpenClassifyConfig;
|
|
12
|
+
configPath?: string;
|
|
13
|
+
catalogPath?: string;
|
|
14
|
+
skipResourceCheck?: boolean;
|
|
15
|
+
minAvailableMemoryBytes?: number;
|
|
16
|
+
minTotalMemoryBytes?: number;
|
|
17
|
+
fetch?: typeof fetch;
|
|
18
|
+
classifierTimeoutMs?: number;
|
|
19
|
+
classifierRetryCount?: number;
|
|
20
|
+
maxConcurrency?: number;
|
|
21
|
+
aggregator?: AggregatorConfig;
|
|
22
|
+
}
|
|
23
|
+
export declare function createClassifier(options?: CreateClassifierOptions): Classifier;
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
// High-level facade for the pipeline. Builds the runner and catalog once,
|
|
2
|
+
// then returns a closure callers can invoke many times without re-loading
|
|
3
|
+
// config or the catalog from disk. Backend-agnostic: pass a custom
|
|
4
|
+
// `runClassifier` to bypass the bundled Ollama runner entirely.
|
|
5
|
+
import { loadCatalog } from "./catalog.js";
|
|
6
|
+
import { classifierModelsFromConfig, loadOpenClassifyConfig, } from "./config.js";
|
|
7
|
+
import { assertOllamaResources, createOllamaClassifierRunner, OLLAMA_DEFAULT_CATALOG_PATH, } from "./ollama.js";
|
|
8
|
+
import { classifyOpenClassifyInput } from "./pipeline.js";
|
|
9
|
+
export function createClassifier(options = {}) {
|
|
10
|
+
const fileConfig = options.config ??
|
|
11
|
+
loadOpenClassifyConfig(options.configPath, {
|
|
12
|
+
optional: options.configPath === undefined &&
|
|
13
|
+
process.env.OPEN_CLASSIFY_CONFIG === undefined,
|
|
14
|
+
});
|
|
15
|
+
// When we own the runner, hoist the resource check to the wrapper so a
|
|
16
|
+
// failure surfaces as a top-level rejection — the per-classifier fallback
|
|
17
|
+
// path would otherwise mask it as five "classifier failed" entries.
|
|
18
|
+
const ownsRunner = options.runClassifier === undefined;
|
|
19
|
+
const needsResourceCheck = ownsRunner && !options.skipResourceCheck;
|
|
20
|
+
const runClassifier = options.runClassifier ??
|
|
21
|
+
createOllamaClassifierRunner({
|
|
22
|
+
host: fileConfig?.runner?.host,
|
|
23
|
+
defaultModel: fileConfig?.runner?.defaultModel,
|
|
24
|
+
models: classifierModelsFromConfig(fileConfig),
|
|
25
|
+
options: fileConfig?.runner?.options,
|
|
26
|
+
skipResourceCheck: needsResourceCheck ? true : options.skipResourceCheck,
|
|
27
|
+
fetch: options.fetch,
|
|
28
|
+
});
|
|
29
|
+
const catalog = options.catalog ??
|
|
30
|
+
loadCatalog(options.catalogPath ?? fileConfig?.catalog ?? OLLAMA_DEFAULT_CATALOG_PATH);
|
|
31
|
+
const aggregator = options.aggregator ?? fileConfig?.aggregator;
|
|
32
|
+
let resourceCheck;
|
|
33
|
+
return async (input, callOptions) => {
|
|
34
|
+
if (needsResourceCheck) {
|
|
35
|
+
resourceCheck ??= assertOllamaResources({
|
|
36
|
+
minTotalMemoryBytes: options.minTotalMemoryBytes,
|
|
37
|
+
minAvailableMemoryBytes: options.minAvailableMemoryBytes,
|
|
38
|
+
});
|
|
39
|
+
await resourceCheck;
|
|
40
|
+
}
|
|
41
|
+
return classifyOpenClassifyInput(input, {
|
|
42
|
+
runClassifier,
|
|
43
|
+
catalog,
|
|
44
|
+
classifierTimeoutMs: options.classifierTimeoutMs,
|
|
45
|
+
classifierRetryCount: options.classifierRetryCount,
|
|
46
|
+
maxConcurrency: options.maxConcurrency,
|
|
47
|
+
aggregator,
|
|
48
|
+
signal: callOptions?.signal,
|
|
49
|
+
});
|
|
50
|
+
};
|
|
51
|
+
}
|
package/dist/src/config.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { existsSync, readFileSync } from "node:fs";
|
|
2
2
|
import { REGISTRY } from "./classifiers.js";
|
|
3
|
-
import { CERTAINTY_GATE_MODES, } from "./manifest.js";
|
|
4
3
|
import { STOCK_CLASSIFIER_NAMES } from "./stock.js";
|
|
5
4
|
import { isRecord } from "./validation.js";
|
|
6
5
|
export const DEFAULT_OPEN_CLASSIFY_CONFIG_PATH = "open-classify.config.json";
|
|
@@ -49,7 +48,7 @@ function validateAggregator(value, path) {
|
|
|
49
48
|
if (!isRecord(value)) {
|
|
50
49
|
throwConfig(path, "aggregator must be an object");
|
|
51
50
|
}
|
|
52
|
-
ensureAllowedKeys(value, ["certaintyThreshold", "confidenceThreshold"
|
|
51
|
+
ensureAllowedKeys(value, ["certaintyThreshold", "confidenceThreshold"], path, "aggregator");
|
|
53
52
|
return {
|
|
54
53
|
...(value.certaintyThreshold === undefined
|
|
55
54
|
? {}
|
|
@@ -57,9 +56,6 @@ function validateAggregator(value, path) {
|
|
|
57
56
|
...(value.confidenceThreshold === undefined
|
|
58
57
|
? {}
|
|
59
58
|
: { confidenceThreshold: requireUnitFloat(value.confidenceThreshold, path, "aggregator.confidenceThreshold") }),
|
|
60
|
-
...(value.certaintyGate === undefined
|
|
61
|
-
? {}
|
|
62
|
-
: { certaintyGate: requireCertaintyGateMode(value.certaintyGate, path, "aggregator.certaintyGate") }),
|
|
63
59
|
};
|
|
64
60
|
}
|
|
65
61
|
function validateRunner(value, path) {
|
|
@@ -157,12 +153,6 @@ function requireUnitFloat(value, path, field) {
|
|
|
157
153
|
}
|
|
158
154
|
return number;
|
|
159
155
|
}
|
|
160
|
-
function requireCertaintyGateMode(value, path, field) {
|
|
161
|
-
if (typeof value !== "string" || !CERTAINTY_GATE_MODES.includes(value)) {
|
|
162
|
-
throwConfig(path, `${field} must be one of ${CERTAINTY_GATE_MODES.join(", ")}`);
|
|
163
|
-
}
|
|
164
|
-
return value;
|
|
165
|
-
}
|
|
166
156
|
function ensureAllowedKeys(value, allowedKeys, path, field) {
|
|
167
157
|
const allowed = new Set(allowedKeys);
|
|
168
158
|
for (const key of Object.keys(value)) {
|
package/dist/src/index.d.ts
CHANGED
package/dist/src/index.js
CHANGED
package/dist/src/manifest.d.ts
CHANGED
|
@@ -3,8 +3,6 @@ import type { ClassifierInput, ClassifierRunStatus } from "./types.js";
|
|
|
3
3
|
import type { DownstreamModelTier, ModelSpecialization } from "./enums.js";
|
|
4
4
|
export type ClassifierName = string;
|
|
5
5
|
export type ClassifierResults = Record<ClassifierName, ClassifierOutput>;
|
|
6
|
-
export declare const CERTAINTY_GATE_MODES: readonly ["min_score", "avg_score", "off"];
|
|
7
|
-
export type CertaintyGateMode = (typeof CERTAINTY_GATE_MODES)[number];
|
|
8
6
|
export type RunClassifier = (name: ClassifierName, input: ClassifierInput, signal: AbortSignal) => Promise<ClassifierOutput>;
|
|
9
7
|
export interface CatalogEntry {
|
|
10
8
|
readonly id: string;
|
|
@@ -67,57 +65,27 @@ export type ClassifierEntry = ClassifierOutput & {
|
|
|
67
65
|
readonly status: ClassifierRunStatus;
|
|
68
66
|
readonly version: string;
|
|
69
67
|
};
|
|
68
|
+
export interface CertaintySummary {
|
|
69
|
+
readonly min: number;
|
|
70
|
+
readonly avg: number;
|
|
71
|
+
}
|
|
70
72
|
export interface PipelineMeta {
|
|
71
73
|
readonly classifiers: Record<string, ClassifierEntry>;
|
|
74
|
+
readonly certainty: CertaintySummary;
|
|
72
75
|
}
|
|
73
76
|
export interface PipelineAudit extends Envelope {
|
|
74
77
|
readonly meta: PipelineMeta;
|
|
75
|
-
readonly fired_by?: string;
|
|
76
|
-
readonly certainty_gate?: LowCertaintyBlockReason;
|
|
77
|
-
}
|
|
78
|
-
export type BlockReason = PromptInjectionBlockReason | LowCertaintyBlockReason;
|
|
79
|
-
export interface PromptInjectionBlockReason {
|
|
80
|
-
readonly kind: "prompt_injection";
|
|
81
|
-
readonly risk_level: PromptInjectionSignal["risk_level"];
|
|
82
|
-
}
|
|
83
|
-
export interface LowCertaintyBlockReason {
|
|
84
|
-
readonly kind: "low_certainty";
|
|
85
|
-
readonly mode: Exclude<CertaintyGateMode, "off">;
|
|
86
|
-
readonly threshold: number;
|
|
87
|
-
readonly score: number;
|
|
88
|
-
readonly classifier_scores: Readonly<Record<string, number>>;
|
|
89
|
-
readonly low_classifiers: ReadonlyArray<string>;
|
|
90
78
|
}
|
|
91
|
-
export
|
|
92
|
-
readonly action: "reply";
|
|
93
|
-
readonly message_id: string;
|
|
94
|
-
readonly reply: {
|
|
95
|
-
readonly text: string;
|
|
96
|
-
};
|
|
97
|
-
readonly reason: "preflight_reply";
|
|
98
|
-
readonly classifier_outputs: ClassifierCustomOutputs;
|
|
99
|
-
readonly audit: Pick<PipelineAudit, "final_reply" | "meta" | "fired_by">;
|
|
100
|
-
};
|
|
101
|
-
export type BlockPipelineResult = {
|
|
102
|
-
readonly action: "block";
|
|
103
|
-
readonly message_id: string;
|
|
104
|
-
readonly fired_by?: string;
|
|
105
|
-
readonly reason: BlockReason;
|
|
106
|
-
readonly classifier_outputs: ClassifierCustomOutputs;
|
|
107
|
-
readonly audit: Pick<PipelineAudit, "prompt_injection" | "meta" | "fired_by" | "certainty_gate">;
|
|
108
|
-
};
|
|
109
|
-
export type RoutePipelineResult = {
|
|
79
|
+
export interface PipelineResult {
|
|
110
80
|
readonly action: "route";
|
|
111
|
-
readonly
|
|
81
|
+
readonly target_message_hash: string;
|
|
112
82
|
readonly downstream: DownstreamPayload;
|
|
113
83
|
readonly classifier_outputs: ClassifierCustomOutputs;
|
|
114
84
|
readonly audit: PipelineAudit;
|
|
115
|
-
}
|
|
116
|
-
export type PipelineResult = ReplyPipelineResult | BlockPipelineResult | RoutePipelineResult;
|
|
85
|
+
}
|
|
117
86
|
export interface AggregatorConfig {
|
|
118
87
|
readonly certaintyThreshold?: number;
|
|
119
88
|
/** @deprecated Use certaintyThreshold. */
|
|
120
89
|
readonly confidenceThreshold?: number;
|
|
121
|
-
readonly certaintyGate?: CertaintyGateMode;
|
|
122
90
|
}
|
|
123
91
|
export type ClassifierRegistry = ReadonlyArray<RuntimeClassifierManifest>;
|
package/dist/src/manifest.js
CHANGED
package/dist/src/ollama.d.ts
CHANGED
|
@@ -1,8 +1,4 @@
|
|
|
1
1
|
import { type ClassifierName, type RunClassifier } from "./classifiers.js";
|
|
2
|
-
import { type OpenClassifyConfig } from "./config.js";
|
|
3
|
-
import { classifyOpenClassifyInput } from "./pipeline.js";
|
|
4
|
-
import type { AggregatorConfig, Catalog } from "./manifest.js";
|
|
5
|
-
import type { OpenClassifyInput } from "./types.js";
|
|
6
2
|
export declare const OLLAMA_DEFAULT_HOST = "http://localhost:11434";
|
|
7
3
|
export declare const OLLAMA_BASE_MODEL = "gemma4:e4b-it-q4_K_M";
|
|
8
4
|
export declare const OLLAMA_BASE_MODEL_NATIVE_CONTEXT_LENGTH = 131072;
|
|
@@ -28,13 +24,6 @@ export interface OllamaClassifierRunnerConfig {
|
|
|
28
24
|
minAvailableMemoryBytes?: number;
|
|
29
25
|
minTotalMemoryBytes?: number;
|
|
30
26
|
}
|
|
31
|
-
export interface ClassifyWithOllamaConfig extends OllamaClassifierRunnerConfig {
|
|
32
|
-
catalog?: Catalog;
|
|
33
|
-
catalogPath?: string;
|
|
34
|
-
configPath?: string;
|
|
35
|
-
openClassifyConfig?: OpenClassifyConfig;
|
|
36
|
-
aggregator?: AggregatorConfig;
|
|
37
|
-
}
|
|
38
27
|
export declare class OllamaClassifierError extends Error {
|
|
39
28
|
readonly classifier: ClassifierName;
|
|
40
29
|
readonly model: string;
|
|
@@ -52,4 +41,3 @@ export declare function assertOllamaResources(options?: {
|
|
|
52
41
|
minTotalMemoryBytes?: number;
|
|
53
42
|
minAvailableMemoryBytes?: number;
|
|
54
43
|
}): Promise<void>;
|
|
55
|
-
export declare function classifyWithOllama(input: OpenClassifyInput, config?: ClassifyWithOllamaConfig): ReturnType<typeof classifyOpenClassifyInput>;
|
package/dist/src/ollama.js
CHANGED
|
@@ -10,10 +10,7 @@
|
|
|
10
10
|
// `classifyOpenClassifyInput` — you don't have to use this module at all.
|
|
11
11
|
import { execFile } from "node:child_process";
|
|
12
12
|
import { promisify } from "node:util";
|
|
13
|
-
import { loadCatalog } from "./catalog.js";
|
|
14
13
|
import { CLASSIFIER_NAMES, MODULES_BY_NAME, validateClassifierOutput, } from "./classifiers.js";
|
|
15
|
-
import { classifierModelsFromConfig, loadOpenClassifyConfig, } from "./config.js";
|
|
16
|
-
import { classifyOpenClassifyInput } from "./pipeline.js";
|
|
17
14
|
import { ClassifierValidationError, isRecord, } from "./validation.js";
|
|
18
15
|
export const OLLAMA_DEFAULT_HOST = "http://localhost:11434";
|
|
19
16
|
export const OLLAMA_BASE_MODEL = "gemma4:e4b-it-q4_K_M";
|
|
@@ -93,40 +90,6 @@ export async function assertOllamaResources(options = {}) {
|
|
|
93
90
|
throw new OllamaResourceError(totalMemoryBytes, availableMemoryBytes, minTotalMemoryBytes, minAvailableMemoryBytes);
|
|
94
91
|
}
|
|
95
92
|
}
|
|
96
|
-
export async function classifyWithOllama(input, config = {}) {
|
|
97
|
-
const fileConfig = config.openClassifyConfig ?? loadOpenClassifyConfig(config.configPath, {
|
|
98
|
-
optional: config.configPath === undefined && process.env.OPEN_CLASSIFY_CONFIG === undefined,
|
|
99
|
-
});
|
|
100
|
-
const runnerFileConfig = fileConfig?.runner;
|
|
101
|
-
const runnerConfig = {
|
|
102
|
-
...config,
|
|
103
|
-
host: config.host ?? runnerFileConfig?.host,
|
|
104
|
-
defaultModel: config.defaultModel ?? runnerFileConfig?.defaultModel,
|
|
105
|
-
models: {
|
|
106
|
-
...classifierModelsFromConfig(fileConfig),
|
|
107
|
-
...config.models,
|
|
108
|
-
},
|
|
109
|
-
options: {
|
|
110
|
-
...runnerFileConfig?.options,
|
|
111
|
-
...config.options,
|
|
112
|
-
},
|
|
113
|
-
};
|
|
114
|
-
if (!runnerConfig.skipResourceCheck) {
|
|
115
|
-
await assertOllamaResources({
|
|
116
|
-
minTotalMemoryBytes: runnerConfig.minTotalMemoryBytes,
|
|
117
|
-
minAvailableMemoryBytes: runnerConfig.minAvailableMemoryBytes,
|
|
118
|
-
});
|
|
119
|
-
Object.assign(runnerConfig, {
|
|
120
|
-
skipResourceCheck: true,
|
|
121
|
-
});
|
|
122
|
-
}
|
|
123
|
-
const catalog = config.catalog ?? loadCatalog(config.catalogPath ?? fileConfig?.catalog ?? OLLAMA_DEFAULT_CATALOG_PATH);
|
|
124
|
-
return classifyOpenClassifyInput(input, {
|
|
125
|
-
runClassifier: createOllamaClassifierRunner(runnerConfig),
|
|
126
|
-
catalog,
|
|
127
|
-
aggregator: config.aggregator ?? fileConfig?.aggregator,
|
|
128
|
-
});
|
|
129
|
-
}
|
|
130
93
|
async function runOllamaClassifier(name, input, signal, fetchImpl, host, model, options, allowManifestModel) {
|
|
131
94
|
const module_ = MODULES_BY_NAME[name];
|
|
132
95
|
const systemPrompt = module_.systemPrompt;
|
package/dist/src/pipeline.d.ts
CHANGED
|
@@ -3,7 +3,7 @@ import type { AggregatorConfig, Catalog, PipelineResult } from "./manifest.js";
|
|
|
3
3
|
import type { OpenClassifyInput } from "./types.js";
|
|
4
4
|
export declare const DEFAULT_CLASSIFIER_TIMEOUT_MS = 15000;
|
|
5
5
|
export declare const DEFAULT_CLASSIFIER_RETRY_COUNT = 1;
|
|
6
|
-
export declare const
|
|
6
|
+
export declare const DEFAULT_MAX_CONCURRENCY = 7;
|
|
7
7
|
export declare class OpenClassifyNormalizationError extends Error {
|
|
8
8
|
constructor(cause: unknown);
|
|
9
9
|
}
|
|
@@ -12,6 +12,7 @@ export interface ClassifyOptions {
|
|
|
12
12
|
catalog: Catalog;
|
|
13
13
|
classifierTimeoutMs?: number;
|
|
14
14
|
classifierRetryCount?: number;
|
|
15
|
+
maxConcurrency?: number;
|
|
15
16
|
aggregator?: AggregatorConfig;
|
|
16
17
|
signal?: AbortSignal;
|
|
17
18
|
}
|
package/dist/src/pipeline.js
CHANGED
|
@@ -1,21 +1,16 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { composeEnvelope } from "./aggregator.js";
|
|
2
|
+
import { MODULES_BY_NAME, REGISTRY, } from "./classifiers.js";
|
|
3
3
|
import { normalizeOpenClassifyInput, toClassifierInput } from "./input.js";
|
|
4
4
|
import { certaintyScore, isCustomManifest } from "./stock.js";
|
|
5
5
|
export const DEFAULT_CLASSIFIER_TIMEOUT_MS = 15_000;
|
|
6
6
|
export const DEFAULT_CLASSIFIER_RETRY_COUNT = 1;
|
|
7
|
-
export const
|
|
7
|
+
export const DEFAULT_MAX_CONCURRENCY = 7;
|
|
8
8
|
export class OpenClassifyNormalizationError extends Error {
|
|
9
9
|
constructor(cause) {
|
|
10
10
|
super(errorMessage(cause), { cause });
|
|
11
11
|
this.name = "OpenClassifyNormalizationError";
|
|
12
12
|
}
|
|
13
13
|
}
|
|
14
|
-
// Short-circuit gates are intrinsic to specific stock signals — not configured
|
|
15
|
-
// per-manifest. preflight.final_reply ⇒ reply; confident high_risk or unknown
|
|
16
|
-
// prompt-injection risk ⇒ block. Order matters: preflight is
|
|
17
|
-
// cheaper to evaluate, so we check it first.
|
|
18
|
-
const SHORT_CIRCUIT_GATES = ["preflight", "prompt_injection"];
|
|
19
14
|
export async function classifyOpenClassifyInput(input, options) {
|
|
20
15
|
let request;
|
|
21
16
|
try {
|
|
@@ -37,27 +32,13 @@ export async function classifyOpenClassifyInput(input, options) {
|
|
|
37
32
|
const classifierInput = toClassifierInput(request);
|
|
38
33
|
const classifierTimeoutMs = options.classifierTimeoutMs ?? DEFAULT_CLASSIFIER_TIMEOUT_MS;
|
|
39
34
|
const classifierRetryCount = options.classifierRetryCount ?? DEFAULT_CLASSIFIER_RETRY_COUNT;
|
|
40
|
-
const
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
35
|
+
const maxConcurrency = resolveMaxConcurrency(options.maxConcurrency);
|
|
36
|
+
// REGISTRY is already sorted by `order` ascending (see classifiers.ts).
|
|
37
|
+
// The worker pool dispatches in array order, so classifiers with the same
|
|
38
|
+
// order are scheduled adjacent and run together when slots are free.
|
|
39
|
+
const queue = REGISTRY.map((m) => m.name);
|
|
45
40
|
try {
|
|
46
|
-
|
|
47
|
-
const gateRun = runs.get(gate);
|
|
48
|
-
if (gateRun === undefined)
|
|
49
|
-
continue;
|
|
50
|
-
const settled = await gateRun;
|
|
51
|
-
if (!settled.ok)
|
|
52
|
-
continue;
|
|
53
|
-
const verdict = shortCircuitVerdict(gate, settled.value, threshold);
|
|
54
|
-
if (!verdict)
|
|
55
|
-
continue;
|
|
56
|
-
controller.abort();
|
|
57
|
-
await settleClassifierRunsExcept(runs, [gate]);
|
|
58
|
-
return buildShortCircuitResult(gate, verdict, settled, request.target_message_hash);
|
|
59
|
-
}
|
|
60
|
-
const settled = await Promise.all([...runs.values()]);
|
|
41
|
+
const settled = await runWithConcurrency(queue, maxConcurrency, controller.signal, (name) => runClassifierWithRetry(name, classifierInput, options.runClassifier, controller.signal, classifierTimeoutMs, classifierRetryCount));
|
|
61
42
|
const { results, meta } = collectFullEntries(settled);
|
|
62
43
|
const envelope = composeEnvelope({
|
|
63
44
|
registry: REGISTRY,
|
|
@@ -66,117 +47,48 @@ export async function classifyOpenClassifyInput(input, options) {
|
|
|
66
47
|
input: classifierInput,
|
|
67
48
|
config: options.aggregator,
|
|
68
49
|
});
|
|
69
|
-
const certaintyGate = certaintyGateBlock(options.aggregator, results);
|
|
70
|
-
if (certaintyGate) {
|
|
71
|
-
return buildCertaintyGateBlockResult(request, envelope, results, meta, certaintyGate);
|
|
72
|
-
}
|
|
73
50
|
return buildRouteResult(request, envelope, results, meta);
|
|
74
51
|
}
|
|
75
52
|
finally {
|
|
76
53
|
options.signal?.removeEventListener("abort", abortFromOptions);
|
|
77
54
|
}
|
|
78
55
|
}
|
|
79
|
-
function
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
const preflight = result;
|
|
85
|
-
if (preflight.final_reply !== undefined) {
|
|
86
|
-
return { kind: "reply", final_reply: preflight.final_reply };
|
|
87
|
-
}
|
|
88
|
-
return null;
|
|
56
|
+
function resolveMaxConcurrency(value) {
|
|
57
|
+
if (value === undefined)
|
|
58
|
+
return DEFAULT_MAX_CONCURRENCY;
|
|
59
|
+
if (!Number.isFinite(value) || value < 1) {
|
|
60
|
+
throw new RangeError(`maxConcurrency must be a positive integer; received ${value}`);
|
|
89
61
|
}
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
62
|
+
return Math.floor(value);
|
|
63
|
+
}
|
|
64
|
+
async function runWithConcurrency(names, maxConcurrency, signal, start) {
|
|
65
|
+
const results = new Array(names.length);
|
|
66
|
+
let next = 0;
|
|
67
|
+
const worker = async () => {
|
|
68
|
+
while (true) {
|
|
69
|
+
const i = next;
|
|
70
|
+
next += 1;
|
|
71
|
+
if (i >= names.length)
|
|
72
|
+
return;
|
|
73
|
+
const name = names[i];
|
|
74
|
+
if (signal.aborted) {
|
|
75
|
+
// Queued classifiers that never started are reported as not-run so
|
|
76
|
+
// the audit shows their fallback in `meta.classifiers`. In-flight
|
|
77
|
+
// classifiers receive the abort signal directly and resolve normally.
|
|
78
|
+
results[i] = {
|
|
79
|
+
ok: false,
|
|
80
|
+
name,
|
|
81
|
+
error: signal.reason ?? new Error(`${name} classifier aborted before start`),
|
|
82
|
+
reason: "error",
|
|
83
|
+
};
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
results[i] = await start(name);
|
|
102
87
|
}
|
|
103
|
-
}
|
|
104
|
-
return null;
|
|
105
|
-
}
|
|
106
|
-
function certaintyGateBlock(config, results) {
|
|
107
|
-
const mode = config?.certaintyGate ?? DEFAULT_CERTAINTY_GATE;
|
|
108
|
-
if (mode === "off")
|
|
109
|
-
return undefined;
|
|
110
|
-
const threshold = certaintyThreshold(config);
|
|
111
|
-
const classifier_scores = classifierScores(results);
|
|
112
|
-
const scores = Object.values(classifier_scores);
|
|
113
|
-
const score = mode === "min_score"
|
|
114
|
-
? Math.min(...scores)
|
|
115
|
-
: scores.reduce((sum, value) => sum + value, 0) / scores.length;
|
|
116
|
-
if (score >= threshold)
|
|
117
|
-
return undefined;
|
|
118
|
-
return {
|
|
119
|
-
kind: "low_certainty",
|
|
120
|
-
mode,
|
|
121
|
-
threshold,
|
|
122
|
-
score,
|
|
123
|
-
classifier_scores,
|
|
124
|
-
low_classifiers: Object.entries(classifier_scores)
|
|
125
|
-
.filter(([, value]) => value < threshold)
|
|
126
|
-
.map(([name]) => name),
|
|
127
|
-
};
|
|
128
|
-
}
|
|
129
|
-
function classifierScores(results) {
|
|
130
|
-
return Object.fromEntries(REGISTRY.map((manifest) => [
|
|
131
|
-
manifest.name,
|
|
132
|
-
scoreCertainty(results[manifest.name]?.certainty),
|
|
133
|
-
]));
|
|
134
|
-
}
|
|
135
|
-
function scoreCertainty(certainty) {
|
|
136
|
-
return certainty === undefined ? 0 : certaintyScore[certainty];
|
|
137
|
-
}
|
|
138
|
-
function extractPromptInjection(value) {
|
|
139
|
-
return {
|
|
140
|
-
risk_level: value.risk_level,
|
|
141
|
-
};
|
|
142
|
-
}
|
|
143
|
-
function buildShortCircuitResult(name, verdict, settled, target_message_hash) {
|
|
144
|
-
const manifest = MODULES_BY_NAME[name];
|
|
145
|
-
const value = settled.ok ? settled.value : manifest.fallback;
|
|
146
|
-
const entry = {
|
|
147
|
-
...value,
|
|
148
|
-
status: classifierRunStatus(settled),
|
|
149
|
-
version: manifest.version,
|
|
150
|
-
};
|
|
151
|
-
const meta = { classifiers: { [name]: entry } };
|
|
152
|
-
const classifier_outputs = classifierCustomOutputs({ [name]: value });
|
|
153
|
-
if (verdict.kind === "reply") {
|
|
154
|
-
const preflight = value;
|
|
155
|
-
return {
|
|
156
|
-
action: "reply",
|
|
157
|
-
message_id: target_message_hash,
|
|
158
|
-
reply: { text: verdict.final_reply.reply },
|
|
159
|
-
reason: "preflight_reply",
|
|
160
|
-
classifier_outputs,
|
|
161
|
-
audit: {
|
|
162
|
-
fired_by: name,
|
|
163
|
-
...(preflight.final_reply === undefined ? {} : { final_reply: preflight.final_reply }),
|
|
164
|
-
meta,
|
|
165
|
-
},
|
|
166
|
-
};
|
|
167
|
-
}
|
|
168
|
-
return {
|
|
169
|
-
action: "block",
|
|
170
|
-
message_id: target_message_hash,
|
|
171
|
-
fired_by: name,
|
|
172
|
-
reason: verdict.reason,
|
|
173
|
-
classifier_outputs,
|
|
174
|
-
audit: {
|
|
175
|
-
fired_by: name,
|
|
176
|
-
prompt_injection: verdict.prompt_injection,
|
|
177
|
-
meta,
|
|
178
|
-
},
|
|
179
88
|
};
|
|
89
|
+
const workerCount = Math.max(1, Math.min(maxConcurrency, names.length));
|
|
90
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
91
|
+
return results;
|
|
180
92
|
}
|
|
181
93
|
function collectFullEntries(settled) {
|
|
182
94
|
const results = {};
|
|
@@ -191,7 +103,18 @@ function collectFullEntries(settled) {
|
|
|
191
103
|
version: manifest.version,
|
|
192
104
|
};
|
|
193
105
|
}
|
|
194
|
-
return { results, meta: { classifiers } };
|
|
106
|
+
return { results, meta: { classifiers, certainty: certaintySummary(results) } };
|
|
107
|
+
}
|
|
108
|
+
function certaintySummary(results) {
|
|
109
|
+
const scores = REGISTRY.map((m) => scoreCertainty(results[m.name]?.certainty));
|
|
110
|
+
if (scores.length === 0)
|
|
111
|
+
return { min: 0, avg: 0 };
|
|
112
|
+
const min = Math.min(...scores);
|
|
113
|
+
const avg = scores.reduce((sum, v) => sum + v, 0) / scores.length;
|
|
114
|
+
return { min, avg };
|
|
115
|
+
}
|
|
116
|
+
function scoreCertainty(certainty) {
|
|
117
|
+
return certainty === undefined ? 0 : certaintyScore[certainty];
|
|
195
118
|
}
|
|
196
119
|
function buildRouteResult(request, envelope, results, meta) {
|
|
197
120
|
const downstream = {
|
|
@@ -205,7 +128,7 @@ function buildRouteResult(request, envelope, results, meta) {
|
|
|
205
128
|
};
|
|
206
129
|
return {
|
|
207
130
|
action: "route",
|
|
208
|
-
|
|
131
|
+
target_message_hash: request.target_message_hash,
|
|
209
132
|
downstream,
|
|
210
133
|
classifier_outputs: classifierCustomOutputs(results),
|
|
211
134
|
audit: {
|
|
@@ -214,21 +137,6 @@ function buildRouteResult(request, envelope, results, meta) {
|
|
|
214
137
|
},
|
|
215
138
|
};
|
|
216
139
|
}
|
|
217
|
-
function buildCertaintyGateBlockResult(request, envelope, results, meta, certaintyGate) {
|
|
218
|
-
return {
|
|
219
|
-
action: "block",
|
|
220
|
-
message_id: request.target_message_hash,
|
|
221
|
-
fired_by: "certainty_gate",
|
|
222
|
-
reason: certaintyGate,
|
|
223
|
-
classifier_outputs: classifierCustomOutputs(results),
|
|
224
|
-
audit: {
|
|
225
|
-
...envelope,
|
|
226
|
-
fired_by: "certainty_gate",
|
|
227
|
-
certainty_gate: certaintyGate,
|
|
228
|
-
meta,
|
|
229
|
-
},
|
|
230
|
-
};
|
|
231
|
-
}
|
|
232
140
|
function classifierCustomOutputs(results) {
|
|
233
141
|
const out = {};
|
|
234
142
|
for (const manifest of REGISTRY) {
|
|
@@ -285,10 +193,6 @@ async function runClassifierAttempt(name, input, runClassifier, rootSignal, time
|
|
|
285
193
|
rootSignal.removeEventListener("abort", abortAttempt);
|
|
286
194
|
}
|
|
287
195
|
}
|
|
288
|
-
async function settleClassifierRunsExcept(runs, keep) {
|
|
289
|
-
const keepSet = new Set(keep);
|
|
290
|
-
await Promise.all([...runs].filter(([name]) => !keepSet.has(name)).map(([, run]) => run.catch(() => undefined)));
|
|
291
|
-
}
|
|
292
196
|
function classifierRunStatus(settled) {
|
|
293
197
|
if (settled.ok)
|
|
294
198
|
return { ok: true, source: "model" };
|
|
@@ -154,15 +154,15 @@ function validateReplySignal(value, classifier, model, field) {
|
|
|
154
154
|
if (!isRecord(value)) {
|
|
155
155
|
throwInvalid(classifier, model, `${field} must be an object`);
|
|
156
156
|
}
|
|
157
|
-
ensureAllowedObjectKeys(value, ["
|
|
158
|
-
const
|
|
159
|
-
if (
|
|
160
|
-
throwInvalid(classifier, model, `${field}.
|
|
157
|
+
ensureAllowedObjectKeys(value, ["text"], classifier, model, field);
|
|
158
|
+
const text = requireString(value.text, classifier, model, `${field}.text`);
|
|
159
|
+
if (text.trim().length === 0) {
|
|
160
|
+
throwInvalid(classifier, model, `${field}.text must not be empty`);
|
|
161
161
|
}
|
|
162
|
-
if (
|
|
163
|
-
throwInvalid(classifier, model, `${field}.
|
|
162
|
+
if (text.length > STOCK_REPLY_MAX_CHARS) {
|
|
163
|
+
throwInvalid(classifier, model, `${field}.text must be ${STOCK_REPLY_MAX_CHARS} characters or fewer`);
|
|
164
164
|
}
|
|
165
|
-
return {
|
|
165
|
+
return { text };
|
|
166
166
|
}
|
|
167
167
|
function validateTierRoutingOutput(value, model) {
|
|
168
168
|
ensureAllowedObjectKeys(value, ["reason", "certainty", "model_tier"], "routing", model, "output");
|
package/dist/src/stock.d.ts
CHANGED
|
@@ -7,10 +7,10 @@ export interface StockClassifierInput {
|
|
|
7
7
|
readonly messages: ReadonlyArray<StockClassifierMessageInput>;
|
|
8
8
|
}
|
|
9
9
|
export interface FinalReplySignal {
|
|
10
|
-
readonly
|
|
10
|
+
readonly text: string;
|
|
11
11
|
}
|
|
12
12
|
export interface AckReplySignal {
|
|
13
|
-
readonly
|
|
13
|
+
readonly text: string;
|
|
14
14
|
}
|
|
15
15
|
export interface RoutingSignal {
|
|
16
16
|
readonly model_tier?: DownstreamModelTier;
|
|
@@ -77,7 +77,8 @@ If the manifest is malformed, the loader throws `ClassifierManifestError` with t
|
|
|
77
77
|
## 5. Consume the output
|
|
78
78
|
|
|
79
79
|
```ts
|
|
80
|
-
const
|
|
80
|
+
const classify = createClassifier({ catalog });
|
|
81
|
+
const result = await classify(input);
|
|
81
82
|
if (result.action === "route") {
|
|
82
83
|
const tags = result.classifier_outputs.topic_tags?.tags ?? [];
|
|
83
84
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "open-classify",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "Manifest-driven classifier runtime for routing user messages to downstream AI models",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Taylor Bayouth",
|
|
@@ -43,9 +43,7 @@
|
|
|
43
43
|
"scripts": {
|
|
44
44
|
"build": "node -e \"require('node:fs').rmSync('dist', { recursive: true, force: true })\" && tsc && node scripts/copy-classifier-assets.mjs",
|
|
45
45
|
"setup": "node scripts/setup.mjs",
|
|
46
|
-
"start": "node scripts/start.mjs",
|
|
47
46
|
"test": "npm run build && node --test tests/*.test.mjs",
|
|
48
|
-
"ui": "npm run build && node dist/src/ui-server.js",
|
|
49
47
|
"prepublishOnly": "npm run build && npm test"
|
|
50
48
|
},
|
|
51
49
|
"devDependencies": {
|
package/dist/src/ui-server.d.ts
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|
package/dist/src/ui-server.js
DELETED
|
@@ -1,257 +0,0 @@
|
|
|
1
|
-
// A tiny dev/demo HTTP server backing the bundled UI. Two responsibilities:
|
|
2
|
-
// 1. Serve the static UI from `./ui` (HTML, CSS, JS).
|
|
3
|
-
// 2. Run a classification over Server-Sent Events at /api/classify-stream.
|
|
4
|
-
//
|
|
5
|
-
// The SSE event vocabulary the UI listens for:
|
|
6
|
-
// pipeline_started — pipeline boot, includes the classifier list
|
|
7
|
-
// pipeline_phase — coarse phase ("normalizing" / "resource_check" /
|
|
8
|
-
// "running"); useful for progress UI
|
|
9
|
-
// classifier_started — a specific classifier is now running
|
|
10
|
-
// classifier_completed — that classifier returned a model result
|
|
11
|
-
// classifier_failed — that classifier threw without being aborted
|
|
12
|
-
// classifier_aborted — early-exit short-circuit cancelled this classifier
|
|
13
|
-
// classifier_timed_out — the per-classifier timeout fired
|
|
14
|
-
// pipeline_completed — final PipelineResult payload
|
|
15
|
-
// pipeline_failed — pipeline-level error (normalization, etc.)
|
|
16
|
-
//
|
|
17
|
-
// This server is intentionally minimal — no auth, no rate limiting, binds to
|
|
18
|
-
// 127.0.0.1 by default. It is not meant for production.
|
|
19
|
-
import { createReadStream, existsSync } from "node:fs";
|
|
20
|
-
import { createServer } from "node:http";
|
|
21
|
-
import { extname, join, normalize } from "node:path";
|
|
22
|
-
import { loadCatalog } from "./catalog.js";
|
|
23
|
-
import { CLASSIFIER_NAMES, REGISTRY } from "./classifiers.js";
|
|
24
|
-
import { DEFAULT_CERTAINTY_THRESHOLD, certaintyThreshold, } from "./aggregator.js";
|
|
25
|
-
import { classifierModelsFromConfig, loadOpenClassifyConfig, } from "./config.js";
|
|
26
|
-
import { DEFAULT_CERTAINTY_GATE } from "./pipeline.js";
|
|
27
|
-
import { DOWNSTREAM_MODEL_TIER_VALUES, MODEL_SPECIALIZATION_VALUES, PROMPT_INJECTION_RISK_LEVEL_VALUES, } from "./enums.js";
|
|
28
|
-
import { createOllamaClassifierRunner, OLLAMA_CONTEXT_LENGTH, OLLAMA_DEFAULT_CATALOG_PATH, OLLAMA_MIN_AVAILABLE_MEMORY_BYTES, OLLAMA_MIN_TOTAL_MEMORY_BYTES, OLLAMA_REQUIRED_PARALLELISM, } from "./ollama.js";
|
|
29
|
-
import { classifyOpenClassifyInput } from "./pipeline.js";
|
|
30
|
-
// Served at GET /api/enums so the UI never needs to duplicate shared enum values.
|
|
31
|
-
const CLASSIFIER_ENUMS = {
|
|
32
|
-
downstream_model_tier: [...DOWNSTREAM_MODEL_TIER_VALUES],
|
|
33
|
-
model_specialization: [...MODEL_SPECIALIZATION_VALUES],
|
|
34
|
-
prompt_injection_risk_level: [...PROMPT_INJECTION_RISK_LEVEL_VALUES],
|
|
35
|
-
};
|
|
36
|
-
const CLASSIFIER_METADATA = REGISTRY.map((classifier) => ({
|
|
37
|
-
name: classifier.name,
|
|
38
|
-
kind: classifier.kind,
|
|
39
|
-
version: classifier.version,
|
|
40
|
-
purpose: classifier.purpose,
|
|
41
|
-
order: classifier.order,
|
|
42
|
-
...("tools" in classifier ? { tools: classifier.tools ?? [] } : {}),
|
|
43
|
-
}));
|
|
44
|
-
const PORT = Number(process.env.OPEN_CLASSIFY_UI_PORT ?? 4317);
|
|
45
|
-
const HOST = process.env.OPEN_CLASSIFY_UI_HOST ?? "127.0.0.1";
|
|
46
|
-
const UI_DIR = join(process.cwd(), "ui");
|
|
47
|
-
const OPEN_CLASSIFY_CONFIG = loadOpenClassifyConfig(undefined, {
|
|
48
|
-
optional: process.env.OPEN_CLASSIFY_CONFIG === undefined,
|
|
49
|
-
});
|
|
50
|
-
const CATALOG_PATH = process.env.OPEN_CLASSIFY_CATALOG_PATH ??
|
|
51
|
-
OPEN_CLASSIFY_CONFIG?.catalog ??
|
|
52
|
-
OLLAMA_DEFAULT_CATALOG_PATH;
|
|
53
|
-
const MIME_TYPES = {
|
|
54
|
-
".html": "text/html; charset=utf-8",
|
|
55
|
-
".css": "text/css; charset=utf-8",
|
|
56
|
-
".js": "text/javascript; charset=utf-8",
|
|
57
|
-
".json": "application/json; charset=utf-8",
|
|
58
|
-
};
|
|
59
|
-
const server = createServer((request, response) => {
|
|
60
|
-
void route(request, response);
|
|
61
|
-
});
|
|
62
|
-
server.listen(PORT, HOST, () => {
|
|
63
|
-
console.log(`Open Classify UI running at http://${HOST}:${PORT}/`);
|
|
64
|
-
});
|
|
65
|
-
async function route(request, response) {
|
|
66
|
-
const startedAt = Date.now();
|
|
67
|
-
console.log(`[req] ${request.method} ${request.url}`);
|
|
68
|
-
try {
|
|
69
|
-
const url = new URL(request.url ?? "/", `http://${request.headers.host ?? "localhost"}`);
|
|
70
|
-
if (request.method === "POST" && url.pathname === "/api/classify-stream") {
|
|
71
|
-
await classifyStream(request, response);
|
|
72
|
-
console.log(`[req] ${request.method} ${request.url} stream ended in ${Date.now() - startedAt}ms`);
|
|
73
|
-
return;
|
|
74
|
-
}
|
|
75
|
-
if (request.method === "GET" && url.pathname === "/api/enums") {
|
|
76
|
-
sendJson(response, CLASSIFIER_ENUMS);
|
|
77
|
-
return;
|
|
78
|
-
}
|
|
79
|
-
if (request.method === "GET" && url.pathname === "/api/classifiers") {
|
|
80
|
-
sendJson(response, {
|
|
81
|
-
classifiers: CLASSIFIER_METADATA,
|
|
82
|
-
aggregator: {
|
|
83
|
-
certaintyGate: OPEN_CLASSIFY_CONFIG?.aggregator?.certaintyGate ?? DEFAULT_CERTAINTY_GATE,
|
|
84
|
-
certaintyThreshold: certaintyThreshold(OPEN_CLASSIFY_CONFIG?.aggregator) ?? DEFAULT_CERTAINTY_THRESHOLD,
|
|
85
|
-
},
|
|
86
|
-
});
|
|
87
|
-
return;
|
|
88
|
-
}
|
|
89
|
-
if (request.method === "GET") {
|
|
90
|
-
serveStatic(url.pathname, response);
|
|
91
|
-
return;
|
|
92
|
-
}
|
|
93
|
-
sendJson(response, { error: "method not allowed" }, 405);
|
|
94
|
-
}
|
|
95
|
-
catch (error) {
|
|
96
|
-
console.error(`[req] ${request.method} ${request.url} failed:`, error);
|
|
97
|
-
sendJson(response, { error: errorMessage(error) }, 500);
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
async function classifyStream(request, response) {
|
|
101
|
-
response.writeHead(200, {
|
|
102
|
-
"content-type": "text/event-stream; charset=utf-8",
|
|
103
|
-
"cache-control": "no-cache, no-transform",
|
|
104
|
-
connection: "keep-alive",
|
|
105
|
-
"x-accel-buffering": "no",
|
|
106
|
-
});
|
|
107
|
-
response.flushHeaders();
|
|
108
|
-
// Disable Nagle so each event flushes immediately. SSE is interactive;
|
|
109
|
-
// batching kills the "live" feel.
|
|
110
|
-
request.socket.setNoDelay(true);
|
|
111
|
-
let closed = false;
|
|
112
|
-
const clientAbortController = new AbortController();
|
|
113
|
-
const abortForClientClose = () => {
|
|
114
|
-
closed = true;
|
|
115
|
-
clientAbortController.abort(new Error("SSE client disconnected"));
|
|
116
|
-
};
|
|
117
|
-
response.on("close", () => {
|
|
118
|
-
abortForClientClose();
|
|
119
|
-
});
|
|
120
|
-
response.on("error", () => {
|
|
121
|
-
abortForClientClose();
|
|
122
|
-
});
|
|
123
|
-
const send = (event, data) => {
|
|
124
|
-
if (closed || response.writableEnded || response.destroyed) {
|
|
125
|
-
console.warn(`[sse] dropped ${event} (closed=${closed} ended=${response.writableEnded})`);
|
|
126
|
-
return;
|
|
127
|
-
}
|
|
128
|
-
const ok = response.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
|
|
129
|
-
console.log(`[sse] -> ${event}${data?.name ? ` ${data.name}` : ""}${ok ? "" : " [backpressure]"}`);
|
|
130
|
-
};
|
|
131
|
-
// SSE comment heartbeat. Some intermediaries (proxies, load balancers)
|
|
132
|
-
// close idle connections; a tiny ping every 5s keeps the stream warm.
|
|
133
|
-
// The leading `:` makes browsers ignore the line as a comment.
|
|
134
|
-
const heartbeat = setInterval(() => {
|
|
135
|
-
if (closed || response.writableEnded || response.destroyed) {
|
|
136
|
-
return;
|
|
137
|
-
}
|
|
138
|
-
response.write(`: ping ${Date.now()}\n\n`);
|
|
139
|
-
}, 5000);
|
|
140
|
-
try {
|
|
141
|
-
const input = (await readJsonBody(request));
|
|
142
|
-
const baseRunner = createOllamaClassifierRunner({
|
|
143
|
-
host: OPEN_CLASSIFY_CONFIG?.runner?.host,
|
|
144
|
-
defaultModel: OPEN_CLASSIFY_CONFIG?.runner?.defaultModel,
|
|
145
|
-
models: classifierModelsFromConfig(OPEN_CLASSIFY_CONFIG),
|
|
146
|
-
options: OPEN_CLASSIFY_CONFIG?.runner?.options,
|
|
147
|
-
});
|
|
148
|
-
const runClassifier = async (name, classifierInput, signal) => {
|
|
149
|
-
send("classifier_started", { name, started_at: Date.now() });
|
|
150
|
-
try {
|
|
151
|
-
const result = await baseRunner(name, classifierInput, signal);
|
|
152
|
-
send("classifier_completed", { name, result, completed_at: Date.now() });
|
|
153
|
-
return result;
|
|
154
|
-
}
|
|
155
|
-
catch (error) {
|
|
156
|
-
console.error(`[classifier] ${name} threw:`, error);
|
|
157
|
-
if (signal.aborted) {
|
|
158
|
-
send(isTimeoutAbort(name, signal) ? "classifier_timed_out" : "classifier_aborted", {
|
|
159
|
-
name,
|
|
160
|
-
reason: errorMessage(signal.reason ?? error),
|
|
161
|
-
completed_at: Date.now(),
|
|
162
|
-
});
|
|
163
|
-
}
|
|
164
|
-
else {
|
|
165
|
-
send("classifier_failed", {
|
|
166
|
-
name,
|
|
167
|
-
error: errorMessage(error),
|
|
168
|
-
completed_at: Date.now(),
|
|
169
|
-
});
|
|
170
|
-
}
|
|
171
|
-
throw error;
|
|
172
|
-
}
|
|
173
|
-
};
|
|
174
|
-
send("pipeline_started", {
|
|
175
|
-
classifiers: CLASSIFIER_NAMES,
|
|
176
|
-
started_at: Date.now(),
|
|
177
|
-
});
|
|
178
|
-
send("pipeline_phase", { phase: "normalizing" });
|
|
179
|
-
send("pipeline_phase", {
|
|
180
|
-
phase: "resource_check",
|
|
181
|
-
required_parallelism: OLLAMA_REQUIRED_PARALLELISM,
|
|
182
|
-
context_length: OLLAMA_CONTEXT_LENGTH,
|
|
183
|
-
min_total_memory_bytes: OLLAMA_MIN_TOTAL_MEMORY_BYTES,
|
|
184
|
-
min_available_memory_bytes: OLLAMA_MIN_AVAILABLE_MEMORY_BYTES,
|
|
185
|
-
});
|
|
186
|
-
send("pipeline_phase", { phase: "running" });
|
|
187
|
-
const result = await classifyOpenClassifyInput(input, {
|
|
188
|
-
runClassifier,
|
|
189
|
-
catalog: loadCatalog(CATALOG_PATH),
|
|
190
|
-
aggregator: OPEN_CLASSIFY_CONFIG?.aggregator,
|
|
191
|
-
signal: clientAbortController.signal,
|
|
192
|
-
});
|
|
193
|
-
send("pipeline_completed", result);
|
|
194
|
-
}
|
|
195
|
-
catch (error) {
|
|
196
|
-
console.error("[pipeline] failed:", error);
|
|
197
|
-
send("pipeline_failed", { error: errorMessage(error) });
|
|
198
|
-
}
|
|
199
|
-
finally {
|
|
200
|
-
clearInterval(heartbeat);
|
|
201
|
-
closed = true;
|
|
202
|
-
if (!response.writableEnded && !response.destroyed) {
|
|
203
|
-
response.end();
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
// Distinguishes a timeout-driven abort from a pipeline early-exit abort, so
|
|
208
|
-
// the UI can show the right state. We sniff the abort reason's message
|
|
209
|
-
// because that's the only signal the pipeline gives us — it doesn't tag
|
|
210
|
-
// reasons with a structured discriminator.
|
|
211
|
-
function isTimeoutAbort(name, signal) {
|
|
212
|
-
return errorMessage(signal.reason).includes(`${name} classifier timed out`);
|
|
213
|
-
}
|
|
214
|
-
function serveStatic(pathname, response) {
|
|
215
|
-
const requestedPath = pathname === "/" ? "/index.html" : pathname;
|
|
216
|
-
// Two-layer path-traversal guard: strip leading `../` segments from the
|
|
217
|
-
// normalized path, then double-check the resolved file is still inside
|
|
218
|
-
// UI_DIR. The redundancy is intentional — defense in depth on a static
|
|
219
|
-
// file server is cheap.
|
|
220
|
-
const safePath = normalize(requestedPath).replace(/^(\.\.[/\\])+/, "");
|
|
221
|
-
const filePath = join(UI_DIR, safePath);
|
|
222
|
-
if (!filePath.startsWith(UI_DIR) || !existsSync(filePath)) {
|
|
223
|
-
sendJson(response, { error: "not found" }, 404);
|
|
224
|
-
return;
|
|
225
|
-
}
|
|
226
|
-
response.writeHead(200, {
|
|
227
|
-
"content-type": MIME_TYPES[extname(filePath)] ?? "application/octet-stream",
|
|
228
|
-
"cache-control": "no-store",
|
|
229
|
-
});
|
|
230
|
-
createReadStream(filePath).on("error", () => response.destroy()).pipe(response);
|
|
231
|
-
}
|
|
232
|
-
function sendJson(response, data, status = 200) {
|
|
233
|
-
response.writeHead(status, { "content-type": "application/json; charset=utf-8" });
|
|
234
|
-
response.end(JSON.stringify(data));
|
|
235
|
-
}
|
|
236
|
-
// 512 KiB cap matches the input contract (5,000-char message budget plus
|
|
237
|
-
// generous slack for history). Big enough for any legitimate
|
|
238
|
-
// classification request, small enough to not be a DoS vector.
|
|
239
|
-
async function readJsonBody(request) {
|
|
240
|
-
const chunks = [];
|
|
241
|
-
let size = 0;
|
|
242
|
-
for await (const chunk of request) {
|
|
243
|
-
const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
244
|
-
size += buffer.byteLength;
|
|
245
|
-
if (size > 512 * 1024) {
|
|
246
|
-
throw new Error("request body is too large");
|
|
247
|
-
}
|
|
248
|
-
chunks.push(buffer);
|
|
249
|
-
}
|
|
250
|
-
return JSON.parse(Buffer.concat(chunks).toString("utf8"));
|
|
251
|
-
}
|
|
252
|
-
function errorMessage(error) {
|
|
253
|
-
if (error instanceof Error) {
|
|
254
|
-
return error.message;
|
|
255
|
-
}
|
|
256
|
-
return String(error);
|
|
257
|
-
}
|