open-classify 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -47
- package/bin/open-classify.mjs +201 -0
- package/dist/src/classifiers.d.ts +12 -5
- package/dist/src/classifiers.js +32 -16
- package/dist/src/classify.d.ts +4 -1
- package/dist/src/classify.js +28 -6
- package/dist/src/config.d.ts +1 -1
- package/dist/src/config.js +0 -5
- package/dist/src/ollama.d.ts +5 -6
- package/dist/src/ollama.js +17 -11
- package/dist/src/pipeline.d.ts +3 -1
- package/dist/src/pipeline.js +15 -10
- package/docs/adding-a-classifier.md +46 -25
- package/open-classify.config.example.json +1 -3
- package/package.json +6 -1
- /package/{dist/src/classifiers → templates}/context_shift/manifest.json +0 -0
- /package/{dist/src/classifiers → templates}/context_shift/prompt.md +0 -0
- /package/{dist/src/classifiers → templates}/conversation_digest/manifest.json +0 -0
- /package/{dist/src/classifiers → templates}/conversation_digest/prompt.md +0 -0
- /package/{dist/src/classifiers → templates}/memory_retrieval_queries/manifest.json +0 -0
- /package/{dist/src/classifiers → templates}/memory_retrieval_queries/prompt.md +0 -0
- /package/{dist/src/classifiers → templates}/tools/manifest.json +0 -0
- /package/{dist/src/classifiers → templates}/tools/prompt.md +0 -0
package/README.md
CHANGED
|
@@ -41,43 +41,53 @@ Every classifier uses the same manifest shape and emits the same output envelope
|
|
|
41
41
|
- **Shape downstream context intentionally.** Built-in and custom classifiers can recommend tools, retrieval queries, summaries, or other context hints without passing the full conversation history back to the caller.
|
|
42
42
|
- **Add another defensive layer.** The `prompt_injection` classifier surfaces instruction-override attempts. High-risk or unknown injection risk automatically sets `action: "block"`.
|
|
43
43
|
|
|
44
|
-
##
|
|
44
|
+
## Getting started
|
|
45
|
+
|
|
46
|
+
Node 18+. The packaged runner uses local Ollama with `gemma4:e4b-it-q4_K_M` as the zero-config classifier model. Pluggable via `open-classify.config.json` or a custom `RunClassifier`.
|
|
47
|
+
|
|
48
|
+
**1. Install**
|
|
45
49
|
|
|
46
50
|
```sh
|
|
47
51
|
npm install open-classify
|
|
48
52
|
```
|
|
49
53
|
|
|
50
|
-
|
|
54
|
+
**2. Scaffold**
|
|
55
|
+
|
|
56
|
+
```sh
|
|
57
|
+
npx open-classify init
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
This creates `open-classify.config.json` and a `classifiers/` directory in your project root. You'll see exactly what will be written and asked to confirm. Re-run safe: existing files are skipped.
|
|
51
61
|
|
|
52
|
-
|
|
62
|
+
**3. Use it**
|
|
53
63
|
|
|
54
64
|
```ts
|
|
55
65
|
import { createClassifier } from "open-classify";
|
|
56
66
|
|
|
57
|
-
const { classify
|
|
67
|
+
const { classify } = createClassifier({
|
|
68
|
+
extraClassifierDirs: ["./classifiers"],
|
|
69
|
+
});
|
|
58
70
|
|
|
59
71
|
const result = await classify({
|
|
60
|
-
messages: [
|
|
61
|
-
{ role: "user", text: "Can you review the attached contract?" },
|
|
62
|
-
],
|
|
72
|
+
messages: [{ role: "user", text: "Can you review the attached contract?" }],
|
|
63
73
|
});
|
|
64
74
|
|
|
65
|
-
if (result.action === "
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
// preflight can answer this immediately — skip the downstream model
|
|
70
|
-
respondToUser(result.reply.text);
|
|
71
|
-
} else {
|
|
72
|
-
// route to the downstream model
|
|
73
|
-
callDownstream(result.model_id, result.tools);
|
|
74
|
-
respondToUser(result.reply?.text); // show the ack while it works
|
|
75
|
-
}
|
|
75
|
+
if (result.action === "reply") respondToUser(result.reply.text); // preflight answered it
|
|
76
|
+
else if (result.action === "block") handleBlock(result.block_reason); // injection or error
|
|
77
|
+
else callDownstream(result.model_id, result.tools, result.reply?.text); // route the real request
|
|
78
|
+
```
|
|
76
79
|
|
|
77
|
-
|
|
80
|
+
**4. Activate or customize a classifier**
|
|
81
|
+
|
|
82
|
+
Inside `classifiers/` you'll find four `_<name>/` directories — templates copied from the package, inactive because of the underscore prefix. To turn one on, drop the underscore:
|
|
83
|
+
|
|
84
|
+
```sh
|
|
85
|
+
mv classifiers/_tools classifiers/tools
|
|
78
86
|
```
|
|
79
87
|
|
|
80
|
-
`
|
|
88
|
+
Edit `manifest.json` first if you need to (e.g. trim `allowed_tools` for your app). The same underscore convention works the other way too: rename `my_classifier/` → `_my_classifier/` to take any classifier out of the active set without deleting it.
|
|
89
|
+
|
|
90
|
+
To write a new classifier from scratch, drop a `<name>/manifest.json` + `<name>/prompt.md` in `classifiers/`. See [docs/adding-a-classifier.md](docs/adding-a-classifier.md).
|
|
81
91
|
|
|
82
92
|
### Classifying assistant output
|
|
83
93
|
|
|
@@ -140,26 +150,36 @@ Example result:
|
|
|
140
150
|
|
|
141
151
|
## Classifier model
|
|
142
152
|
|
|
143
|
-
|
|
153
|
+
Every classifier — bundled or your own — uses the same two-file shape (`manifest.json` + `prompt.md`) and emits the same envelope: `{ reason, certainty, ...payload }`. Some payload fields are **reserved** (like `model_tier`, `final_reply`, `risk_level`); the aggregator knows how to consume them into the routing decision. Everything else passes through to the caller.
|
|
154
|
+
|
|
155
|
+
Open Classify ships eight built-in classifiers. **Four are mandatory** — they always load, they can't be turned off, and extras can't override them. The other four ship as **templates** that `init` copies into your project as inactive (`_<name>/`); rename to activate.
|
|
144
156
|
|
|
145
|
-
| Name | dispatch_order | Reserved fields | What the aggregator does with it |
|
|
146
|
-
|
|
147
|
-
| `preflight` | 10 | `final_reply`, `ack_reply` | Sets `action: "reply"` or populates `result.reply` |
|
|
148
|
-
| `model_tier` | 20 | `model_tier` | Feeds the catalog resolver as a soft constraint |
|
|
149
|
-
| `model_specialization` | 30 | `model_specialization` | Feeds the catalog resolver as a soft constraint |
|
|
150
|
-
| `
|
|
151
|
-
| `
|
|
152
|
-
| `memory_retrieval_queries` | 60 | — | Passes through to `classifier_outputs` |
|
|
153
|
-
| `conversation_digest` | 70 | — | Passes through |
|
|
154
|
-
| `context_shift` | 80 | — | Passes through |
|
|
157
|
+
| Name | dispatch_order | Reserved fields | Bundled as | What the aggregator does with it |
|
|
158
|
+
|---|---|---|---|---|
|
|
159
|
+
| `preflight` | 10 | `final_reply`, `ack_reply` | mandatory | Sets `action: "reply"` or populates `result.reply` |
|
|
160
|
+
| `model_tier` | 20 | `model_tier` | mandatory | Feeds the catalog resolver as a soft constraint |
|
|
161
|
+
| `model_specialization` | 30 | `model_specialization` | mandatory | Feeds the catalog resolver as a soft constraint |
|
|
162
|
+
| `prompt_injection` | 50 | `risk_level` | mandatory | High-risk/unknown → `action: "block"`; suspicious → advisory |
|
|
163
|
+
| `tools` | 40 | `tools` | template | Sets `result.tools` |
|
|
164
|
+
| `memory_retrieval_queries` | 60 | — | template | Passes through to `classifier_outputs` |
|
|
165
|
+
| `conversation_digest` | 70 | — | template | Passes through |
|
|
166
|
+
| `context_shift` | 80 | — | template | Passes through |
|
|
155
167
|
|
|
156
|
-
|
|
168
|
+
The directory-naming convention (`_<name>/` = inactive) is the only on/off mechanism, and it applies equally to bundled templates and your own classifiers. No `disabled` config, no allow-lists, no flags. If a folder is in `classifiers/` without a leading underscore, it runs.
|
|
157
169
|
|
|
158
|
-
|
|
170
|
+
> Need to customize `preflight`'s prompt or any other mandatory built-in? Use a custom `RunClassifier` (see [Bring your own backend](#bring-your-own-backend)) to intercept it, or fork the package.
|
|
159
171
|
|
|
160
|
-
|
|
172
|
+
## Adding your own classifier
|
|
161
173
|
|
|
162
|
-
|
|
174
|
+
The two files are:
|
|
175
|
+
|
|
176
|
+
```
|
|
177
|
+
classifiers/topic_tags/
|
|
178
|
+
├── manifest.json
|
|
179
|
+
└── prompt.md
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
`manifest.json` declares the output shape and a fallback for when the classifier errors:
|
|
163
183
|
|
|
164
184
|
```json
|
|
165
185
|
{
|
|
@@ -184,31 +204,22 @@ Every classifier is two files in `src/classifiers/<name>/`:
|
|
|
184
204
|
}
|
|
185
205
|
```
|
|
186
206
|
|
|
187
|
-
`prompt.md`
|
|
207
|
+
`prompt.md` is the classification rule in plain language. No need to write JSON examples — the runtime synthesizes one from your schema — and no need to paste enum values for reserved fields:
|
|
188
208
|
|
|
189
209
|
```markdown
|
|
190
210
|
You are the topic_tags classifier.
|
|
191
211
|
|
|
192
212
|
`tags` are short single-word topic labels (lowercase, no spaces). Use at most five.
|
|
193
213
|
Return an empty array when no clear topic applies.
|
|
194
|
-
Do not invent tags for vague or ambiguous messages.
|
|
195
214
|
```
|
|
196
215
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
- `name` must match the directory name.
|
|
200
|
-
- Reserved field names cannot appear in `output_schema.properties` — declare them in `reserved_fields` instead.
|
|
201
|
-
- `fallback` requires only `reason` and `certainty`; reserved and custom required fields are exempt from the fallback check.
|
|
202
|
-
- If you want hand-picked examples (preflight does this), add an `output_schema.examples` array. Each entry must validate against the composed schema at load time. Otherwise the runtime synthesizes a skeleton example for you.
|
|
203
|
-
|
|
204
|
-
Consume your output:
|
|
216
|
+
Consume:
|
|
205
217
|
|
|
206
218
|
```ts
|
|
207
|
-
const result = await classify(input);
|
|
208
219
|
const tags = result.classifier_outputs.topic_tags?.tags ?? [];
|
|
209
220
|
```
|
|
210
221
|
|
|
211
|
-
See [docs/adding-a-classifier.md](docs/adding-a-classifier.md) for
|
|
222
|
+
Rules: `name` must match the directory name; reserved-field names can't appear in `output_schema.properties` (declare them under `reserved_fields` instead); `fallback` only needs `reason` and `certainty`; name collisions throw at startup. See [docs/adding-a-classifier.md](docs/adding-a-classifier.md) for the full reference.
|
|
212
223
|
|
|
213
224
|
## Using reserved fields in your own classifier
|
|
214
225
|
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// open-classify CLI. Currently exposes a single subcommand: `init`.
|
|
3
|
+
//
|
|
4
|
+
// `init` scaffolds the standard project layout for a consumer:
|
|
5
|
+
// - open-classify.config.json (minimal)
|
|
6
|
+
// - classifiers/
|
|
7
|
+
// - README.md
|
|
8
|
+
// - _conversation_digest/ (templates, prefix means inactive)
|
|
9
|
+
// - _context_shift/
|
|
10
|
+
// - _memory_retrieval_queries/
|
|
11
|
+
// - _tools/
|
|
12
|
+
//
|
|
13
|
+
// Re-run safe: existing files are skipped, never overwritten. Use
|
|
14
|
+
// `--yes` to skip the confirmation prompt (for scripted setup).
|
|
15
|
+
|
|
16
|
+
import { cpSync, existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
17
|
+
import { createInterface } from "node:readline";
|
|
18
|
+
import { dirname, join, relative, resolve } from "node:path";
|
|
19
|
+
import { fileURLToPath } from "node:url";
|
|
20
|
+
|
|
21
|
+
const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url));
|
|
22
|
+
const PACKAGE_ROOT = resolve(SCRIPT_DIR, "..");
|
|
23
|
+
const TEMPLATES_DIR = join(PACKAGE_ROOT, "templates");
|
|
24
|
+
|
|
25
|
+
const TEMPLATE_NAMES = ["conversation_digest", "context_shift", "memory_retrieval_queries", "tools"];
|
|
26
|
+
|
|
27
|
+
const CLASSIFIERS_README = `# classifiers/
|
|
28
|
+
|
|
29
|
+
Drop a folder here per classifier. Each folder needs:
|
|
30
|
+
|
|
31
|
+
- \`manifest.json\` — see [open-classify docs](https://github.com/taylorbayouth/open-classify/blob/main/docs/adding-a-classifier.md)
|
|
32
|
+
- \`prompt.md\` — the classifier-specific instructions
|
|
33
|
+
|
|
34
|
+
## Activating templates
|
|
35
|
+
|
|
36
|
+
The four \`_<name>/\` directories below are templates copied from the package — they ship inactive (the loader skips any folder starting with \`_\`). Activate one by dropping the underscore:
|
|
37
|
+
|
|
38
|
+
\`\`\`sh
|
|
39
|
+
mv _tools tools
|
|
40
|
+
\`\`\`
|
|
41
|
+
|
|
42
|
+
You probably also want to edit its \`manifest.json\` first to fit your app (e.g. trim the \`allowed_tools\` list).
|
|
43
|
+
|
|
44
|
+
## Deactivating without deleting
|
|
45
|
+
|
|
46
|
+
Same trick in reverse — rename \`my_classifier\` → \`_my_classifier\` to take it out of the active set without losing your work.
|
|
47
|
+
`;
|
|
48
|
+
|
|
49
|
+
const DEFAULT_CONFIG = {
|
|
50
|
+
runner: {
|
|
51
|
+
provider: "ollama",
|
|
52
|
+
host: "http://127.0.0.1:11434",
|
|
53
|
+
defaultModel: "gemma4:e4b-it-q4_K_M",
|
|
54
|
+
},
|
|
55
|
+
catalog: "downstream-models.json",
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
async function main() {
|
|
59
|
+
const args = process.argv.slice(2);
|
|
60
|
+
const subcommand = args[0];
|
|
61
|
+
|
|
62
|
+
if (!subcommand || subcommand === "-h" || subcommand === "--help") {
|
|
63
|
+
printHelp();
|
|
64
|
+
process.exit(subcommand ? 0 : 1);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (subcommand === "init") {
|
|
68
|
+
const yes = args.includes("--yes") || args.includes("-y");
|
|
69
|
+
await runInit({ cwd: process.cwd(), yes });
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
console.error(`Unknown subcommand: ${subcommand}`);
|
|
74
|
+
printHelp();
|
|
75
|
+
process.exit(1);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function printHelp() {
|
|
79
|
+
process.stdout.write(`open-classify — runtime CLI
|
|
80
|
+
|
|
81
|
+
Commands:
|
|
82
|
+
init [--yes] Scaffold open-classify.config.json and classifiers/ in the
|
|
83
|
+
current directory. Re-run safe: existing files are skipped.
|
|
84
|
+
|
|
85
|
+
`);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
async function runInit({ cwd, yes }) {
|
|
89
|
+
const plan = planInit(cwd);
|
|
90
|
+
|
|
91
|
+
if (plan.toCreate.length === 0) {
|
|
92
|
+
console.log("Nothing to do — your project already has all the scaffolded files.");
|
|
93
|
+
if (plan.toSkip.length > 0) {
|
|
94
|
+
console.log("\nAlready in place:");
|
|
95
|
+
for (const p of plan.toSkip) console.log(` ${p}`);
|
|
96
|
+
}
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
console.log("This will create:");
|
|
101
|
+
for (const p of plan.toCreate) console.log(` ${p}`);
|
|
102
|
+
if (plan.toSkip.length > 0) {
|
|
103
|
+
console.log("\nAlready exists (will skip):");
|
|
104
|
+
for (const p of plan.toSkip) console.log(` ${p}`);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (!yes) {
|
|
108
|
+
const proceed = await confirm("\nContinue? [Y/n] ");
|
|
109
|
+
if (!proceed) {
|
|
110
|
+
console.log("Aborted.");
|
|
111
|
+
process.exit(1);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
for (const action of plan.actions) {
|
|
116
|
+
action();
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
console.log("\nDone. Wire it into your code:\n");
|
|
120
|
+
console.log(" import { createClassifier } from \"open-classify\";");
|
|
121
|
+
console.log(" const { classify } = createClassifier({");
|
|
122
|
+
console.log(" extraClassifierDirs: [\"./classifiers\"],");
|
|
123
|
+
console.log(" });");
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function planInit(cwd) {
|
|
127
|
+
const toCreate = [];
|
|
128
|
+
const toSkip = [];
|
|
129
|
+
const actions = [];
|
|
130
|
+
|
|
131
|
+
const configPath = join(cwd, "open-classify.config.json");
|
|
132
|
+
if (existsSync(configPath)) {
|
|
133
|
+
toSkip.push(relative(cwd, configPath));
|
|
134
|
+
} else {
|
|
135
|
+
toCreate.push(relative(cwd, configPath));
|
|
136
|
+
actions.push(() => {
|
|
137
|
+
writeFileSync(configPath, JSON.stringify(DEFAULT_CONFIG, null, 2) + "\n");
|
|
138
|
+
console.log(`wrote ${relative(cwd, configPath)}`);
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const classifiersDir = join(cwd, "classifiers");
|
|
143
|
+
if (!existsSync(classifiersDir)) {
|
|
144
|
+
toCreate.push(relative(cwd, classifiersDir) + "/");
|
|
145
|
+
actions.push(() => {
|
|
146
|
+
mkdirSync(classifiersDir, { recursive: true });
|
|
147
|
+
console.log(`created ${relative(cwd, classifiersDir)}/`);
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const readmePath = join(classifiersDir, "README.md");
|
|
152
|
+
if (existsSync(readmePath)) {
|
|
153
|
+
toSkip.push(relative(cwd, readmePath));
|
|
154
|
+
} else {
|
|
155
|
+
toCreate.push(relative(cwd, readmePath));
|
|
156
|
+
actions.push(() => {
|
|
157
|
+
// The classifiers dir may not yet exist when we generated the plan,
|
|
158
|
+
// but it will by the time this action runs.
|
|
159
|
+
mkdirSync(classifiersDir, { recursive: true });
|
|
160
|
+
writeFileSync(readmePath, CLASSIFIERS_README);
|
|
161
|
+
console.log(`wrote ${relative(cwd, readmePath)}`);
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
for (const name of TEMPLATE_NAMES) {
|
|
166
|
+
const inactivePath = join(classifiersDir, `_${name}`);
|
|
167
|
+
const activePath = join(classifiersDir, name);
|
|
168
|
+
|
|
169
|
+
if (existsSync(inactivePath) || existsSync(activePath)) {
|
|
170
|
+
// Either already scaffolded (inactive) or already activated by the
|
|
171
|
+
// consumer. Either way, leave it alone.
|
|
172
|
+
toSkip.push(relative(cwd, existsSync(activePath) ? activePath : inactivePath) + "/");
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
toCreate.push(relative(cwd, inactivePath) + "/");
|
|
177
|
+
actions.push(() => {
|
|
178
|
+
mkdirSync(classifiersDir, { recursive: true });
|
|
179
|
+
cpSync(join(TEMPLATES_DIR, name), inactivePath, { recursive: true });
|
|
180
|
+
console.log(`wrote ${relative(cwd, inactivePath)}/`);
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return { toCreate, toSkip, actions };
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function confirm(prompt) {
|
|
188
|
+
return new Promise((resolveAnswer) => {
|
|
189
|
+
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
190
|
+
rl.question(prompt, (answer) => {
|
|
191
|
+
rl.close();
|
|
192
|
+
const normalized = (answer || "").trim().toLowerCase();
|
|
193
|
+
resolveAnswer(normalized === "" || normalized === "y" || normalized === "yes");
|
|
194
|
+
});
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
main().catch((err) => {
|
|
199
|
+
console.error(err instanceof Error ? err.message : String(err));
|
|
200
|
+
process.exit(1);
|
|
201
|
+
});
|
|
@@ -1,14 +1,21 @@
|
|
|
1
1
|
import type { ClassifierInput } from "./types.js";
|
|
2
2
|
import type { ClassifierName, ClassifierRegistry, RunClassifier } from "./manifest.js";
|
|
3
3
|
import type { ClassifierOutput, RuntimeClassifierManifest } from "./stock.js";
|
|
4
|
+
export declare const BUILTIN_CLASSIFIERS_DIR: string;
|
|
4
5
|
export declare class ClassifierManifestError extends Error {
|
|
5
6
|
constructor(message: string);
|
|
6
7
|
}
|
|
8
|
+
export type ClassifierModuleMap = Readonly<Record<string, RuntimeClassifierManifest>>;
|
|
9
|
+
export interface ClassifierRegistryBundle {
|
|
10
|
+
readonly registry: ClassifierRegistry;
|
|
11
|
+
readonly modulesByName: ClassifierModuleMap;
|
|
12
|
+
readonly names: ReadonlyArray<string>;
|
|
13
|
+
}
|
|
14
|
+
export interface BuildRegistryOptions {
|
|
15
|
+
readonly extraDirs?: ReadonlyArray<string>;
|
|
16
|
+
}
|
|
7
17
|
export declare function loadClassifierRegistry(classifiersDir?: string): RuntimeClassifierManifest[];
|
|
8
|
-
export declare
|
|
9
|
-
export declare
|
|
10
|
-
export declare const MODULES_BY_NAME: Record<string, RuntimeClassifierManifest>;
|
|
18
|
+
export declare function buildClassifierRegistry(options?: BuildRegistryOptions): ClassifierRegistryBundle;
|
|
19
|
+
export declare function validateClassifierOutput(manifest: RuntimeClassifierManifest, value: unknown, model: string): ClassifierOutput;
|
|
11
20
|
export type { ClassifierName, RunClassifier };
|
|
12
|
-
export type RegistryType = typeof REGISTRY;
|
|
13
|
-
export declare function validateClassifierOutput(name: string, value: unknown, model: string): ClassifierOutput;
|
|
14
21
|
export type { ClassifierInput };
|
package/dist/src/classifiers.js
CHANGED
|
@@ -4,9 +4,11 @@ import { fileURLToPath } from "node:url";
|
|
|
4
4
|
import { buildClassifierPrompt } from "./stock-prompt.js";
|
|
5
5
|
import { validateJsonClassifierManifest, validateOutputForManifest, } from "./stock-validation.js";
|
|
6
6
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
7
|
-
const
|
|
7
|
+
export const BUILTIN_CLASSIFIERS_DIR = join(__dirname, "classifiers");
|
|
8
8
|
// Directories whose names start with "_" are reserved for shared assets
|
|
9
|
-
// (e.g. `_prompts/`) and are not loaded as classifiers.
|
|
9
|
+
// (e.g. `_prompts/`) and are not loaded as classifiers. Consumers can use
|
|
10
|
+
// the same convention in their own classifier directories: rename a
|
|
11
|
+
// classifier to `_<name>/` to deactivate it without deleting it.
|
|
10
12
|
const SHARED_DIRECTORY_PREFIX = "_";
|
|
11
13
|
export class ClassifierManifestError extends Error {
|
|
12
14
|
constructor(message) {
|
|
@@ -14,7 +16,10 @@ export class ClassifierManifestError extends Error {
|
|
|
14
16
|
this.name = "ClassifierManifestError";
|
|
15
17
|
}
|
|
16
18
|
}
|
|
17
|
-
|
|
19
|
+
// Load all classifier manifests under a single directory. Used internally to
|
|
20
|
+
// load the built-ins and each extra directory; callers wanting the merged
|
|
21
|
+
// registry should use `buildClassifierRegistry()` instead.
|
|
22
|
+
export function loadClassifierRegistry(classifiersDir = BUILTIN_CLASSIFIERS_DIR) {
|
|
18
23
|
if (!existsSync(classifiersDir)) {
|
|
19
24
|
throw new ClassifierManifestError(`classifier directory not found: ${classifiersDir}`);
|
|
20
25
|
}
|
|
@@ -26,11 +31,29 @@ export function loadClassifierRegistry(classifiersDir = CLASSIFIERS_DIR) {
|
|
|
26
31
|
continue;
|
|
27
32
|
manifests.push(loadClassifierManifest(join(classifiersDir, entry.name)));
|
|
28
33
|
}
|
|
29
|
-
|
|
30
|
-
|
|
34
|
+
return manifests;
|
|
35
|
+
}
|
|
36
|
+
// Build a complete classifier registry from the bundled built-ins plus any
|
|
37
|
+
// extra directories supplied by the caller. Sorts by dispatch_order
|
|
38
|
+
// ascending (manifests without dispatch_order sort last). Rejects duplicate
|
|
39
|
+
// names.
|
|
40
|
+
//
|
|
41
|
+
// Mandatory built-ins (preflight, model_tier, model_specialization,
|
|
42
|
+
// prompt_injection) always load. Extras with the same name as a built-in
|
|
43
|
+
// throw — there's no override mechanism. Customise by editing the bundled
|
|
44
|
+
// manifest in your own fork, or replace behaviour entirely with a custom
|
|
45
|
+
// `runClassifier`.
|
|
46
|
+
export function buildClassifierRegistry(options = {}) {
|
|
47
|
+
const manifests = [
|
|
48
|
+
...loadClassifierRegistry(BUILTIN_CLASSIFIERS_DIR),
|
|
49
|
+
...(options.extraDirs ?? []).flatMap((dir) => loadClassifierRegistry(dir)),
|
|
50
|
+
];
|
|
31
51
|
manifests.sort((a, b) => (a.dispatch_order ?? Infinity) - (b.dispatch_order ?? Infinity));
|
|
32
52
|
validateRegistry(manifests);
|
|
33
|
-
|
|
53
|
+
const registry = manifests;
|
|
54
|
+
const modulesByName = Object.fromEntries(manifests.map((m) => [m.name, m]));
|
|
55
|
+
const names = manifests.map((m) => m.name);
|
|
56
|
+
return { registry, modulesByName, names };
|
|
34
57
|
}
|
|
35
58
|
function loadClassifierManifest(classifierDir) {
|
|
36
59
|
const manifestPath = join(classifierDir, "manifest.json");
|
|
@@ -69,18 +92,11 @@ function validateRegistry(manifests) {
|
|
|
69
92
|
const names = new Set();
|
|
70
93
|
for (const manifest of manifests) {
|
|
71
94
|
if (names.has(manifest.name)) {
|
|
72
|
-
throw new ClassifierManifestError(`duplicate classifier name: ${manifest.name}
|
|
95
|
+
throw new ClassifierManifestError(`duplicate classifier name: ${manifest.name} — extras cannot override built-ins or other extras. Rename your classifier or run it under a different name.`);
|
|
73
96
|
}
|
|
74
97
|
names.add(manifest.name);
|
|
75
98
|
}
|
|
76
99
|
}
|
|
77
|
-
export
|
|
78
|
-
|
|
79
|
-
export const MODULES_BY_NAME = Object.fromEntries(REGISTRY.map((m) => [m.name, m]));
|
|
80
|
-
export function validateClassifierOutput(name, value, model) {
|
|
81
|
-
const manifest = MODULES_BY_NAME[name];
|
|
82
|
-
if (!manifest) {
|
|
83
|
-
throw new ClassifierManifestError(`unknown classifier: ${name}`);
|
|
84
|
-
}
|
|
85
|
-
return validateOutputForManifest(manifest, value, { classifier: name, model });
|
|
100
|
+
export function validateClassifierOutput(manifest, value, model) {
|
|
101
|
+
return validateOutputForManifest(manifest, value, { classifier: manifest.name, model });
|
|
86
102
|
}
|
package/dist/src/classify.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type RunClassifier } from "./classifiers.js";
|
|
1
|
+
import { ClassifierManifestError, type ClassifierRegistryBundle, type RunClassifier } from "./classifiers.js";
|
|
2
2
|
import { type OpenClassifyConfig } from "./config.js";
|
|
3
3
|
import type { Catalog, InspectResult, PipelineResult } from "./manifest.js";
|
|
4
4
|
import type { OpenClassifyInput } from "./types.js";
|
|
@@ -11,10 +11,12 @@ export type Inspector = (input: OpenClassifyInput, options?: {
|
|
|
11
11
|
export interface OpenClassify {
|
|
12
12
|
readonly classify: Classifier;
|
|
13
13
|
readonly inspect: Inspector;
|
|
14
|
+
readonly registry: ClassifierRegistryBundle;
|
|
14
15
|
}
|
|
15
16
|
export interface CreateClassifierOptions {
|
|
16
17
|
runClassifier?: RunClassifier;
|
|
17
18
|
catalog?: Catalog;
|
|
19
|
+
extraClassifierDirs?: ReadonlyArray<string>;
|
|
18
20
|
config?: OpenClassifyConfig;
|
|
19
21
|
configPath?: string;
|
|
20
22
|
catalogPath?: string;
|
|
@@ -27,3 +29,4 @@ export interface CreateClassifierOptions {
|
|
|
27
29
|
maxConcurrency?: number;
|
|
28
30
|
}
|
|
29
31
|
export declare function createClassifier(options?: CreateClassifierOptions): OpenClassify;
|
|
32
|
+
export { ClassifierManifestError };
|
package/dist/src/classify.js
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
// High-level facade for the pipeline. Builds the runner and
|
|
2
|
-
// then returns two functions — classify() for the
|
|
3
|
-
// and inspect() for the assistant-output lean pass.
|
|
4
|
-
// custom `runClassifier` to bypass the bundled
|
|
1
|
+
// High-level facade for the pipeline. Builds the runner, registry, and
|
|
2
|
+
// catalog once, then returns two functions — classify() for the
|
|
3
|
+
// user-input/routing pass and inspect() for the assistant-output lean pass.
|
|
4
|
+
// Backend-agnostic: pass a custom `runClassifier` to bypass the bundled
|
|
5
|
+
// Ollama runner entirely.
|
|
5
6
|
import { loadCatalog } from "./catalog.js";
|
|
6
|
-
import {
|
|
7
|
+
import { buildClassifierRegistry, ClassifierManifestError, } from "./classifiers.js";
|
|
8
|
+
import { classifierModelsFromConfig, loadOpenClassifyConfig, OpenClassifyConfigError, } from "./config.js";
|
|
7
9
|
import { assertOllamaResources, createOllamaClassifierRunner, OLLAMA_DEFAULT_CATALOG_PATH, } from "./ollama.js";
|
|
8
10
|
import { classifyOpenClassifyInput, inspectOpenClassifyInput, } from "./pipeline.js";
|
|
9
11
|
export function createClassifier(options = {}) {
|
|
@@ -12,6 +14,20 @@ export function createClassifier(options = {}) {
|
|
|
12
14
|
optional: options.configPath === undefined &&
|
|
13
15
|
process.env.OPEN_CLASSIFY_CONFIG === undefined,
|
|
14
16
|
});
|
|
17
|
+
const registryBundle = buildClassifierRegistry({
|
|
18
|
+
extraDirs: options.extraClassifierDirs,
|
|
19
|
+
});
|
|
20
|
+
// Cross-check `runner.models` keys against the loaded registry so a typo
|
|
21
|
+
// or stale reference fails fast at construction time instead of being
|
|
22
|
+
// silently ignored by the runner.
|
|
23
|
+
if (fileConfig?.runner?.models !== undefined) {
|
|
24
|
+
const known = new Set(registryBundle.names);
|
|
25
|
+
for (const name of Object.keys(fileConfig.runner.models)) {
|
|
26
|
+
if (!known.has(name)) {
|
|
27
|
+
throw new OpenClassifyConfigError(`runner.models.${name} is not a loaded classifier (loaded: ${registryBundle.names.join(", ")})`);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
15
31
|
// When we own the runner, hoist the resource check to the wrapper so a
|
|
16
32
|
// failure surfaces as a top-level rejection — the per-classifier fallback
|
|
17
33
|
// path would otherwise mask it as five "classifier failed" entries.
|
|
@@ -19,6 +35,7 @@ export function createClassifier(options = {}) {
|
|
|
19
35
|
const needsResourceCheck = ownsRunner && !options.skipResourceCheck;
|
|
20
36
|
const runClassifier = options.runClassifier ??
|
|
21
37
|
createOllamaClassifierRunner({
|
|
38
|
+
modulesByName: registryBundle.modulesByName,
|
|
22
39
|
host: fileConfig?.runner?.host,
|
|
23
40
|
defaultModel: fileConfig?.runner?.defaultModel,
|
|
24
41
|
models: classifierModelsFromConfig(fileConfig),
|
|
@@ -43,6 +60,7 @@ export function createClassifier(options = {}) {
|
|
|
43
60
|
return classifyOpenClassifyInput(input, {
|
|
44
61
|
runClassifier,
|
|
45
62
|
catalog,
|
|
63
|
+
registry: registryBundle.registry,
|
|
46
64
|
classifierTimeoutMs: options.classifierTimeoutMs,
|
|
47
65
|
classifierRetryCount: options.classifierRetryCount,
|
|
48
66
|
maxConcurrency: options.maxConcurrency,
|
|
@@ -53,11 +71,15 @@ export function createClassifier(options = {}) {
|
|
|
53
71
|
await ensureResources();
|
|
54
72
|
return inspectOpenClassifyInput(input, {
|
|
55
73
|
runClassifier,
|
|
74
|
+
registry: registryBundle.registry,
|
|
56
75
|
classifierTimeoutMs: options.classifierTimeoutMs,
|
|
57
76
|
classifierRetryCount: options.classifierRetryCount,
|
|
58
77
|
maxConcurrency: options.maxConcurrency,
|
|
59
78
|
signal: callOptions?.signal,
|
|
60
79
|
});
|
|
61
80
|
};
|
|
62
|
-
return { classify, inspect };
|
|
81
|
+
return { classify, inspect, registry: registryBundle };
|
|
63
82
|
}
|
|
83
|
+
// Re-export so callers can `import { ClassifierManifestError } from "open-classify"`
|
|
84
|
+
// and catch directory/name collision errors from createClassifier().
|
|
85
|
+
export { ClassifierManifestError };
|
package/dist/src/config.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import type { ClassifierName } from "./classifiers.js";
|
|
2
2
|
export declare const DEFAULT_OPEN_CLASSIFY_CONFIG_PATH = "open-classify.config.json";
|
|
3
3
|
export interface OpenClassifyConfig {
|
|
4
4
|
readonly runner?: OllamaRunnerConfig;
|
package/dist/src/config.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { existsSync, readFileSync } from "node:fs";
|
|
2
|
-
import { CLASSIFIER_NAMES } from "./classifiers.js";
|
|
3
2
|
import { isRecord } from "./validation.js";
|
|
4
3
|
export const DEFAULT_OPEN_CLASSIFY_CONFIG_PATH = "open-classify.config.json";
|
|
5
4
|
export class OpenClassifyConfigError extends Error {
|
|
@@ -81,12 +80,8 @@ function validateModels(value, path) {
|
|
|
81
80
|
if (!isRecord(value)) {
|
|
82
81
|
throwConfig(path, "runner.models must be an object");
|
|
83
82
|
}
|
|
84
|
-
const allowed = new Set(CLASSIFIER_NAMES);
|
|
85
83
|
const out = {};
|
|
86
84
|
for (const [name, model] of Object.entries(value)) {
|
|
87
|
-
if (!allowed.has(name)) {
|
|
88
|
-
throwConfig(path, `runner.models.${name} is not a known classifier`);
|
|
89
|
-
}
|
|
90
85
|
out[name] = requireString(model, path, `runner.models.${name}`);
|
|
91
86
|
}
|
|
92
87
|
return out;
|
package/dist/src/ollama.d.ts
CHANGED
|
@@ -1,13 +1,11 @@
|
|
|
1
|
-
import { type ClassifierName, type RunClassifier } from "./classifiers.js";
|
|
1
|
+
import { type ClassifierModuleMap, type ClassifierName, type RunClassifier } from "./classifiers.js";
|
|
2
2
|
export declare const OLLAMA_DEFAULT_HOST = "http://localhost:11434";
|
|
3
3
|
export declare const OLLAMA_BASE_MODEL = "gemma4:e4b-it-q4_K_M";
|
|
4
4
|
export declare const OLLAMA_BASE_MODEL_NATIVE_CONTEXT_LENGTH = 131072;
|
|
5
|
-
export declare const OLLAMA_REQUIRED_PARALLELISM: number;
|
|
6
5
|
export declare const OLLAMA_DEFAULT_CATALOG_PATH = "downstream-models.json";
|
|
7
6
|
export declare const OLLAMA_CONTEXT_LENGTH = 4096;
|
|
8
7
|
export declare const OLLAMA_MIN_TOTAL_MEMORY_BYTES: number;
|
|
9
8
|
export declare const OLLAMA_MIN_AVAILABLE_MEMORY_BYTES: number;
|
|
10
|
-
export declare const OLLAMA_CLASSIFIER_MODELS: Record<ClassifierName, string | null>;
|
|
11
9
|
export interface OllamaOptions {
|
|
12
10
|
temperature?: number;
|
|
13
11
|
top_p?: number;
|
|
@@ -15,14 +13,15 @@ export interface OllamaOptions {
|
|
|
15
13
|
num_ctx?: number;
|
|
16
14
|
}
|
|
17
15
|
export interface OllamaClassifierRunnerConfig {
|
|
16
|
+
modulesByName: ClassifierModuleMap;
|
|
17
|
+
minTotalMemoryBytes?: number;
|
|
18
|
+
minAvailableMemoryBytes?: number;
|
|
18
19
|
host?: string;
|
|
19
20
|
defaultModel?: string;
|
|
20
21
|
models?: Partial<Record<ClassifierName, string | null>>;
|
|
21
22
|
options?: OllamaOptions;
|
|
22
23
|
fetch?: typeof fetch;
|
|
23
24
|
skipResourceCheck?: boolean;
|
|
24
|
-
minAvailableMemoryBytes?: number;
|
|
25
|
-
minTotalMemoryBytes?: number;
|
|
26
25
|
}
|
|
27
26
|
export declare class OllamaClassifierError extends Error {
|
|
28
27
|
readonly classifier: ClassifierName;
|
|
@@ -36,7 +35,7 @@ export declare class OllamaResourceError extends Error {
|
|
|
36
35
|
readonly minAvailableMemoryBytes: number;
|
|
37
36
|
constructor(totalMemoryBytes: number, availableMemoryBytes: number, minTotalMemoryBytes: number, minAvailableMemoryBytes: number);
|
|
38
37
|
}
|
|
39
|
-
export declare function createOllamaClassifierRunner(config
|
|
38
|
+
export declare function createOllamaClassifierRunner(config: OllamaClassifierRunnerConfig): RunClassifier;
|
|
40
39
|
export declare function assertOllamaResources(options?: {
|
|
41
40
|
minTotalMemoryBytes?: number;
|
|
42
41
|
minAvailableMemoryBytes?: number;
|
package/dist/src/ollama.js
CHANGED
|
@@ -10,12 +10,11 @@
|
|
|
10
10
|
// `classifyOpenClassifyInput` — you don't have to use this module at all.
|
|
11
11
|
import { execFile } from "node:child_process";
|
|
12
12
|
import { promisify } from "node:util";
|
|
13
|
-
import {
|
|
13
|
+
import { validateClassifierOutput, } from "./classifiers.js";
|
|
14
14
|
import { ClassifierValidationError, isRecord, } from "./validation.js";
|
|
15
15
|
export const OLLAMA_DEFAULT_HOST = "http://localhost:11434";
|
|
16
16
|
export const OLLAMA_BASE_MODEL = "gemma4:e4b-it-q4_K_M";
|
|
17
17
|
export const OLLAMA_BASE_MODEL_NATIVE_CONTEXT_LENGTH = 131_072;
|
|
18
|
-
export const OLLAMA_REQUIRED_PARALLELISM = CLASSIFIER_NAMES.length;
|
|
19
18
|
export const OLLAMA_DEFAULT_CATALOG_PATH = "downstream-models.json";
|
|
20
19
|
/*
|
|
21
20
|
* Gemma 4 E4B's native context is 131,072 tokens (128K). The reference local
|
|
@@ -28,7 +27,6 @@ export const OLLAMA_MIN_TOTAL_MEMORY_BYTES = 16 * 1024 * 1024 * 1024;
|
|
|
28
27
|
export const OLLAMA_MIN_AVAILABLE_MEMORY_BYTES = 16 * 1024 * 1024 * 1024;
|
|
29
28
|
const ESTIMATED_CHARS_PER_TOKEN = 3;
|
|
30
29
|
const execFileAsync = promisify(execFile);
|
|
31
|
-
export const OLLAMA_CLASSIFIER_MODELS = Object.fromEntries(CLASSIFIER_NAMES.map((name) => [name, null]));
|
|
32
30
|
export class OllamaClassifierError extends Error {
|
|
33
31
|
classifier;
|
|
34
32
|
model;
|
|
@@ -45,7 +43,7 @@ export class OllamaResourceError extends Error {
|
|
|
45
43
|
minTotalMemoryBytes;
|
|
46
44
|
minAvailableMemoryBytes;
|
|
47
45
|
constructor(totalMemoryBytes, availableMemoryBytes, minTotalMemoryBytes, minAvailableMemoryBytes) {
|
|
48
|
-
super(`Ollama resource check failed: ${formatBytes(totalMemoryBytes)} total and ${formatBytes(availableMemoryBytes)} available; ${formatBytes(minTotalMemoryBytes)} total and ${formatBytes(minAvailableMemoryBytes)} available required
|
|
46
|
+
super(`Ollama resource check failed: ${formatBytes(totalMemoryBytes)} total and ${formatBytes(availableMemoryBytes)} available; ${formatBytes(minTotalMemoryBytes)} total and ${formatBytes(minAvailableMemoryBytes)} available required to run classifiers in parallel`);
|
|
49
47
|
this.name = "OllamaResourceError";
|
|
50
48
|
this.totalMemoryBytes = totalMemoryBytes;
|
|
51
49
|
this.availableMemoryBytes = availableMemoryBytes;
|
|
@@ -56,7 +54,11 @@ export class OllamaResourceError extends Error {
|
|
|
56
54
|
// Build a `RunClassifier` bound to a specific Ollama host + model selection.
|
|
57
55
|
// The resource check is lazy and runs once per runner — the first classifier
|
|
58
56
|
// invocation pays for it; subsequent ones reuse the same promise.
|
|
59
|
-
export function createOllamaClassifierRunner(config
|
|
57
|
+
export function createOllamaClassifierRunner(config) {
|
|
58
|
+
if (!config?.modulesByName) {
|
|
59
|
+
throw new Error("createOllamaClassifierRunner requires `modulesByName` from buildClassifierRegistry()");
|
|
60
|
+
}
|
|
61
|
+
const modulesByName = config.modulesByName;
|
|
60
62
|
const host = trimTrailingSlash(config.host ?? OLLAMA_DEFAULT_HOST);
|
|
61
63
|
const fetchImpl = config.fetch ?? fetch;
|
|
62
64
|
const models = config.models ?? {};
|
|
@@ -76,9 +78,13 @@ export function createOllamaClassifierRunner(config = {}) {
|
|
|
76
78
|
});
|
|
77
79
|
await resourceCheck;
|
|
78
80
|
}
|
|
81
|
+
const manifest = modulesByName[name];
|
|
82
|
+
if (manifest === undefined) {
|
|
83
|
+
throw new OllamaClassifierError(name, defaultModel, `unknown classifier "${name}" — not present in registry`);
|
|
84
|
+
}
|
|
79
85
|
const configuredModel = models[name];
|
|
80
86
|
const model = configuredModel ?? defaultModel;
|
|
81
|
-
return runOllamaClassifier(
|
|
87
|
+
return runOllamaClassifier(manifest, input, signal, fetchImpl, host, model, options, configuredModel === undefined && !hasDefaultModelOverride);
|
|
82
88
|
};
|
|
83
89
|
}
|
|
84
90
|
export async function assertOllamaResources(options = {}) {
|
|
@@ -90,10 +96,10 @@ export async function assertOllamaResources(options = {}) {
|
|
|
90
96
|
throw new OllamaResourceError(totalMemoryBytes, availableMemoryBytes, minTotalMemoryBytes, minAvailableMemoryBytes);
|
|
91
97
|
}
|
|
92
98
|
}
|
|
93
|
-
async function runOllamaClassifier(
|
|
94
|
-
const
|
|
95
|
-
const systemPrompt =
|
|
96
|
-
const configuredBaseModel =
|
|
99
|
+
async function runOllamaClassifier(manifest, input, signal, fetchImpl, host, model, options, allowManifestModel) {
|
|
100
|
+
const name = manifest.name;
|
|
101
|
+
const systemPrompt = manifest.systemPrompt;
|
|
102
|
+
const configuredBaseModel = manifest.backend?.ollama?.base_model;
|
|
97
103
|
if (allowManifestModel && configuredBaseModel) {
|
|
98
104
|
model = configuredBaseModel;
|
|
99
105
|
}
|
|
@@ -137,7 +143,7 @@ async function runOllamaClassifier(name, input, signal, fetchImpl, host, model,
|
|
|
137
143
|
}
|
|
138
144
|
const parsed = parseJsonObject(content, name, model);
|
|
139
145
|
try {
|
|
140
|
-
return validateClassifierOutput(
|
|
146
|
+
return validateClassifierOutput(manifest, parsed, model);
|
|
141
147
|
}
|
|
142
148
|
catch (error) {
|
|
143
149
|
if (error instanceof ClassifierValidationError) {
|
package/dist/src/pipeline.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { type RunClassifier } from "./classifiers.js";
|
|
2
|
-
import type { Catalog, InspectResult, PipelineResult } from "./manifest.js";
|
|
2
|
+
import type { Catalog, ClassifierRegistry, InspectResult, PipelineResult } from "./manifest.js";
|
|
3
3
|
import type { OpenClassifyInput } from "./types.js";
|
|
4
4
|
export declare const DEFAULT_CLASSIFIER_TIMEOUT_MS = 15000;
|
|
5
5
|
export declare const DEFAULT_CLASSIFIER_RETRY_COUNT = 1;
|
|
@@ -10,6 +10,7 @@ export declare class OpenClassifyNormalizationError extends Error {
|
|
|
10
10
|
export interface ClassifyOptions {
|
|
11
11
|
runClassifier: RunClassifier;
|
|
12
12
|
catalog: Catalog;
|
|
13
|
+
registry: ClassifierRegistry;
|
|
13
14
|
classifierTimeoutMs?: number;
|
|
14
15
|
classifierRetryCount?: number;
|
|
15
16
|
maxConcurrency?: number;
|
|
@@ -17,6 +18,7 @@ export interface ClassifyOptions {
|
|
|
17
18
|
}
|
|
18
19
|
export interface InspectOptions {
|
|
19
20
|
runClassifier: RunClassifier;
|
|
21
|
+
registry: ClassifierRegistry;
|
|
20
22
|
classifierTimeoutMs?: number;
|
|
21
23
|
classifierRetryCount?: number;
|
|
22
24
|
maxConcurrency?: number;
|
package/dist/src/pipeline.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { assembleResult, buildPublicOutputs } from "./aggregator.js";
|
|
2
|
-
import { MODULES_BY_NAME, REGISTRY, } from "./classifiers.js";
|
|
3
2
|
import { normalizeOpenClassifyInput, toClassifierInput } from "./input.js";
|
|
4
3
|
export const DEFAULT_CLASSIFIER_TIMEOUT_MS = 15_000;
|
|
5
4
|
export const DEFAULT_CLASSIFIER_RETRY_COUNT = 1;
|
|
@@ -12,7 +11,7 @@ export class OpenClassifyNormalizationError extends Error {
|
|
|
12
11
|
}
|
|
13
12
|
export async function classifyOpenClassifyInput(input, options) {
|
|
14
13
|
const { request, results, failedClassifiers } = await runPipeline(input, "user", options);
|
|
15
|
-
const reg = filteredRegistry("user");
|
|
14
|
+
const reg = filteredRegistry(options.registry, "user");
|
|
16
15
|
const assembled = assembleResult({
|
|
17
16
|
registry: reg,
|
|
18
17
|
results,
|
|
@@ -26,7 +25,7 @@ export async function classifyOpenClassifyInput(input, options) {
|
|
|
26
25
|
}
|
|
27
26
|
export async function inspectOpenClassifyInput(input, options) {
|
|
28
27
|
const { request, results } = await runPipeline(input, "assistant", options);
|
|
29
|
-
const reg = filteredRegistry("assistant");
|
|
28
|
+
const reg = filteredRegistry(options.registry, "assistant");
|
|
30
29
|
const lastMsg = request.messages[request.messages.length - 1];
|
|
31
30
|
return {
|
|
32
31
|
target_message_hash: request.target_message_hash,
|
|
@@ -56,19 +55,19 @@ async function runPipeline(input, role, options) {
|
|
|
56
55
|
const classifierTimeoutMs = options.classifierTimeoutMs ?? DEFAULT_CLASSIFIER_TIMEOUT_MS;
|
|
57
56
|
const classifierRetryCount = options.classifierRetryCount ?? DEFAULT_CLASSIFIER_RETRY_COUNT;
|
|
58
57
|
const maxConcurrency = resolveMaxConcurrency(options.maxConcurrency);
|
|
59
|
-
const registry = filteredRegistry(role);
|
|
58
|
+
const registry = filteredRegistry(options.registry, role);
|
|
60
59
|
const queue = registry.map((m) => m.name);
|
|
61
60
|
try {
|
|
62
61
|
const settled = await runWithConcurrency(queue, maxConcurrency, controller.signal, (name) => runClassifierWithRetry(name, classifierInput, options.runClassifier, controller.signal, classifierTimeoutMs, classifierRetryCount));
|
|
63
|
-
const { results, failedClassifiers } = collectResults(settled);
|
|
62
|
+
const { results, failedClassifiers } = collectResults(registry, settled);
|
|
64
63
|
return { request, results, failedClassifiers };
|
|
65
64
|
}
|
|
66
65
|
finally {
|
|
67
66
|
options.signal?.removeEventListener("abort", abortFromOptions);
|
|
68
67
|
}
|
|
69
68
|
}
|
|
70
|
-
function filteredRegistry(role) {
|
|
71
|
-
return
|
|
69
|
+
function filteredRegistry(registry, role) {
|
|
70
|
+
return registry.filter((m) => roleAppliesTo(m.appliesTo, role));
|
|
72
71
|
}
|
|
73
72
|
function roleAppliesTo(appliesTo, role) {
|
|
74
73
|
return appliesTo === "both" || appliesTo === role;
|
|
@@ -107,12 +106,18 @@ async function runWithConcurrency(names, maxConcurrency, signal, start) {
|
|
|
107
106
|
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
108
107
|
return results;
|
|
109
108
|
}
|
|
110
|
-
function collectResults(settled) {
|
|
109
|
+
function collectResults(registry, settled) {
|
|
110
|
+
const fallbackByName = new Map();
|
|
111
|
+
for (const m of registry)
|
|
112
|
+
fallbackByName.set(m.name, m.fallback);
|
|
111
113
|
const results = {};
|
|
112
114
|
const failedClassifiers = [];
|
|
113
115
|
for (const s of settled) {
|
|
114
|
-
const
|
|
115
|
-
|
|
116
|
+
const fallback = fallbackByName.get(s.name);
|
|
117
|
+
if (fallback === undefined) {
|
|
118
|
+
throw new Error(`pipeline: classifier "${s.name}" missing from registry`);
|
|
119
|
+
}
|
|
120
|
+
results[s.name] = s.ok ? s.value : fallback;
|
|
116
121
|
if (!s.ok)
|
|
117
122
|
failedClassifiers.push(s.name);
|
|
118
123
|
}
|
|
@@ -1,20 +1,27 @@
|
|
|
1
1
|
# Adding a classifier
|
|
2
2
|
|
|
3
|
-
Every classifier —
|
|
3
|
+
Every classifier — bundled or your own — uses the same two-file layout. There is no separate "stock" vs "custom" distinction; the runtime only cares about which reserved fields a classifier opts into.
|
|
4
|
+
|
|
5
|
+
There are two places a classifier can live:
|
|
6
|
+
|
|
7
|
+
- **In your own app**, in a directory you pass to `extraClassifierDirs` (almost always `./classifiers/` after `npx open-classify init`). This is the right path when you've installed Open Classify as a dependency.
|
|
8
|
+
- **In this repo**, under `src/classifiers/<name>/`. Only do this when you're contributing a new mandatory built-in back to Open Classify.
|
|
9
|
+
|
|
10
|
+
Either way, the layout and contract are identical.
|
|
4
11
|
|
|
5
12
|
## 1. Create the directory
|
|
6
13
|
|
|
7
14
|
```
|
|
8
|
-
|
|
15
|
+
classifiers/<name>/
|
|
9
16
|
├── manifest.json
|
|
10
17
|
└── prompt.md
|
|
11
18
|
```
|
|
12
19
|
|
|
13
|
-
The directory name must match `manifest.json`'s `name` field.
|
|
20
|
+
The directory name must match `manifest.json`'s `name` field. Directories starting with `_` are skipped by the loader — that's the deactivation mechanism (`_topic_tags/` is inert; rename to `topic_tags/` to activate).
|
|
14
21
|
|
|
15
22
|
## 2. Write the manifest
|
|
16
23
|
|
|
17
|
-
Minimal example — a pure-custom classifier that emits tags.
|
|
24
|
+
Minimal example — a pure-custom classifier that emits tags. The runtime synthesizes a JSON example from your schema, so you don't need to write one.
|
|
18
25
|
|
|
19
26
|
```json
|
|
20
27
|
{
|
|
@@ -39,8 +46,6 @@ Minimal example — a pure-custom classifier that emits tags. You don't need to
|
|
|
39
46
|
}
|
|
40
47
|
```
|
|
41
48
|
|
|
42
|
-
If your classifier's behavior is nuanced enough that hand-picked examples would help the model (preflight is one), add an `output_schema.examples` array. The runtime validates each example against the composed schema at load time, so a broken example fails the build.
|
|
43
|
-
|
|
44
49
|
To also influence routing, opt into a reserved field:
|
|
45
50
|
|
|
46
51
|
```json
|
|
@@ -73,6 +78,7 @@ Rules:
|
|
|
73
78
|
- `reason` and `certainty` are added to the composed schema by the runtime — don't declare them.
|
|
74
79
|
- `fallback` must validate against the composed schema. Only `reason` and `certainty` are required in fallback; reserved fields and `output_schema.required` fields are exempt (a "no signal" fallback usually omits them).
|
|
75
80
|
- `output_schema.examples` (JSON Schema standard) must validate against the composed schema at load time, so a broken example fails the build, not the model call.
|
|
81
|
+
- **Name collisions throw.** Extras cannot override the mandatory built-ins (`preflight`, `model_tier`, `model_specialization`, `prompt_injection`). To customize one of those, use a custom `RunClassifier` to intercept it (see "Replacing the backend" below).
|
|
76
82
|
|
|
77
83
|
See [manifests.md](manifests.md) for the full field list.
|
|
78
84
|
|
|
@@ -90,24 +96,37 @@ Do not invent tags for vague or ambiguous messages.
|
|
|
90
96
|
|
|
91
97
|
Don't paste enum values for reserved fields — the runtime injects them with canonical wording so they never drift from `src/enums.ts`.
|
|
92
98
|
|
|
93
|
-
## 4.
|
|
99
|
+
## 4. Use it
|
|
94
100
|
|
|
95
|
-
|
|
96
|
-
npm run build # validates the manifest, composes the schema, copies assets
|
|
97
|
-
npm test
|
|
98
|
-
```
|
|
101
|
+
After `npx open-classify init`, your `classifiers/` directory already exists. Drop your folder in and point `createClassifier` at the parent dir:
|
|
99
102
|
|
|
100
|
-
|
|
103
|
+
```ts
|
|
104
|
+
import { createClassifier } from "open-classify";
|
|
101
105
|
|
|
102
|
-
|
|
106
|
+
const { classify } = createClassifier({
|
|
107
|
+
extraClassifierDirs: ["./classifiers"],
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
const result = await classify({
|
|
111
|
+
messages: [{ role: "user", text: "Can you review the attached contract?" }],
|
|
112
|
+
});
|
|
103
113
|
|
|
104
|
-
```ts
|
|
105
|
-
const { classify } = createClassifier({ catalog });
|
|
106
|
-
const result = await classify(input);
|
|
107
114
|
const tags = result.classifier_outputs.topic_tags?.tags ?? [];
|
|
108
115
|
```
|
|
109
116
|
|
|
110
|
-
`
|
|
117
|
+
> Production tip: `"./classifiers"` resolves against `process.cwd()`, which is fine for `npm start` but breaks if the process launches from a different directory. For long-running services, resolve absolutely via `fileURLToPath(import.meta.url) + path.resolve(...)`.
|
|
118
|
+
|
|
119
|
+
If the manifest is malformed, `createClassifier` throws `ClassifierManifestError` at startup with the path and a specific reason — typos fail loud.
|
|
120
|
+
|
|
121
|
+
## Activating one of the bundled templates
|
|
122
|
+
|
|
123
|
+
`npx open-classify init` copies four templates (`tools`, `memory_retrieval_queries`, `conversation_digest`, `context_shift`) into your `classifiers/` directory as `_<name>/` — inactive because of the underscore prefix. To turn one on:
|
|
124
|
+
|
|
125
|
+
```sh
|
|
126
|
+
mv classifiers/_tools classifiers/tools
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Edit `manifest.json` first if you need to (`tools` in particular ships with an opinionated `allowed_tools` list you'll almost certainly want to tailor). The reverse works on any classifier: rename `<name>/` → `_<name>/` to deactivate without deleting.
|
|
111
130
|
|
|
112
131
|
## Targeting the assistant response
|
|
113
132
|
|
|
@@ -134,7 +153,7 @@ The built-in `prompt_injection` ships tagged `"both"` so it runs on both sides.
|
|
|
134
153
|
|
|
135
154
|
## Choosing the classifier model
|
|
136
155
|
|
|
137
|
-
|
|
156
|
+
In `open-classify.config.json`:
|
|
138
157
|
|
|
139
158
|
```json
|
|
140
159
|
{
|
|
@@ -148,9 +167,9 @@ For apps and OSS installs, prefer `open-classify.config.json`:
|
|
|
148
167
|
}
|
|
149
168
|
```
|
|
150
169
|
|
|
151
|
-
`runner.defaultModel` applies to every classifier without an override. `runner.models` is a flat map keyed by classifier name —
|
|
170
|
+
`runner.defaultModel` applies to every classifier without an override. `runner.models` is a flat map keyed by classifier name — works for built-ins, templates, and your own.
|
|
152
171
|
|
|
153
|
-
Classifier manifests may also carry an Ollama hint
|
|
172
|
+
Classifier manifests may also carry an Ollama hint:
|
|
154
173
|
|
|
155
174
|
```json
|
|
156
175
|
{
|
|
@@ -162,15 +181,17 @@ Config file and function options take precedence over manifest hints.
|
|
|
162
181
|
|
|
163
182
|
## Replacing the backend
|
|
164
183
|
|
|
165
|
-
For full backend control
|
|
184
|
+
For full backend control — including replacing a mandatory built-in like `preflight` — implement your own `RunClassifier` and pass it to `createClassifier`:
|
|
166
185
|
|
|
167
186
|
```ts
|
|
168
|
-
import {
|
|
187
|
+
import { createClassifier, type RunClassifier } from "open-classify";
|
|
169
188
|
|
|
170
189
|
const runClassifier: RunClassifier = async (name, input, signal) => {
|
|
171
|
-
|
|
172
|
-
|
|
190
|
+
if (name === "preflight") {
|
|
191
|
+
// call OpenAI / Anthropic / your own logic; return a ClassifierOutput.
|
|
192
|
+
}
|
|
193
|
+
// …handle other classifiers, or delegate to the Ollama runner you imported.
|
|
173
194
|
};
|
|
174
195
|
|
|
175
|
-
|
|
196
|
+
const { classify } = createClassifier({ runClassifier });
|
|
176
197
|
```
|
|
@@ -13,9 +13,7 @@
|
|
|
13
13
|
"preflight": "gemma4:e4b-it-q4_K_M",
|
|
14
14
|
"model_tier": "gemma4:e4b-it-q4_K_M",
|
|
15
15
|
"model_specialization": "gemma4:e4b-it-q4_K_M",
|
|
16
|
-
"
|
|
17
|
-
"prompt_injection": "gemma4:e4b-it-q4_K_M",
|
|
18
|
-
"memory_retrieval_queries": "gemma4:e4b-it-q4_K_M"
|
|
16
|
+
"prompt_injection": "gemma4:e4b-it-q4_K_M"
|
|
19
17
|
}
|
|
20
18
|
},
|
|
21
19
|
"catalog": "downstream-models.json"
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "open-classify",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"description": "Manifest-driven classifier runtime for routing user messages to downstream AI models",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Taylor Bayouth",
|
|
@@ -29,11 +29,16 @@
|
|
|
29
29
|
"default": "./dist/src/index.js"
|
|
30
30
|
}
|
|
31
31
|
},
|
|
32
|
+
"bin": {
|
|
33
|
+
"open-classify": "./bin/open-classify.mjs"
|
|
34
|
+
},
|
|
32
35
|
"files": [
|
|
36
|
+
"bin",
|
|
33
37
|
"dist/src",
|
|
34
38
|
"docs",
|
|
35
39
|
"downstream-models.json",
|
|
36
40
|
"open-classify.config.example.json",
|
|
41
|
+
"templates",
|
|
37
42
|
"LICENSE",
|
|
38
43
|
"README.md"
|
|
39
44
|
],
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|