open-classify 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +96 -88
  2. package/bin/open-classify.mjs +201 -0
  3. package/dist/src/aggregator.d.ts +7 -23
  4. package/dist/src/aggregator.js +108 -186
  5. package/dist/src/classifiers/{routing → model_tier}/manifest.json +2 -2
  6. package/dist/src/classifiers/{routing → model_tier}/prompt.md +1 -1
  7. package/dist/src/classifiers/preflight/manifest.json +9 -8
  8. package/dist/src/classifiers/preflight/prompt.md +12 -6
  9. package/dist/src/classifiers/prompt_injection/manifest.json +2 -3
  10. package/dist/src/classifiers.d.ts +12 -5
  11. package/dist/src/classifiers.js +32 -16
  12. package/dist/src/classify.d.ts +5 -3
  13. package/dist/src/classify.js +28 -8
  14. package/dist/src/config.d.ts +1 -3
  15. package/dist/src/config.js +1 -28
  16. package/dist/src/index.js +2 -3
  17. package/dist/src/manifest.d.ts +25 -70
  18. package/dist/src/ollama.d.ts +5 -6
  19. package/dist/src/ollama.js +17 -11
  20. package/dist/src/pipeline.d.ts +3 -2
  21. package/dist/src/pipeline.js +32 -94
  22. package/dist/src/stock-validation.js +8 -4
  23. package/docs/adding-a-classifier.md +50 -27
  24. package/docs/manifests.md +6 -6
  25. package/docs/resolver.md +20 -44
  26. package/docs/signals.md +18 -8
  27. package/open-classify.config.example.json +2 -7
  28. package/package.json +6 -1
  29. /package/{dist/src/classifiers → templates}/context_shift/manifest.json +0 -0
  30. /package/{dist/src/classifiers → templates}/context_shift/prompt.md +0 -0
  31. /package/{dist/src/classifiers → templates}/conversation_digest/manifest.json +0 -0
  32. /package/{dist/src/classifiers → templates}/conversation_digest/prompt.md +0 -0
  33. /package/{dist/src/classifiers → templates}/memory_retrieval_queries/manifest.json +0 -0
  34. /package/{dist/src/classifiers → templates}/memory_retrieval_queries/prompt.md +0 -0
  35. /package/{dist/src/classifiers → templates}/tools/manifest.json +0 -0
  36. /package/{dist/src/classifiers → templates}/tools/prompt.md +0 -0
@@ -1,119 +1,121 @@
1
1
  import { certaintyScore } from "./stock.js";
2
- export const DEFAULT_CERTAINTY_THRESHOLD = 0.65;
3
- /** @deprecated Use DEFAULT_CERTAINTY_THRESHOLD. */
4
- export const DEFAULT_CONFIDENCE_THRESHOLD = DEFAULT_CERTAINTY_THRESHOLD;
5
- export function composeEnvelope(args) {
6
- const { registry, results, catalog, config } = args;
7
- const threshold = certaintyThreshold(config);
8
- const finalReplyPick = pickReservedField(registry, results, "final_reply", threshold);
9
- const ackReplyPick = pickReservedField(registry, results, "ack_reply", threshold);
10
- const tierPick = pickReservedField(registry, results, "model_tier", threshold);
11
- const specPick = pickReservedField(registry, results, "model_specialization", threshold);
12
- const toolsPick = pickReservedField(registry, results, "tools", threshold);
13
- const riskLevelPick = pickReservedField(registry, results, "risk_level", threshold);
14
- const routing = mergeRouting(tierPick?.value, specPick?.value);
15
- const routingConfidence = maxConfidence([tierPick?.confidence, specPick?.confidence]);
16
- const routingDrops = lowConfidenceRoutingDrops(registry, results, threshold, routing);
17
- const envelope = {
18
- ...optional("final_reply", finalReplyPick?.value),
19
- ...optional("ack_reply", ackReplyPick?.value),
20
- ...optional("routing", routing),
21
- ...optional("tools", toolsPick?.value === undefined ? undefined : { tools: toolsPick.value }),
22
- ...optional("prompt_injection", riskLevelPick?.value === undefined ? undefined : { risk_level: riskLevelPick.value }),
23
- classifier_outputs: buildAuditOutputs(registry, results),
24
- model_recommendation: resolveModelFromRouting(routing, catalog, routingConfidence, routingDrops),
25
- };
26
- return envelope;
27
- }
28
- export function certaintyThreshold(config) {
29
- return config?.certaintyThreshold ?? config?.confidenceThreshold ?? DEFAULT_CERTAINTY_THRESHOLD;
30
- }
31
- function optional(key, value) {
32
- return value === undefined ? {} : { [key]: value };
33
- }
34
- // Highest-certainty contributor wins. Ties broken by registry order — the
35
- // registry is already sorted by `dispatch_order` ascending (classifiers without
36
- // dispatch_order sort last), and we iterate in that order, so the first
37
- // encountered tie keeps the slot.
38
- function pickReservedField(registry, results, field, threshold) {
39
- let best;
40
- for (const manifest of registry) {
41
- if (!manifest.reservedFields.includes(field))
42
- continue;
43
- const output = results[manifest.name];
44
- if (output === undefined)
45
- continue;
46
- const raw = output[field];
47
- if (raw === undefined)
48
- continue;
49
- const confidence = scoreCertainty(output.certainty);
50
- if (confidence < threshold)
51
- continue;
52
- if (best === undefined || confidence > best.confidence) {
53
- best = { value: raw, confidence, source: manifest.name };
54
- }
2
+ export function assembleResult(args) {
3
+ const { registry, results, failedClassifiers, catalog } = args;
4
+ // Pick reserved fields — highest certainty wins, no threshold gate.
5
+ const finalReply = pickField(registry, results, "final_reply");
6
+ const ackReply = pickField(registry, results, "ack_reply");
7
+ const modelTier = pickField(registry, results, "model_tier");
8
+ const modelSpec = pickField(registry, results, "model_specialization");
9
+ const toolsPick = pickField(registry, results, "tools");
10
+ const riskLevel = pickField(registry, results, "risk_level");
11
+ // Resolve concrete model id.
12
+ let model_id = null;
13
+ try {
14
+ const routing = mergeRouting(modelTier?.value, modelSpec?.value);
15
+ model_id = resolveModelFromRouting(routing, catalog).id;
16
+ }
17
+ catch {
18
+ // Catalog error — model_id stays null.
19
+ }
20
+ const tools = toolsPick?.value ?? [];
21
+ const reply = finalReply?.value
22
+ ? { text: finalReply.value.text }
23
+ : ackReply?.value
24
+ ? { text: ackReply.value.text }
25
+ : null;
26
+ const prompt_injection = riskLevel?.value !== undefined ? { risk_level: riskLevel.value } : null;
27
+ const { avg_certainty, min_certainty } = certaintySummary(registry, results);
28
+ const classifier_outputs = buildPublicOutputs(registry, results);
29
+ // Determine action. Priority: prompt_injection > classification_error > reply > route.
30
+ const isInjectionBlock = riskLevel?.value === "high_risk" || riskLevel?.value === "unknown";
31
+ const isClassificationError = failedClassifiers.length > 0 || reply === null || model_id === null;
32
+ let action;
33
+ let block_reason;
34
+ if (isInjectionBlock) {
35
+ action = "block";
36
+ block_reason = "prompt_injection";
37
+ }
38
+ else if (isClassificationError) {
39
+ action = "block";
40
+ block_reason = "classification_error";
41
+ }
42
+ else if (finalReply?.value !== undefined) {
43
+ action = "reply";
44
+ }
45
+ else {
46
+ action = "route";
55
47
  }
56
- return best;
57
- }
58
- function mergeRouting(tier, model_specialization) {
59
- if (tier === undefined && model_specialization === undefined)
60
- return undefined;
61
48
  return {
62
- ...(tier === undefined ? {} : { model_tier: tier }),
63
- ...(model_specialization === undefined ? {} : { model_specialization }),
49
+ action,
50
+ ...(block_reason !== undefined ? { block_reason } : {}),
51
+ model_id,
52
+ tools,
53
+ reply,
54
+ prompt_injection,
55
+ avg_certainty,
56
+ min_certainty,
57
+ failed_classifiers: failedClassifiers,
58
+ classifier_outputs,
64
59
  };
65
60
  }
66
- function maxConfidence(values) {
67
- const finite = values.filter((v) => v !== undefined);
68
- if (finite.length === 0)
69
- return undefined;
70
- return Math.max(...finite);
71
- }
72
- function buildAuditOutputs(registry, results) {
73
- const out = [];
61
+ // Build the public classifier_outputs map. Keeps reason + payload fields;
62
+ // converts certainty label to float score.
63
+ export function buildPublicOutputs(registry, results) {
64
+ const out = {};
74
65
  for (const manifest of registry) {
75
66
  const result = results[manifest.name];
76
67
  if (result === undefined)
77
68
  continue;
78
- out.push({ classifier: manifest.name, ...result });
69
+ const { certainty, ...rest } = result;
70
+ out[manifest.name] = {
71
+ ...rest,
72
+ certainty: scoreCertainty(certainty),
73
+ };
79
74
  }
80
75
  return out;
81
76
  }
82
- // ─── Model recommendation ───────────────────────────────────────────────────
83
- function lowConfidenceRoutingDrops(registry, results, threshold, merged) {
84
- const dropped = [];
85
- if (merged?.model_tier === undefined && hasLowConfidenceReservedField(registry, results, "model_tier", threshold)) {
86
- dropped.push({ axis: "model_tier", reason: "low_confidence" });
87
- }
88
- if (merged?.model_specialization === undefined &&
89
- hasLowConfidenceReservedField(registry, results, "model_specialization", threshold)) {
90
- dropped.push({ axis: "model_specialization", reason: "low_confidence" });
91
- }
92
- return dropped;
93
- }
94
- function hasLowConfidenceReservedField(registry, results, field, threshold) {
77
+ function certaintySummary(registry, results) {
78
+ const scores = registry.map((m) => scoreCertainty(results[m.name]?.certainty));
79
+ if (scores.length === 0)
80
+ return { avg_certainty: 0, min_certainty: 0 };
81
+ const min_certainty = Math.min(...scores);
82
+ const avg_certainty = scores.reduce((sum, v) => sum + v, 0) / scores.length;
83
+ return { min_certainty, avg_certainty };
84
+ }
85
+ // Highest certainty wins; ties broken by registry order (already sorted by
86
+ // dispatch_order ascending).
87
+ function pickField(registry, results, field) {
88
+ let best;
95
89
  for (const manifest of registry) {
96
90
  if (!manifest.reservedFields.includes(field))
97
91
  continue;
98
92
  const output = results[manifest.name];
99
93
  if (output === undefined)
100
94
  continue;
101
- if (output[field] === undefined)
95
+ const raw = output[field];
96
+ if (raw === undefined)
102
97
  continue;
103
- if (scoreCertainty(output.certainty) < threshold)
104
- return true;
98
+ const score = scoreCertainty(output.certainty);
99
+ if (best === undefined || score > best.score) {
100
+ best = { value: raw, source: manifest.name, score };
101
+ }
105
102
  }
106
- return false;
103
+ return best;
107
104
  }
108
105
  function scoreCertainty(certainty) {
109
106
  return certainty === undefined ? 0 : certaintyScore[certainty];
110
107
  }
111
- export function resolveModelFromRouting(routing, catalog, confidence, ignoredConstraints = []) {
108
+ // ─── Model resolution ────────────────────────────────────────────────────────
109
+ function mergeRouting(tier, specialization) {
110
+ if (tier === undefined && specialization === undefined)
111
+ return undefined;
112
+ return {
113
+ ...(tier === undefined ? {} : { model_tier: tier }),
114
+ ...(specialization === undefined ? {} : { model_specialization: specialization }),
115
+ };
116
+ }
117
+ function resolveModelFromRouting(routing, catalog) {
112
118
  const requested = {};
113
- const confidences = {};
114
- if (confidence !== undefined) {
115
- confidences.routing = confidence;
116
- }
117
119
  if (routing?.model_specialization !== undefined) {
118
120
  requested.model_specialization = routing.model_specialization;
119
121
  }
@@ -121,74 +123,27 @@ export function resolveModelFromRouting(routing, catalog, confidence, ignoredCon
121
123
  requested.model_tier = routing.model_tier;
122
124
  }
123
125
  const passes = [
124
- { useSpecialization: true, useTier: true },
125
- { useSpecialization: true, useTier: false },
126
- { useSpecialization: false, useTier: true },
127
- { useSpecialization: false, useTier: false },
126
+ { useSpec: true, useTier: true },
127
+ { useSpec: true, useTier: false },
128
+ { useSpec: false, useTier: true },
129
+ { useSpec: false, useTier: false },
128
130
  ];
129
131
  for (const pass of passes) {
130
- const constraints_used = constraintsForPass(requested, pass);
131
- const matching = catalog.models.filter((model) => matchesConstraints(model, constraints_used));
132
+ const constraints = constraintsForPass(requested, pass);
133
+ const matching = catalog.models.filter((m) => matchesConstraints(m, constraints));
132
134
  if (matching.length === 0)
133
135
  continue;
134
- const winner = pickBestModel(matching, catalog.models);
135
- return {
136
- ...modelRecommendationFields(winner),
137
- resolution: {
138
- constraints_used,
139
- constraints_dropped: [
140
- ...ignoredConstraints,
141
- ...relaxedConstraints(requested, constraints_used),
142
- ],
143
- confidences,
144
- fell_back_to_default: false,
145
- },
146
- };
136
+ return { id: pickBestModel(matching, catalog.models).id };
147
137
  }
148
- const fallback = catalog.models.find((model) => model.id === catalog.default);
138
+ const fallback = catalog.models.find((m) => m.id === catalog.default);
149
139
  if (!fallback) {
150
- throw new Error(`catalog default "${catalog.default}" not found in models — catalog skipped validation`);
140
+ throw new Error(`catalog default "${catalog.default}" not found in models`);
151
141
  }
152
- return {
153
- ...modelRecommendationFields(fallback),
154
- resolution: {
155
- constraints_used: {},
156
- constraints_dropped: [
157
- ...ignoredConstraints,
158
- ...defaultFallbackConstraints(requested),
159
- ],
160
- confidences,
161
- fell_back_to_default: true,
162
- },
163
- };
164
- }
165
- // Test-friendly convenience wrapper: given typed result outputs for the
166
- // routing-bearing classifiers, merge their reserved fields and resolve a
167
- // model.
168
- export function resolveModel(results, catalog, threshold) {
169
- const routingCert = scoreCertainty(results.routing?.certainty);
170
- const specCert = scoreCertainty(results.model_specialization?.certainty);
171
- const tier = routingCert >= threshold ? results.routing?.model_tier : undefined;
172
- const model_specialization = specCert >= threshold ? results.model_specialization?.model_specialization : undefined;
173
- const merged = mergeRouting(tier, model_specialization);
174
- const dropped = [];
175
- if (tier === undefined && results.routing?.model_tier !== undefined && routingCert < threshold) {
176
- dropped.push({ axis: "model_tier", reason: "low_confidence" });
177
- }
178
- if (model_specialization === undefined &&
179
- results.model_specialization?.model_specialization !== undefined &&
180
- specCert < threshold) {
181
- dropped.push({ axis: "model_specialization", reason: "low_confidence" });
182
- }
183
- const confidence = maxConfidence([
184
- results.routing?.certainty === undefined ? undefined : routingCert,
185
- results.model_specialization?.certainty === undefined ? undefined : specCert,
186
- ]);
187
- return resolveModelFromRouting(merged, catalog, confidence, dropped);
142
+ return { id: fallback.id };
188
143
  }
189
144
  function constraintsForPass(requested, pass) {
190
145
  return {
191
- ...(pass.useSpecialization && requested.model_specialization !== undefined
146
+ ...(pass.useSpec && requested.model_specialization !== undefined
192
147
  ? { model_specialization: requested.model_specialization }
193
148
  : {}),
194
149
  ...(pass.useTier && requested.model_tier !== undefined
@@ -201,32 +156,11 @@ function matchesConstraints(model, constraints) {
201
156
  model.specializations.includes(constraints.model_specialization)) &&
202
157
  (constraints.model_tier === undefined || model.tier === constraints.model_tier));
203
158
  }
204
- function relaxedConstraints(requested, used) {
205
- const dropped = [];
206
- if (requested.model_specialization !== undefined && used.model_specialization === undefined) {
207
- dropped.push({ axis: "model_specialization", reason: "no_match_relaxed" });
208
- }
209
- if (requested.model_tier !== undefined && used.model_tier === undefined) {
210
- dropped.push({ axis: "model_tier", reason: "no_match_relaxed" });
211
- }
212
- return dropped;
213
- }
214
- function defaultFallbackConstraints(requested) {
215
- const dropped = [];
216
- if (requested.model_specialization !== undefined) {
217
- dropped.push({ axis: "model_specialization", reason: "default_fallback" });
218
- }
219
- if (requested.model_tier !== undefined) {
220
- dropped.push({ axis: "model_tier", reason: "default_fallback" });
221
- }
222
- return dropped;
223
- }
224
159
  function pickBestModel(candidates, catalogOrder) {
225
160
  let winner = candidates[0];
226
161
  for (let i = 1; i < candidates.length; i++) {
227
- const candidate = candidates[i];
228
- if (compareModels(candidate, winner, catalogOrder) < 0) {
229
- winner = candidate;
162
+ if (compareModels(candidates[i], winner, catalogOrder) < 0) {
163
+ winner = candidates[i];
230
164
  }
231
165
  }
232
166
  return winner;
@@ -238,27 +172,15 @@ function compareModels(a, b, catalogOrder) {
238
172
  if (a.params_in_billions !== b.params_in_billions) {
239
173
  return comparableParams(b) - comparableParams(a);
240
174
  }
241
- if (a.context_window !== b.context_window) {
175
+ if (a.context_window !== b.context_window)
242
176
  return b.context_window - a.context_window;
243
- }
244
177
  return catalogOrder.indexOf(a) - catalogOrder.indexOf(b);
245
178
  }
246
179
  function priceIndex(model) {
247
- if (model.input_tokens_cpm === undefined || model.output_tokens_cpm === undefined) {
180
+ if (model.input_tokens_cpm === undefined || model.output_tokens_cpm === undefined)
248
181
  return 0;
249
- }
250
182
  return model.input_tokens_cpm + model.output_tokens_cpm;
251
183
  }
252
184
  function comparableParams(model) {
253
185
  return model.params_in_billions ?? 0;
254
186
  }
255
- function modelRecommendationFields(winner) {
256
- return {
257
- id: winner.id,
258
- params_in_billions: winner.params_in_billions,
259
- context_window: winner.context_window,
260
- ...(winner.input_tokens_cpm === undefined ? {} : { input_tokens_cpm: winner.input_tokens_cpm }),
261
- ...(winner.cached_tokens_cpm === undefined ? {} : { cached_tokens_cpm: winner.cached_tokens_cpm }),
262
- ...(winner.output_tokens_cpm === undefined ? {} : { output_tokens_cpm: winner.output_tokens_cpm }),
263
- };
264
- }
@@ -1,11 +1,11 @@
1
1
  {
2
- "name": "routing",
2
+ "name": "model_tier",
3
3
  "version": "1.0.0",
4
4
  "purpose": "Recommend the downstream model tier.",
5
5
  "dispatch_order": 20,
6
6
  "reserved_fields": ["model_tier"],
7
7
  "fallback": {
8
- "reason": "Classifier failed; no routing signal.",
8
+ "reason": "Classifier failed; no model tier signal.",
9
9
  "certainty": "no_signal"
10
10
  }
11
11
  }
@@ -1,4 +1,4 @@
1
- You are the routing classifier for an AI assistant routing system.
1
+ You are the model_tier classifier for an AI assistant routing system.
2
2
 
3
3
  Pick the coarse model tier that best fits the target user message. Emit only `model_tier`; do not infer specialization, tools, or prompt-injection risk — other classifiers own those axes.
4
4
 
@@ -1,29 +1,30 @@
1
1
  {
2
2
  "name": "preflight",
3
- "version": "1.0.0",
4
- "purpose": "Determine whether the latest message can be answered immediately or should continue downstream.",
3
+ "version": "1.1.0",
4
+ "purpose": "Assess whether the latest message can be answered immediately (final_reply) or should route downstream with an acknowledgement (ack_reply). Always emits exactly one.",
5
5
  "dispatch_order": 10,
6
6
  "reserved_fields": ["final_reply", "ack_reply"],
7
7
  "output_schema": {
8
8
  "examples": [
9
9
  {
10
- "reason": "Greeting.",
10
+ "reason": "Simple greeting — answerable directly.",
11
11
  "certainty": "near_certain",
12
12
  "final_reply": { "text": "Hi!" }
13
13
  },
14
14
  {
15
- "reason": "Trivial arithmetic.",
15
+ "reason": "Trivial arithmetic — answerable directly.",
16
16
  "certainty": "very_strong",
17
17
  "final_reply": { "text": "4" }
18
18
  },
19
19
  {
20
- "reason": "Generated writing task.",
20
+ "reason": "Code review task requires substantive downstream work.",
21
21
  "certainty": "very_strong",
22
- "ack_reply": { "text": "On it." }
22
+ "ack_reply": { "text": "On it — reviewing the code now." }
23
23
  },
24
24
  {
25
- "reason": "Ambiguous; needs downstream model.",
26
- "certainty": "strong"
25
+ "reason": "Reminder request requires downstream action.",
26
+ "certainty": "strong",
27
+ "ack_reply": { "text": "Got it, I'll set that reminder for 3pm." }
27
28
  }
28
29
  ]
29
30
  },
@@ -1,10 +1,16 @@
1
1
  You are the preflight classifier for an AI assistant routing system.
2
2
 
3
- Decide whether the target user message can be answered immediately with a tiny terminal reply, or whether downstream work should continue (optionally with a brief acknowledgement).
3
+ Your primary task is to assess: **can you fully answer the target message yourself**, given the conversation history? Make this judgment first the reply text follows from it.
4
4
 
5
- - Emit `final_reply` only for tiny terminal answers like greetings, thanks, spelling lookups, and simple arithmetic. The reply text IS the complete answer to the user — nothing else happens after this.
6
- - Emit `ack_reply` when downstream work should continue and a brief acknowledgement would help (drafting, analysis, coding, research). The text must not contain the answer.
7
- - Omit both fields when the request is ambiguous or no acknowledgement is useful.
8
- - Do not address the user anywhere except inside `final_reply.text` or `ack_reply.text`.
5
+ **Step 1 assess whether you can fully answer:**
6
+ Ask yourself: Is the intent clear? Is the answer fully derivable from context right now, without real-time data, external tools, code execution, non-trivial generation, analysis, or judgment? Would a one-sentence reply genuinely resolve the request?
7
+
8
+ If yes emit `final_reply` with the complete answer.
9
+
10
+ If no (the downstream model should handle it) → emit `ack_reply` with a brief, contextually specific acknowledgement that shows you understood the request. The ack must reflect the actual request — not a generic "On it." — so the user knows their message was understood while the model works.
9
11
 
10
- If answering would require non-trivial generation, analysis, or judgment, do not use `final_reply`. Use `ack_reply` (or omit both) and let the downstream model produce the answer.
12
+ **Rule: always emit exactly one of `final_reply` or `ack_reply`. Never emit both. Never emit neither.**
13
+
14
+ - `final_reply` is for tiny terminal answers only: greetings, thanks, spelling lookups, simple arithmetic, yes/no factual questions answerable from context. If answering requires drafting, rewriting, analysis, coding, research, planning, or any substantive generation — use `ack_reply` instead.
15
+ - `ack_reply` text must not contain the answer. It acknowledges the request and confirms it is being worked on.
16
+ - Do not address the user anywhere except inside `final_reply.text` or `ack_reply.text`.
@@ -9,8 +9,7 @@
9
9
  "required": ["risk_level"]
10
10
  },
11
11
  "fallback": {
12
- "reason": "Classifier failed; prompt-injection risk is unknown.",
13
- "certainty": "no_signal",
14
- "risk_level": "unknown"
12
+ "reason": "Classifier failed; prompt-injection risk could not be assessed.",
13
+ "certainty": "no_signal"
15
14
  }
16
15
  }
@@ -1,14 +1,21 @@
1
1
  import type { ClassifierInput } from "./types.js";
2
2
  import type { ClassifierName, ClassifierRegistry, RunClassifier } from "./manifest.js";
3
3
  import type { ClassifierOutput, RuntimeClassifierManifest } from "./stock.js";
4
+ export declare const BUILTIN_CLASSIFIERS_DIR: string;
4
5
  export declare class ClassifierManifestError extends Error {
5
6
  constructor(message: string);
6
7
  }
8
+ export type ClassifierModuleMap = Readonly<Record<string, RuntimeClassifierManifest>>;
9
+ export interface ClassifierRegistryBundle {
10
+ readonly registry: ClassifierRegistry;
11
+ readonly modulesByName: ClassifierModuleMap;
12
+ readonly names: ReadonlyArray<string>;
13
+ }
14
+ export interface BuildRegistryOptions {
15
+ readonly extraDirs?: ReadonlyArray<string>;
16
+ }
7
17
  export declare function loadClassifierRegistry(classifiersDir?: string): RuntimeClassifierManifest[];
8
- export declare const REGISTRY: ClassifierRegistry;
9
- export declare const CLASSIFIER_NAMES: string[];
10
- export declare const MODULES_BY_NAME: Record<string, RuntimeClassifierManifest>;
18
+ export declare function buildClassifierRegistry(options?: BuildRegistryOptions): ClassifierRegistryBundle;
19
+ export declare function validateClassifierOutput(manifest: RuntimeClassifierManifest, value: unknown, model: string): ClassifierOutput;
11
20
  export type { ClassifierName, RunClassifier };
12
- export type RegistryType = typeof REGISTRY;
13
- export declare function validateClassifierOutput(name: string, value: unknown, model: string): ClassifierOutput;
14
21
  export type { ClassifierInput };
@@ -4,9 +4,11 @@ import { fileURLToPath } from "node:url";
4
4
  import { buildClassifierPrompt } from "./stock-prompt.js";
5
5
  import { validateJsonClassifierManifest, validateOutputForManifest, } from "./stock-validation.js";
6
6
  const __dirname = dirname(fileURLToPath(import.meta.url));
7
- const CLASSIFIERS_DIR = join(__dirname, "classifiers");
7
+ export const BUILTIN_CLASSIFIERS_DIR = join(__dirname, "classifiers");
8
8
  // Directories whose names start with "_" are reserved for shared assets
9
- // (e.g. `_prompts/`) and are not loaded as classifiers.
9
+ // (e.g. `_prompts/`) and are not loaded as classifiers. Consumers can use
10
+ // the same convention in their own classifier directories: rename a
11
+ // classifier to `_<name>/` to deactivate it without deleting it.
10
12
  const SHARED_DIRECTORY_PREFIX = "_";
11
13
  export class ClassifierManifestError extends Error {
12
14
  constructor(message) {
@@ -14,7 +16,10 @@ export class ClassifierManifestError extends Error {
14
16
  this.name = "ClassifierManifestError";
15
17
  }
16
18
  }
17
- export function loadClassifierRegistry(classifiersDir = CLASSIFIERS_DIR) {
19
+ // Load all classifier manifests under a single directory. Used internally to
20
+ // load the built-ins and each extra directory; callers wanting the merged
21
+ // registry should use `buildClassifierRegistry()` instead.
22
+ export function loadClassifierRegistry(classifiersDir = BUILTIN_CLASSIFIERS_DIR) {
18
23
  if (!existsSync(classifiersDir)) {
19
24
  throw new ClassifierManifestError(`classifier directory not found: ${classifiersDir}`);
20
25
  }
@@ -26,11 +31,29 @@ export function loadClassifierRegistry(classifiersDir = CLASSIFIERS_DIR) {
26
31
  continue;
27
32
  manifests.push(loadClassifierManifest(join(classifiersDir, entry.name)));
28
33
  }
29
- // Lower dispatch_order runs first. Classifiers without dispatch_order sort
30
- // last (treated as +Infinity) — useful for "run me whenever there's a slot".
34
+ return manifests;
35
+ }
36
+ // Build a complete classifier registry from the bundled built-ins plus any
37
+ // extra directories supplied by the caller. Sorts by dispatch_order
38
+ // ascending (manifests without dispatch_order sort last). Rejects duplicate
39
+ // names.
40
+ //
41
+ // Mandatory built-ins (preflight, model_tier, model_specialization,
42
+ // prompt_injection) always load. Extras with the same name as a built-in
43
+ // throw — there's no override mechanism. Customise by editing the bundled
44
+ // manifest in your own fork, or replace behaviour entirely with a custom
45
+ // `runClassifier`.
46
+ export function buildClassifierRegistry(options = {}) {
47
+ const manifests = [
48
+ ...loadClassifierRegistry(BUILTIN_CLASSIFIERS_DIR),
49
+ ...(options.extraDirs ?? []).flatMap((dir) => loadClassifierRegistry(dir)),
50
+ ];
31
51
  manifests.sort((a, b) => (a.dispatch_order ?? Infinity) - (b.dispatch_order ?? Infinity));
32
52
  validateRegistry(manifests);
33
- return manifests;
53
+ const registry = manifests;
54
+ const modulesByName = Object.fromEntries(manifests.map((m) => [m.name, m]));
55
+ const names = manifests.map((m) => m.name);
56
+ return { registry, modulesByName, names };
34
57
  }
35
58
  function loadClassifierManifest(classifierDir) {
36
59
  const manifestPath = join(classifierDir, "manifest.json");
@@ -69,18 +92,11 @@ function validateRegistry(manifests) {
69
92
  const names = new Set();
70
93
  for (const manifest of manifests) {
71
94
  if (names.has(manifest.name)) {
72
- throw new ClassifierManifestError(`duplicate classifier name: ${manifest.name}`);
95
+ throw new ClassifierManifestError(`duplicate classifier name: ${manifest.name} — extras cannot override built-ins or other extras. Rename your classifier or run it under a different name.`);
73
96
  }
74
97
  names.add(manifest.name);
75
98
  }
76
99
  }
77
- export const REGISTRY = loadClassifierRegistry();
78
- export const CLASSIFIER_NAMES = REGISTRY.map((m) => m.name);
79
- export const MODULES_BY_NAME = Object.fromEntries(REGISTRY.map((m) => [m.name, m]));
80
- export function validateClassifierOutput(name, value, model) {
81
- const manifest = MODULES_BY_NAME[name];
82
- if (!manifest) {
83
- throw new ClassifierManifestError(`unknown classifier: ${name}`);
84
- }
85
- return validateOutputForManifest(manifest, value, { classifier: name, model });
100
+ export function validateClassifierOutput(manifest, value, model) {
101
+ return validateOutputForManifest(manifest, value, { classifier: manifest.name, model });
86
102
  }
@@ -1,6 +1,6 @@
1
- import { type RunClassifier } from "./classifiers.js";
1
+ import { ClassifierManifestError, type ClassifierRegistryBundle, type RunClassifier } from "./classifiers.js";
2
2
  import { type OpenClassifyConfig } from "./config.js";
3
- import type { AggregatorConfig, Catalog, InspectResult, PipelineResult } from "./manifest.js";
3
+ import type { Catalog, InspectResult, PipelineResult } from "./manifest.js";
4
4
  import type { OpenClassifyInput } from "./types.js";
5
5
  export type Classifier = (input: OpenClassifyInput, options?: {
6
6
  signal?: AbortSignal;
@@ -11,10 +11,12 @@ export type Inspector = (input: OpenClassifyInput, options?: {
11
11
  export interface OpenClassify {
12
12
  readonly classify: Classifier;
13
13
  readonly inspect: Inspector;
14
+ readonly registry: ClassifierRegistryBundle;
14
15
  }
15
16
  export interface CreateClassifierOptions {
16
17
  runClassifier?: RunClassifier;
17
18
  catalog?: Catalog;
19
+ extraClassifierDirs?: ReadonlyArray<string>;
18
20
  config?: OpenClassifyConfig;
19
21
  configPath?: string;
20
22
  catalogPath?: string;
@@ -25,6 +27,6 @@ export interface CreateClassifierOptions {
25
27
  classifierTimeoutMs?: number;
26
28
  classifierRetryCount?: number;
27
29
  maxConcurrency?: number;
28
- aggregator?: AggregatorConfig;
29
30
  }
30
31
  export declare function createClassifier(options?: CreateClassifierOptions): OpenClassify;
32
+ export { ClassifierManifestError };