open-classify 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -35
- package/dist/src/aggregator.d.ts +4 -1
- package/dist/src/aggregator.js +25 -15
- package/dist/src/classifiers/custom/context_shift/manifest.json +31 -0
- package/dist/src/classifiers/custom/context_shift/prompt.md +12 -0
- package/dist/src/classifiers/custom/{conversation_diegest → conversation_digest}/manifest.json +3 -1
- package/dist/src/classifiers/custom/{conversation_diegest → conversation_digest}/prompt.md +1 -1
- package/dist/src/classifiers/custom/memory_retrieval_queries/manifest.json +2 -0
- package/dist/src/classifiers/stock/model_specialization/manifest.json +4 -1
- package/dist/src/classifiers/stock/preflight/manifest.json +4 -1
- package/dist/src/classifiers/stock/prompt_injection/manifest.json +12 -0
- package/dist/src/classifiers/stock/prompts/confidence.md +3 -3
- package/dist/src/classifiers/stock/prompts/custom-output.md +7 -1
- package/dist/src/classifiers/stock/prompts/preflight.md +7 -7
- package/dist/src/classifiers/stock/prompts/prompt-injection-output.md +5 -0
- package/dist/src/classifiers/stock/prompts/prompt_injection.md +24 -0
- package/dist/src/classifiers/stock/prompts/reason.md +1 -1
- package/dist/src/classifiers/stock/prompts/specialty.md +8 -6
- package/dist/src/classifiers/stock/prompts/tier.md +1 -1
- package/dist/src/classifiers/stock/prompts/tools-output.md +4 -0
- package/dist/src/classifiers/stock/routing/manifest.json +4 -1
- package/dist/src/classifiers/stock/tools/manifest.json +2 -0
- package/dist/src/classify.d.ts +22 -0
- package/dist/src/classify.js +50 -0
- package/dist/src/config.d.ts +2 -0
- package/dist/src/config.js +33 -1
- package/dist/src/enums.d.ts +3 -7
- package/dist/src/enums.js +7 -30
- package/dist/src/index.d.ts +1 -0
- package/dist/src/index.js +2 -1
- package/dist/src/input.js +1 -1
- package/dist/src/manifest.d.ts +31 -23
- package/dist/src/manifest.js +5 -1
- package/dist/src/ollama.d.ts +0 -11
- package/dist/src/ollama.js +0 -36
- package/dist/src/pipeline.d.ts +1 -0
- package/dist/src/pipeline.js +78 -48
- package/dist/src/stock-prompt.js +1 -1
- package/dist/src/stock-validation.d.ts +1 -2
- package/dist/src/stock-validation.js +23 -40
- package/dist/src/stock.d.ts +12 -11
- package/dist/src/stock.js +21 -1
- package/dist/src/ui-server.js +12 -5
- package/dist/src/validation.d.ts +0 -1
- package/dist/src/validation.js +0 -37
- package/docs/adding-a-classifier.md +132 -0
- package/docs/manifests.md +127 -0
- package/docs/resolver.md +104 -0
- package/docs/signals.md +102 -0
- package/downstream-models.json +124 -0
- package/open-classify.config.example.json +5 -1
- package/package.json +3 -1
- package/dist/src/classifiers/stock/prompts/security-output.md +0 -8
- package/dist/src/classifiers/stock/prompts/security.md +0 -26
- package/dist/src/classifiers/stock/security/manifest.json +0 -12
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { JsonClassifierManifest,
|
|
1
|
+
import type { JsonClassifierManifest, ClassifierOutput } from "./stock.js";
|
|
2
2
|
export declare const STOCK_REASON_MAX_CHARS = 120;
|
|
3
3
|
export declare const STOCK_REPLY_MAX_CHARS = 200;
|
|
4
4
|
export declare const STOCK_TOOL_ID_MAX_CHARS = 64;
|
|
@@ -19,4 +19,3 @@ export interface LegacyValidateOptions {
|
|
|
19
19
|
readonly manifest: JsonClassifierManifest;
|
|
20
20
|
}
|
|
21
21
|
export declare function validateClassifierOutputWithManifest(value: unknown, options: LegacyValidateOptions): ClassifierOutput;
|
|
22
|
-
export type { SafetySignal };
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { DOWNSTREAM_MODEL_TIER_VALUES, MODEL_SPECIALIZATION_VALUES,
|
|
1
|
+
import { DOWNSTREAM_MODEL_TIER_VALUES, MODEL_SPECIALIZATION_VALUES, } from "./enums.js";
|
|
2
2
|
import { Ajv } from "ajv/dist/ajv.js";
|
|
3
|
-
import { STOCK_CLASSIFIER_NAMES } from "./stock.js";
|
|
4
|
-
import { ensureNoDuplicates, isRecord,
|
|
3
|
+
import { CERTAINTY_VALUES, STOCK_CLASSIFIER_NAMES } from "./stock.js";
|
|
4
|
+
import { ensureNoDuplicates, isRecord, requireEnum, requireNonEmptyStringMaxLength, requireNonNegativeSafeInteger, requireString, requireStringArray, throwInvalid, } from "./validation.js";
|
|
5
5
|
export const STOCK_REASON_MAX_CHARS = 120;
|
|
6
6
|
export const STOCK_REPLY_MAX_CHARS = 200;
|
|
7
7
|
export const STOCK_TOOL_ID_MAX_CHARS = 64;
|
|
@@ -9,7 +9,7 @@ export const STOCK_TOOL_DESCRIPTION_MAX_CHARS = 240;
|
|
|
9
9
|
export const STOCK_MANIFEST_NAME_MAX_CHARS = 80;
|
|
10
10
|
export const STOCK_MANIFEST_VERSION_MAX_CHARS = 40;
|
|
11
11
|
export const STOCK_MANIFEST_PURPOSE_MAX_CHARS = 400;
|
|
12
|
-
const
|
|
12
|
+
const STOCK_PROMPT_INJECTION_RISK_LEVEL_VALUES = [
|
|
13
13
|
"normal",
|
|
14
14
|
"suspicious",
|
|
15
15
|
"high_risk",
|
|
@@ -113,8 +113,8 @@ function validateStockOutputForName(name, value, model, tools) {
|
|
|
113
113
|
return validateModelSpecializationOutput(value, model);
|
|
114
114
|
case "tools":
|
|
115
115
|
return validateToolsOutput(value, model, tools?.map((tool) => tool.id));
|
|
116
|
-
case "
|
|
117
|
-
return
|
|
116
|
+
case "prompt_injection":
|
|
117
|
+
return validatePromptInjectionOutput(value, model);
|
|
118
118
|
default: {
|
|
119
119
|
const _exhaustive = name;
|
|
120
120
|
void _exhaustive;
|
|
@@ -123,17 +123,19 @@ function validateStockOutputForName(name, value, model, tools) {
|
|
|
123
123
|
}
|
|
124
124
|
}
|
|
125
125
|
function validateMetadata(value, classifier, model) {
|
|
126
|
+
if (value.reason === undefined) {
|
|
127
|
+
throwInvalid(classifier, model, "reason is required");
|
|
128
|
+
}
|
|
129
|
+
if (value.certainty === undefined) {
|
|
130
|
+
throwInvalid(classifier, model, "certainty is required");
|
|
131
|
+
}
|
|
126
132
|
return {
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
: { reason: truncateText(requireString(value.reason, classifier, model, "reason"), STOCK_REASON_MAX_CHARS) }),
|
|
130
|
-
...(value.confidence === undefined
|
|
131
|
-
? {}
|
|
132
|
-
: { confidence: requireConfidence(value.confidence, classifier, model) }),
|
|
133
|
+
reason: truncateText(requireString(value.reason, classifier, model, "reason"), STOCK_REASON_MAX_CHARS),
|
|
134
|
+
certainty: requireEnum(value.certainty, CERTAINTY_VALUES, classifier, model, "certainty"),
|
|
133
135
|
};
|
|
134
136
|
}
|
|
135
137
|
function validatePreflightOutput(value, model) {
|
|
136
|
-
ensureAllowedObjectKeys(value, ["reason", "
|
|
138
|
+
ensureAllowedObjectKeys(value, ["reason", "certainty", "final_reply", "ack_reply"], "preflight", model, "output");
|
|
137
139
|
if (value.final_reply !== undefined && value.ack_reply !== undefined) {
|
|
138
140
|
throwInvalid("preflight", model, "final_reply and ack_reply are mutually exclusive");
|
|
139
141
|
}
|
|
@@ -163,7 +165,7 @@ function validateReplySignal(value, classifier, model, field) {
|
|
|
163
165
|
return { reply };
|
|
164
166
|
}
|
|
165
167
|
function validateTierRoutingOutput(value, model) {
|
|
166
|
-
ensureAllowedObjectKeys(value, ["reason", "
|
|
168
|
+
ensureAllowedObjectKeys(value, ["reason", "certainty", "model_tier"], "routing", model, "output");
|
|
167
169
|
const meta = validateMetadata(value, "routing", model);
|
|
168
170
|
const modelTier = normalizeOptionalEnumValue(value.model_tier);
|
|
169
171
|
return {
|
|
@@ -174,7 +176,7 @@ function validateTierRoutingOutput(value, model) {
|
|
|
174
176
|
};
|
|
175
177
|
}
|
|
176
178
|
function validateModelSpecializationOutput(value, model) {
|
|
177
|
-
ensureAllowedObjectKeys(value, ["reason", "
|
|
179
|
+
ensureAllowedObjectKeys(value, ["reason", "certainty", "specialization"], "model_specialization", model, "output");
|
|
178
180
|
const meta = validateMetadata(value, "model_specialization", model);
|
|
179
181
|
const specialization = normalizeOptionalEnumValue(value.specialization);
|
|
180
182
|
return {
|
|
@@ -194,7 +196,7 @@ function normalizeOptionalEnumValue(value) {
|
|
|
194
196
|
return value;
|
|
195
197
|
}
|
|
196
198
|
function validateToolsOutput(value, model, configuredTools) {
|
|
197
|
-
ensureAllowedObjectKeys(value, ["reason", "
|
|
199
|
+
ensureAllowedObjectKeys(value, ["reason", "certainty", "tools"], "tools", model, "output");
|
|
198
200
|
const meta = validateMetadata(value, "tools", model);
|
|
199
201
|
const tools = requireStringArray(value.tools, "tools", model, "tools").map(normalizeTool);
|
|
200
202
|
ensureNoDuplicates(tools, "tools", model, "tools");
|
|
@@ -208,39 +210,20 @@ function validateToolsOutput(value, model, configuredTools) {
|
|
|
208
210
|
}
|
|
209
211
|
return { ...meta, tools };
|
|
210
212
|
}
|
|
211
|
-
function
|
|
212
|
-
ensureAllowedObjectKeys(value, ["reason", "
|
|
213
|
-
const meta = validateMetadata(value, "
|
|
214
|
-
const
|
|
215
|
-
? undefined
|
|
216
|
-
: requireEnum(value.decision, SECURITY_DECISION_VALUES, "security", model, "decision");
|
|
217
|
-
const riskLevel = requireEnum(value.risk_level, STOCK_SAFETY_RISK_LEVEL_VALUES, "security", model, "risk_level");
|
|
218
|
-
const signals = requireStringArray(value.signals, "security", model, "signals");
|
|
219
|
-
ensureNoDuplicates(signals, "security", model, "signals");
|
|
220
|
-
if ((riskLevel === "normal" || riskLevel === "unknown") && signals.length > 0) {
|
|
221
|
-
throwInvalid("security", model, `${riskLevel} risk_level must not include signals`);
|
|
222
|
-
}
|
|
223
|
-
if (riskLevel !== "normal" && riskLevel !== "unknown" && signals.length === 0) {
|
|
224
|
-
throwInvalid("security", model, "elevated risk_level must include at least one signal");
|
|
225
|
-
}
|
|
226
|
-
if (decision === "block" && riskLevel !== "high_risk") {
|
|
227
|
-
throwInvalid("security", model, "decision block requires high_risk risk_level");
|
|
228
|
-
}
|
|
229
|
-
if (decision === "allow" && riskLevel === "high_risk") {
|
|
230
|
-
throwInvalid("security", model, "decision allow must not use high_risk risk_level");
|
|
231
|
-
}
|
|
213
|
+
function validatePromptInjectionOutput(value, model) {
|
|
214
|
+
ensureAllowedObjectKeys(value, ["reason", "certainty", "risk_level"], "prompt_injection", model, "output");
|
|
215
|
+
const meta = validateMetadata(value, "prompt_injection", model);
|
|
216
|
+
const riskLevel = requireEnum(value.risk_level, STOCK_PROMPT_INJECTION_RISK_LEVEL_VALUES, "prompt_injection", model, "risk_level");
|
|
232
217
|
return {
|
|
233
218
|
...meta,
|
|
234
|
-
...(decision === undefined ? {} : { decision }),
|
|
235
219
|
risk_level: riskLevel,
|
|
236
|
-
signals,
|
|
237
220
|
};
|
|
238
221
|
}
|
|
239
222
|
function validateCustomOutput(value, classifier, model, schema) {
|
|
240
223
|
if (!isRecord(value)) {
|
|
241
224
|
throwInvalid(classifier, model, "output must be a JSON object");
|
|
242
225
|
}
|
|
243
|
-
ensureAllowedObjectKeys(value, ["reason", "
|
|
226
|
+
ensureAllowedObjectKeys(value, ["reason", "certainty", "output"], classifier, model, "output");
|
|
244
227
|
if (value.output === undefined) {
|
|
245
228
|
throwInvalid(classifier, model, "output is required for custom classifiers");
|
|
246
229
|
}
|
package/dist/src/stock.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { DownstreamModelTier, ModelSpecialization
|
|
1
|
+
import type { DownstreamModelTier, ModelSpecialization } from "./enums.js";
|
|
2
2
|
export interface StockClassifierMessageInput {
|
|
3
3
|
readonly role: "user" | "assistant";
|
|
4
4
|
readonly text: string;
|
|
@@ -25,14 +25,15 @@ export interface SpecializationSignal {
|
|
|
25
25
|
export interface ToolsSignal {
|
|
26
26
|
readonly tools: ReadonlyArray<string>;
|
|
27
27
|
}
|
|
28
|
-
export interface
|
|
29
|
-
readonly decision?: SecurityDecision;
|
|
28
|
+
export interface PromptInjectionSignal {
|
|
30
29
|
readonly risk_level: "normal" | "suspicious" | "high_risk" | "unknown";
|
|
31
|
-
readonly signals: ReadonlyArray<string>;
|
|
32
30
|
}
|
|
31
|
+
export type Certainty = "no_signal" | "very_weak" | "weak" | "tentative" | "reasonable" | "strong" | "very_strong" | "near_certain";
|
|
32
|
+
export declare const CERTAINTY_VALUES: readonly ["no_signal", "very_weak", "weak", "tentative", "reasonable", "strong", "very_strong", "near_certain"];
|
|
33
|
+
export declare const certaintyScore: Record<Certainty, number>;
|
|
33
34
|
export interface ClassifierOutputMetadata {
|
|
34
|
-
readonly reason
|
|
35
|
-
readonly
|
|
35
|
+
readonly reason: string;
|
|
36
|
+
readonly certainty: Certainty;
|
|
36
37
|
}
|
|
37
38
|
export interface PreflightClassifierOutput extends ClassifierOutputMetadata {
|
|
38
39
|
readonly final_reply?: FinalReplySignal;
|
|
@@ -41,7 +42,7 @@ export interface PreflightClassifierOutput extends ClassifierOutputMetadata {
|
|
|
41
42
|
export type RoutingClassifierOutput = TierSignal & ClassifierOutputMetadata;
|
|
42
43
|
export type ModelSpecializationClassifierOutput = SpecializationSignal & ClassifierOutputMetadata;
|
|
43
44
|
export type ToolsClassifierOutput = ToolsSignal & ClassifierOutputMetadata;
|
|
44
|
-
export type
|
|
45
|
+
export type PromptInjectionClassifierOutput = PromptInjectionSignal & ClassifierOutputMetadata;
|
|
45
46
|
export interface CustomClassifierOutputValue extends ClassifierOutputMetadata {
|
|
46
47
|
readonly output: unknown;
|
|
47
48
|
}
|
|
@@ -50,9 +51,9 @@ export interface StockClassifierOutputs {
|
|
|
50
51
|
readonly routing: RoutingClassifierOutput;
|
|
51
52
|
readonly model_specialization: ModelSpecializationClassifierOutput;
|
|
52
53
|
readonly tools: ToolsClassifierOutput;
|
|
53
|
-
readonly
|
|
54
|
+
readonly prompt_injection: PromptInjectionClassifierOutput;
|
|
54
55
|
}
|
|
55
|
-
export declare const STOCK_CLASSIFIER_NAMES: readonly ["preflight", "routing", "model_specialization", "tools", "
|
|
56
|
+
export declare const STOCK_CLASSIFIER_NAMES: readonly ["preflight", "routing", "model_specialization", "tools", "prompt_injection"];
|
|
56
57
|
export type StockClassifierName = (typeof STOCK_CLASSIFIER_NAMES)[number];
|
|
57
58
|
export type StockClassifierOutput = StockClassifierOutputs[StockClassifierName];
|
|
58
59
|
export type ClassifierOutput = StockClassifierOutput | CustomClassifierOutputValue;
|
|
@@ -94,8 +95,8 @@ export declare function isStockManifest(manifest: RuntimeClassifierManifest): ma
|
|
|
94
95
|
export declare function isCustomManifest(manifest: RuntimeClassifierManifest): manifest is RuntimeCustomManifest;
|
|
95
96
|
export interface CustomClassifierOutput {
|
|
96
97
|
readonly classifier: string;
|
|
97
|
-
readonly reason
|
|
98
|
-
readonly
|
|
98
|
+
readonly reason: string;
|
|
99
|
+
readonly certainty: Certainty;
|
|
99
100
|
readonly output: unknown;
|
|
100
101
|
}
|
|
101
102
|
export {};
|
package/dist/src/stock.js
CHANGED
|
@@ -1,9 +1,29 @@
|
|
|
1
|
+
export const CERTAINTY_VALUES = [
|
|
2
|
+
"no_signal",
|
|
3
|
+
"very_weak",
|
|
4
|
+
"weak",
|
|
5
|
+
"tentative",
|
|
6
|
+
"reasonable",
|
|
7
|
+
"strong",
|
|
8
|
+
"very_strong",
|
|
9
|
+
"near_certain",
|
|
10
|
+
];
|
|
11
|
+
export const certaintyScore = {
|
|
12
|
+
no_signal: 0.00,
|
|
13
|
+
very_weak: 0.15,
|
|
14
|
+
weak: 0.30,
|
|
15
|
+
tentative: 0.45,
|
|
16
|
+
reasonable: 0.60,
|
|
17
|
+
strong: 0.75,
|
|
18
|
+
very_strong: 0.88,
|
|
19
|
+
near_certain: 0.97,
|
|
20
|
+
};
|
|
1
21
|
export const STOCK_CLASSIFIER_NAMES = [
|
|
2
22
|
"preflight",
|
|
3
23
|
"routing",
|
|
4
24
|
"model_specialization",
|
|
5
25
|
"tools",
|
|
6
|
-
"
|
|
26
|
+
"prompt_injection",
|
|
7
27
|
];
|
|
8
28
|
// Helper: narrow a manifest to its stock kind for callers that know the name.
|
|
9
29
|
export function isStockManifest(manifest) {
|
package/dist/src/ui-server.js
CHANGED
|
@@ -21,17 +21,17 @@ import { createServer } from "node:http";
|
|
|
21
21
|
import { extname, join, normalize } from "node:path";
|
|
22
22
|
import { loadCatalog } from "./catalog.js";
|
|
23
23
|
import { CLASSIFIER_NAMES, REGISTRY } from "./classifiers.js";
|
|
24
|
+
import { DEFAULT_CERTAINTY_THRESHOLD, certaintyThreshold, } from "./aggregator.js";
|
|
24
25
|
import { classifierModelsFromConfig, loadOpenClassifyConfig, } from "./config.js";
|
|
25
|
-
import {
|
|
26
|
+
import { DEFAULT_CERTAINTY_GATE } from "./pipeline.js";
|
|
27
|
+
import { DOWNSTREAM_MODEL_TIER_VALUES, MODEL_SPECIALIZATION_VALUES, PROMPT_INJECTION_RISK_LEVEL_VALUES, } from "./enums.js";
|
|
26
28
|
import { createOllamaClassifierRunner, OLLAMA_CONTEXT_LENGTH, OLLAMA_DEFAULT_CATALOG_PATH, OLLAMA_MIN_AVAILABLE_MEMORY_BYTES, OLLAMA_MIN_TOTAL_MEMORY_BYTES, OLLAMA_REQUIRED_PARALLELISM, } from "./ollama.js";
|
|
27
29
|
import { classifyOpenClassifyInput } from "./pipeline.js";
|
|
28
30
|
// Served at GET /api/enums so the UI never needs to duplicate shared enum values.
|
|
29
31
|
const CLASSIFIER_ENUMS = {
|
|
30
32
|
downstream_model_tier: [...DOWNSTREAM_MODEL_TIER_VALUES],
|
|
31
33
|
model_specialization: [...MODEL_SPECIALIZATION_VALUES],
|
|
32
|
-
|
|
33
|
-
security_risk_level: [...SECURITY_RISK_LEVEL_VALUES],
|
|
34
|
-
security_signal: [...SECURITY_SIGNAL_VALUES],
|
|
34
|
+
prompt_injection_risk_level: [...PROMPT_INJECTION_RISK_LEVEL_VALUES],
|
|
35
35
|
};
|
|
36
36
|
const CLASSIFIER_METADATA = REGISTRY.map((classifier) => ({
|
|
37
37
|
name: classifier.name,
|
|
@@ -77,7 +77,13 @@ async function route(request, response) {
|
|
|
77
77
|
return;
|
|
78
78
|
}
|
|
79
79
|
if (request.method === "GET" && url.pathname === "/api/classifiers") {
|
|
80
|
-
sendJson(response, {
|
|
80
|
+
sendJson(response, {
|
|
81
|
+
classifiers: CLASSIFIER_METADATA,
|
|
82
|
+
aggregator: {
|
|
83
|
+
certaintyGate: OPEN_CLASSIFY_CONFIG?.aggregator?.certaintyGate ?? DEFAULT_CERTAINTY_GATE,
|
|
84
|
+
certaintyThreshold: certaintyThreshold(OPEN_CLASSIFY_CONFIG?.aggregator) ?? DEFAULT_CERTAINTY_THRESHOLD,
|
|
85
|
+
},
|
|
86
|
+
});
|
|
81
87
|
return;
|
|
82
88
|
}
|
|
83
89
|
if (request.method === "GET") {
|
|
@@ -181,6 +187,7 @@ async function classifyStream(request, response) {
|
|
|
181
187
|
const result = await classifyOpenClassifyInput(input, {
|
|
182
188
|
runClassifier,
|
|
183
189
|
catalog: loadCatalog(CATALOG_PATH),
|
|
190
|
+
aggregator: OPEN_CLASSIFY_CONFIG?.aggregator,
|
|
184
191
|
signal: clientAbortController.signal,
|
|
185
192
|
});
|
|
186
193
|
send("pipeline_completed", result);
|
package/dist/src/validation.d.ts
CHANGED
|
@@ -11,7 +11,6 @@ export declare function requireStringArray(value: unknown, classifier: string, m
|
|
|
11
11
|
export declare function requireStringMaxLength(value: unknown, classifier: string, model: string, path: string, maxChars: number): string;
|
|
12
12
|
export declare function requireNonEmptyStringMaxLength(value: unknown, classifier: string, model: string, path: string, maxChars: number): string;
|
|
13
13
|
export declare function requireEnum<const Values extends readonly string[]>(value: unknown, values: Values, classifier: string, model: string, path: string): Values[number];
|
|
14
|
-
export declare function requireConfidence(value: unknown, classifier: string, model: string, path?: string): number;
|
|
15
14
|
export declare function ensureExactKeys(value: Record<string, unknown>, keys: readonly string[], classifier: string, model: string): void;
|
|
16
15
|
export declare function ensureNoDuplicates(values: string[], classifier: string, model: string, path: string): void;
|
|
17
16
|
export declare function isRecord(value: unknown): value is Record<string, unknown>;
|
package/dist/src/validation.js
CHANGED
|
@@ -67,43 +67,6 @@ export function requireEnum(value, values, classifier, model, path) {
|
|
|
67
67
|
}
|
|
68
68
|
return value;
|
|
69
69
|
}
|
|
70
|
-
// `confidence` must be a finite number in [0, 1]. Required on every
|
|
71
|
-
// classifier output (ClassifierResultBase); fallback shapes use 0.
|
|
72
|
-
export function requireConfidence(value, classifier, model, path = "confidence") {
|
|
73
|
-
const confidence = normalizeConfidence(value);
|
|
74
|
-
if (typeof confidence !== "number" ||
|
|
75
|
-
!Number.isFinite(confidence) ||
|
|
76
|
-
confidence < 0 ||
|
|
77
|
-
confidence > 1) {
|
|
78
|
-
throwInvalid(classifier, model, `${path} must be a number between 0 and 1 inclusive`);
|
|
79
|
-
}
|
|
80
|
-
return confidence;
|
|
81
|
-
}
|
|
82
|
-
function normalizeConfidence(value) {
|
|
83
|
-
if (typeof value === "number") {
|
|
84
|
-
return value > 1 && value <= 100 ? value / 100 : value;
|
|
85
|
-
}
|
|
86
|
-
if (typeof value !== "string")
|
|
87
|
-
return value;
|
|
88
|
-
const text = value.trim().toLowerCase();
|
|
89
|
-
if (text === "")
|
|
90
|
-
return value;
|
|
91
|
-
if (text.endsWith("%")) {
|
|
92
|
-
const percent = Number(text.slice(0, -1).trim());
|
|
93
|
-
return Number.isFinite(percent) ? percent / 100 : value;
|
|
94
|
-
}
|
|
95
|
-
const numeric = Number(text);
|
|
96
|
-
if (Number.isFinite(numeric)) {
|
|
97
|
-
return numeric > 1 && numeric <= 100 ? numeric / 100 : numeric;
|
|
98
|
-
}
|
|
99
|
-
if (text === "high")
|
|
100
|
-
return 0.9;
|
|
101
|
-
if (text === "medium")
|
|
102
|
-
return 0.5;
|
|
103
|
-
if (text === "low")
|
|
104
|
-
return 0.2;
|
|
105
|
-
return value;
|
|
106
|
-
}
|
|
107
70
|
export function ensureExactKeys(value, keys, classifier, model) {
|
|
108
71
|
const expected = new Set(keys);
|
|
109
72
|
for (const key of Object.keys(value)) {
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# Adding a classifier
|
|
2
|
+
|
|
3
|
+
Most additions are custom classifiers. You drop two files in a directory; the runtime picks them up. No TypeScript registry edits required.
|
|
4
|
+
|
|
5
|
+
## 1. Pick a directory
|
|
6
|
+
|
|
7
|
+
Custom classifier:
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
src/classifiers/custom/<name>/
|
|
11
|
+
├── manifest.json
|
|
12
|
+
└── prompt.md
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Stock classifier names are closed (`preflight`, `routing`, `model_specialization`, `tools`, `prompt_injection`). You generally don't add new stock classifiers — extend behavior with a custom one instead.
|
|
16
|
+
|
|
17
|
+
## 2. Write the manifest
|
|
18
|
+
|
|
19
|
+
```json
|
|
20
|
+
{
|
|
21
|
+
"kind": "custom",
|
|
22
|
+
"name": "topic_tags",
|
|
23
|
+
"version": "1.0.0",
|
|
24
|
+
"purpose": "Tag the message with a small set of topic labels for analytics.",
|
|
25
|
+
"order": 70,
|
|
26
|
+
"fallback": {
|
|
27
|
+
"reason": "Classifier failed; no tags generated.",
|
|
28
|
+
"certainty": "no_signal",
|
|
29
|
+
"output": { "tags": [] }
|
|
30
|
+
},
|
|
31
|
+
"output_schema": {
|
|
32
|
+
"type": "object",
|
|
33
|
+
"additionalProperties": false,
|
|
34
|
+
"required": ["tags"],
|
|
35
|
+
"properties": {
|
|
36
|
+
"tags": {
|
|
37
|
+
"type": "array", "maxItems": 5,
|
|
38
|
+
"items": { "type": "string", "minLength": 1, "maxLength": 40 }
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Rules:
|
|
46
|
+
|
|
47
|
+
- `name` must match the directory name.
|
|
48
|
+
- `name` must not collide with a stock classifier name.
|
|
49
|
+
- `order` must not collide with any other classifier.
|
|
50
|
+
- `fallback` must validate against your `output_schema`.
|
|
51
|
+
|
|
52
|
+
See [manifests.md](manifests.md) for the full field list.
|
|
53
|
+
|
|
54
|
+
## 3. Write the prompt
|
|
55
|
+
|
|
56
|
+
`prompt.md` is the classifier-specific instruction text. The runtime composes it with an auto-generated preamble that describes the JSON output envelope, so your prompt can focus on the classification rule:
|
|
57
|
+
|
|
58
|
+
```markdown
|
|
59
|
+
You are the topic_tags classifier.
|
|
60
|
+
|
|
61
|
+
Tags are short single-word topic labels (lowercase, no spaces). Use at most five.
|
|
62
|
+
Return an empty array when no clear topic applies.
|
|
63
|
+
Do not invent tags for vague or ambiguous messages.
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Keep it focused. Don't put aggregation or routing rules in prompts — those live in the runtime and catalog.
|
|
67
|
+
|
|
68
|
+
## 4. Build and test
|
|
69
|
+
|
|
70
|
+
```sh
|
|
71
|
+
npm run build # validates the manifest, sorts the registry, copies assets
|
|
72
|
+
npm test
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
If the manifest is malformed, the loader throws `ClassifierManifestError` with the path and a specific reason.
|
|
76
|
+
|
|
77
|
+
## 5. Consume the output
|
|
78
|
+
|
|
79
|
+
```ts
|
|
80
|
+
const classify = createClassifier({ catalog });
|
|
81
|
+
const result = await classify(input);
|
|
82
|
+
if (result.action === "route") {
|
|
83
|
+
const tags = result.classifier_outputs.topic_tags?.tags ?? [];
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
`result.audit.custom_outputs[]` carries the same data with required `reason` and `certainty` metadata if you need to inspect them.
|
|
88
|
+
|
|
89
|
+
## Choosing the classifier model
|
|
90
|
+
|
|
91
|
+
For apps and OSS installs, prefer `open-classify.config.json`:
|
|
92
|
+
|
|
93
|
+
```json
|
|
94
|
+
{
|
|
95
|
+
"runner": {
|
|
96
|
+
"provider": "ollama",
|
|
97
|
+
"defaultModel": "gemma4:e4b-it-q4_K_M",
|
|
98
|
+
"models": {
|
|
99
|
+
"custom": {
|
|
100
|
+
"topic_tags": "qwen2.5:7b-instruct-q4_K_M"
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
`runner.defaultModel` applies to every classifier without an override. `runner.models.stock` contains built-in classifier ids; `runner.models.custom` contains custom classifier ids.
|
|
108
|
+
|
|
109
|
+
Classifier manifests may also carry an Ollama hint for packaged classifiers:
|
|
110
|
+
|
|
111
|
+
```json
|
|
112
|
+
{
|
|
113
|
+
"backend": { "ollama": { "base_model": "qwen2.5:7b-instruct-q4_K_M" } }
|
|
114
|
+
}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Config file and function options take precedence over manifest hints.
|
|
118
|
+
|
|
119
|
+
## Replacing the backend
|
|
120
|
+
|
|
121
|
+
For full backend control, implement your own `RunClassifier` and pass it to `classifyOpenClassifyInput`:
|
|
122
|
+
|
|
123
|
+
```ts
|
|
124
|
+
import { classifyOpenClassifyInput, loadCatalog } from "open-classify";
|
|
125
|
+
|
|
126
|
+
const runClassifier: RunClassifier = async (name, input, signal) => {
|
|
127
|
+
// call OpenAI, Anthropic, a remote service, etc.
|
|
128
|
+
// return a ClassifierOutput matching the classifier's contract.
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
await classifyOpenClassifyInput(input, { runClassifier, catalog: loadCatalog(...) });
|
|
132
|
+
```
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# Manifest reference
|
|
2
|
+
|
|
3
|
+
Every classifier directory contains a `manifest.json`. Custom classifiers also contain a `prompt.md`. Stock prompt markdown lives in `src/classifiers/stock/prompts/` and is assembled at runtime.
|
|
4
|
+
|
|
5
|
+
## Layout
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
src/classifiers/
|
|
9
|
+
stock/prompts/ # built-in prompt markdown
|
|
10
|
+
base.md
|
|
11
|
+
confidence.md
|
|
12
|
+
reason.md
|
|
13
|
+
tier.md
|
|
14
|
+
specialty.md
|
|
15
|
+
tools-output.md
|
|
16
|
+
tools.md
|
|
17
|
+
stock/<name>/ # built-in classifier
|
|
18
|
+
manifest.json
|
|
19
|
+
custom/<name>/ # caller-defined classifier
|
|
20
|
+
manifest.json
|
|
21
|
+
prompt.md
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
The `kind` field in the manifest must match the parent directory (`stock` or `custom`). Mismatches are rejected at load time.
|
|
25
|
+
|
|
26
|
+
## Common fields
|
|
27
|
+
|
|
28
|
+
| Field | Required | Description |
|
|
29
|
+
|---|---|---|
|
|
30
|
+
| `kind` | yes | `"stock"` or `"custom"` |
|
|
31
|
+
| `name` | yes | Classifier id. Must match the directory name. |
|
|
32
|
+
| `version` | yes | Contract version surfaced in `meta.classifiers[name].version`. |
|
|
33
|
+
| `purpose` | yes | Human-readable description. |
|
|
34
|
+
| `order` | yes | Integer sort key. Duplicate orders are rejected. |
|
|
35
|
+
| `fallback` | yes | Output emitted when the classifier errors or times out. Must validate against the kind's output contract. |
|
|
36
|
+
| `backend.ollama.base_model` | no | Packaged Ollama model hint for this classifier. User config and function options take precedence. |
|
|
37
|
+
|
|
38
|
+
## Stock manifests
|
|
39
|
+
|
|
40
|
+
Stock manifests use a closed set of names (`preflight`, `routing`, `model_specialization`, `tools`, `prompt_injection`). The runtime knows each name's signal type, so there's no `emits` field. Fallbacks must satisfy the signal contract for that name (see [signals.md](signals.md)).
|
|
41
|
+
|
|
42
|
+
The `tools` classifier additionally takes:
|
|
43
|
+
|
|
44
|
+
| Field | Required | Description |
|
|
45
|
+
|---|---|---|
|
|
46
|
+
| `tools` | no | Array of `{ id, description }`. Restricts which tool ids the classifier may emit. |
|
|
47
|
+
|
|
48
|
+
Example (`src/classifiers/stock/prompt_injection/manifest.json`):
|
|
49
|
+
|
|
50
|
+
```json
|
|
51
|
+
{
|
|
52
|
+
"kind": "stock",
|
|
53
|
+
"name": "prompt_injection",
|
|
54
|
+
"version": "1.0.0",
|
|
55
|
+
"purpose": "Assess whether the target message contains prompt-injection attempts.",
|
|
56
|
+
"order": 50,
|
|
57
|
+
"fallback": {
|
|
58
|
+
"reason": "Classifier failed; prompt-injection risk is unknown.",
|
|
59
|
+
"certainty": "no_signal",
|
|
60
|
+
"risk_level": "unknown"
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Custom manifests
|
|
66
|
+
|
|
67
|
+
| Field | Required | Description |
|
|
68
|
+
|---|---|---|
|
|
69
|
+
| `output_schema` | yes | JSON Schema (Ajv-validated) for the `output` payload. |
|
|
70
|
+
|
|
71
|
+
Custom classifier names must not collide with any stock classifier name.
|
|
72
|
+
|
|
73
|
+
Example:
|
|
74
|
+
|
|
75
|
+
```json
|
|
76
|
+
{
|
|
77
|
+
"kind": "custom",
|
|
78
|
+
"name": "memory_retrieval_queries",
|
|
79
|
+
"version": "1.0.0",
|
|
80
|
+
"purpose": "Generate saved-memory query hints for caller-owned memory retrieval.",
|
|
81
|
+
"order": 60,
|
|
82
|
+
"fallback": {
|
|
83
|
+
"reason": "Classifier failed; no memory queries generated.",
|
|
84
|
+
"certainty": "no_signal",
|
|
85
|
+
"output": { "queries": [] }
|
|
86
|
+
},
|
|
87
|
+
"output_schema": {
|
|
88
|
+
"type": "object",
|
|
89
|
+
"additionalProperties": false,
|
|
90
|
+
"required": ["queries"],
|
|
91
|
+
"properties": {
|
|
92
|
+
"queries": {
|
|
93
|
+
"type": "array", "maxItems": 5,
|
|
94
|
+
"items": { "type": "string", "minLength": 1, "maxLength": 120 }
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Prompt files
|
|
102
|
+
|
|
103
|
+
Stock prompt files live together in `src/classifiers/stock/prompts/`. The runtime assembles shared markdown (`base.md`, `reason.md`, `confidence.md`, `classifier-header.md`) with focused stock sections such as `tier.md`, `specialty.md`, `tools-output.md`, and the stock classifier file (`preflight.md`, `routing.md`, `model_specialization.md`, `tools.md`, or `prompt_injection.md`).
|
|
104
|
+
|
|
105
|
+
Dynamic prompt sections use small markdown slots. For example, `tools.md` contains `{{allowed_tools}}`, and the runtime renders the allowed tool list from the tools manifest.
|
|
106
|
+
|
|
107
|
+
Custom `prompt.md` is the classifier-specific instruction text. The runtime composes it with the shared JSON output envelope, so prompts can stay focused on classifier behavior:
|
|
108
|
+
|
|
109
|
+
- what the classifier decides
|
|
110
|
+
- when to emit each declared field
|
|
111
|
+
- when to omit optional fields
|
|
112
|
+
- short examples only when they clarify a boundary
|
|
113
|
+
|
|
114
|
+
Do not put aggregation or model-id rules in prompts — those live in the runtime and catalog.
|
|
115
|
+
|
|
116
|
+
## Validation rejections
|
|
117
|
+
|
|
118
|
+
The loader rejects manifests that:
|
|
119
|
+
|
|
120
|
+
- declare unsupported fields
|
|
121
|
+
- collide on `name` or `order`
|
|
122
|
+
- have an empty custom `prompt.md`
|
|
123
|
+
- declare a custom name that matches a stock classifier
|
|
124
|
+
- declare `kind` that doesn't match the parent directory
|
|
125
|
+
- have a `fallback` that doesn't satisfy the signal or `output_schema`
|
|
126
|
+
- are missing `output_schema` on a custom classifier
|
|
127
|
+
- declare `tools` on any classifier other than the `tools` stock classifier
|