open-classify 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +54 -35
  2. package/dist/src/aggregator.d.ts +4 -1
  3. package/dist/src/aggregator.js +25 -15
  4. package/dist/src/classifiers/custom/context_shift/manifest.json +31 -0
  5. package/dist/src/classifiers/custom/context_shift/prompt.md +12 -0
  6. package/dist/src/classifiers/custom/{conversation_diegest → conversation_digest}/manifest.json +3 -1
  7. package/dist/src/classifiers/custom/{conversation_diegest → conversation_digest}/prompt.md +1 -1
  8. package/dist/src/classifiers/custom/memory_retrieval_queries/manifest.json +2 -0
  9. package/dist/src/classifiers/stock/model_specialization/manifest.json +4 -1
  10. package/dist/src/classifiers/stock/preflight/manifest.json +4 -1
  11. package/dist/src/classifiers/stock/prompt_injection/manifest.json +12 -0
  12. package/dist/src/classifiers/stock/prompts/confidence.md +3 -3
  13. package/dist/src/classifiers/stock/prompts/custom-output.md +7 -1
  14. package/dist/src/classifiers/stock/prompts/preflight.md +7 -7
  15. package/dist/src/classifiers/stock/prompts/prompt-injection-output.md +5 -0
  16. package/dist/src/classifiers/stock/prompts/prompt_injection.md +24 -0
  17. package/dist/src/classifiers/stock/prompts/reason.md +1 -1
  18. package/dist/src/classifiers/stock/prompts/specialty.md +8 -6
  19. package/dist/src/classifiers/stock/prompts/tier.md +1 -1
  20. package/dist/src/classifiers/stock/prompts/tools-output.md +4 -0
  21. package/dist/src/classifiers/stock/routing/manifest.json +4 -1
  22. package/dist/src/classifiers/stock/tools/manifest.json +2 -0
  23. package/dist/src/classify.d.ts +22 -0
  24. package/dist/src/classify.js +50 -0
  25. package/dist/src/config.d.ts +2 -0
  26. package/dist/src/config.js +33 -1
  27. package/dist/src/enums.d.ts +3 -7
  28. package/dist/src/enums.js +7 -30
  29. package/dist/src/index.d.ts +1 -0
  30. package/dist/src/index.js +2 -1
  31. package/dist/src/input.js +1 -1
  32. package/dist/src/manifest.d.ts +31 -23
  33. package/dist/src/manifest.js +5 -1
  34. package/dist/src/ollama.d.ts +0 -11
  35. package/dist/src/ollama.js +0 -36
  36. package/dist/src/pipeline.d.ts +1 -0
  37. package/dist/src/pipeline.js +78 -48
  38. package/dist/src/stock-prompt.js +1 -1
  39. package/dist/src/stock-validation.d.ts +1 -2
  40. package/dist/src/stock-validation.js +23 -40
  41. package/dist/src/stock.d.ts +12 -11
  42. package/dist/src/stock.js +21 -1
  43. package/dist/src/ui-server.js +12 -5
  44. package/dist/src/validation.d.ts +0 -1
  45. package/dist/src/validation.js +0 -37
  46. package/docs/adding-a-classifier.md +132 -0
  47. package/docs/manifests.md +127 -0
  48. package/docs/resolver.md +104 -0
  49. package/docs/signals.md +102 -0
  50. package/downstream-models.json +124 -0
  51. package/open-classify.config.example.json +5 -1
  52. package/package.json +3 -1
  53. package/dist/src/classifiers/stock/prompts/security-output.md +0 -8
  54. package/dist/src/classifiers/stock/prompts/security.md +0 -26
  55. package/dist/src/classifiers/stock/security/manifest.json +0 -12
@@ -1,4 +1,4 @@
1
- import type { JsonClassifierManifest, SafetySignal, ClassifierOutput } from "./stock.js";
1
+ import type { JsonClassifierManifest, ClassifierOutput } from "./stock.js";
2
2
  export declare const STOCK_REASON_MAX_CHARS = 120;
3
3
  export declare const STOCK_REPLY_MAX_CHARS = 200;
4
4
  export declare const STOCK_TOOL_ID_MAX_CHARS = 64;
@@ -19,4 +19,3 @@ export interface LegacyValidateOptions {
19
19
  readonly manifest: JsonClassifierManifest;
20
20
  }
21
21
  export declare function validateClassifierOutputWithManifest(value: unknown, options: LegacyValidateOptions): ClassifierOutput;
22
- export type { SafetySignal };
@@ -1,7 +1,7 @@
1
- import { DOWNSTREAM_MODEL_TIER_VALUES, MODEL_SPECIALIZATION_VALUES, SECURITY_DECISION_VALUES, } from "./enums.js";
1
+ import { DOWNSTREAM_MODEL_TIER_VALUES, MODEL_SPECIALIZATION_VALUES, } from "./enums.js";
2
2
  import { Ajv } from "ajv/dist/ajv.js";
3
- import { STOCK_CLASSIFIER_NAMES } from "./stock.js";
4
- import { ensureNoDuplicates, isRecord, requireConfidence, requireEnum, requireNonEmptyStringMaxLength, requireNonNegativeSafeInteger, requireString, requireStringArray, throwInvalid, } from "./validation.js";
3
+ import { CERTAINTY_VALUES, STOCK_CLASSIFIER_NAMES } from "./stock.js";
4
+ import { ensureNoDuplicates, isRecord, requireEnum, requireNonEmptyStringMaxLength, requireNonNegativeSafeInteger, requireString, requireStringArray, throwInvalid, } from "./validation.js";
5
5
  export const STOCK_REASON_MAX_CHARS = 120;
6
6
  export const STOCK_REPLY_MAX_CHARS = 200;
7
7
  export const STOCK_TOOL_ID_MAX_CHARS = 64;
@@ -9,7 +9,7 @@ export const STOCK_TOOL_DESCRIPTION_MAX_CHARS = 240;
9
9
  export const STOCK_MANIFEST_NAME_MAX_CHARS = 80;
10
10
  export const STOCK_MANIFEST_VERSION_MAX_CHARS = 40;
11
11
  export const STOCK_MANIFEST_PURPOSE_MAX_CHARS = 400;
12
- const STOCK_SAFETY_RISK_LEVEL_VALUES = [
12
+ const STOCK_PROMPT_INJECTION_RISK_LEVEL_VALUES = [
13
13
  "normal",
14
14
  "suspicious",
15
15
  "high_risk",
@@ -113,8 +113,8 @@ function validateStockOutputForName(name, value, model, tools) {
113
113
  return validateModelSpecializationOutput(value, model);
114
114
  case "tools":
115
115
  return validateToolsOutput(value, model, tools?.map((tool) => tool.id));
116
- case "security":
117
- return validateSecurityOutput(value, model);
116
+ case "prompt_injection":
117
+ return validatePromptInjectionOutput(value, model);
118
118
  default: {
119
119
  const _exhaustive = name;
120
120
  void _exhaustive;
@@ -123,17 +123,19 @@ function validateStockOutputForName(name, value, model, tools) {
123
123
  }
124
124
  }
125
125
  function validateMetadata(value, classifier, model) {
126
+ if (value.reason === undefined) {
127
+ throwInvalid(classifier, model, "reason is required");
128
+ }
129
+ if (value.certainty === undefined) {
130
+ throwInvalid(classifier, model, "certainty is required");
131
+ }
126
132
  return {
127
- ...(value.reason === undefined
128
- ? {}
129
- : { reason: truncateText(requireString(value.reason, classifier, model, "reason"), STOCK_REASON_MAX_CHARS) }),
130
- ...(value.confidence === undefined
131
- ? {}
132
- : { confidence: requireConfidence(value.confidence, classifier, model) }),
133
+ reason: truncateText(requireString(value.reason, classifier, model, "reason"), STOCK_REASON_MAX_CHARS),
134
+ certainty: requireEnum(value.certainty, CERTAINTY_VALUES, classifier, model, "certainty"),
133
135
  };
134
136
  }
135
137
  function validatePreflightOutput(value, model) {
136
- ensureAllowedObjectKeys(value, ["reason", "confidence", "final_reply", "ack_reply"], "preflight", model, "output");
138
+ ensureAllowedObjectKeys(value, ["reason", "certainty", "final_reply", "ack_reply"], "preflight", model, "output");
137
139
  if (value.final_reply !== undefined && value.ack_reply !== undefined) {
138
140
  throwInvalid("preflight", model, "final_reply and ack_reply are mutually exclusive");
139
141
  }
@@ -163,7 +165,7 @@ function validateReplySignal(value, classifier, model, field) {
163
165
  return { reply };
164
166
  }
165
167
  function validateTierRoutingOutput(value, model) {
166
- ensureAllowedObjectKeys(value, ["reason", "confidence", "model_tier"], "routing", model, "output");
168
+ ensureAllowedObjectKeys(value, ["reason", "certainty", "model_tier"], "routing", model, "output");
167
169
  const meta = validateMetadata(value, "routing", model);
168
170
  const modelTier = normalizeOptionalEnumValue(value.model_tier);
169
171
  return {
@@ -174,7 +176,7 @@ function validateTierRoutingOutput(value, model) {
174
176
  };
175
177
  }
176
178
  function validateModelSpecializationOutput(value, model) {
177
- ensureAllowedObjectKeys(value, ["reason", "confidence", "specialization"], "model_specialization", model, "output");
179
+ ensureAllowedObjectKeys(value, ["reason", "certainty", "specialization"], "model_specialization", model, "output");
178
180
  const meta = validateMetadata(value, "model_specialization", model);
179
181
  const specialization = normalizeOptionalEnumValue(value.specialization);
180
182
  return {
@@ -194,7 +196,7 @@ function normalizeOptionalEnumValue(value) {
194
196
  return value;
195
197
  }
196
198
  function validateToolsOutput(value, model, configuredTools) {
197
- ensureAllowedObjectKeys(value, ["reason", "confidence", "tools"], "tools", model, "output");
199
+ ensureAllowedObjectKeys(value, ["reason", "certainty", "tools"], "tools", model, "output");
198
200
  const meta = validateMetadata(value, "tools", model);
199
201
  const tools = requireStringArray(value.tools, "tools", model, "tools").map(normalizeTool);
200
202
  ensureNoDuplicates(tools, "tools", model, "tools");
@@ -208,39 +210,20 @@ function validateToolsOutput(value, model, configuredTools) {
208
210
  }
209
211
  return { ...meta, tools };
210
212
  }
211
- function validateSecurityOutput(value, model) {
212
- ensureAllowedObjectKeys(value, ["reason", "confidence", "decision", "risk_level", "signals"], "security", model, "output");
213
- const meta = validateMetadata(value, "security", model);
214
- const decision = value.decision === undefined
215
- ? undefined
216
- : requireEnum(value.decision, SECURITY_DECISION_VALUES, "security", model, "decision");
217
- const riskLevel = requireEnum(value.risk_level, STOCK_SAFETY_RISK_LEVEL_VALUES, "security", model, "risk_level");
218
- const signals = requireStringArray(value.signals, "security", model, "signals");
219
- ensureNoDuplicates(signals, "security", model, "signals");
220
- if ((riskLevel === "normal" || riskLevel === "unknown") && signals.length > 0) {
221
- throwInvalid("security", model, `${riskLevel} risk_level must not include signals`);
222
- }
223
- if (riskLevel !== "normal" && riskLevel !== "unknown" && signals.length === 0) {
224
- throwInvalid("security", model, "elevated risk_level must include at least one signal");
225
- }
226
- if (decision === "block" && riskLevel !== "high_risk") {
227
- throwInvalid("security", model, "decision block requires high_risk risk_level");
228
- }
229
- if (decision === "allow" && riskLevel === "high_risk") {
230
- throwInvalid("security", model, "decision allow must not use high_risk risk_level");
231
- }
213
+ function validatePromptInjectionOutput(value, model) {
214
+ ensureAllowedObjectKeys(value, ["reason", "certainty", "risk_level"], "prompt_injection", model, "output");
215
+ const meta = validateMetadata(value, "prompt_injection", model);
216
+ const riskLevel = requireEnum(value.risk_level, STOCK_PROMPT_INJECTION_RISK_LEVEL_VALUES, "prompt_injection", model, "risk_level");
232
217
  return {
233
218
  ...meta,
234
- ...(decision === undefined ? {} : { decision }),
235
219
  risk_level: riskLevel,
236
- signals,
237
220
  };
238
221
  }
239
222
  function validateCustomOutput(value, classifier, model, schema) {
240
223
  if (!isRecord(value)) {
241
224
  throwInvalid(classifier, model, "output must be a JSON object");
242
225
  }
243
- ensureAllowedObjectKeys(value, ["reason", "confidence", "output"], classifier, model, "output");
226
+ ensureAllowedObjectKeys(value, ["reason", "certainty", "output"], classifier, model, "output");
244
227
  if (value.output === undefined) {
245
228
  throwInvalid(classifier, model, "output is required for custom classifiers");
246
229
  }
@@ -1,4 +1,4 @@
1
- import type { DownstreamModelTier, ModelSpecialization, SecurityDecision } from "./enums.js";
1
+ import type { DownstreamModelTier, ModelSpecialization } from "./enums.js";
2
2
  export interface StockClassifierMessageInput {
3
3
  readonly role: "user" | "assistant";
4
4
  readonly text: string;
@@ -25,14 +25,15 @@ export interface SpecializationSignal {
25
25
  export interface ToolsSignal {
26
26
  readonly tools: ReadonlyArray<string>;
27
27
  }
28
- export interface SafetySignal {
29
- readonly decision?: SecurityDecision;
28
+ export interface PromptInjectionSignal {
30
29
  readonly risk_level: "normal" | "suspicious" | "high_risk" | "unknown";
31
- readonly signals: ReadonlyArray<string>;
32
30
  }
31
+ export type Certainty = "no_signal" | "very_weak" | "weak" | "tentative" | "reasonable" | "strong" | "very_strong" | "near_certain";
32
+ export declare const CERTAINTY_VALUES: readonly ["no_signal", "very_weak", "weak", "tentative", "reasonable", "strong", "very_strong", "near_certain"];
33
+ export declare const certaintyScore: Record<Certainty, number>;
33
34
  export interface ClassifierOutputMetadata {
34
- readonly reason?: string;
35
- readonly confidence?: number;
35
+ readonly reason: string;
36
+ readonly certainty: Certainty;
36
37
  }
37
38
  export interface PreflightClassifierOutput extends ClassifierOutputMetadata {
38
39
  readonly final_reply?: FinalReplySignal;
@@ -41,7 +42,7 @@ export interface PreflightClassifierOutput extends ClassifierOutputMetadata {
41
42
  export type RoutingClassifierOutput = TierSignal & ClassifierOutputMetadata;
42
43
  export type ModelSpecializationClassifierOutput = SpecializationSignal & ClassifierOutputMetadata;
43
44
  export type ToolsClassifierOutput = ToolsSignal & ClassifierOutputMetadata;
44
- export type SecurityClassifierOutput = SafetySignal & ClassifierOutputMetadata;
45
+ export type PromptInjectionClassifierOutput = PromptInjectionSignal & ClassifierOutputMetadata;
45
46
  export interface CustomClassifierOutputValue extends ClassifierOutputMetadata {
46
47
  readonly output: unknown;
47
48
  }
@@ -50,9 +51,9 @@ export interface StockClassifierOutputs {
50
51
  readonly routing: RoutingClassifierOutput;
51
52
  readonly model_specialization: ModelSpecializationClassifierOutput;
52
53
  readonly tools: ToolsClassifierOutput;
53
- readonly security: SecurityClassifierOutput;
54
+ readonly prompt_injection: PromptInjectionClassifierOutput;
54
55
  }
55
- export declare const STOCK_CLASSIFIER_NAMES: readonly ["preflight", "routing", "model_specialization", "tools", "security"];
56
+ export declare const STOCK_CLASSIFIER_NAMES: readonly ["preflight", "routing", "model_specialization", "tools", "prompt_injection"];
56
57
  export type StockClassifierName = (typeof STOCK_CLASSIFIER_NAMES)[number];
57
58
  export type StockClassifierOutput = StockClassifierOutputs[StockClassifierName];
58
59
  export type ClassifierOutput = StockClassifierOutput | CustomClassifierOutputValue;
@@ -94,8 +95,8 @@ export declare function isStockManifest(manifest: RuntimeClassifierManifest): ma
94
95
  export declare function isCustomManifest(manifest: RuntimeClassifierManifest): manifest is RuntimeCustomManifest;
95
96
  export interface CustomClassifierOutput {
96
97
  readonly classifier: string;
97
- readonly reason?: string;
98
- readonly confidence?: number;
98
+ readonly reason: string;
99
+ readonly certainty: Certainty;
99
100
  readonly output: unknown;
100
101
  }
101
102
  export {};
package/dist/src/stock.js CHANGED
@@ -1,9 +1,29 @@
1
+ export const CERTAINTY_VALUES = [
2
+ "no_signal",
3
+ "very_weak",
4
+ "weak",
5
+ "tentative",
6
+ "reasonable",
7
+ "strong",
8
+ "very_strong",
9
+ "near_certain",
10
+ ];
11
+ export const certaintyScore = {
12
+ no_signal: 0.00,
13
+ very_weak: 0.15,
14
+ weak: 0.30,
15
+ tentative: 0.45,
16
+ reasonable: 0.60,
17
+ strong: 0.75,
18
+ very_strong: 0.88,
19
+ near_certain: 0.97,
20
+ };
1
21
  export const STOCK_CLASSIFIER_NAMES = [
2
22
  "preflight",
3
23
  "routing",
4
24
  "model_specialization",
5
25
  "tools",
6
- "security",
26
+ "prompt_injection",
7
27
  ];
8
28
  // Helper: narrow a manifest to its stock kind for callers that know the name.
9
29
  export function isStockManifest(manifest) {
@@ -21,17 +21,17 @@ import { createServer } from "node:http";
21
21
  import { extname, join, normalize } from "node:path";
22
22
  import { loadCatalog } from "./catalog.js";
23
23
  import { CLASSIFIER_NAMES, REGISTRY } from "./classifiers.js";
24
+ import { DEFAULT_CERTAINTY_THRESHOLD, certaintyThreshold, } from "./aggregator.js";
24
25
  import { classifierModelsFromConfig, loadOpenClassifyConfig, } from "./config.js";
25
- import { DOWNSTREAM_MODEL_TIER_VALUES, MODEL_SPECIALIZATION_VALUES, SECURITY_DECISION_VALUES, SECURITY_RISK_LEVEL_VALUES, SECURITY_SIGNAL_VALUES, } from "./enums.js";
26
+ import { DEFAULT_CERTAINTY_GATE } from "./pipeline.js";
27
+ import { DOWNSTREAM_MODEL_TIER_VALUES, MODEL_SPECIALIZATION_VALUES, PROMPT_INJECTION_RISK_LEVEL_VALUES, } from "./enums.js";
26
28
  import { createOllamaClassifierRunner, OLLAMA_CONTEXT_LENGTH, OLLAMA_DEFAULT_CATALOG_PATH, OLLAMA_MIN_AVAILABLE_MEMORY_BYTES, OLLAMA_MIN_TOTAL_MEMORY_BYTES, OLLAMA_REQUIRED_PARALLELISM, } from "./ollama.js";
27
29
  import { classifyOpenClassifyInput } from "./pipeline.js";
28
30
  // Served at GET /api/enums so the UI never needs to duplicate shared enum values.
29
31
  const CLASSIFIER_ENUMS = {
30
32
  downstream_model_tier: [...DOWNSTREAM_MODEL_TIER_VALUES],
31
33
  model_specialization: [...MODEL_SPECIALIZATION_VALUES],
32
- security_decision: [...SECURITY_DECISION_VALUES],
33
- security_risk_level: [...SECURITY_RISK_LEVEL_VALUES],
34
- security_signal: [...SECURITY_SIGNAL_VALUES],
34
+ prompt_injection_risk_level: [...PROMPT_INJECTION_RISK_LEVEL_VALUES],
35
35
  };
36
36
  const CLASSIFIER_METADATA = REGISTRY.map((classifier) => ({
37
37
  name: classifier.name,
@@ -77,7 +77,13 @@ async function route(request, response) {
77
77
  return;
78
78
  }
79
79
  if (request.method === "GET" && url.pathname === "/api/classifiers") {
80
- sendJson(response, { classifiers: CLASSIFIER_METADATA });
80
+ sendJson(response, {
81
+ classifiers: CLASSIFIER_METADATA,
82
+ aggregator: {
83
+ certaintyGate: OPEN_CLASSIFY_CONFIG?.aggregator?.certaintyGate ?? DEFAULT_CERTAINTY_GATE,
84
+ certaintyThreshold: certaintyThreshold(OPEN_CLASSIFY_CONFIG?.aggregator) ?? DEFAULT_CERTAINTY_THRESHOLD,
85
+ },
86
+ });
81
87
  return;
82
88
  }
83
89
  if (request.method === "GET") {
@@ -181,6 +187,7 @@ async function classifyStream(request, response) {
181
187
  const result = await classifyOpenClassifyInput(input, {
182
188
  runClassifier,
183
189
  catalog: loadCatalog(CATALOG_PATH),
190
+ aggregator: OPEN_CLASSIFY_CONFIG?.aggregator,
184
191
  signal: clientAbortController.signal,
185
192
  });
186
193
  send("pipeline_completed", result);
@@ -11,7 +11,6 @@ export declare function requireStringArray(value: unknown, classifier: string, m
11
11
  export declare function requireStringMaxLength(value: unknown, classifier: string, model: string, path: string, maxChars: number): string;
12
12
  export declare function requireNonEmptyStringMaxLength(value: unknown, classifier: string, model: string, path: string, maxChars: number): string;
13
13
  export declare function requireEnum<const Values extends readonly string[]>(value: unknown, values: Values, classifier: string, model: string, path: string): Values[number];
14
- export declare function requireConfidence(value: unknown, classifier: string, model: string, path?: string): number;
15
14
  export declare function ensureExactKeys(value: Record<string, unknown>, keys: readonly string[], classifier: string, model: string): void;
16
15
  export declare function ensureNoDuplicates(values: string[], classifier: string, model: string, path: string): void;
17
16
  export declare function isRecord(value: unknown): value is Record<string, unknown>;
@@ -67,43 +67,6 @@ export function requireEnum(value, values, classifier, model, path) {
67
67
  }
68
68
  return value;
69
69
  }
70
- // `confidence` must be a finite number in [0, 1]. Required on every
71
- // classifier output (ClassifierResultBase); fallback shapes use 0.
72
- export function requireConfidence(value, classifier, model, path = "confidence") {
73
- const confidence = normalizeConfidence(value);
74
- if (typeof confidence !== "number" ||
75
- !Number.isFinite(confidence) ||
76
- confidence < 0 ||
77
- confidence > 1) {
78
- throwInvalid(classifier, model, `${path} must be a number between 0 and 1 inclusive`);
79
- }
80
- return confidence;
81
- }
82
- function normalizeConfidence(value) {
83
- if (typeof value === "number") {
84
- return value > 1 && value <= 100 ? value / 100 : value;
85
- }
86
- if (typeof value !== "string")
87
- return value;
88
- const text = value.trim().toLowerCase();
89
- if (text === "")
90
- return value;
91
- if (text.endsWith("%")) {
92
- const percent = Number(text.slice(0, -1).trim());
93
- return Number.isFinite(percent) ? percent / 100 : value;
94
- }
95
- const numeric = Number(text);
96
- if (Number.isFinite(numeric)) {
97
- return numeric > 1 && numeric <= 100 ? numeric / 100 : numeric;
98
- }
99
- if (text === "high")
100
- return 0.9;
101
- if (text === "medium")
102
- return 0.5;
103
- if (text === "low")
104
- return 0.2;
105
- return value;
106
- }
107
70
  export function ensureExactKeys(value, keys, classifier, model) {
108
71
  const expected = new Set(keys);
109
72
  for (const key of Object.keys(value)) {
@@ -0,0 +1,132 @@
1
+ # Adding a classifier
2
+
3
+ Most additions are custom classifiers. You drop two files in a directory; the runtime picks them up. No TypeScript registry edits required.
4
+
5
+ ## 1. Pick a directory
6
+
7
+ Custom classifier:
8
+
9
+ ```
10
+ src/classifiers/custom/<name>/
11
+ ├── manifest.json
12
+ └── prompt.md
13
+ ```
14
+
15
+ Stock classifier names are closed (`preflight`, `routing`, `model_specialization`, `tools`, `prompt_injection`). You generally don't add new stock classifiers — extend behavior with a custom one instead.
16
+
17
+ ## 2. Write the manifest
18
+
19
+ ```json
20
+ {
21
+ "kind": "custom",
22
+ "name": "topic_tags",
23
+ "version": "1.0.0",
24
+ "purpose": "Tag the message with a small set of topic labels for analytics.",
25
+ "order": 70,
26
+ "fallback": {
27
+ "reason": "Classifier failed; no tags generated.",
28
+ "certainty": "no_signal",
29
+ "output": { "tags": [] }
30
+ },
31
+ "output_schema": {
32
+ "type": "object",
33
+ "additionalProperties": false,
34
+ "required": ["tags"],
35
+ "properties": {
36
+ "tags": {
37
+ "type": "array", "maxItems": 5,
38
+ "items": { "type": "string", "minLength": 1, "maxLength": 40 }
39
+ }
40
+ }
41
+ }
42
+ }
43
+ ```
44
+
45
+ Rules:
46
+
47
+ - `name` must match the directory name.
48
+ - `name` must not collide with a stock classifier name.
49
+ - `order` must not collide with any other classifier.
50
+ - `fallback` must validate against your `output_schema`.
51
+
52
+ See [manifests.md](manifests.md) for the full field list.
53
+
54
+ ## 3. Write the prompt
55
+
56
+ `prompt.md` is the classifier-specific instruction text. The runtime composes it with an auto-generated preamble that describes the JSON output envelope, so your prompt can focus on the classification rule:
57
+
58
+ ```markdown
59
+ You are the topic_tags classifier.
60
+
61
+ Tags are short single-word topic labels (lowercase, no spaces). Use at most five.
62
+ Return an empty array when no clear topic applies.
63
+ Do not invent tags for vague or ambiguous messages.
64
+ ```
65
+
66
+ Keep it focused. Don't put aggregation or routing rules in prompts — those live in the runtime and catalog.
67
+
68
+ ## 4. Build and test
69
+
70
+ ```sh
71
+ npm run build # validates the manifest, sorts the registry, copies assets
72
+ npm test
73
+ ```
74
+
75
+ If the manifest is malformed, the loader throws `ClassifierManifestError` with the path and a specific reason.
76
+
77
+ ## 5. Consume the output
78
+
79
+ ```ts
80
+ const classify = createClassifier({ catalog });
81
+ const result = await classify(input);
82
+ if (result.action === "route") {
83
+ const tags = result.classifier_outputs.topic_tags?.tags ?? [];
84
+ }
85
+ ```
86
+
87
+ `result.audit.custom_outputs[]` carries the same data with required `reason` and `certainty` metadata if you need to inspect them.
88
+
89
+ ## Choosing the classifier model
90
+
91
+ For apps and OSS installs, prefer `open-classify.config.json`:
92
+
93
+ ```json
94
+ {
95
+ "runner": {
96
+ "provider": "ollama",
97
+ "defaultModel": "gemma4:e4b-it-q4_K_M",
98
+ "models": {
99
+ "custom": {
100
+ "topic_tags": "qwen2.5:7b-instruct-q4_K_M"
101
+ }
102
+ }
103
+ }
104
+ }
105
+ ```
106
+
107
+ `runner.defaultModel` applies to every classifier without an override. `runner.models.stock` contains built-in classifier ids; `runner.models.custom` contains custom classifier ids.
108
+
109
+ Classifier manifests may also carry an Ollama hint for packaged classifiers:
110
+
111
+ ```json
112
+ {
113
+ "backend": { "ollama": { "base_model": "qwen2.5:7b-instruct-q4_K_M" } }
114
+ }
115
+ ```
116
+
117
+ Config file and function options take precedence over manifest hints.
118
+
119
+ ## Replacing the backend
120
+
121
+ For full backend control, implement your own `RunClassifier` and pass it to `classifyOpenClassifyInput`:
122
+
123
+ ```ts
124
+ import { classifyOpenClassifyInput, loadCatalog } from "open-classify";
125
+
126
+ const runClassifier: RunClassifier = async (name, input, signal) => {
127
+ // call OpenAI, Anthropic, a remote service, etc.
128
+ // return a ClassifierOutput matching the classifier's contract.
129
+ };
130
+
131
+ await classifyOpenClassifyInput(input, { runClassifier, catalog: loadCatalog(...) });
132
+ ```
@@ -0,0 +1,127 @@
1
+ # Manifest reference
2
+
3
+ Every classifier directory contains a `manifest.json`. Custom classifiers also contain a `prompt.md`. Stock prompt markdown lives in `src/classifiers/stock/prompts/` and is assembled at runtime.
4
+
5
+ ## Layout
6
+
7
+ ```
8
+ src/classifiers/
9
+ stock/prompts/ # built-in prompt markdown
10
+ base.md
11
+ confidence.md
12
+ reason.md
13
+ tier.md
14
+ specialty.md
15
+ tools-output.md
16
+ tools.md
17
+ stock/<name>/ # built-in classifier
18
+ manifest.json
19
+ custom/<name>/ # caller-defined classifier
20
+ manifest.json
21
+ prompt.md
22
+ ```
23
+
24
+ The `kind` field in the manifest must match the parent directory (`stock` or `custom`). Mismatches are rejected at load time.
25
+
26
+ ## Common fields
27
+
28
+ | Field | Required | Description |
29
+ |---|---|---|
30
+ | `kind` | yes | `"stock"` or `"custom"` |
31
+ | `name` | yes | Classifier id. Must match the directory name. |
32
+ | `version` | yes | Contract version surfaced in `meta.classifiers[name].version`. |
33
+ | `purpose` | yes | Human-readable description. |
34
+ | `order` | yes | Integer sort key. Duplicate orders are rejected. |
35
+ | `fallback` | yes | Output emitted when the classifier errors or times out. Must validate against the kind's output contract. |
36
+ | `backend.ollama.base_model` | no | Packaged Ollama model hint for this classifier. User config and function options take precedence. |
37
+
38
+ ## Stock manifests
39
+
40
+ Stock manifests use a closed set of names (`preflight`, `routing`, `model_specialization`, `tools`, `prompt_injection`). The runtime knows each name's signal type, so there's no `emits` field. Fallbacks must satisfy the signal contract for that name (see [signals.md](signals.md)).
41
+
42
+ The `tools` classifier additionally takes:
43
+
44
+ | Field | Required | Description |
45
+ |---|---|---|
46
+ | `tools` | no | Array of `{ id, description }`. Restricts which tool ids the classifier may emit. |
47
+
48
+ Example (`src/classifiers/stock/prompt_injection/manifest.json`):
49
+
50
+ ```json
51
+ {
52
+ "kind": "stock",
53
+ "name": "prompt_injection",
54
+ "version": "1.0.0",
55
+ "purpose": "Assess whether the target message contains prompt-injection attempts.",
56
+ "order": 50,
57
+ "fallback": {
58
+ "reason": "Classifier failed; prompt-injection risk is unknown.",
59
+ "certainty": "no_signal",
60
+ "risk_level": "unknown"
61
+ }
62
+ }
63
+ ```
64
+
65
+ ## Custom manifests
66
+
67
+ | Field | Required | Description |
68
+ |---|---|---|
69
+ | `output_schema` | yes | JSON Schema (Ajv-validated) for the `output` payload. |
70
+
71
+ Custom classifier names must not collide with any stock classifier name.
72
+
73
+ Example:
74
+
75
+ ```json
76
+ {
77
+ "kind": "custom",
78
+ "name": "memory_retrieval_queries",
79
+ "version": "1.0.0",
80
+ "purpose": "Generate saved-memory query hints for caller-owned memory retrieval.",
81
+ "order": 60,
82
+ "fallback": {
83
+ "reason": "Classifier failed; no memory queries generated.",
84
+ "certainty": "no_signal",
85
+ "output": { "queries": [] }
86
+ },
87
+ "output_schema": {
88
+ "type": "object",
89
+ "additionalProperties": false,
90
+ "required": ["queries"],
91
+ "properties": {
92
+ "queries": {
93
+ "type": "array", "maxItems": 5,
94
+ "items": { "type": "string", "minLength": 1, "maxLength": 120 }
95
+ }
96
+ }
97
+ }
98
+ }
99
+ ```
100
+
101
+ ## Prompt files
102
+
103
+ Stock prompt files live together in `src/classifiers/stock/prompts/`. The runtime assembles shared markdown (`base.md`, `reason.md`, `confidence.md`, `classifier-header.md`) with focused stock sections such as `tier.md`, `specialty.md`, `tools-output.md`, and the stock classifier file (`preflight.md`, `routing.md`, `model_specialization.md`, `tools.md`, or `prompt_injection.md`).
104
+
105
+ Dynamic prompt sections use small markdown slots. For example, `tools.md` contains `{{allowed_tools}}`, and the runtime renders the allowed tool list from the tools manifest.
106
+
107
+ Custom `prompt.md` is the classifier-specific instruction text. The runtime composes it with the shared JSON output envelope, so prompts can stay focused on classifier behavior:
108
+
109
+ - what the classifier decides
110
+ - when to emit each declared field
111
+ - when to omit optional fields
112
+ - short examples only when they clarify a boundary
113
+
114
+ Do not put aggregation or model-id rules in prompts — those live in the runtime and catalog.
115
+
116
+ ## Validation rejections
117
+
118
+ The loader rejects manifests that:
119
+
120
+ - declare unsupported fields
121
+ - collide on `name` or `order`
122
+ - have an empty custom `prompt.md`
123
+ - declare a custom name that matches a stock classifier
124
+ - declare `kind` that doesn't match the parent directory
125
+ - have a `fallback` that doesn't satisfy the signal or `output_schema`
126
+ - are missing `output_schema` on a custom classifier
127
+ - declare `tools` on any classifier other than the `tools` stock classifier