open-classify 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +30 -24
  2. package/dist/src/aggregator.d.ts +4 -1
  3. package/dist/src/aggregator.js +25 -15
  4. package/dist/src/classifiers/custom/{conversation_diegest → conversation_digest}/manifest.json +3 -1
  5. package/dist/src/classifiers/custom/{conversation_diegest → conversation_digest}/prompt.md +1 -1
  6. package/dist/src/classifiers/custom/memory_retrieval_queries/manifest.json +2 -0
  7. package/dist/src/classifiers/stock/model_specialization/manifest.json +4 -1
  8. package/dist/src/classifiers/stock/preflight/manifest.json +4 -1
  9. package/dist/src/classifiers/stock/prompt_injection/manifest.json +12 -0
  10. package/dist/src/classifiers/stock/prompts/confidence.md +3 -3
  11. package/dist/src/classifiers/stock/prompts/custom-output.md +7 -1
  12. package/dist/src/classifiers/stock/prompts/preflight.md +7 -7
  13. package/dist/src/classifiers/stock/prompts/prompt-injection-output.md +5 -0
  14. package/dist/src/classifiers/stock/prompts/prompt_injection.md +24 -0
  15. package/dist/src/classifiers/stock/prompts/reason.md +1 -1
  16. package/dist/src/classifiers/stock/prompts/specialty.md +8 -6
  17. package/dist/src/classifiers/stock/prompts/tier.md +1 -1
  18. package/dist/src/classifiers/stock/routing/manifest.json +4 -1
  19. package/dist/src/classifiers/stock/tools/manifest.json +2 -0
  20. package/dist/src/config.d.ts +2 -0
  21. package/dist/src/config.js +33 -1
  22. package/dist/src/enums.d.ts +3 -7
  23. package/dist/src/enums.js +7 -30
  24. package/dist/src/index.js +1 -1
  25. package/dist/src/input.js +1 -1
  26. package/dist/src/manifest.d.ts +31 -23
  27. package/dist/src/manifest.js +5 -1
  28. package/dist/src/ollama.d.ts +2 -1
  29. package/dist/src/ollama.js +1 -0
  30. package/dist/src/pipeline.d.ts +1 -0
  31. package/dist/src/pipeline.js +78 -48
  32. package/dist/src/stock-prompt.js +1 -1
  33. package/dist/src/stock-validation.d.ts +1 -2
  34. package/dist/src/stock-validation.js +23 -40
  35. package/dist/src/stock.d.ts +12 -11
  36. package/dist/src/stock.js +21 -1
  37. package/dist/src/ui-server.js +12 -5
  38. package/dist/src/validation.d.ts +0 -1
  39. package/dist/src/validation.js +0 -37
  40. package/docs/adding-a-classifier.md +131 -0
  41. package/docs/manifests.md +127 -0
  42. package/docs/resolver.md +104 -0
  43. package/docs/signals.md +102 -0
  44. package/downstream-models.json +124 -0
  45. package/open-classify.config.example.json +5 -1
  46. package/package.json +3 -1
  47. package/dist/src/classifiers/stock/prompts/security-output.md +0 -8
  48. package/dist/src/classifiers/stock/prompts/security.md +0 -26
  49. package/dist/src/classifiers/stock/security/manifest.json +0 -12
package/dist/src/enums.js CHANGED
@@ -19,44 +19,21 @@ export const DOWNSTREAM_MODEL_TIER_VALUES = [
19
19
  // Which kind of model/prompt specialization fits the request best. Combined
20
20
  // with the tier to look up a concrete model in the catalog.
21
21
  export const MODEL_SPECIALIZATION_VALUES = [
22
- "agentic_coding",
23
- "agentic_workflows",
24
22
  "chat",
25
- "code_fixing",
26
- "code_reasoning",
27
- "code_review",
28
- "writing",
29
23
  "reasoning",
30
24
  "planning",
25
+ "writing",
26
+ "summarization",
31
27
  "coding",
28
+ "tool_use",
32
29
  "computer_use",
33
- "debugging",
34
- "instruction_following",
35
- "question_answering",
36
- "subagents",
37
- "summarization",
38
- "tool_assisted_coding",
39
- "vision_input",
40
- ];
41
- export const SECURITY_DECISION_VALUES = [
42
- "allow",
43
- "block",
44
- "needs_review",
30
+ "vision",
45
31
  ];
46
- // Overall safety posture on the latest user message. Security short-circuiting
47
- // is driven by safety.decision, not risk level alone.
48
- export const SECURITY_RISK_LEVEL_VALUES = [
32
+ // Prompt-injection posture on the latest user message. The pipeline blocks
33
+ // confident high_risk and unknown prompt-injection outputs.
34
+ export const PROMPT_INJECTION_RISK_LEVEL_VALUES = [
49
35
  "normal",
50
36
  "suspicious",
51
37
  "high_risk",
52
38
  "unknown",
53
39
  ];
54
- // Specific safety concerns the security classifier can flag. These are
55
- // advisory; safety.decision controls whether the pipeline blocks or needs review.
56
- export const SECURITY_SIGNAL_VALUES = [
57
- "instruction_attack",
58
- "secret_or_private_data_risk",
59
- "unsafe_tool_or_action",
60
- "untrusted_content_or_code",
61
- "injection_or_obfuscation",
62
- ];
package/dist/src/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  // Public barrel for the Open Classify package. Everything an external caller
2
2
  // would need — input types, enums, the registry, the pipeline, the Ollama
3
- // runner, the catalog loader, the aggregator's confidence threshold — is
3
+ // runner, the catalog loader, the aggregator's certainty threshold — is
4
4
  // re-exported here. The build emits a single `index.js` that downstream
5
5
  // consumers can import from `open-classify`.
6
6
  export * from "./aggregator.js";
package/dist/src/input.js CHANGED
@@ -9,7 +9,7 @@ import { createHash } from "node:crypto";
9
9
  * Gemma 4 E4B supports a native 131,072-token (128K) context window. Open
10
10
  * Classify does not use that full window in the reference local runtime: it
11
11
  * runs the classifier set in parallel with a configured 4,096-token context.
12
- * The largest fixed classifier prompt is security at about 1,748 estimated
12
+ * The largest fixed classifier prompt is prompt_injection at roughly 1,700 estimated
13
13
  * tokens using the same 3 chars/token heuristic as the Ollama packer. We round
14
14
  * that up to 2,000 fixed-prompt tokens, reserve roughly 400 tokens for output,
15
15
  * chat-template variance, and estimation error, then spend the remainder on
@@ -1,8 +1,10 @@
1
- import type { AckReplySignal, ClassifierOutput, CustomClassifierOutput, FinalReplySignal, RoutingSignal, RuntimeClassifierManifest, SafetySignal, ToolsSignal } from "./stock.js";
1
+ import type { AckReplySignal, ClassifierOutput, CustomClassifierOutput, FinalReplySignal, PromptInjectionSignal, RoutingSignal, RuntimeClassifierManifest, ToolsSignal } from "./stock.js";
2
2
  import type { ClassifierInput, ClassifierRunStatus } from "./types.js";
3
3
  import type { DownstreamModelTier, ModelSpecialization } from "./enums.js";
4
4
  export type ClassifierName = string;
5
5
  export type ClassifierResults = Record<ClassifierName, ClassifierOutput>;
6
+ export declare const CERTAINTY_GATE_MODES: readonly ["min_score", "avg_score", "off"];
7
+ export type CertaintyGateMode = (typeof CERTAINTY_GATE_MODES)[number];
6
8
  export type RunClassifier = (name: ClassifierName, input: ClassifierInput, signal: AbortSignal) => Promise<ClassifierOutput>;
7
9
  export interface CatalogEntry {
8
10
  readonly id: string;
@@ -46,7 +48,7 @@ export interface Envelope {
46
48
  readonly ack_reply?: AckReplySignal;
47
49
  readonly routing?: RoutingSignal;
48
50
  readonly tools?: ToolsSignal;
49
- readonly safety?: SafetySignal;
51
+ readonly prompt_injection?: PromptInjectionSignal;
50
52
  readonly custom_outputs: ReadonlyArray<CustomClassifierOutput>;
51
53
  readonly model_recommendation: ModelRecommendation;
52
54
  }
@@ -71,35 +73,38 @@ export interface PipelineMeta {
71
73
  export interface PipelineAudit extends Envelope {
72
74
  readonly meta: PipelineMeta;
73
75
  readonly fired_by?: string;
76
+ readonly certainty_gate?: LowCertaintyBlockReason;
74
77
  }
75
- export type AnswerPipelineResult = {
76
- readonly action: "answer";
78
+ export type BlockReason = PromptInjectionBlockReason | LowCertaintyBlockReason;
79
+ export interface PromptInjectionBlockReason {
80
+ readonly kind: "prompt_injection";
81
+ readonly risk_level: PromptInjectionSignal["risk_level"];
82
+ }
83
+ export interface LowCertaintyBlockReason {
84
+ readonly kind: "low_certainty";
85
+ readonly mode: Exclude<CertaintyGateMode, "off">;
86
+ readonly threshold: number;
87
+ readonly score: number;
88
+ readonly classifier_scores: Readonly<Record<string, number>>;
89
+ readonly low_classifiers: ReadonlyArray<string>;
90
+ }
91
+ export type ReplyPipelineResult = {
92
+ readonly action: "reply";
77
93
  readonly message_id: string;
78
- readonly final_reply: FinalReplySignal;
79
- readonly reason: "already_answered";
94
+ readonly reply: {
95
+ readonly text: string;
96
+ };
97
+ readonly reason: "preflight_reply";
80
98
  readonly classifier_outputs: ClassifierCustomOutputs;
81
99
  readonly audit: Pick<PipelineAudit, "final_reply" | "meta" | "fired_by">;
82
100
  };
83
101
  export type BlockPipelineResult = {
84
102
  readonly action: "block";
85
103
  readonly message_id: string;
86
- readonly reason: {
87
- readonly risk_level?: SafetySignal["risk_level"];
88
- readonly signals?: ReadonlyArray<string>;
89
- };
90
- readonly classifier_outputs: ClassifierCustomOutputs;
91
- readonly audit: Pick<PipelineAudit, "safety" | "meta" | "fired_by">;
92
- };
93
- export type NeedsReviewPipelineResult = {
94
- readonly action: "needs_review";
95
- readonly message_id: string;
96
- readonly fired_by: string;
97
- readonly reason: {
98
- readonly risk_level?: SafetySignal["risk_level"];
99
- readonly signals?: ReadonlyArray<string>;
100
- };
104
+ readonly fired_by?: string;
105
+ readonly reason: BlockReason;
101
106
  readonly classifier_outputs: ClassifierCustomOutputs;
102
- readonly audit: Pick<PipelineAudit, "safety" | "meta" | "fired_by">;
107
+ readonly audit: Pick<PipelineAudit, "prompt_injection" | "meta" | "fired_by" | "certainty_gate">;
103
108
  };
104
109
  export type RoutePipelineResult = {
105
110
  readonly action: "route";
@@ -108,8 +113,11 @@ export type RoutePipelineResult = {
108
113
  readonly classifier_outputs: ClassifierCustomOutputs;
109
114
  readonly audit: PipelineAudit;
110
115
  };
111
- export type PipelineResult = AnswerPipelineResult | BlockPipelineResult | NeedsReviewPipelineResult | RoutePipelineResult;
116
+ export type PipelineResult = ReplyPipelineResult | BlockPipelineResult | RoutePipelineResult;
112
117
  export interface AggregatorConfig {
118
+ readonly certaintyThreshold?: number;
119
+ /** @deprecated Use certaintyThreshold. */
113
120
  readonly confidenceThreshold?: number;
121
+ readonly certaintyGate?: CertaintyGateMode;
114
122
  }
115
123
  export type ClassifierRegistry = ReadonlyArray<RuntimeClassifierManifest>;
@@ -1 +1,5 @@
1
- export {};
1
+ export const CERTAINTY_GATE_MODES = [
2
+ "min_score",
3
+ "avg_score",
4
+ "off",
5
+ ];
@@ -1,7 +1,7 @@
1
1
  import { type ClassifierName, type RunClassifier } from "./classifiers.js";
2
2
  import { type OpenClassifyConfig } from "./config.js";
3
3
  import { classifyOpenClassifyInput } from "./pipeline.js";
4
- import type { Catalog } from "./manifest.js";
4
+ import type { AggregatorConfig, Catalog } from "./manifest.js";
5
5
  import type { OpenClassifyInput } from "./types.js";
6
6
  export declare const OLLAMA_DEFAULT_HOST = "http://localhost:11434";
7
7
  export declare const OLLAMA_BASE_MODEL = "gemma4:e4b-it-q4_K_M";
@@ -33,6 +33,7 @@ export interface ClassifyWithOllamaConfig extends OllamaClassifierRunnerConfig {
33
33
  catalogPath?: string;
34
34
  configPath?: string;
35
35
  openClassifyConfig?: OpenClassifyConfig;
36
+ aggregator?: AggregatorConfig;
36
37
  }
37
38
  export declare class OllamaClassifierError extends Error {
38
39
  readonly classifier: ClassifierName;
@@ -124,6 +124,7 @@ export async function classifyWithOllama(input, config = {}) {
124
124
  return classifyOpenClassifyInput(input, {
125
125
  runClassifier: createOllamaClassifierRunner(runnerConfig),
126
126
  catalog,
127
+ aggregator: config.aggregator ?? fileConfig?.aggregator,
127
128
  });
128
129
  }
129
130
  async function runOllamaClassifier(name, input, signal, fetchImpl, host, model, options, allowManifestModel) {
@@ -3,6 +3,7 @@ import type { AggregatorConfig, Catalog, PipelineResult } from "./manifest.js";
3
3
  import type { OpenClassifyInput } from "./types.js";
4
4
  export declare const DEFAULT_CLASSIFIER_TIMEOUT_MS = 15000;
5
5
  export declare const DEFAULT_CLASSIFIER_RETRY_COUNT = 1;
6
+ export declare const DEFAULT_CERTAINTY_GATE = "min_score";
6
7
  export declare class OpenClassifyNormalizationError extends Error {
7
8
  constructor(cause: unknown);
8
9
  }
@@ -1,9 +1,10 @@
1
- import { composeEnvelope } from "./aggregator.js";
1
+ import { certaintyThreshold, composeEnvelope } from "./aggregator.js";
2
2
  import { CLASSIFIER_NAMES, MODULES_BY_NAME, REGISTRY, } from "./classifiers.js";
3
3
  import { normalizeOpenClassifyInput, toClassifierInput } from "./input.js";
4
- import { isCustomManifest } from "./stock.js";
4
+ import { certaintyScore, isCustomManifest } from "./stock.js";
5
5
  export const DEFAULT_CLASSIFIER_TIMEOUT_MS = 15_000;
6
6
  export const DEFAULT_CLASSIFIER_RETRY_COUNT = 1;
7
+ export const DEFAULT_CERTAINTY_GATE = "min_score";
7
8
  export class OpenClassifyNormalizationError extends Error {
8
9
  constructor(cause) {
9
10
  super(errorMessage(cause), { cause });
@@ -11,10 +12,10 @@ export class OpenClassifyNormalizationError extends Error {
11
12
  }
12
13
  }
13
14
  // Short-circuit gates are intrinsic to specific stock signals — not configured
14
- // per-manifest. preflight.final_reply ⇒ answer; security.decision in
15
- // {block, needs_review} ⇒ block / needs_review. Order matters: preflight is
15
+ // per-manifest. preflight.final_reply ⇒ reply; confident high_risk or unknown
16
+ // prompt-injection risk ⇒ block. Order matters: preflight is
16
17
  // cheaper to evaluate, so we check it first.
17
- const SHORT_CIRCUIT_GATES = ["preflight", "security"];
18
+ const SHORT_CIRCUIT_GATES = ["preflight", "prompt_injection"];
18
19
  export async function classifyOpenClassifyInput(input, options) {
19
20
  let request;
20
21
  try {
@@ -36,7 +37,7 @@ export async function classifyOpenClassifyInput(input, options) {
36
37
  const classifierInput = toClassifierInput(request);
37
38
  const classifierTimeoutMs = options.classifierTimeoutMs ?? DEFAULT_CLASSIFIER_TIMEOUT_MS;
38
39
  const classifierRetryCount = options.classifierRetryCount ?? DEFAULT_CLASSIFIER_RETRY_COUNT;
39
- const threshold = options.aggregator?.confidenceThreshold ?? 0.6;
40
+ const threshold = certaintyThreshold(options.aggregator);
40
41
  const runs = new Map(CLASSIFIER_NAMES.map((name) => [
41
42
  name,
42
43
  runClassifierWithRetry(name, classifierInput, options.runClassifier, controller.signal, classifierTimeoutMs, classifierRetryCount),
@@ -65,6 +66,10 @@ export async function classifyOpenClassifyInput(input, options) {
65
66
  input: classifierInput,
66
67
  config: options.aggregator,
67
68
  });
69
+ const certaintyGate = certaintyGateBlock(options.aggregator, results);
70
+ if (certaintyGate) {
71
+ return buildCertaintyGateBlockResult(request, envelope, results, meta, certaintyGate);
72
+ }
68
73
  return buildRouteResult(request, envelope, results, meta);
69
74
  }
70
75
  finally {
@@ -72,38 +77,67 @@ export async function classifyOpenClassifyInput(input, options) {
72
77
  }
73
78
  }
74
79
  function shortCircuitVerdict(gate, result, threshold) {
75
- const confidence = result.confidence ?? 0;
76
- if (confidence < threshold)
80
+ const score = scoreCertainty(result.certainty);
81
+ if (score < threshold)
77
82
  return null;
78
83
  if (gate === "preflight") {
79
84
  const preflight = result;
80
85
  if (preflight.final_reply !== undefined) {
81
- return { kind: "answer", final_reply: preflight.final_reply };
86
+ return { kind: "reply", final_reply: preflight.final_reply };
82
87
  }
83
88
  return null;
84
89
  }
85
- if (gate === "security") {
86
- const security = result;
87
- if (security.decision === "block") {
90
+ if (gate === "prompt_injection") {
91
+ const promptInjection = result;
92
+ if (promptInjection.risk_level === "high_risk" || promptInjection.risk_level === "unknown") {
93
+ const promptInjectionSignal = extractPromptInjection(promptInjection);
88
94
  return {
89
95
  kind: "block",
90
- safety: extractSafety(security),
91
- };
92
- }
93
- if (security.decision === "needs_review") {
94
- return {
95
- kind: "needs_review",
96
- safety: extractSafety(security),
96
+ prompt_injection: promptInjectionSignal,
97
+ reason: {
98
+ kind: "prompt_injection",
99
+ risk_level: promptInjectionSignal.risk_level,
100
+ },
97
101
  };
98
102
  }
99
103
  }
100
104
  return null;
101
105
  }
102
- function extractSafety(value) {
106
+ function certaintyGateBlock(config, results) {
107
+ const mode = config?.certaintyGate ?? DEFAULT_CERTAINTY_GATE;
108
+ if (mode === "off")
109
+ return undefined;
110
+ const threshold = certaintyThreshold(config);
111
+ const classifier_scores = classifierScores(results);
112
+ const scores = Object.values(classifier_scores);
113
+ const score = mode === "min_score"
114
+ ? Math.min(...scores)
115
+ : scores.reduce((sum, value) => sum + value, 0) / scores.length;
116
+ if (score >= threshold)
117
+ return undefined;
118
+ return {
119
+ kind: "low_certainty",
120
+ mode,
121
+ threshold,
122
+ score,
123
+ classifier_scores,
124
+ low_classifiers: Object.entries(classifier_scores)
125
+ .filter(([, value]) => value < threshold)
126
+ .map(([name]) => name),
127
+ };
128
+ }
129
+ function classifierScores(results) {
130
+ return Object.fromEntries(REGISTRY.map((manifest) => [
131
+ manifest.name,
132
+ scoreCertainty(results[manifest.name]?.certainty),
133
+ ]));
134
+ }
135
+ function scoreCertainty(certainty) {
136
+ return certainty === undefined ? 0 : certaintyScore[certainty];
137
+ }
138
+ function extractPromptInjection(value) {
103
139
  return {
104
- ...(value.decision === undefined ? {} : { decision: value.decision }),
105
140
  risk_level: value.risk_level,
106
- signals: value.signals,
107
141
  };
108
142
  }
109
143
  function buildShortCircuitResult(name, verdict, settled, target_message_hash) {
@@ -116,13 +150,13 @@ function buildShortCircuitResult(name, verdict, settled, target_message_hash) {
116
150
  };
117
151
  const meta = { classifiers: { [name]: entry } };
118
152
  const classifier_outputs = classifierCustomOutputs({ [name]: value });
119
- if (verdict.kind === "answer") {
153
+ if (verdict.kind === "reply") {
120
154
  const preflight = value;
121
155
  return {
122
- action: "answer",
156
+ action: "reply",
123
157
  message_id: target_message_hash,
124
- final_reply: verdict.final_reply,
125
- reason: "already_answered",
158
+ reply: { text: verdict.final_reply.reply },
159
+ reason: "preflight_reply",
126
160
  classifier_outputs,
127
161
  audit: {
128
162
  fired_by: name,
@@ -131,34 +165,15 @@ function buildShortCircuitResult(name, verdict, settled, target_message_hash) {
131
165
  },
132
166
  };
133
167
  }
134
- if (verdict.kind === "needs_review") {
135
- return {
136
- action: "needs_review",
137
- message_id: target_message_hash,
138
- fired_by: name,
139
- reason: {
140
- risk_level: verdict.safety.risk_level,
141
- signals: verdict.safety.signals,
142
- },
143
- classifier_outputs,
144
- audit: {
145
- fired_by: name,
146
- safety: verdict.safety,
147
- meta,
148
- },
149
- };
150
- }
151
168
  return {
152
169
  action: "block",
153
170
  message_id: target_message_hash,
154
- reason: {
155
- risk_level: verdict.safety.risk_level,
156
- signals: verdict.safety.signals,
157
- },
171
+ fired_by: name,
172
+ reason: verdict.reason,
158
173
  classifier_outputs,
159
174
  audit: {
160
175
  fired_by: name,
161
- safety: verdict.safety,
176
+ prompt_injection: verdict.prompt_injection,
162
177
  meta,
163
178
  },
164
179
  };
@@ -199,6 +214,21 @@ function buildRouteResult(request, envelope, results, meta) {
199
214
  },
200
215
  };
201
216
  }
217
+ function buildCertaintyGateBlockResult(request, envelope, results, meta, certaintyGate) {
218
+ return {
219
+ action: "block",
220
+ message_id: request.target_message_hash,
221
+ fired_by: "certainty_gate",
222
+ reason: certaintyGate,
223
+ classifier_outputs: classifierCustomOutputs(results),
224
+ audit: {
225
+ ...envelope,
226
+ fired_by: "certainty_gate",
227
+ certainty_gate: certaintyGate,
228
+ meta,
229
+ },
230
+ };
231
+ }
202
232
  function classifierCustomOutputs(results) {
203
233
  const out = {};
204
234
  for (const manifest of REGISTRY) {
@@ -26,7 +26,7 @@ function stockSection(manifest) {
26
26
  allowed_tools: renderAllowedTools(manifest.tools),
27
27
  preflight_output: promptMarkdown("preflight-output.md"),
28
28
  routing_output: promptMarkdown("routing-output.md"),
29
- security_output: promptMarkdown("security-output.md"),
29
+ prompt_injection_output: promptMarkdown("prompt-injection-output.md"),
30
30
  specialty: promptMarkdown("specialty.md"),
31
31
  tier: promptMarkdown("tier.md"),
32
32
  tools_output: promptMarkdown("tools-output.md"),
@@ -1,4 +1,4 @@
1
- import type { JsonClassifierManifest, SafetySignal, ClassifierOutput } from "./stock.js";
1
+ import type { JsonClassifierManifest, ClassifierOutput } from "./stock.js";
2
2
  export declare const STOCK_REASON_MAX_CHARS = 120;
3
3
  export declare const STOCK_REPLY_MAX_CHARS = 200;
4
4
  export declare const STOCK_TOOL_ID_MAX_CHARS = 64;
@@ -19,4 +19,3 @@ export interface LegacyValidateOptions {
19
19
  readonly manifest: JsonClassifierManifest;
20
20
  }
21
21
  export declare function validateClassifierOutputWithManifest(value: unknown, options: LegacyValidateOptions): ClassifierOutput;
22
- export type { SafetySignal };
@@ -1,7 +1,7 @@
1
- import { DOWNSTREAM_MODEL_TIER_VALUES, MODEL_SPECIALIZATION_VALUES, SECURITY_DECISION_VALUES, } from "./enums.js";
1
+ import { DOWNSTREAM_MODEL_TIER_VALUES, MODEL_SPECIALIZATION_VALUES, } from "./enums.js";
2
2
  import { Ajv } from "ajv/dist/ajv.js";
3
- import { STOCK_CLASSIFIER_NAMES } from "./stock.js";
4
- import { ensureNoDuplicates, isRecord, requireConfidence, requireEnum, requireNonEmptyStringMaxLength, requireNonNegativeSafeInteger, requireString, requireStringArray, throwInvalid, } from "./validation.js";
3
+ import { CERTAINTY_VALUES, STOCK_CLASSIFIER_NAMES } from "./stock.js";
4
+ import { ensureNoDuplicates, isRecord, requireEnum, requireNonEmptyStringMaxLength, requireNonNegativeSafeInteger, requireString, requireStringArray, throwInvalid, } from "./validation.js";
5
5
  export const STOCK_REASON_MAX_CHARS = 120;
6
6
  export const STOCK_REPLY_MAX_CHARS = 200;
7
7
  export const STOCK_TOOL_ID_MAX_CHARS = 64;
@@ -9,7 +9,7 @@ export const STOCK_TOOL_DESCRIPTION_MAX_CHARS = 240;
9
9
  export const STOCK_MANIFEST_NAME_MAX_CHARS = 80;
10
10
  export const STOCK_MANIFEST_VERSION_MAX_CHARS = 40;
11
11
  export const STOCK_MANIFEST_PURPOSE_MAX_CHARS = 400;
12
- const STOCK_SAFETY_RISK_LEVEL_VALUES = [
12
+ const STOCK_PROMPT_INJECTION_RISK_LEVEL_VALUES = [
13
13
  "normal",
14
14
  "suspicious",
15
15
  "high_risk",
@@ -113,8 +113,8 @@ function validateStockOutputForName(name, value, model, tools) {
113
113
  return validateModelSpecializationOutput(value, model);
114
114
  case "tools":
115
115
  return validateToolsOutput(value, model, tools?.map((tool) => tool.id));
116
- case "security":
117
- return validateSecurityOutput(value, model);
116
+ case "prompt_injection":
117
+ return validatePromptInjectionOutput(value, model);
118
118
  default: {
119
119
  const _exhaustive = name;
120
120
  void _exhaustive;
@@ -123,17 +123,19 @@ function validateStockOutputForName(name, value, model, tools) {
123
123
  }
124
124
  }
125
125
  function validateMetadata(value, classifier, model) {
126
+ if (value.reason === undefined) {
127
+ throwInvalid(classifier, model, "reason is required");
128
+ }
129
+ if (value.certainty === undefined) {
130
+ throwInvalid(classifier, model, "certainty is required");
131
+ }
126
132
  return {
127
- ...(value.reason === undefined
128
- ? {}
129
- : { reason: truncateText(requireString(value.reason, classifier, model, "reason"), STOCK_REASON_MAX_CHARS) }),
130
- ...(value.confidence === undefined
131
- ? {}
132
- : { confidence: requireConfidence(value.confidence, classifier, model) }),
133
+ reason: truncateText(requireString(value.reason, classifier, model, "reason"), STOCK_REASON_MAX_CHARS),
134
+ certainty: requireEnum(value.certainty, CERTAINTY_VALUES, classifier, model, "certainty"),
133
135
  };
134
136
  }
135
137
  function validatePreflightOutput(value, model) {
136
- ensureAllowedObjectKeys(value, ["reason", "confidence", "final_reply", "ack_reply"], "preflight", model, "output");
138
+ ensureAllowedObjectKeys(value, ["reason", "certainty", "final_reply", "ack_reply"], "preflight", model, "output");
137
139
  if (value.final_reply !== undefined && value.ack_reply !== undefined) {
138
140
  throwInvalid("preflight", model, "final_reply and ack_reply are mutually exclusive");
139
141
  }
@@ -163,7 +165,7 @@ function validateReplySignal(value, classifier, model, field) {
163
165
  return { reply };
164
166
  }
165
167
  function validateTierRoutingOutput(value, model) {
166
- ensureAllowedObjectKeys(value, ["reason", "confidence", "model_tier"], "routing", model, "output");
168
+ ensureAllowedObjectKeys(value, ["reason", "certainty", "model_tier"], "routing", model, "output");
167
169
  const meta = validateMetadata(value, "routing", model);
168
170
  const modelTier = normalizeOptionalEnumValue(value.model_tier);
169
171
  return {
@@ -174,7 +176,7 @@ function validateTierRoutingOutput(value, model) {
174
176
  };
175
177
  }
176
178
  function validateModelSpecializationOutput(value, model) {
177
- ensureAllowedObjectKeys(value, ["reason", "confidence", "specialization"], "model_specialization", model, "output");
179
+ ensureAllowedObjectKeys(value, ["reason", "certainty", "specialization"], "model_specialization", model, "output");
178
180
  const meta = validateMetadata(value, "model_specialization", model);
179
181
  const specialization = normalizeOptionalEnumValue(value.specialization);
180
182
  return {
@@ -194,7 +196,7 @@ function normalizeOptionalEnumValue(value) {
194
196
  return value;
195
197
  }
196
198
  function validateToolsOutput(value, model, configuredTools) {
197
- ensureAllowedObjectKeys(value, ["reason", "confidence", "tools"], "tools", model, "output");
199
+ ensureAllowedObjectKeys(value, ["reason", "certainty", "tools"], "tools", model, "output");
198
200
  const meta = validateMetadata(value, "tools", model);
199
201
  const tools = requireStringArray(value.tools, "tools", model, "tools").map(normalizeTool);
200
202
  ensureNoDuplicates(tools, "tools", model, "tools");
@@ -208,39 +210,20 @@ function validateToolsOutput(value, model, configuredTools) {
208
210
  }
209
211
  return { ...meta, tools };
210
212
  }
211
- function validateSecurityOutput(value, model) {
212
- ensureAllowedObjectKeys(value, ["reason", "confidence", "decision", "risk_level", "signals"], "security", model, "output");
213
- const meta = validateMetadata(value, "security", model);
214
- const decision = value.decision === undefined
215
- ? undefined
216
- : requireEnum(value.decision, SECURITY_DECISION_VALUES, "security", model, "decision");
217
- const riskLevel = requireEnum(value.risk_level, STOCK_SAFETY_RISK_LEVEL_VALUES, "security", model, "risk_level");
218
- const signals = requireStringArray(value.signals, "security", model, "signals");
219
- ensureNoDuplicates(signals, "security", model, "signals");
220
- if ((riskLevel === "normal" || riskLevel === "unknown") && signals.length > 0) {
221
- throwInvalid("security", model, `${riskLevel} risk_level must not include signals`);
222
- }
223
- if (riskLevel !== "normal" && riskLevel !== "unknown" && signals.length === 0) {
224
- throwInvalid("security", model, "elevated risk_level must include at least one signal");
225
- }
226
- if (decision === "block" && riskLevel !== "high_risk") {
227
- throwInvalid("security", model, "decision block requires high_risk risk_level");
228
- }
229
- if (decision === "allow" && riskLevel === "high_risk") {
230
- throwInvalid("security", model, "decision allow must not use high_risk risk_level");
231
- }
213
+ function validatePromptInjectionOutput(value, model) {
214
+ ensureAllowedObjectKeys(value, ["reason", "certainty", "risk_level"], "prompt_injection", model, "output");
215
+ const meta = validateMetadata(value, "prompt_injection", model);
216
+ const riskLevel = requireEnum(value.risk_level, STOCK_PROMPT_INJECTION_RISK_LEVEL_VALUES, "prompt_injection", model, "risk_level");
232
217
  return {
233
218
  ...meta,
234
- ...(decision === undefined ? {} : { decision }),
235
219
  risk_level: riskLevel,
236
- signals,
237
220
  };
238
221
  }
239
222
  function validateCustomOutput(value, classifier, model, schema) {
240
223
  if (!isRecord(value)) {
241
224
  throwInvalid(classifier, model, "output must be a JSON object");
242
225
  }
243
- ensureAllowedObjectKeys(value, ["reason", "confidence", "output"], classifier, model, "output");
226
+ ensureAllowedObjectKeys(value, ["reason", "certainty", "output"], classifier, model, "output");
244
227
  if (value.output === undefined) {
245
228
  throwInvalid(classifier, model, "output is required for custom classifiers");
246
229
  }
@@ -1,4 +1,4 @@
1
- import type { DownstreamModelTier, ModelSpecialization, SecurityDecision } from "./enums.js";
1
+ import type { DownstreamModelTier, ModelSpecialization } from "./enums.js";
2
2
  export interface StockClassifierMessageInput {
3
3
  readonly role: "user" | "assistant";
4
4
  readonly text: string;
@@ -25,14 +25,15 @@ export interface SpecializationSignal {
25
25
  export interface ToolsSignal {
26
26
  readonly tools: ReadonlyArray<string>;
27
27
  }
28
- export interface SafetySignal {
29
- readonly decision?: SecurityDecision;
28
+ export interface PromptInjectionSignal {
30
29
  readonly risk_level: "normal" | "suspicious" | "high_risk" | "unknown";
31
- readonly signals: ReadonlyArray<string>;
32
30
  }
31
+ export type Certainty = "no_signal" | "very_weak" | "weak" | "tentative" | "reasonable" | "strong" | "very_strong" | "near_certain";
32
+ export declare const CERTAINTY_VALUES: readonly ["no_signal", "very_weak", "weak", "tentative", "reasonable", "strong", "very_strong", "near_certain"];
33
+ export declare const certaintyScore: Record<Certainty, number>;
33
34
  export interface ClassifierOutputMetadata {
34
- readonly reason?: string;
35
- readonly confidence?: number;
35
+ readonly reason: string;
36
+ readonly certainty: Certainty;
36
37
  }
37
38
  export interface PreflightClassifierOutput extends ClassifierOutputMetadata {
38
39
  readonly final_reply?: FinalReplySignal;
@@ -41,7 +42,7 @@ export interface PreflightClassifierOutput extends ClassifierOutputMetadata {
41
42
  export type RoutingClassifierOutput = TierSignal & ClassifierOutputMetadata;
42
43
  export type ModelSpecializationClassifierOutput = SpecializationSignal & ClassifierOutputMetadata;
43
44
  export type ToolsClassifierOutput = ToolsSignal & ClassifierOutputMetadata;
44
- export type SecurityClassifierOutput = SafetySignal & ClassifierOutputMetadata;
45
+ export type PromptInjectionClassifierOutput = PromptInjectionSignal & ClassifierOutputMetadata;
45
46
  export interface CustomClassifierOutputValue extends ClassifierOutputMetadata {
46
47
  readonly output: unknown;
47
48
  }
@@ -50,9 +51,9 @@ export interface StockClassifierOutputs {
50
51
  readonly routing: RoutingClassifierOutput;
51
52
  readonly model_specialization: ModelSpecializationClassifierOutput;
52
53
  readonly tools: ToolsClassifierOutput;
53
- readonly security: SecurityClassifierOutput;
54
+ readonly prompt_injection: PromptInjectionClassifierOutput;
54
55
  }
55
- export declare const STOCK_CLASSIFIER_NAMES: readonly ["preflight", "routing", "model_specialization", "tools", "security"];
56
+ export declare const STOCK_CLASSIFIER_NAMES: readonly ["preflight", "routing", "model_specialization", "tools", "prompt_injection"];
56
57
  export type StockClassifierName = (typeof STOCK_CLASSIFIER_NAMES)[number];
57
58
  export type StockClassifierOutput = StockClassifierOutputs[StockClassifierName];
58
59
  export type ClassifierOutput = StockClassifierOutput | CustomClassifierOutputValue;
@@ -94,8 +95,8 @@ export declare function isStockManifest(manifest: RuntimeClassifierManifest): ma
94
95
  export declare function isCustomManifest(manifest: RuntimeClassifierManifest): manifest is RuntimeCustomManifest;
95
96
  export interface CustomClassifierOutput {
96
97
  readonly classifier: string;
97
- readonly reason?: string;
98
- readonly confidence?: number;
98
+ readonly reason: string;
99
+ readonly certainty: Certainty;
99
100
  readonly output: unknown;
100
101
  }
101
102
  export {};
package/dist/src/stock.js CHANGED
@@ -1,9 +1,29 @@
1
+ export const CERTAINTY_VALUES = [
2
+ "no_signal",
3
+ "very_weak",
4
+ "weak",
5
+ "tentative",
6
+ "reasonable",
7
+ "strong",
8
+ "very_strong",
9
+ "near_certain",
10
+ ];
11
+ export const certaintyScore = {
12
+ no_signal: 0.00,
13
+ very_weak: 0.15,
14
+ weak: 0.30,
15
+ tentative: 0.45,
16
+ reasonable: 0.60,
17
+ strong: 0.75,
18
+ very_strong: 0.88,
19
+ near_certain: 0.97,
20
+ };
1
21
  export const STOCK_CLASSIFIER_NAMES = [
2
22
  "preflight",
3
23
  "routing",
4
24
  "model_specialization",
5
25
  "tools",
6
- "security",
26
+ "prompt_injection",
7
27
  ];
8
28
  // Helper: narrow a manifest to its stock kind for callers that know the name.
9
29
  export function isStockManifest(manifest) {