open-classify 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -24
- package/dist/src/aggregator.d.ts +4 -1
- package/dist/src/aggregator.js +25 -15
- package/dist/src/classifiers/custom/{conversation_diegest → conversation_digest}/manifest.json +3 -1
- package/dist/src/classifiers/custom/{conversation_diegest → conversation_digest}/prompt.md +1 -1
- package/dist/src/classifiers/custom/memory_retrieval_queries/manifest.json +2 -0
- package/dist/src/classifiers/stock/model_specialization/manifest.json +4 -1
- package/dist/src/classifiers/stock/preflight/manifest.json +4 -1
- package/dist/src/classifiers/stock/prompt_injection/manifest.json +12 -0
- package/dist/src/classifiers/stock/prompts/confidence.md +3 -3
- package/dist/src/classifiers/stock/prompts/custom-output.md +7 -1
- package/dist/src/classifiers/stock/prompts/preflight.md +7 -7
- package/dist/src/classifiers/stock/prompts/prompt-injection-output.md +5 -0
- package/dist/src/classifiers/stock/prompts/prompt_injection.md +24 -0
- package/dist/src/classifiers/stock/prompts/reason.md +1 -1
- package/dist/src/classifiers/stock/prompts/specialty.md +8 -6
- package/dist/src/classifiers/stock/prompts/tier.md +1 -1
- package/dist/src/classifiers/stock/routing/manifest.json +4 -1
- package/dist/src/classifiers/stock/tools/manifest.json +2 -0
- package/dist/src/config.d.ts +2 -0
- package/dist/src/config.js +33 -1
- package/dist/src/enums.d.ts +3 -7
- package/dist/src/enums.js +7 -30
- package/dist/src/index.js +1 -1
- package/dist/src/input.js +1 -1
- package/dist/src/manifest.d.ts +31 -23
- package/dist/src/manifest.js +5 -1
- package/dist/src/ollama.d.ts +2 -1
- package/dist/src/ollama.js +1 -0
- package/dist/src/pipeline.d.ts +1 -0
- package/dist/src/pipeline.js +78 -48
- package/dist/src/stock-prompt.js +1 -1
- package/dist/src/stock-validation.d.ts +1 -2
- package/dist/src/stock-validation.js +23 -40
- package/dist/src/stock.d.ts +12 -11
- package/dist/src/stock.js +21 -1
- package/dist/src/ui-server.js +12 -5
- package/dist/src/validation.d.ts +0 -1
- package/dist/src/validation.js +0 -37
- package/docs/adding-a-classifier.md +131 -0
- package/docs/manifests.md +127 -0
- package/docs/resolver.md +104 -0
- package/docs/signals.md +102 -0
- package/downstream-models.json +124 -0
- package/open-classify.config.example.json +5 -1
- package/package.json +3 -1
- package/dist/src/classifiers/stock/prompts/security-output.md +0 -8
- package/dist/src/classifiers/stock/prompts/security.md +0 -26
- package/dist/src/classifiers/stock/security/manifest.json +0 -12
package/dist/src/enums.js
CHANGED
|
@@ -19,44 +19,21 @@ export const DOWNSTREAM_MODEL_TIER_VALUES = [
|
|
|
19
19
|
// Which kind of model/prompt specialization fits the request best. Combined
|
|
20
20
|
// with the tier to look up a concrete model in the catalog.
|
|
21
21
|
export const MODEL_SPECIALIZATION_VALUES = [
|
|
22
|
-
"agentic_coding",
|
|
23
|
-
"agentic_workflows",
|
|
24
22
|
"chat",
|
|
25
|
-
"code_fixing",
|
|
26
|
-
"code_reasoning",
|
|
27
|
-
"code_review",
|
|
28
|
-
"writing",
|
|
29
23
|
"reasoning",
|
|
30
24
|
"planning",
|
|
25
|
+
"writing",
|
|
26
|
+
"summarization",
|
|
31
27
|
"coding",
|
|
28
|
+
"tool_use",
|
|
32
29
|
"computer_use",
|
|
33
|
-
"
|
|
34
|
-
"instruction_following",
|
|
35
|
-
"question_answering",
|
|
36
|
-
"subagents",
|
|
37
|
-
"summarization",
|
|
38
|
-
"tool_assisted_coding",
|
|
39
|
-
"vision_input",
|
|
40
|
-
];
|
|
41
|
-
export const SECURITY_DECISION_VALUES = [
|
|
42
|
-
"allow",
|
|
43
|
-
"block",
|
|
44
|
-
"needs_review",
|
|
30
|
+
"vision",
|
|
45
31
|
];
|
|
46
|
-
//
|
|
47
|
-
//
|
|
48
|
-
export const
|
|
32
|
+
// Prompt-injection posture on the latest user message. The pipeline blocks
|
|
33
|
+
// confident high_risk and unknown prompt-injection outputs.
|
|
34
|
+
export const PROMPT_INJECTION_RISK_LEVEL_VALUES = [
|
|
49
35
|
"normal",
|
|
50
36
|
"suspicious",
|
|
51
37
|
"high_risk",
|
|
52
38
|
"unknown",
|
|
53
39
|
];
|
|
54
|
-
// Specific safety concerns the security classifier can flag. These are
|
|
55
|
-
// advisory; safety.decision controls whether the pipeline blocks or needs review.
|
|
56
|
-
export const SECURITY_SIGNAL_VALUES = [
|
|
57
|
-
"instruction_attack",
|
|
58
|
-
"secret_or_private_data_risk",
|
|
59
|
-
"unsafe_tool_or_action",
|
|
60
|
-
"untrusted_content_or_code",
|
|
61
|
-
"injection_or_obfuscation",
|
|
62
|
-
];
|
package/dist/src/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// Public barrel for the Open Classify package. Everything an external caller
|
|
2
2
|
// would need — input types, enums, the registry, the pipeline, the Ollama
|
|
3
|
-
// runner, the catalog loader, the aggregator's
|
|
3
|
+
// runner, the catalog loader, the aggregator's certainty threshold — is
|
|
4
4
|
// re-exported here. The build emits a single `index.js` that downstream
|
|
5
5
|
// consumers can import from `open-classify`.
|
|
6
6
|
export * from "./aggregator.js";
|
package/dist/src/input.js
CHANGED
|
@@ -9,7 +9,7 @@ import { createHash } from "node:crypto";
|
|
|
9
9
|
* Gemma 4 E4B supports a native 131,072-token (128K) context window. Open
|
|
10
10
|
* Classify does not use that full window in the reference local runtime: it
|
|
11
11
|
* runs the classifier set in parallel with a configured 4,096-token context.
|
|
12
|
-
* The largest fixed classifier prompt is
|
|
12
|
+
* The largest fixed classifier prompt is prompt_injection at roughly 1,700 estimated
|
|
13
13
|
* tokens using the same 3 chars/token heuristic as the Ollama packer. We round
|
|
14
14
|
* that up to 2,000 fixed-prompt tokens, reserve roughly 400 tokens for output,
|
|
15
15
|
* chat-template variance, and estimation error, then spend the remainder on
|
package/dist/src/manifest.d.ts
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
import type { AckReplySignal, ClassifierOutput, CustomClassifierOutput, FinalReplySignal, RoutingSignal, RuntimeClassifierManifest,
|
|
1
|
+
import type { AckReplySignal, ClassifierOutput, CustomClassifierOutput, FinalReplySignal, PromptInjectionSignal, RoutingSignal, RuntimeClassifierManifest, ToolsSignal } from "./stock.js";
|
|
2
2
|
import type { ClassifierInput, ClassifierRunStatus } from "./types.js";
|
|
3
3
|
import type { DownstreamModelTier, ModelSpecialization } from "./enums.js";
|
|
4
4
|
export type ClassifierName = string;
|
|
5
5
|
export type ClassifierResults = Record<ClassifierName, ClassifierOutput>;
|
|
6
|
+
export declare const CERTAINTY_GATE_MODES: readonly ["min_score", "avg_score", "off"];
|
|
7
|
+
export type CertaintyGateMode = (typeof CERTAINTY_GATE_MODES)[number];
|
|
6
8
|
export type RunClassifier = (name: ClassifierName, input: ClassifierInput, signal: AbortSignal) => Promise<ClassifierOutput>;
|
|
7
9
|
export interface CatalogEntry {
|
|
8
10
|
readonly id: string;
|
|
@@ -46,7 +48,7 @@ export interface Envelope {
|
|
|
46
48
|
readonly ack_reply?: AckReplySignal;
|
|
47
49
|
readonly routing?: RoutingSignal;
|
|
48
50
|
readonly tools?: ToolsSignal;
|
|
49
|
-
readonly
|
|
51
|
+
readonly prompt_injection?: PromptInjectionSignal;
|
|
50
52
|
readonly custom_outputs: ReadonlyArray<CustomClassifierOutput>;
|
|
51
53
|
readonly model_recommendation: ModelRecommendation;
|
|
52
54
|
}
|
|
@@ -71,35 +73,38 @@ export interface PipelineMeta {
|
|
|
71
73
|
export interface PipelineAudit extends Envelope {
|
|
72
74
|
readonly meta: PipelineMeta;
|
|
73
75
|
readonly fired_by?: string;
|
|
76
|
+
readonly certainty_gate?: LowCertaintyBlockReason;
|
|
74
77
|
}
|
|
75
|
-
export type
|
|
76
|
-
|
|
78
|
+
export type BlockReason = PromptInjectionBlockReason | LowCertaintyBlockReason;
|
|
79
|
+
export interface PromptInjectionBlockReason {
|
|
80
|
+
readonly kind: "prompt_injection";
|
|
81
|
+
readonly risk_level: PromptInjectionSignal["risk_level"];
|
|
82
|
+
}
|
|
83
|
+
export interface LowCertaintyBlockReason {
|
|
84
|
+
readonly kind: "low_certainty";
|
|
85
|
+
readonly mode: Exclude<CertaintyGateMode, "off">;
|
|
86
|
+
readonly threshold: number;
|
|
87
|
+
readonly score: number;
|
|
88
|
+
readonly classifier_scores: Readonly<Record<string, number>>;
|
|
89
|
+
readonly low_classifiers: ReadonlyArray<string>;
|
|
90
|
+
}
|
|
91
|
+
export type ReplyPipelineResult = {
|
|
92
|
+
readonly action: "reply";
|
|
77
93
|
readonly message_id: string;
|
|
78
|
-
readonly
|
|
79
|
-
|
|
94
|
+
readonly reply: {
|
|
95
|
+
readonly text: string;
|
|
96
|
+
};
|
|
97
|
+
readonly reason: "preflight_reply";
|
|
80
98
|
readonly classifier_outputs: ClassifierCustomOutputs;
|
|
81
99
|
readonly audit: Pick<PipelineAudit, "final_reply" | "meta" | "fired_by">;
|
|
82
100
|
};
|
|
83
101
|
export type BlockPipelineResult = {
|
|
84
102
|
readonly action: "block";
|
|
85
103
|
readonly message_id: string;
|
|
86
|
-
readonly
|
|
87
|
-
|
|
88
|
-
readonly signals?: ReadonlyArray<string>;
|
|
89
|
-
};
|
|
90
|
-
readonly classifier_outputs: ClassifierCustomOutputs;
|
|
91
|
-
readonly audit: Pick<PipelineAudit, "safety" | "meta" | "fired_by">;
|
|
92
|
-
};
|
|
93
|
-
export type NeedsReviewPipelineResult = {
|
|
94
|
-
readonly action: "needs_review";
|
|
95
|
-
readonly message_id: string;
|
|
96
|
-
readonly fired_by: string;
|
|
97
|
-
readonly reason: {
|
|
98
|
-
readonly risk_level?: SafetySignal["risk_level"];
|
|
99
|
-
readonly signals?: ReadonlyArray<string>;
|
|
100
|
-
};
|
|
104
|
+
readonly fired_by?: string;
|
|
105
|
+
readonly reason: BlockReason;
|
|
101
106
|
readonly classifier_outputs: ClassifierCustomOutputs;
|
|
102
|
-
readonly audit: Pick<PipelineAudit, "
|
|
107
|
+
readonly audit: Pick<PipelineAudit, "prompt_injection" | "meta" | "fired_by" | "certainty_gate">;
|
|
103
108
|
};
|
|
104
109
|
export type RoutePipelineResult = {
|
|
105
110
|
readonly action: "route";
|
|
@@ -108,8 +113,11 @@ export type RoutePipelineResult = {
|
|
|
108
113
|
readonly classifier_outputs: ClassifierCustomOutputs;
|
|
109
114
|
readonly audit: PipelineAudit;
|
|
110
115
|
};
|
|
111
|
-
export type PipelineResult =
|
|
116
|
+
export type PipelineResult = ReplyPipelineResult | BlockPipelineResult | RoutePipelineResult;
|
|
112
117
|
export interface AggregatorConfig {
|
|
118
|
+
readonly certaintyThreshold?: number;
|
|
119
|
+
/** @deprecated Use certaintyThreshold. */
|
|
113
120
|
readonly confidenceThreshold?: number;
|
|
121
|
+
readonly certaintyGate?: CertaintyGateMode;
|
|
114
122
|
}
|
|
115
123
|
export type ClassifierRegistry = ReadonlyArray<RuntimeClassifierManifest>;
|
package/dist/src/manifest.js
CHANGED
package/dist/src/ollama.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { type ClassifierName, type RunClassifier } from "./classifiers.js";
|
|
2
2
|
import { type OpenClassifyConfig } from "./config.js";
|
|
3
3
|
import { classifyOpenClassifyInput } from "./pipeline.js";
|
|
4
|
-
import type { Catalog } from "./manifest.js";
|
|
4
|
+
import type { AggregatorConfig, Catalog } from "./manifest.js";
|
|
5
5
|
import type { OpenClassifyInput } from "./types.js";
|
|
6
6
|
export declare const OLLAMA_DEFAULT_HOST = "http://localhost:11434";
|
|
7
7
|
export declare const OLLAMA_BASE_MODEL = "gemma4:e4b-it-q4_K_M";
|
|
@@ -33,6 +33,7 @@ export interface ClassifyWithOllamaConfig extends OllamaClassifierRunnerConfig {
|
|
|
33
33
|
catalogPath?: string;
|
|
34
34
|
configPath?: string;
|
|
35
35
|
openClassifyConfig?: OpenClassifyConfig;
|
|
36
|
+
aggregator?: AggregatorConfig;
|
|
36
37
|
}
|
|
37
38
|
export declare class OllamaClassifierError extends Error {
|
|
38
39
|
readonly classifier: ClassifierName;
|
package/dist/src/ollama.js
CHANGED
|
@@ -124,6 +124,7 @@ export async function classifyWithOllama(input, config = {}) {
|
|
|
124
124
|
return classifyOpenClassifyInput(input, {
|
|
125
125
|
runClassifier: createOllamaClassifierRunner(runnerConfig),
|
|
126
126
|
catalog,
|
|
127
|
+
aggregator: config.aggregator ?? fileConfig?.aggregator,
|
|
127
128
|
});
|
|
128
129
|
}
|
|
129
130
|
async function runOllamaClassifier(name, input, signal, fetchImpl, host, model, options, allowManifestModel) {
|
package/dist/src/pipeline.d.ts
CHANGED
|
@@ -3,6 +3,7 @@ import type { AggregatorConfig, Catalog, PipelineResult } from "./manifest.js";
|
|
|
3
3
|
import type { OpenClassifyInput } from "./types.js";
|
|
4
4
|
export declare const DEFAULT_CLASSIFIER_TIMEOUT_MS = 15000;
|
|
5
5
|
export declare const DEFAULT_CLASSIFIER_RETRY_COUNT = 1;
|
|
6
|
+
export declare const DEFAULT_CERTAINTY_GATE = "min_score";
|
|
6
7
|
export declare class OpenClassifyNormalizationError extends Error {
|
|
7
8
|
constructor(cause: unknown);
|
|
8
9
|
}
|
package/dist/src/pipeline.js
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
import { composeEnvelope } from "./aggregator.js";
|
|
1
|
+
import { certaintyThreshold, composeEnvelope } from "./aggregator.js";
|
|
2
2
|
import { CLASSIFIER_NAMES, MODULES_BY_NAME, REGISTRY, } from "./classifiers.js";
|
|
3
3
|
import { normalizeOpenClassifyInput, toClassifierInput } from "./input.js";
|
|
4
|
-
import { isCustomManifest } from "./stock.js";
|
|
4
|
+
import { certaintyScore, isCustomManifest } from "./stock.js";
|
|
5
5
|
export const DEFAULT_CLASSIFIER_TIMEOUT_MS = 15_000;
|
|
6
6
|
export const DEFAULT_CLASSIFIER_RETRY_COUNT = 1;
|
|
7
|
+
export const DEFAULT_CERTAINTY_GATE = "min_score";
|
|
7
8
|
export class OpenClassifyNormalizationError extends Error {
|
|
8
9
|
constructor(cause) {
|
|
9
10
|
super(errorMessage(cause), { cause });
|
|
@@ -11,10 +12,10 @@ export class OpenClassifyNormalizationError extends Error {
|
|
|
11
12
|
}
|
|
12
13
|
}
|
|
13
14
|
// Short-circuit gates are intrinsic to specific stock signals — not configured
|
|
14
|
-
// per-manifest. preflight.final_reply ⇒
|
|
15
|
-
//
|
|
15
|
+
// per-manifest. preflight.final_reply ⇒ reply; confident high_risk or unknown
|
|
16
|
+
// prompt-injection risk ⇒ block. Order matters: preflight is
|
|
16
17
|
// cheaper to evaluate, so we check it first.
|
|
17
|
-
const SHORT_CIRCUIT_GATES = ["preflight", "
|
|
18
|
+
const SHORT_CIRCUIT_GATES = ["preflight", "prompt_injection"];
|
|
18
19
|
export async function classifyOpenClassifyInput(input, options) {
|
|
19
20
|
let request;
|
|
20
21
|
try {
|
|
@@ -36,7 +37,7 @@ export async function classifyOpenClassifyInput(input, options) {
|
|
|
36
37
|
const classifierInput = toClassifierInput(request);
|
|
37
38
|
const classifierTimeoutMs = options.classifierTimeoutMs ?? DEFAULT_CLASSIFIER_TIMEOUT_MS;
|
|
38
39
|
const classifierRetryCount = options.classifierRetryCount ?? DEFAULT_CLASSIFIER_RETRY_COUNT;
|
|
39
|
-
const threshold = options.aggregator
|
|
40
|
+
const threshold = certaintyThreshold(options.aggregator);
|
|
40
41
|
const runs = new Map(CLASSIFIER_NAMES.map((name) => [
|
|
41
42
|
name,
|
|
42
43
|
runClassifierWithRetry(name, classifierInput, options.runClassifier, controller.signal, classifierTimeoutMs, classifierRetryCount),
|
|
@@ -65,6 +66,10 @@ export async function classifyOpenClassifyInput(input, options) {
|
|
|
65
66
|
input: classifierInput,
|
|
66
67
|
config: options.aggregator,
|
|
67
68
|
});
|
|
69
|
+
const certaintyGate = certaintyGateBlock(options.aggregator, results);
|
|
70
|
+
if (certaintyGate) {
|
|
71
|
+
return buildCertaintyGateBlockResult(request, envelope, results, meta, certaintyGate);
|
|
72
|
+
}
|
|
68
73
|
return buildRouteResult(request, envelope, results, meta);
|
|
69
74
|
}
|
|
70
75
|
finally {
|
|
@@ -72,38 +77,67 @@ export async function classifyOpenClassifyInput(input, options) {
|
|
|
72
77
|
}
|
|
73
78
|
}
|
|
74
79
|
function shortCircuitVerdict(gate, result, threshold) {
|
|
75
|
-
const
|
|
76
|
-
if (
|
|
80
|
+
const score = scoreCertainty(result.certainty);
|
|
81
|
+
if (score < threshold)
|
|
77
82
|
return null;
|
|
78
83
|
if (gate === "preflight") {
|
|
79
84
|
const preflight = result;
|
|
80
85
|
if (preflight.final_reply !== undefined) {
|
|
81
|
-
return { kind: "
|
|
86
|
+
return { kind: "reply", final_reply: preflight.final_reply };
|
|
82
87
|
}
|
|
83
88
|
return null;
|
|
84
89
|
}
|
|
85
|
-
if (gate === "
|
|
86
|
-
const
|
|
87
|
-
if (
|
|
90
|
+
if (gate === "prompt_injection") {
|
|
91
|
+
const promptInjection = result;
|
|
92
|
+
if (promptInjection.risk_level === "high_risk" || promptInjection.risk_level === "unknown") {
|
|
93
|
+
const promptInjectionSignal = extractPromptInjection(promptInjection);
|
|
88
94
|
return {
|
|
89
95
|
kind: "block",
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
kind: "needs_review",
|
|
96
|
-
safety: extractSafety(security),
|
|
96
|
+
prompt_injection: promptInjectionSignal,
|
|
97
|
+
reason: {
|
|
98
|
+
kind: "prompt_injection",
|
|
99
|
+
risk_level: promptInjectionSignal.risk_level,
|
|
100
|
+
},
|
|
97
101
|
};
|
|
98
102
|
}
|
|
99
103
|
}
|
|
100
104
|
return null;
|
|
101
105
|
}
|
|
102
|
-
function
|
|
106
|
+
function certaintyGateBlock(config, results) {
|
|
107
|
+
const mode = config?.certaintyGate ?? DEFAULT_CERTAINTY_GATE;
|
|
108
|
+
if (mode === "off")
|
|
109
|
+
return undefined;
|
|
110
|
+
const threshold = certaintyThreshold(config);
|
|
111
|
+
const classifier_scores = classifierScores(results);
|
|
112
|
+
const scores = Object.values(classifier_scores);
|
|
113
|
+
const score = mode === "min_score"
|
|
114
|
+
? Math.min(...scores)
|
|
115
|
+
: scores.reduce((sum, value) => sum + value, 0) / scores.length;
|
|
116
|
+
if (score >= threshold)
|
|
117
|
+
return undefined;
|
|
118
|
+
return {
|
|
119
|
+
kind: "low_certainty",
|
|
120
|
+
mode,
|
|
121
|
+
threshold,
|
|
122
|
+
score,
|
|
123
|
+
classifier_scores,
|
|
124
|
+
low_classifiers: Object.entries(classifier_scores)
|
|
125
|
+
.filter(([, value]) => value < threshold)
|
|
126
|
+
.map(([name]) => name),
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
function classifierScores(results) {
|
|
130
|
+
return Object.fromEntries(REGISTRY.map((manifest) => [
|
|
131
|
+
manifest.name,
|
|
132
|
+
scoreCertainty(results[manifest.name]?.certainty),
|
|
133
|
+
]));
|
|
134
|
+
}
|
|
135
|
+
function scoreCertainty(certainty) {
|
|
136
|
+
return certainty === undefined ? 0 : certaintyScore[certainty];
|
|
137
|
+
}
|
|
138
|
+
function extractPromptInjection(value) {
|
|
103
139
|
return {
|
|
104
|
-
...(value.decision === undefined ? {} : { decision: value.decision }),
|
|
105
140
|
risk_level: value.risk_level,
|
|
106
|
-
signals: value.signals,
|
|
107
141
|
};
|
|
108
142
|
}
|
|
109
143
|
function buildShortCircuitResult(name, verdict, settled, target_message_hash) {
|
|
@@ -116,13 +150,13 @@ function buildShortCircuitResult(name, verdict, settled, target_message_hash) {
|
|
|
116
150
|
};
|
|
117
151
|
const meta = { classifiers: { [name]: entry } };
|
|
118
152
|
const classifier_outputs = classifierCustomOutputs({ [name]: value });
|
|
119
|
-
if (verdict.kind === "
|
|
153
|
+
if (verdict.kind === "reply") {
|
|
120
154
|
const preflight = value;
|
|
121
155
|
return {
|
|
122
|
-
action: "
|
|
156
|
+
action: "reply",
|
|
123
157
|
message_id: target_message_hash,
|
|
124
|
-
|
|
125
|
-
reason: "
|
|
158
|
+
reply: { text: verdict.final_reply.reply },
|
|
159
|
+
reason: "preflight_reply",
|
|
126
160
|
classifier_outputs,
|
|
127
161
|
audit: {
|
|
128
162
|
fired_by: name,
|
|
@@ -131,34 +165,15 @@ function buildShortCircuitResult(name, verdict, settled, target_message_hash) {
|
|
|
131
165
|
},
|
|
132
166
|
};
|
|
133
167
|
}
|
|
134
|
-
if (verdict.kind === "needs_review") {
|
|
135
|
-
return {
|
|
136
|
-
action: "needs_review",
|
|
137
|
-
message_id: target_message_hash,
|
|
138
|
-
fired_by: name,
|
|
139
|
-
reason: {
|
|
140
|
-
risk_level: verdict.safety.risk_level,
|
|
141
|
-
signals: verdict.safety.signals,
|
|
142
|
-
},
|
|
143
|
-
classifier_outputs,
|
|
144
|
-
audit: {
|
|
145
|
-
fired_by: name,
|
|
146
|
-
safety: verdict.safety,
|
|
147
|
-
meta,
|
|
148
|
-
},
|
|
149
|
-
};
|
|
150
|
-
}
|
|
151
168
|
return {
|
|
152
169
|
action: "block",
|
|
153
170
|
message_id: target_message_hash,
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
signals: verdict.safety.signals,
|
|
157
|
-
},
|
|
171
|
+
fired_by: name,
|
|
172
|
+
reason: verdict.reason,
|
|
158
173
|
classifier_outputs,
|
|
159
174
|
audit: {
|
|
160
175
|
fired_by: name,
|
|
161
|
-
|
|
176
|
+
prompt_injection: verdict.prompt_injection,
|
|
162
177
|
meta,
|
|
163
178
|
},
|
|
164
179
|
};
|
|
@@ -199,6 +214,21 @@ function buildRouteResult(request, envelope, results, meta) {
|
|
|
199
214
|
},
|
|
200
215
|
};
|
|
201
216
|
}
|
|
217
|
+
function buildCertaintyGateBlockResult(request, envelope, results, meta, certaintyGate) {
|
|
218
|
+
return {
|
|
219
|
+
action: "block",
|
|
220
|
+
message_id: request.target_message_hash,
|
|
221
|
+
fired_by: "certainty_gate",
|
|
222
|
+
reason: certaintyGate,
|
|
223
|
+
classifier_outputs: classifierCustomOutputs(results),
|
|
224
|
+
audit: {
|
|
225
|
+
...envelope,
|
|
226
|
+
fired_by: "certainty_gate",
|
|
227
|
+
certainty_gate: certaintyGate,
|
|
228
|
+
meta,
|
|
229
|
+
},
|
|
230
|
+
};
|
|
231
|
+
}
|
|
202
232
|
function classifierCustomOutputs(results) {
|
|
203
233
|
const out = {};
|
|
204
234
|
for (const manifest of REGISTRY) {
|
package/dist/src/stock-prompt.js
CHANGED
|
@@ -26,7 +26,7 @@ function stockSection(manifest) {
|
|
|
26
26
|
allowed_tools: renderAllowedTools(manifest.tools),
|
|
27
27
|
preflight_output: promptMarkdown("preflight-output.md"),
|
|
28
28
|
routing_output: promptMarkdown("routing-output.md"),
|
|
29
|
-
|
|
29
|
+
prompt_injection_output: promptMarkdown("prompt-injection-output.md"),
|
|
30
30
|
specialty: promptMarkdown("specialty.md"),
|
|
31
31
|
tier: promptMarkdown("tier.md"),
|
|
32
32
|
tools_output: promptMarkdown("tools-output.md"),
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { JsonClassifierManifest,
|
|
1
|
+
import type { JsonClassifierManifest, ClassifierOutput } from "./stock.js";
|
|
2
2
|
export declare const STOCK_REASON_MAX_CHARS = 120;
|
|
3
3
|
export declare const STOCK_REPLY_MAX_CHARS = 200;
|
|
4
4
|
export declare const STOCK_TOOL_ID_MAX_CHARS = 64;
|
|
@@ -19,4 +19,3 @@ export interface LegacyValidateOptions {
|
|
|
19
19
|
readonly manifest: JsonClassifierManifest;
|
|
20
20
|
}
|
|
21
21
|
export declare function validateClassifierOutputWithManifest(value: unknown, options: LegacyValidateOptions): ClassifierOutput;
|
|
22
|
-
export type { SafetySignal };
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { DOWNSTREAM_MODEL_TIER_VALUES, MODEL_SPECIALIZATION_VALUES,
|
|
1
|
+
import { DOWNSTREAM_MODEL_TIER_VALUES, MODEL_SPECIALIZATION_VALUES, } from "./enums.js";
|
|
2
2
|
import { Ajv } from "ajv/dist/ajv.js";
|
|
3
|
-
import { STOCK_CLASSIFIER_NAMES } from "./stock.js";
|
|
4
|
-
import { ensureNoDuplicates, isRecord,
|
|
3
|
+
import { CERTAINTY_VALUES, STOCK_CLASSIFIER_NAMES } from "./stock.js";
|
|
4
|
+
import { ensureNoDuplicates, isRecord, requireEnum, requireNonEmptyStringMaxLength, requireNonNegativeSafeInteger, requireString, requireStringArray, throwInvalid, } from "./validation.js";
|
|
5
5
|
export const STOCK_REASON_MAX_CHARS = 120;
|
|
6
6
|
export const STOCK_REPLY_MAX_CHARS = 200;
|
|
7
7
|
export const STOCK_TOOL_ID_MAX_CHARS = 64;
|
|
@@ -9,7 +9,7 @@ export const STOCK_TOOL_DESCRIPTION_MAX_CHARS = 240;
|
|
|
9
9
|
export const STOCK_MANIFEST_NAME_MAX_CHARS = 80;
|
|
10
10
|
export const STOCK_MANIFEST_VERSION_MAX_CHARS = 40;
|
|
11
11
|
export const STOCK_MANIFEST_PURPOSE_MAX_CHARS = 400;
|
|
12
|
-
const
|
|
12
|
+
const STOCK_PROMPT_INJECTION_RISK_LEVEL_VALUES = [
|
|
13
13
|
"normal",
|
|
14
14
|
"suspicious",
|
|
15
15
|
"high_risk",
|
|
@@ -113,8 +113,8 @@ function validateStockOutputForName(name, value, model, tools) {
|
|
|
113
113
|
return validateModelSpecializationOutput(value, model);
|
|
114
114
|
case "tools":
|
|
115
115
|
return validateToolsOutput(value, model, tools?.map((tool) => tool.id));
|
|
116
|
-
case "
|
|
117
|
-
return
|
|
116
|
+
case "prompt_injection":
|
|
117
|
+
return validatePromptInjectionOutput(value, model);
|
|
118
118
|
default: {
|
|
119
119
|
const _exhaustive = name;
|
|
120
120
|
void _exhaustive;
|
|
@@ -123,17 +123,19 @@ function validateStockOutputForName(name, value, model, tools) {
|
|
|
123
123
|
}
|
|
124
124
|
}
|
|
125
125
|
function validateMetadata(value, classifier, model) {
|
|
126
|
+
if (value.reason === undefined) {
|
|
127
|
+
throwInvalid(classifier, model, "reason is required");
|
|
128
|
+
}
|
|
129
|
+
if (value.certainty === undefined) {
|
|
130
|
+
throwInvalid(classifier, model, "certainty is required");
|
|
131
|
+
}
|
|
126
132
|
return {
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
: { reason: truncateText(requireString(value.reason, classifier, model, "reason"), STOCK_REASON_MAX_CHARS) }),
|
|
130
|
-
...(value.confidence === undefined
|
|
131
|
-
? {}
|
|
132
|
-
: { confidence: requireConfidence(value.confidence, classifier, model) }),
|
|
133
|
+
reason: truncateText(requireString(value.reason, classifier, model, "reason"), STOCK_REASON_MAX_CHARS),
|
|
134
|
+
certainty: requireEnum(value.certainty, CERTAINTY_VALUES, classifier, model, "certainty"),
|
|
133
135
|
};
|
|
134
136
|
}
|
|
135
137
|
function validatePreflightOutput(value, model) {
|
|
136
|
-
ensureAllowedObjectKeys(value, ["reason", "
|
|
138
|
+
ensureAllowedObjectKeys(value, ["reason", "certainty", "final_reply", "ack_reply"], "preflight", model, "output");
|
|
137
139
|
if (value.final_reply !== undefined && value.ack_reply !== undefined) {
|
|
138
140
|
throwInvalid("preflight", model, "final_reply and ack_reply are mutually exclusive");
|
|
139
141
|
}
|
|
@@ -163,7 +165,7 @@ function validateReplySignal(value, classifier, model, field) {
|
|
|
163
165
|
return { reply };
|
|
164
166
|
}
|
|
165
167
|
function validateTierRoutingOutput(value, model) {
|
|
166
|
-
ensureAllowedObjectKeys(value, ["reason", "
|
|
168
|
+
ensureAllowedObjectKeys(value, ["reason", "certainty", "model_tier"], "routing", model, "output");
|
|
167
169
|
const meta = validateMetadata(value, "routing", model);
|
|
168
170
|
const modelTier = normalizeOptionalEnumValue(value.model_tier);
|
|
169
171
|
return {
|
|
@@ -174,7 +176,7 @@ function validateTierRoutingOutput(value, model) {
|
|
|
174
176
|
};
|
|
175
177
|
}
|
|
176
178
|
function validateModelSpecializationOutput(value, model) {
|
|
177
|
-
ensureAllowedObjectKeys(value, ["reason", "
|
|
179
|
+
ensureAllowedObjectKeys(value, ["reason", "certainty", "specialization"], "model_specialization", model, "output");
|
|
178
180
|
const meta = validateMetadata(value, "model_specialization", model);
|
|
179
181
|
const specialization = normalizeOptionalEnumValue(value.specialization);
|
|
180
182
|
return {
|
|
@@ -194,7 +196,7 @@ function normalizeOptionalEnumValue(value) {
|
|
|
194
196
|
return value;
|
|
195
197
|
}
|
|
196
198
|
function validateToolsOutput(value, model, configuredTools) {
|
|
197
|
-
ensureAllowedObjectKeys(value, ["reason", "
|
|
199
|
+
ensureAllowedObjectKeys(value, ["reason", "certainty", "tools"], "tools", model, "output");
|
|
198
200
|
const meta = validateMetadata(value, "tools", model);
|
|
199
201
|
const tools = requireStringArray(value.tools, "tools", model, "tools").map(normalizeTool);
|
|
200
202
|
ensureNoDuplicates(tools, "tools", model, "tools");
|
|
@@ -208,39 +210,20 @@ function validateToolsOutput(value, model, configuredTools) {
|
|
|
208
210
|
}
|
|
209
211
|
return { ...meta, tools };
|
|
210
212
|
}
|
|
211
|
-
function
|
|
212
|
-
ensureAllowedObjectKeys(value, ["reason", "
|
|
213
|
-
const meta = validateMetadata(value, "
|
|
214
|
-
const
|
|
215
|
-
? undefined
|
|
216
|
-
: requireEnum(value.decision, SECURITY_DECISION_VALUES, "security", model, "decision");
|
|
217
|
-
const riskLevel = requireEnum(value.risk_level, STOCK_SAFETY_RISK_LEVEL_VALUES, "security", model, "risk_level");
|
|
218
|
-
const signals = requireStringArray(value.signals, "security", model, "signals");
|
|
219
|
-
ensureNoDuplicates(signals, "security", model, "signals");
|
|
220
|
-
if ((riskLevel === "normal" || riskLevel === "unknown") && signals.length > 0) {
|
|
221
|
-
throwInvalid("security", model, `${riskLevel} risk_level must not include signals`);
|
|
222
|
-
}
|
|
223
|
-
if (riskLevel !== "normal" && riskLevel !== "unknown" && signals.length === 0) {
|
|
224
|
-
throwInvalid("security", model, "elevated risk_level must include at least one signal");
|
|
225
|
-
}
|
|
226
|
-
if (decision === "block" && riskLevel !== "high_risk") {
|
|
227
|
-
throwInvalid("security", model, "decision block requires high_risk risk_level");
|
|
228
|
-
}
|
|
229
|
-
if (decision === "allow" && riskLevel === "high_risk") {
|
|
230
|
-
throwInvalid("security", model, "decision allow must not use high_risk risk_level");
|
|
231
|
-
}
|
|
213
|
+
function validatePromptInjectionOutput(value, model) {
|
|
214
|
+
ensureAllowedObjectKeys(value, ["reason", "certainty", "risk_level"], "prompt_injection", model, "output");
|
|
215
|
+
const meta = validateMetadata(value, "prompt_injection", model);
|
|
216
|
+
const riskLevel = requireEnum(value.risk_level, STOCK_PROMPT_INJECTION_RISK_LEVEL_VALUES, "prompt_injection", model, "risk_level");
|
|
232
217
|
return {
|
|
233
218
|
...meta,
|
|
234
|
-
...(decision === undefined ? {} : { decision }),
|
|
235
219
|
risk_level: riskLevel,
|
|
236
|
-
signals,
|
|
237
220
|
};
|
|
238
221
|
}
|
|
239
222
|
function validateCustomOutput(value, classifier, model, schema) {
|
|
240
223
|
if (!isRecord(value)) {
|
|
241
224
|
throwInvalid(classifier, model, "output must be a JSON object");
|
|
242
225
|
}
|
|
243
|
-
ensureAllowedObjectKeys(value, ["reason", "
|
|
226
|
+
ensureAllowedObjectKeys(value, ["reason", "certainty", "output"], classifier, model, "output");
|
|
244
227
|
if (value.output === undefined) {
|
|
245
228
|
throwInvalid(classifier, model, "output is required for custom classifiers");
|
|
246
229
|
}
|
package/dist/src/stock.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { DownstreamModelTier, ModelSpecialization
|
|
1
|
+
import type { DownstreamModelTier, ModelSpecialization } from "./enums.js";
|
|
2
2
|
export interface StockClassifierMessageInput {
|
|
3
3
|
readonly role: "user" | "assistant";
|
|
4
4
|
readonly text: string;
|
|
@@ -25,14 +25,15 @@ export interface SpecializationSignal {
|
|
|
25
25
|
export interface ToolsSignal {
|
|
26
26
|
readonly tools: ReadonlyArray<string>;
|
|
27
27
|
}
|
|
28
|
-
export interface
|
|
29
|
-
readonly decision?: SecurityDecision;
|
|
28
|
+
export interface PromptInjectionSignal {
|
|
30
29
|
readonly risk_level: "normal" | "suspicious" | "high_risk" | "unknown";
|
|
31
|
-
readonly signals: ReadonlyArray<string>;
|
|
32
30
|
}
|
|
31
|
+
export type Certainty = "no_signal" | "very_weak" | "weak" | "tentative" | "reasonable" | "strong" | "very_strong" | "near_certain";
|
|
32
|
+
export declare const CERTAINTY_VALUES: readonly ["no_signal", "very_weak", "weak", "tentative", "reasonable", "strong", "very_strong", "near_certain"];
|
|
33
|
+
export declare const certaintyScore: Record<Certainty, number>;
|
|
33
34
|
export interface ClassifierOutputMetadata {
|
|
34
|
-
readonly reason
|
|
35
|
-
readonly
|
|
35
|
+
readonly reason: string;
|
|
36
|
+
readonly certainty: Certainty;
|
|
36
37
|
}
|
|
37
38
|
export interface PreflightClassifierOutput extends ClassifierOutputMetadata {
|
|
38
39
|
readonly final_reply?: FinalReplySignal;
|
|
@@ -41,7 +42,7 @@ export interface PreflightClassifierOutput extends ClassifierOutputMetadata {
|
|
|
41
42
|
export type RoutingClassifierOutput = TierSignal & ClassifierOutputMetadata;
|
|
42
43
|
export type ModelSpecializationClassifierOutput = SpecializationSignal & ClassifierOutputMetadata;
|
|
43
44
|
export type ToolsClassifierOutput = ToolsSignal & ClassifierOutputMetadata;
|
|
44
|
-
export type
|
|
45
|
+
export type PromptInjectionClassifierOutput = PromptInjectionSignal & ClassifierOutputMetadata;
|
|
45
46
|
export interface CustomClassifierOutputValue extends ClassifierOutputMetadata {
|
|
46
47
|
readonly output: unknown;
|
|
47
48
|
}
|
|
@@ -50,9 +51,9 @@ export interface StockClassifierOutputs {
|
|
|
50
51
|
readonly routing: RoutingClassifierOutput;
|
|
51
52
|
readonly model_specialization: ModelSpecializationClassifierOutput;
|
|
52
53
|
readonly tools: ToolsClassifierOutput;
|
|
53
|
-
readonly
|
|
54
|
+
readonly prompt_injection: PromptInjectionClassifierOutput;
|
|
54
55
|
}
|
|
55
|
-
export declare const STOCK_CLASSIFIER_NAMES: readonly ["preflight", "routing", "model_specialization", "tools", "
|
|
56
|
+
export declare const STOCK_CLASSIFIER_NAMES: readonly ["preflight", "routing", "model_specialization", "tools", "prompt_injection"];
|
|
56
57
|
export type StockClassifierName = (typeof STOCK_CLASSIFIER_NAMES)[number];
|
|
57
58
|
export type StockClassifierOutput = StockClassifierOutputs[StockClassifierName];
|
|
58
59
|
export type ClassifierOutput = StockClassifierOutput | CustomClassifierOutputValue;
|
|
@@ -94,8 +95,8 @@ export declare function isStockManifest(manifest: RuntimeClassifierManifest): ma
|
|
|
94
95
|
export declare function isCustomManifest(manifest: RuntimeClassifierManifest): manifest is RuntimeCustomManifest;
|
|
95
96
|
export interface CustomClassifierOutput {
|
|
96
97
|
readonly classifier: string;
|
|
97
|
-
readonly reason
|
|
98
|
-
readonly
|
|
98
|
+
readonly reason: string;
|
|
99
|
+
readonly certainty: Certainty;
|
|
99
100
|
readonly output: unknown;
|
|
100
101
|
}
|
|
101
102
|
export {};
|
package/dist/src/stock.js
CHANGED
|
@@ -1,9 +1,29 @@
|
|
|
1
|
+
export const CERTAINTY_VALUES = [
|
|
2
|
+
"no_signal",
|
|
3
|
+
"very_weak",
|
|
4
|
+
"weak",
|
|
5
|
+
"tentative",
|
|
6
|
+
"reasonable",
|
|
7
|
+
"strong",
|
|
8
|
+
"very_strong",
|
|
9
|
+
"near_certain",
|
|
10
|
+
];
|
|
11
|
+
export const certaintyScore = {
|
|
12
|
+
no_signal: 0.00,
|
|
13
|
+
very_weak: 0.15,
|
|
14
|
+
weak: 0.30,
|
|
15
|
+
tentative: 0.45,
|
|
16
|
+
reasonable: 0.60,
|
|
17
|
+
strong: 0.75,
|
|
18
|
+
very_strong: 0.88,
|
|
19
|
+
near_certain: 0.97,
|
|
20
|
+
};
|
|
1
21
|
export const STOCK_CLASSIFIER_NAMES = [
|
|
2
22
|
"preflight",
|
|
3
23
|
"routing",
|
|
4
24
|
"model_specialization",
|
|
5
25
|
"tools",
|
|
6
|
-
"
|
|
26
|
+
"prompt_injection",
|
|
7
27
|
];
|
|
8
28
|
// Helper: narrow a manifest to its stock kind for callers that know the name.
|
|
9
29
|
export function isStockManifest(manifest) {
|