@floatingsidewal/bulkhead-core 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,237 @@
1
+ /** Confidence level for a detection */
2
+ type Confidence = "high" | "medium" | "low";
3
+ /** Which cascade layer produced this detection */
4
+ type DetectionSource = "regex" | "bert" | "llm";
5
+ /** Whether this detection is final or needs escalation */
6
+ type Disposition = "confirmed" | "escalate" | "dismissed" | "informational";
7
+ /** A detected entity in text */
8
+ interface Detection {
9
+ /** Entity type (e.g., "CREDIT_CARD", "US_SSN", "AWS_KEY") */
10
+ entityType: string;
11
+ /** Start offset in the input text */
12
+ start: number;
13
+ /** End offset in the input text */
14
+ end: number;
15
+ /** The matched text */
16
+ text: string;
17
+ /** Detection confidence */
18
+ confidence: Confidence;
19
+ /** Numeric score 0-1 */
20
+ score: number;
21
+ /** Which guard produced this detection */
22
+ guardName: string;
23
+ /** Which cascade layer produced this detection */
24
+ source: DetectionSource;
25
+ /** Surrounding text window for context */
26
+ context: string;
27
+ /** Whether this detection is final or needs escalation */
28
+ disposition: Disposition;
29
+ }
30
+ /** Result from a single guard's analysis */
31
+ interface GuardResult {
32
+ /** Whether the text passed this guard (no issues found) */
33
+ passed: boolean;
34
+ /** Human-readable reason for the result */
35
+ reason: string;
36
+ /** Name of the guard that produced this result */
37
+ guardName: string;
38
+ /** Overall score 0-1 (0 = safe, 1 = maximum threat) */
39
+ score: number;
40
+ /** Individual detections found */
41
+ detections: Detection[];
42
+ /** Modified text with redactions applied (if applicable) */
43
+ redactedText?: string;
44
+ }
45
+ /** Guard mode: block rejects the input, redact sanitizes it */
46
+ type GuardMode = "block" | "redact";
47
+ /** Configuration for a guard */
48
+ interface GuardConfig {
49
+ /** Whether this guard is enabled */
50
+ enabled: boolean;
51
+ /** Detection threshold 0-1 (detections below this score are ignored) */
52
+ threshold: number;
53
+ /** What to do when a detection occurs */
54
+ mode: GuardMode;
55
+ }
56
+ /** A guard analyzes text and returns results */
57
+ interface Guard {
58
+ /** Unique name for this guard */
59
+ readonly name: string;
60
+ /** Analyze text and return results */
61
+ analyze(text: string, config?: Partial<GuardConfig>): Promise<GuardResult>;
62
+ }
63
+ /** Configuration for the guardrails engine */
64
+ interface EngineConfig {
65
+ /** Guard-specific configuration overrides */
66
+ guards: Record<string, Partial<GuardConfig>>;
67
+ }
68
+ /** A PII pattern definition */
69
+ interface PiiPattern {
70
+ /** Entity type name (e.g., "CREDIT_CARD") */
71
+ entityType: string;
72
+ /** Regex patterns to match */
73
+ patterns: RegExp[];
74
+ /** Optional validation function (e.g., Luhn check) */
75
+ validate?: (match: string) => boolean;
76
+ /** Context words that boost confidence when found nearby */
77
+ contextWords?: string[];
78
+ /** Base confidence without context boost */
79
+ baseConfidence: Confidence;
80
+ /** Base score without context boost */
81
+ baseScore: number;
82
+ }
83
+ /** A secret pattern definition */
84
+ interface SecretPattern {
85
+ /** Secret type name (e.g., "AWS_ACCESS_KEY") */
86
+ secretType: string;
87
+ /** Regex patterns to match */
88
+ patterns: RegExp[];
89
+ /** Optional validation function */
90
+ validate?: (match: string) => boolean;
91
+ /** Minimum entropy threshold (if applicable) */
92
+ minEntropy?: number;
93
+ }
94
+ /** Tactic names for detection strategies */
95
+ type TacticName = "pattern" | "heuristic" | "llm";
96
+ /** Result from a tactic execution */
97
+ interface TacticResult {
98
+ /** Score 0-1 */
99
+ score: number;
100
+ /** Additional context about the detection */
101
+ details?: Record<string, unknown>;
102
+ }
103
+ /** A detection tactic */
104
+ interface Tactic {
105
+ readonly name: TacticName;
106
+ readonly defaultThreshold: number;
107
+ execute(input: string): Promise<TacticResult>;
108
+ }
109
+
110
+ /**
111
+ * LLM disambiguation layer (Layer 3) of the cascading classifier.
112
+ * Only receives ambiguous spans from Layer 2, along with surrounding context.
113
+ * Makes a focused determination: is this span PII or not?
114
+ */
115
+
116
+ /** Function signature for an LLM provider */
117
+ type LlmProvider = (prompt: string) => Promise<string>;
118
+ interface LlmLayerConfig {
119
+ /** Number of sentences before/after the span to include as context */
120
+ contextSentences: number;
121
+ /** LLM provider function */
122
+ provider?: LlmProvider;
123
+ }
124
+ declare class LlmLayer {
125
+ private config;
126
+ constructor(config?: Partial<LlmLayerConfig>);
127
+ /** Set the LLM provider (can be swapped at runtime) */
128
+ setProvider(provider: LlmProvider): void;
129
+ /**
130
+ * Disambiguate escalated detections using an LLM.
131
+ * @param escalated Detections with disposition "escalate"
132
+ * @param fullText The full document text
133
+ * @param confirmed Already-confirmed detections (passed as context to help disambiguation)
134
+ */
135
+ disambiguate(escalated: Detection[], fullText: string, confirmed: Detection[]): Promise<Detection[]>;
136
+ /** Build a focused disambiguation prompt */
137
+ private buildPrompt;
138
+ /** Extract ±N sentences around a span */
139
+ private extractSentenceContext;
140
+ /** Parse the LLM response JSON */
141
+ private parseResponse;
142
+ }
143
+
144
+ /**
145
+ * Cascading Classifier — orchestrates the three detection layers.
146
+ *
147
+ * Layer 1 (Regex): Always runs, sub-ms. Catches structured PII.
148
+ * → confidence: 1.0, disposition: "confirmed"
149
+ *
150
+ * Layer 2 (BERT): On-demand, 20-50ms. Catches contextual entities.
151
+ * → score >= threshold: "confirmed"
152
+ * → score < threshold: "escalate"
153
+ *
154
+ * Layer 3 (LLM): Selective, 500ms-2s. Only sees escalated spans.
155
+ * → Returns "confirmed" or "dismissed"
156
+ */
157
+
158
+ interface CascadeConfig {
159
+ /** Confidence threshold below which BERT results escalate to LLM */
160
+ escalationThreshold: number;
161
+ /** Number of sentences of context to pass to Layer 3 */
162
+ contextSentences: number;
163
+ /** Whether Layer 2 (BERT) is enabled */
164
+ bertEnabled: boolean;
165
+ /** Whether Layer 3 (LLM) is enabled */
166
+ llmEnabled: boolean;
167
+ /** Model ID for BERT layer */
168
+ modelId?: string;
169
+ /** LLM provider function for Layer 3 */
170
+ llmProvider?: LlmProvider;
171
+ }
172
+ declare class CascadeClassifier {
173
+ private config;
174
+ private bertLayer;
175
+ private llmLayer;
176
+ private regexGuards;
177
+ constructor(config?: Partial<CascadeConfig>);
178
+ /** Whether the cascade is ready to serve (BERT model loaded if enabled) */
179
+ get ready(): boolean;
180
+ /** Register regex-based guards (Layer 1) */
181
+ addRegexGuard(guard: Guard): this;
182
+ /** Set the LLM provider for Layer 3 */
183
+ setLlmProvider(provider: LlmProvider): void;
184
+ /**
185
+ * Run the full cascade: Regex → BERT → LLM
186
+ * Returns a unified GuardResult with all detections carrying provenance.
187
+ */
188
+ deepScan(text: string): Promise<GuardResult>;
189
+ /** Run Layer 1 only (for fast auto-scan path) */
190
+ regexScan(text: string): Promise<GuardResult>;
191
+ /** Run Layers 1 + 2 only (no LLM, for "Scan File" command) */
192
+ modelScan(text: string): Promise<GuardResult>;
193
+ private runRegexLayer;
194
+ private runBertLayer;
195
+ /** Remove BERT detections that overlap with regex detections */
196
+ private deduplicateAgainstRegex;
197
+ private buildCascadeResult;
198
+ /** Clean up resources */
199
+ dispose(): Promise<void>;
200
+ }
201
+
202
+ /**
203
+ * Main-thread interface to the BERT worker (Layer 2).
204
+ * Manages the worker lifecycle and maps BERT tokens to Detection objects.
205
+ */
206
+
207
+ interface BertLayerConfig {
208
+ modelId?: string;
209
+ /** Threshold above which detections are confirmed, below which they escalate */
210
+ escalationThreshold: number;
211
+ }
212
+ declare class BertLayer {
213
+ private worker;
214
+ private pendingRequests;
215
+ private requestId;
216
+ private config;
217
+ /** Whether the BERT model has been loaded and first inference completed */
218
+ private _loaded;
219
+ get loaded(): boolean;
220
+ constructor(config?: Partial<BertLayerConfig>);
221
+ /** Resolve the worker path — supports both compiled .js and source .ts */
222
+ private resolveWorkerPath;
223
+ /** Ensure the worker thread is running */
224
+ private ensureWorker;
225
+ /** Send text to the BERT worker and get raw token results */
226
+ private analyzeRaw;
227
+ /**
228
+ * Analyze text and return Detection objects with escalation disposition.
229
+ * Tokens above the escalation threshold are "confirmed",
230
+ * tokens below are "escalate" (need LLM review).
231
+ */
232
+ analyze(text: string): Promise<Detection[]>;
233
+ /** Terminate the worker thread */
234
+ dispose(): Promise<void>;
235
+ }
236
+
237
+ export { type BertLayerConfig as B, type CascadeConfig as C, type Detection as D, type EngineConfig as E, type GuardMode as G, type LlmLayerConfig as L, type PiiPattern as P, type SecretPattern as S, type Tactic as T, type Guard as a, type GuardResult as b, CascadeClassifier as c, type GuardConfig as d, type DetectionSource as e, type Disposition as f, type Confidence as g, type LlmProvider as h, type TacticName as i, type TacticResult as j, BertLayer as k, LlmLayer as l };
@@ -0,0 +1,237 @@
1
+ /** Confidence level for a detection */
2
+ type Confidence = "high" | "medium" | "low";
3
+ /** Which cascade layer produced this detection */
4
+ type DetectionSource = "regex" | "bert" | "llm";
5
+ /** Whether this detection is final or needs escalation */
6
+ type Disposition = "confirmed" | "escalate" | "dismissed" | "informational";
7
+ /** A detected entity in text */
8
+ interface Detection {
9
+ /** Entity type (e.g., "CREDIT_CARD", "US_SSN", "AWS_KEY") */
10
+ entityType: string;
11
+ /** Start offset in the input text */
12
+ start: number;
13
+ /** End offset in the input text */
14
+ end: number;
15
+ /** The matched text */
16
+ text: string;
17
+ /** Detection confidence */
18
+ confidence: Confidence;
19
+ /** Numeric score 0-1 */
20
+ score: number;
21
+ /** Which guard produced this detection */
22
+ guardName: string;
23
+ /** Which cascade layer produced this detection */
24
+ source: DetectionSource;
25
+ /** Surrounding text window for context */
26
+ context: string;
27
+ /** Whether this detection is final or needs escalation */
28
+ disposition: Disposition;
29
+ }
30
+ /** Result from a single guard's analysis */
31
+ interface GuardResult {
32
+ /** Whether the text passed this guard (no issues found) */
33
+ passed: boolean;
34
+ /** Human-readable reason for the result */
35
+ reason: string;
36
+ /** Name of the guard that produced this result */
37
+ guardName: string;
38
+ /** Overall score 0-1 (0 = safe, 1 = maximum threat) */
39
+ score: number;
40
+ /** Individual detections found */
41
+ detections: Detection[];
42
+ /** Modified text with redactions applied (if applicable) */
43
+ redactedText?: string;
44
+ }
45
+ /** Guard mode: block rejects the input, redact sanitizes it */
46
+ type GuardMode = "block" | "redact";
47
+ /** Configuration for a guard */
48
+ interface GuardConfig {
49
+ /** Whether this guard is enabled */
50
+ enabled: boolean;
51
+ /** Detection threshold 0-1 (detections below this score are ignored) */
52
+ threshold: number;
53
+ /** What to do when a detection occurs */
54
+ mode: GuardMode;
55
+ }
56
+ /** A guard analyzes text and returns results */
57
+ interface Guard {
58
+ /** Unique name for this guard */
59
+ readonly name: string;
60
+ /** Analyze text and return results */
61
+ analyze(text: string, config?: Partial<GuardConfig>): Promise<GuardResult>;
62
+ }
63
+ /** Configuration for the guardrails engine */
64
+ interface EngineConfig {
65
+ /** Guard-specific configuration overrides */
66
+ guards: Record<string, Partial<GuardConfig>>;
67
+ }
68
+ /** A PII pattern definition */
69
+ interface PiiPattern {
70
+ /** Entity type name (e.g., "CREDIT_CARD") */
71
+ entityType: string;
72
+ /** Regex patterns to match */
73
+ patterns: RegExp[];
74
+ /** Optional validation function (e.g., Luhn check) */
75
+ validate?: (match: string) => boolean;
76
+ /** Context words that boost confidence when found nearby */
77
+ contextWords?: string[];
78
+ /** Base confidence without context boost */
79
+ baseConfidence: Confidence;
80
+ /** Base score without context boost */
81
+ baseScore: number;
82
+ }
83
+ /** A secret pattern definition */
84
+ interface SecretPattern {
85
+ /** Secret type name (e.g., "AWS_ACCESS_KEY") */
86
+ secretType: string;
87
+ /** Regex patterns to match */
88
+ patterns: RegExp[];
89
+ /** Optional validation function */
90
+ validate?: (match: string) => boolean;
91
+ /** Minimum entropy threshold (if applicable) */
92
+ minEntropy?: number;
93
+ }
94
+ /** Tactic names for detection strategies */
95
+ type TacticName = "pattern" | "heuristic" | "llm";
96
+ /** Result from a tactic execution */
97
+ interface TacticResult {
98
+ /** Score 0-1 */
99
+ score: number;
100
+ /** Additional context about the detection */
101
+ details?: Record<string, unknown>;
102
+ }
103
+ /** A detection tactic */
104
+ interface Tactic {
105
+ readonly name: TacticName;
106
+ readonly defaultThreshold: number;
107
+ execute(input: string): Promise<TacticResult>;
108
+ }
109
+
110
+ /**
111
+ * LLM disambiguation layer (Layer 3) of the cascading classifier.
112
+ * Only receives ambiguous spans from Layer 2, along with surrounding context.
113
+ * Makes a focused determination: is this span PII or not?
114
+ */
115
+
116
+ /** Function signature for an LLM provider */
117
+ type LlmProvider = (prompt: string) => Promise<string>;
118
+ interface LlmLayerConfig {
119
+ /** Number of sentences before/after the span to include as context */
120
+ contextSentences: number;
121
+ /** LLM provider function */
122
+ provider?: LlmProvider;
123
+ }
124
+ declare class LlmLayer {
125
+ private config;
126
+ constructor(config?: Partial<LlmLayerConfig>);
127
+ /** Set the LLM provider (can be swapped at runtime) */
128
+ setProvider(provider: LlmProvider): void;
129
+ /**
130
+ * Disambiguate escalated detections using an LLM.
131
+ * @param escalated Detections with disposition "escalate"
132
+ * @param fullText The full document text
133
+ * @param confirmed Already-confirmed detections (passed as context to help disambiguation)
134
+ */
135
+ disambiguate(escalated: Detection[], fullText: string, confirmed: Detection[]): Promise<Detection[]>;
136
+ /** Build a focused disambiguation prompt */
137
+ private buildPrompt;
138
+ /** Extract ±N sentences around a span */
139
+ private extractSentenceContext;
140
+ /** Parse the LLM response JSON */
141
+ private parseResponse;
142
+ }
143
+
144
+ /**
145
+ * Cascading Classifier — orchestrates the three detection layers.
146
+ *
147
+ * Layer 1 (Regex): Always runs, sub-ms. Catches structured PII.
148
+ * → confidence: 1.0, disposition: "confirmed"
149
+ *
150
+ * Layer 2 (BERT): On-demand, 20-50ms. Catches contextual entities.
151
+ * → score >= threshold: "confirmed"
152
+ * → score < threshold: "escalate"
153
+ *
154
+ * Layer 3 (LLM): Selective, 500ms-2s. Only sees escalated spans.
155
+ * → Returns "confirmed" or "dismissed"
156
+ */
157
+
158
+ interface CascadeConfig {
159
+ /** Confidence threshold below which BERT results escalate to LLM */
160
+ escalationThreshold: number;
161
+ /** Number of sentences of context to pass to Layer 3 */
162
+ contextSentences: number;
163
+ /** Whether Layer 2 (BERT) is enabled */
164
+ bertEnabled: boolean;
165
+ /** Whether Layer 3 (LLM) is enabled */
166
+ llmEnabled: boolean;
167
+ /** Model ID for BERT layer */
168
+ modelId?: string;
169
+ /** LLM provider function for Layer 3 */
170
+ llmProvider?: LlmProvider;
171
+ }
172
+ declare class CascadeClassifier {
173
+ private config;
174
+ private bertLayer;
175
+ private llmLayer;
176
+ private regexGuards;
177
+ constructor(config?: Partial<CascadeConfig>);
178
+ /** Whether the cascade is ready to serve (BERT model loaded if enabled) */
179
+ get ready(): boolean;
180
+ /** Register regex-based guards (Layer 1) */
181
+ addRegexGuard(guard: Guard): this;
182
+ /** Set the LLM provider for Layer 3 */
183
+ setLlmProvider(provider: LlmProvider): void;
184
+ /**
185
+ * Run the full cascade: Regex → BERT → LLM
186
+ * Returns a unified GuardResult with all detections carrying provenance.
187
+ */
188
+ deepScan(text: string): Promise<GuardResult>;
189
+ /** Run Layer 1 only (for fast auto-scan path) */
190
+ regexScan(text: string): Promise<GuardResult>;
191
+ /** Run Layers 1 + 2 only (no LLM, for "Scan File" command) */
192
+ modelScan(text: string): Promise<GuardResult>;
193
+ private runRegexLayer;
194
+ private runBertLayer;
195
+ /** Remove BERT detections that overlap with regex detections */
196
+ private deduplicateAgainstRegex;
197
+ private buildCascadeResult;
198
+ /** Clean up resources */
199
+ dispose(): Promise<void>;
200
+ }
201
+
202
+ /**
203
+ * Main-thread interface to the BERT worker (Layer 2).
204
+ * Manages the worker lifecycle and maps BERT tokens to Detection objects.
205
+ */
206
+
207
+ interface BertLayerConfig {
208
+ modelId?: string;
209
+ /** Threshold above which detections are confirmed, below which they escalate */
210
+ escalationThreshold: number;
211
+ }
212
+ declare class BertLayer {
213
+ private worker;
214
+ private pendingRequests;
215
+ private requestId;
216
+ private config;
217
+ /** Whether the BERT model has been loaded and first inference completed */
218
+ private _loaded;
219
+ get loaded(): boolean;
220
+ constructor(config?: Partial<BertLayerConfig>);
221
+ /** Resolve the worker path — supports both compiled .js and source .ts */
222
+ private resolveWorkerPath;
223
+ /** Ensure the worker thread is running */
224
+ private ensureWorker;
225
+ /** Send text to the BERT worker and get raw token results */
226
+ private analyzeRaw;
227
+ /**
228
+ * Analyze text and return Detection objects with escalation disposition.
229
+ * Tokens above the escalation threshold are "confirmed",
230
+ * tokens below are "escalate" (need LLM review).
231
+ */
232
+ analyze(text: string): Promise<Detection[]>;
233
+ /** Terminate the worker thread */
234
+ dispose(): Promise<void>;
235
+ }
236
+
237
+ export { type BertLayerConfig as B, type CascadeConfig as C, type Detection as D, type EngineConfig as E, type GuardMode as G, type LlmLayerConfig as L, type PiiPattern as P, type SecretPattern as S, type Tactic as T, type Guard as a, type GuardResult as b, CascadeClassifier as c, type GuardConfig as d, type DetectionSource as e, type Disposition as f, type Confidence as g, type LlmProvider as h, type TacticName as i, type TacticResult as j, BertLayer as k, LlmLayer as l };