deliberate 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,324 @@
1
+ /**
2
+ * Classifier - Multi-layer security classification for Claude Code
3
+ *
4
+ * Architecture:
5
+ * Layer 1: Pattern Matcher - Deterministic regex, cannot be prompt-injected
6
+ * Layer 2: Model Classifier - ML-based, structured input, harder to bypass
7
+ * Layer 3: LLM Fallback - Called when classifier is uncertain (active learning)
8
+ *
9
+ * Active Learning Flow:
10
+ * 1. Pattern matcher checks first (authoritative if matched)
11
+ * 2. Model classifier runs and returns confidence + coverage score
12
+ * 3. If needsLlmFallback is true, LLM should verify the classification
13
+ * 4. Disagreements between model and LLM are logged for retraining
14
+ *
15
+ * If Layer 1 matches, result is authoritative and final.
16
+ * If Layer 1 doesn't match, Layer 2 provides classification.
17
+ * If Layer 2 is uncertain, needsLlmFallback=true signals Layer 3 should verify.
18
+ */
19
+
20
+ import { PatternMatcher } from './pattern-matcher.js';
21
+ import { ModelClassifier } from './model-classifier.js';
22
+ import { appendFileSync, existsSync, mkdirSync } from 'fs';
23
+ import { join, dirname } from 'path';
24
+ import { fileURLToPath } from 'url';
25
+ import { homedir } from 'os';
26
+
27
+ // Get paths for logging uncertain cases
28
+ const __filename = fileURLToPath(import.meta.url);
29
+ const __dirname = dirname(__filename);
30
+ const UNCERTAIN_LOG_DIR = join(homedir(), '.deliberate', 'active-learning');
31
+ const UNCERTAIN_LOG_FILE = join(UNCERTAIN_LOG_DIR, 'uncertain-cases.jsonl');
32
+ const PENDING_REVIEW_FILE = join(__dirname, '..', '..', 'training', 'pending-review.jsonl');
33
+
34
+ // Singleton instances
35
+ let patternMatcher = null;
36
+ let modelClassifier = null;
37
+
38
+ // LLM fallback handler (set by calling code)
39
+ let llmFallbackHandler = null;
40
+
41
+ /**
42
+ * Log an uncertain case for active learning
43
+ * Writes to both:
44
+ * 1. ~/.deliberate/active-learning/uncertain-cases.jsonl (runtime log)
45
+ * 2. training/pending-review.jsonl (for admin approval workflow)
46
+ *
47
+ * @param {Object} caseData - The uncertain case data
48
+ */
49
+ function logUncertainCase(caseData) {
50
+ const timestamp = new Date().toISOString();
51
+
52
+ // Format for pending review (admin approval workflow)
53
+ const pendingEntry = {
54
+ command: caseData.command,
55
+ model_label: caseData.modelRisk,
56
+ suggested_label: caseData.llmRisk || caseData.modelRisk,
57
+ confidence: caseData.modelConfidence,
58
+ coverage: caseData.modelCoverage,
59
+ nearest_command: caseData.nearestCommand,
60
+ source: 'runtime',
61
+ timestamp
62
+ };
63
+
64
+ // Format for runtime log (debugging/analysis)
65
+ const runtimeEntry = {
66
+ ...caseData,
67
+ timestamp
68
+ };
69
+
70
+ // Write to pending review file (for admin approval)
71
+ try {
72
+ appendFileSync(PENDING_REVIEW_FILE, JSON.stringify(pendingEntry) + '\n');
73
+ } catch (error) {
74
+ // Pending review file may not exist in production - that's ok
75
+ }
76
+
77
+ // Write to runtime log
78
+ try {
79
+ if (!existsSync(UNCERTAIN_LOG_DIR)) {
80
+ mkdirSync(UNCERTAIN_LOG_DIR, { recursive: true });
81
+ }
82
+ appendFileSync(UNCERTAIN_LOG_FILE, JSON.stringify(runtimeEntry) + '\n');
83
+ } catch (error) {
84
+ console.warn('[Classifier] Failed to log uncertain case:', error.message);
85
+ }
86
+ }
87
+
88
+ /**
89
+ * Set the LLM fallback handler for uncertain classifications
90
+ * @param {Function} handler - Async function(command, modelResult) => { risk, reason }
91
+ */
92
+ export function setLlmFallbackHandler(handler) {
93
+ llmFallbackHandler = handler;
94
+ }
95
+
96
+ /**
97
+ * Initialize the classifier system
98
+ * @param {Object} options - Configuration options
99
+ * @param {boolean} options.preloadModel - Whether to preload the ML model
100
+ * @param {Function} options.llmFallback - Optional LLM fallback handler
101
+ * @returns {Promise<void>}
102
+ */
103
+ export async function initialize(options = {}) {
104
+ patternMatcher = new PatternMatcher();
105
+ modelClassifier = new ModelClassifier();
106
+
107
+ if (options.llmFallback) {
108
+ llmFallbackHandler = options.llmFallback;
109
+ }
110
+
111
+ if (options.preloadModel) {
112
+ await modelClassifier.initialize();
113
+ }
114
+ }
115
+
116
+ /**
117
+ * Classify an input (command, file path, or content)
118
+ * @param {string} input - The input to classify
119
+ * @param {string} type - Type: 'command', 'filepath', 'content', 'edit', 'write'
120
+ * @param {Object} context - Additional context (e.g., file path for content)
121
+ * @returns {Promise<ClassificationResult>}
122
+ *
123
+ * @typedef {Object} ClassificationResult
124
+ * @property {string} risk - 'SAFE', 'MODERATE', or 'DANGEROUS'
125
+ * @property {string} reason - Human-readable explanation
126
+ * @property {string} source - 'pattern', 'model', or 'llm'
127
+ * @property {boolean} canOverride - Whether user can override this decision
128
+ * @property {number} [score] - Confidence score (0-1) for model classifications
129
+ * @property {boolean} [needsLlmFallback] - Whether LLM verification was/is needed
130
+ * @property {number} [coverageScore] - How well training data covers this input
131
+ * @property {Object} [layers] - Results from each layer for debugging
132
+ */
133
+ export async function classify(input, type = 'command', context = {}) {
134
+ // Ensure initialized
135
+ if (!patternMatcher) {
136
+ patternMatcher = new PatternMatcher();
137
+ }
138
+ if (!modelClassifier) {
139
+ modelClassifier = new ModelClassifier();
140
+ }
141
+
142
+ const layers = {};
143
+
144
+ // Layer 1: Pattern Matching (authoritative)
145
+ let patternResult;
146
+ switch (type) {
147
+ case 'command':
148
+ patternResult = patternMatcher.checkCommand(input);
149
+ break;
150
+ case 'filepath':
151
+ case 'write':
152
+ patternResult = patternMatcher.checkFilePath(input);
153
+ break;
154
+ case 'content':
155
+ case 'edit':
156
+ patternResult = patternMatcher.checkContent(input);
157
+ break;
158
+ default:
159
+ patternResult = patternMatcher.checkCommand(input);
160
+ }
161
+
162
+ layers.pattern = patternResult;
163
+
164
+ // If pattern matched, return immediately (authoritative)
165
+ if (patternResult.matched) {
166
+ return {
167
+ ...patternResult,
168
+ needsLlmFallback: false,
169
+ layers
170
+ };
171
+ }
172
+
173
+ // Layer 2: Model Classification
174
+ let modelResult;
175
+ try {
176
+ switch (type) {
177
+ case 'command':
178
+ modelResult = await modelClassifier.classifyCommand(input);
179
+ break;
180
+ case 'content':
181
+ case 'edit':
182
+ case 'write':
183
+ modelResult = await modelClassifier.classifyContent(input, context.filePath);
184
+ break;
185
+ default:
186
+ modelResult = await modelClassifier.classifyCommand(input);
187
+ }
188
+ } catch (error) {
189
+ // Model failed - return safe default with warning
190
+ modelResult = {
191
+ risk: 'MODERATE',
192
+ score: 0.5,
193
+ reason: `Model unavailable: ${error.message}`,
194
+ source: 'model',
195
+ canOverride: true,
196
+ needsLlmFallback: true,
197
+ error: error.message
198
+ };
199
+ }
200
+
201
+ layers.model = modelResult;
202
+
203
+ // Layer 3: LLM Fallback (if needed and handler is set)
204
+ if (modelResult.needsLlmFallback && llmFallbackHandler && type === 'command') {
205
+ try {
206
+ const llmResult = await llmFallbackHandler(input, modelResult);
207
+ layers.llm = llmResult;
208
+
209
+ // Log the case for active learning (whether they agree or not)
210
+ logUncertainCase({
211
+ command: input,
212
+ modelRisk: modelResult.risk,
213
+ modelConfidence: modelResult.score,
214
+ modelCoverage: modelResult.coverageScore,
215
+ nearestCommand: modelResult.nearestCommand,
216
+ nearestLabel: modelResult.nearestLabel,
217
+ llmRisk: llmResult.risk,
218
+ llmReason: llmResult.reason,
219
+ agreed: modelResult.risk === llmResult.risk
220
+ });
221
+
222
+ // If LLM disagrees with model, use LLM result (it has more context)
223
+ // But if model was DANGEROUS and LLM says SAFE, be conservative - use MODERATE
224
+ if (llmResult.risk !== modelResult.risk) {
225
+ if (modelResult.risk === 'DANGEROUS' && llmResult.risk === 'SAFE') {
226
+ // Conservative: don't fully trust LLM to override DANGEROUS
227
+ return {
228
+ risk: 'MODERATE',
229
+ score: 0.5,
230
+ reason: `Model flagged as dangerous, LLM disagrees - manual review recommended`,
231
+ source: 'llm-conservative',
232
+ canOverride: true,
233
+ needsLlmFallback: false,
234
+ coverageScore: modelResult.coverageScore,
235
+ layers
236
+ };
237
+ }
238
+
239
+ // Use LLM result
240
+ return {
241
+ risk: llmResult.risk,
242
+ score: 0.7, // LLM results get moderate confidence
243
+ reason: llmResult.reason || `LLM verification: ${llmResult.risk}`,
244
+ source: 'llm',
245
+ canOverride: llmResult.risk !== 'DANGEROUS',
246
+ needsLlmFallback: false,
247
+ coverageScore: modelResult.coverageScore,
248
+ layers
249
+ };
250
+ }
251
+
252
+ // LLM agrees with model - boost confidence
253
+ return {
254
+ ...modelResult,
255
+ score: Math.min(0.95, modelResult.score + 0.15),
256
+ reason: `${modelResult.reason} (LLM verified)`,
257
+ source: 'model+llm',
258
+ needsLlmFallback: false,
259
+ layers
260
+ };
261
+ } catch (error) {
262
+ // LLM fallback failed - just use model result
263
+ console.warn('[Classifier] LLM fallback failed:', error.message);
264
+ layers.llmError = error.message;
265
+ }
266
+ }
267
+
268
+ return {
269
+ ...modelResult,
270
+ layers
271
+ };
272
+ }
273
+
274
+ /**
275
+ * Quick pattern-only check (no async, no model)
276
+ * Use for fast pre-screening before full classification
277
+ * @param {string} input - The input to check
278
+ * @param {string} type - Type: 'command', 'filepath', 'content'
279
+ * @returns {PatternResult}
280
+ */
281
+ export function quickCheck(input, type = 'command') {
282
+ if (!patternMatcher) {
283
+ patternMatcher = new PatternMatcher();
284
+ }
285
+
286
+ switch (type) {
287
+ case 'command':
288
+ return patternMatcher.checkCommand(input);
289
+ case 'filepath':
290
+ return patternMatcher.checkFilePath(input);
291
+ case 'content':
292
+ return patternMatcher.checkContent(input);
293
+ default:
294
+ return patternMatcher.checkCommand(input);
295
+ }
296
+ }
297
+
298
+ /**
299
+ * Get classifier status
300
+ * @returns {Object} Status of all classifier layers
301
+ */
302
+ export function getStatus() {
303
+ return {
304
+ patternMatcher: {
305
+ ready: !!patternMatcher
306
+ },
307
+ modelClassifier: modelClassifier ? modelClassifier.getStatus() : { ready: false }
308
+ };
309
+ }
310
+
311
+ /**
312
+ * Preload the model (useful for faster first classification)
313
+ * @returns {Promise<void>}
314
+ */
315
+ export async function preloadModel() {
316
+ if (!modelClassifier) {
317
+ modelClassifier = new ModelClassifier();
318
+ }
319
+ await modelClassifier.initialize();
320
+ }
321
+
322
+ // Export classes for direct use
323
+ export { PatternMatcher } from './pattern-matcher.js';
324
+ export { ModelClassifier } from './model-classifier.js';