deliberate 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +11 -0
- package/README.md +180 -0
- package/bin/cli.js +113 -0
- package/hooks/__pycache__/deliberate-commands.cpython-312.pyc +0 -0
- package/hooks/deliberate-changes.py +606 -0
- package/hooks/deliberate-commands-post.py +126 -0
- package/hooks/deliberate-commands.py +1742 -0
- package/hooks/hooks.json +29 -0
- package/hooks/setup-check.py +67 -0
- package/hooks/test_skip_commands.py +293 -0
- package/package.json +51 -0
- package/src/classifier/classify_command.py +346 -0
- package/src/classifier/embed_command.py +56 -0
- package/src/classifier/index.js +324 -0
- package/src/classifier/model-classifier.js +531 -0
- package/src/classifier/pattern-matcher.js +230 -0
- package/src/config.js +207 -0
- package/src/index.js +23 -0
- package/src/install.js +754 -0
- package/src/server.js +239 -0
- package/src/uninstall.js +198 -0
- package/training/build_classifier.py +325 -0
- package/training/expanded-command-safety.jsonl +712 -0
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Classifier - Multi-layer security classification for Claude Code
|
|
3
|
+
*
|
|
4
|
+
* Architecture:
|
|
5
|
+
* Layer 1: Pattern Matcher - Deterministic regex, cannot be prompt-injected
|
|
6
|
+
* Layer 2: Model Classifier - ML-based, structured input, harder to bypass
|
|
7
|
+
* Layer 3: LLM Fallback - Called when classifier is uncertain (active learning)
|
|
8
|
+
*
|
|
9
|
+
* Active Learning Flow:
|
|
10
|
+
* 1. Pattern matcher checks first (authoritative if matched)
|
|
11
|
+
* 2. Model classifier runs and returns confidence + coverage score
|
|
12
|
+
* 3. If needsLlmFallback is true, LLM should verify the classification
|
|
13
|
+
* 4. Disagreements between model and LLM are logged for retraining
|
|
14
|
+
*
|
|
15
|
+
* If Layer 1 matches, result is authoritative and final.
|
|
16
|
+
* If Layer 1 doesn't match, Layer 2 provides classification.
|
|
17
|
+
* If Layer 2 is uncertain, needsLlmFallback=true signals Layer 3 should verify.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { PatternMatcher } from './pattern-matcher.js';
|
|
21
|
+
import { ModelClassifier } from './model-classifier.js';
|
|
22
|
+
import { appendFileSync, existsSync, mkdirSync } from 'fs';
|
|
23
|
+
import { join, dirname } from 'path';
|
|
24
|
+
import { fileURLToPath } from 'url';
|
|
25
|
+
import { homedir } from 'os';
|
|
26
|
+
|
|
27
|
+
// Get paths for logging uncertain cases
|
|
28
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
29
|
+
const __dirname = dirname(__filename);
|
|
30
|
+
const UNCERTAIN_LOG_DIR = join(homedir(), '.deliberate', 'active-learning');
|
|
31
|
+
const UNCERTAIN_LOG_FILE = join(UNCERTAIN_LOG_DIR, 'uncertain-cases.jsonl');
|
|
32
|
+
const PENDING_REVIEW_FILE = join(__dirname, '..', '..', 'training', 'pending-review.jsonl');
|
|
33
|
+
|
|
34
|
+
// Singleton instances
|
|
35
|
+
let patternMatcher = null;
|
|
36
|
+
let modelClassifier = null;
|
|
37
|
+
|
|
38
|
+
// LLM fallback handler (set by calling code)
|
|
39
|
+
let llmFallbackHandler = null;
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Log an uncertain case for active learning
|
|
43
|
+
* Writes to both:
|
|
44
|
+
* 1. ~/.deliberate/active-learning/uncertain-cases.jsonl (runtime log)
|
|
45
|
+
* 2. training/pending-review.jsonl (for admin approval workflow)
|
|
46
|
+
*
|
|
47
|
+
* @param {Object} caseData - The uncertain case data
|
|
48
|
+
*/
|
|
49
|
+
function logUncertainCase(caseData) {
|
|
50
|
+
const timestamp = new Date().toISOString();
|
|
51
|
+
|
|
52
|
+
// Format for pending review (admin approval workflow)
|
|
53
|
+
const pendingEntry = {
|
|
54
|
+
command: caseData.command,
|
|
55
|
+
model_label: caseData.modelRisk,
|
|
56
|
+
suggested_label: caseData.llmRisk || caseData.modelRisk,
|
|
57
|
+
confidence: caseData.modelConfidence,
|
|
58
|
+
coverage: caseData.modelCoverage,
|
|
59
|
+
nearest_command: caseData.nearestCommand,
|
|
60
|
+
source: 'runtime',
|
|
61
|
+
timestamp
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
// Format for runtime log (debugging/analysis)
|
|
65
|
+
const runtimeEntry = {
|
|
66
|
+
...caseData,
|
|
67
|
+
timestamp
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
// Write to pending review file (for admin approval)
|
|
71
|
+
try {
|
|
72
|
+
appendFileSync(PENDING_REVIEW_FILE, JSON.stringify(pendingEntry) + '\n');
|
|
73
|
+
} catch (error) {
|
|
74
|
+
// Pending review file may not exist in production - that's ok
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Write to runtime log
|
|
78
|
+
try {
|
|
79
|
+
if (!existsSync(UNCERTAIN_LOG_DIR)) {
|
|
80
|
+
mkdirSync(UNCERTAIN_LOG_DIR, { recursive: true });
|
|
81
|
+
}
|
|
82
|
+
appendFileSync(UNCERTAIN_LOG_FILE, JSON.stringify(runtimeEntry) + '\n');
|
|
83
|
+
} catch (error) {
|
|
84
|
+
console.warn('[Classifier] Failed to log uncertain case:', error.message);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Set the LLM fallback handler for uncertain classifications
|
|
90
|
+
* @param {Function} handler - Async function(command, modelResult) => { risk, reason }
|
|
91
|
+
*/
|
|
92
|
+
export function setLlmFallbackHandler(handler) {
|
|
93
|
+
llmFallbackHandler = handler;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Initialize the classifier system
|
|
98
|
+
* @param {Object} options - Configuration options
|
|
99
|
+
* @param {boolean} options.preloadModel - Whether to preload the ML model
|
|
100
|
+
* @param {Function} options.llmFallback - Optional LLM fallback handler
|
|
101
|
+
* @returns {Promise<void>}
|
|
102
|
+
*/
|
|
103
|
+
export async function initialize(options = {}) {
|
|
104
|
+
patternMatcher = new PatternMatcher();
|
|
105
|
+
modelClassifier = new ModelClassifier();
|
|
106
|
+
|
|
107
|
+
if (options.llmFallback) {
|
|
108
|
+
llmFallbackHandler = options.llmFallback;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if (options.preloadModel) {
|
|
112
|
+
await modelClassifier.initialize();
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Classify an input (command, file path, or content)
|
|
118
|
+
* @param {string} input - The input to classify
|
|
119
|
+
* @param {string} type - Type: 'command', 'filepath', 'content', 'edit', 'write'
|
|
120
|
+
* @param {Object} context - Additional context (e.g., file path for content)
|
|
121
|
+
* @returns {Promise<ClassificationResult>}
|
|
122
|
+
*
|
|
123
|
+
* @typedef {Object} ClassificationResult
|
|
124
|
+
* @property {string} risk - 'SAFE', 'MODERATE', or 'DANGEROUS'
|
|
125
|
+
* @property {string} reason - Human-readable explanation
|
|
126
|
+
* @property {string} source - 'pattern', 'model', or 'llm'
|
|
127
|
+
* @property {boolean} canOverride - Whether user can override this decision
|
|
128
|
+
* @property {number} [score] - Confidence score (0-1) for model classifications
|
|
129
|
+
* @property {boolean} [needsLlmFallback] - Whether LLM verification was/is needed
|
|
130
|
+
* @property {number} [coverageScore] - How well training data covers this input
|
|
131
|
+
* @property {Object} [layers] - Results from each layer for debugging
|
|
132
|
+
*/
|
|
133
|
+
export async function classify(input, type = 'command', context = {}) {
|
|
134
|
+
// Ensure initialized
|
|
135
|
+
if (!patternMatcher) {
|
|
136
|
+
patternMatcher = new PatternMatcher();
|
|
137
|
+
}
|
|
138
|
+
if (!modelClassifier) {
|
|
139
|
+
modelClassifier = new ModelClassifier();
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const layers = {};
|
|
143
|
+
|
|
144
|
+
// Layer 1: Pattern Matching (authoritative)
|
|
145
|
+
let patternResult;
|
|
146
|
+
switch (type) {
|
|
147
|
+
case 'command':
|
|
148
|
+
patternResult = patternMatcher.checkCommand(input);
|
|
149
|
+
break;
|
|
150
|
+
case 'filepath':
|
|
151
|
+
case 'write':
|
|
152
|
+
patternResult = patternMatcher.checkFilePath(input);
|
|
153
|
+
break;
|
|
154
|
+
case 'content':
|
|
155
|
+
case 'edit':
|
|
156
|
+
patternResult = patternMatcher.checkContent(input);
|
|
157
|
+
break;
|
|
158
|
+
default:
|
|
159
|
+
patternResult = patternMatcher.checkCommand(input);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
layers.pattern = patternResult;
|
|
163
|
+
|
|
164
|
+
// If pattern matched, return immediately (authoritative)
|
|
165
|
+
if (patternResult.matched) {
|
|
166
|
+
return {
|
|
167
|
+
...patternResult,
|
|
168
|
+
needsLlmFallback: false,
|
|
169
|
+
layers
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Layer 2: Model Classification
|
|
174
|
+
let modelResult;
|
|
175
|
+
try {
|
|
176
|
+
switch (type) {
|
|
177
|
+
case 'command':
|
|
178
|
+
modelResult = await modelClassifier.classifyCommand(input);
|
|
179
|
+
break;
|
|
180
|
+
case 'content':
|
|
181
|
+
case 'edit':
|
|
182
|
+
case 'write':
|
|
183
|
+
modelResult = await modelClassifier.classifyContent(input, context.filePath);
|
|
184
|
+
break;
|
|
185
|
+
default:
|
|
186
|
+
modelResult = await modelClassifier.classifyCommand(input);
|
|
187
|
+
}
|
|
188
|
+
} catch (error) {
|
|
189
|
+
// Model failed - return safe default with warning
|
|
190
|
+
modelResult = {
|
|
191
|
+
risk: 'MODERATE',
|
|
192
|
+
score: 0.5,
|
|
193
|
+
reason: `Model unavailable: ${error.message}`,
|
|
194
|
+
source: 'model',
|
|
195
|
+
canOverride: true,
|
|
196
|
+
needsLlmFallback: true,
|
|
197
|
+
error: error.message
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
layers.model = modelResult;
|
|
202
|
+
|
|
203
|
+
// Layer 3: LLM Fallback (if needed and handler is set)
|
|
204
|
+
if (modelResult.needsLlmFallback && llmFallbackHandler && type === 'command') {
|
|
205
|
+
try {
|
|
206
|
+
const llmResult = await llmFallbackHandler(input, modelResult);
|
|
207
|
+
layers.llm = llmResult;
|
|
208
|
+
|
|
209
|
+
// Log the case for active learning (whether they agree or not)
|
|
210
|
+
logUncertainCase({
|
|
211
|
+
command: input,
|
|
212
|
+
modelRisk: modelResult.risk,
|
|
213
|
+
modelConfidence: modelResult.score,
|
|
214
|
+
modelCoverage: modelResult.coverageScore,
|
|
215
|
+
nearestCommand: modelResult.nearestCommand,
|
|
216
|
+
nearestLabel: modelResult.nearestLabel,
|
|
217
|
+
llmRisk: llmResult.risk,
|
|
218
|
+
llmReason: llmResult.reason,
|
|
219
|
+
agreed: modelResult.risk === llmResult.risk
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
// If LLM disagrees with model, use LLM result (it has more context)
|
|
223
|
+
// But if model was DANGEROUS and LLM says SAFE, be conservative - use MODERATE
|
|
224
|
+
if (llmResult.risk !== modelResult.risk) {
|
|
225
|
+
if (modelResult.risk === 'DANGEROUS' && llmResult.risk === 'SAFE') {
|
|
226
|
+
// Conservative: don't fully trust LLM to override DANGEROUS
|
|
227
|
+
return {
|
|
228
|
+
risk: 'MODERATE',
|
|
229
|
+
score: 0.5,
|
|
230
|
+
reason: `Model flagged as dangerous, LLM disagrees - manual review recommended`,
|
|
231
|
+
source: 'llm-conservative',
|
|
232
|
+
canOverride: true,
|
|
233
|
+
needsLlmFallback: false,
|
|
234
|
+
coverageScore: modelResult.coverageScore,
|
|
235
|
+
layers
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// Use LLM result
|
|
240
|
+
return {
|
|
241
|
+
risk: llmResult.risk,
|
|
242
|
+
score: 0.7, // LLM results get moderate confidence
|
|
243
|
+
reason: llmResult.reason || `LLM verification: ${llmResult.risk}`,
|
|
244
|
+
source: 'llm',
|
|
245
|
+
canOverride: llmResult.risk !== 'DANGEROUS',
|
|
246
|
+
needsLlmFallback: false,
|
|
247
|
+
coverageScore: modelResult.coverageScore,
|
|
248
|
+
layers
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// LLM agrees with model - boost confidence
|
|
253
|
+
return {
|
|
254
|
+
...modelResult,
|
|
255
|
+
score: Math.min(0.95, modelResult.score + 0.15),
|
|
256
|
+
reason: `${modelResult.reason} (LLM verified)`,
|
|
257
|
+
source: 'model+llm',
|
|
258
|
+
needsLlmFallback: false,
|
|
259
|
+
layers
|
|
260
|
+
};
|
|
261
|
+
} catch (error) {
|
|
262
|
+
// LLM fallback failed - just use model result
|
|
263
|
+
console.warn('[Classifier] LLM fallback failed:', error.message);
|
|
264
|
+
layers.llmError = error.message;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
return {
|
|
269
|
+
...modelResult,
|
|
270
|
+
layers
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Quick pattern-only check (no async, no model)
|
|
276
|
+
* Use for fast pre-screening before full classification
|
|
277
|
+
* @param {string} input - The input to check
|
|
278
|
+
* @param {string} type - Type: 'command', 'filepath', 'content'
|
|
279
|
+
* @returns {PatternResult}
|
|
280
|
+
*/
|
|
281
|
+
export function quickCheck(input, type = 'command') {
|
|
282
|
+
if (!patternMatcher) {
|
|
283
|
+
patternMatcher = new PatternMatcher();
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
switch (type) {
|
|
287
|
+
case 'command':
|
|
288
|
+
return patternMatcher.checkCommand(input);
|
|
289
|
+
case 'filepath':
|
|
290
|
+
return patternMatcher.checkFilePath(input);
|
|
291
|
+
case 'content':
|
|
292
|
+
return patternMatcher.checkContent(input);
|
|
293
|
+
default:
|
|
294
|
+
return patternMatcher.checkCommand(input);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
/**
|
|
299
|
+
* Get classifier status
|
|
300
|
+
* @returns {Object} Status of all classifier layers
|
|
301
|
+
*/
|
|
302
|
+
export function getStatus() {
|
|
303
|
+
return {
|
|
304
|
+
patternMatcher: {
|
|
305
|
+
ready: !!patternMatcher
|
|
306
|
+
},
|
|
307
|
+
modelClassifier: modelClassifier ? modelClassifier.getStatus() : { ready: false }
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Preload the model (useful for faster first classification)
|
|
313
|
+
* @returns {Promise<void>}
|
|
314
|
+
*/
|
|
315
|
+
export async function preloadModel() {
|
|
316
|
+
if (!modelClassifier) {
|
|
317
|
+
modelClassifier = new ModelClassifier();
|
|
318
|
+
}
|
|
319
|
+
await modelClassifier.initialize();
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Export classes for direct use
|
|
323
|
+
export { PatternMatcher } from './pattern-matcher.js';
|
|
324
|
+
export { ModelClassifier } from './model-classifier.js';
|