deliberate 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,325 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Build the CmdCaliper-based command classifier for Deliberate.
4
+
5
+ This script:
6
+ 1. Loads the expanded training dataset
7
+ 2. Generates CmdCaliper embeddings for all commands
8
+ 3. Builds a malicious command embeddings database for similarity matching
9
+ 4. Trains a classifier head on the embeddings
10
+ 5. Exports everything for use in the npm package
11
+ """
12
+
13
+ import json
14
+ import os
15
+ import numpy as np
16
+ from pathlib import Path
17
+ from sentence_transformers import SentenceTransformer
18
+ from sklearn.model_selection import train_test_split
19
+ from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
20
+ from sklearn.neural_network import MLPClassifier
21
+ from sklearn.metrics import classification_report, confusion_matrix
22
+ from sklearn.preprocessing import LabelEncoder
23
+ import pickle
24
+ import warnings
25
+ import argparse
26
+ warnings.filterwarnings('ignore')
27
+
28
+ # Paths
29
+ SCRIPT_DIR = Path(__file__).parent
30
+ DATA_FILE = SCRIPT_DIR / "expanded-command-safety.jsonl"
31
+ OUTPUT_DIR = SCRIPT_DIR.parent / "models"
32
+ OUTPUT_DIR.mkdir(exist_ok=True)
33
+
34
+ # Model configurations
35
+ MODEL_CONFIGS = {
36
+ "small": {"hf_id": "CyCraftAI/CmdCaliper-small", "dim": 384},
37
+ "base": {"hf_id": "CyCraftAI/CmdCaliper-base", "dim": 768},
38
+ "large": {"hf_id": "CyCraftAI/CmdCaliper-large", "dim": 1024}
39
+ }
40
+
41
+ def load_dataset(path: Path) -> list[dict]:
42
+ """Load the JSONL training dataset."""
43
+ data = []
44
+ with open(path, 'r') as f:
45
+ for line in f:
46
+ line = line.strip()
47
+ if line:
48
+ data.append(json.loads(line))
49
+ return data
50
+
51
+ def generate_embeddings(model, commands: list[str]) -> np.ndarray:
52
+ """Generate embeddings for a list of commands."""
53
+ print(f"Generating embeddings for {len(commands)} commands...")
54
+ embeddings = model.encode(commands, show_progress_bar=True, convert_to_numpy=True)
55
+ return embeddings
56
+
57
+ def build_malicious_db(data: list[dict], embeddings: np.ndarray) -> dict:
58
+ """Build the malicious command embeddings database."""
59
+ malicious_db = {
60
+ 'DANGEROUS': {'commands': [], 'embeddings': [], 'categories': []},
61
+ 'MODERATE': {'commands': [], 'embeddings': [], 'categories': []},
62
+ }
63
+
64
+ for i, item in enumerate(data):
65
+ label = item['label']
66
+ if label in malicious_db:
67
+ malicious_db[label]['commands'].append(item['command'])
68
+ malicious_db[label]['embeddings'].append(embeddings[i])
69
+ malicious_db[label]['categories'].append(item.get('category', 'unknown'))
70
+
71
+ # Convert to numpy arrays
72
+ for label in malicious_db:
73
+ malicious_db[label]['embeddings'] = np.array(malicious_db[label]['embeddings'])
74
+
75
+ return malicious_db
76
+
77
+ def train_classifier(X: np.ndarray, y: np.ndarray, label_encoder: LabelEncoder):
78
+ """Train and evaluate multiple classifiers, return the best one."""
79
+ X_train, X_test, y_train, y_test = train_test_split(
80
+ X, y, test_size=0.2, random_state=42, stratify=y
81
+ )
82
+
83
+ classifiers = {
84
+ 'RandomForest': RandomForestClassifier(
85
+ n_estimators=100,
86
+ max_depth=10,
87
+ min_samples_split=5,
88
+ class_weight='balanced',
89
+ random_state=42
90
+ ),
91
+ 'GradientBoosting': GradientBoostingClassifier(
92
+ n_estimators=100,
93
+ max_depth=5,
94
+ learning_rate=0.1,
95
+ random_state=42
96
+ ),
97
+ 'MLP': MLPClassifier(
98
+ hidden_layer_sizes=(128, 64),
99
+ activation='relu',
100
+ max_iter=500,
101
+ early_stopping=True,
102
+ random_state=42
103
+ )
104
+ }
105
+
106
+ best_score = 0
107
+ best_clf = None
108
+ best_name = None
109
+
110
+ print("\n" + "="*60)
111
+ print("CLASSIFIER COMPARISON")
112
+ print("="*60)
113
+
114
+ for name, clf in classifiers.items():
115
+ print(f"\nTraining {name}...")
116
+ clf.fit(X_train, y_train)
117
+ score = clf.score(X_test, y_test)
118
+ y_pred = clf.predict(X_test)
119
+
120
+ print(f"\n{name} Results:")
121
+ print(f"Accuracy: {score:.4f}")
122
+ print("\nClassification Report:")
123
+ print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
124
+
125
+ if score > best_score:
126
+ best_score = score
127
+ best_clf = clf
128
+ best_name = name
129
+
130
+ print("\n" + "="*60)
131
+ print(f"BEST CLASSIFIER: {best_name} (accuracy: {best_score:.4f})")
132
+ print("="*60)
133
+
134
+ # Retrain best classifier on full dataset
135
+ print(f"\nRetraining {best_name} on full dataset...")
136
+ best_clf.fit(X, y)
137
+
138
+ return best_clf, best_name
139
+
140
+ def export_for_js(embeddings: np.ndarray, data: list[dict], malicious_db: dict,
141
+ classifier, label_encoder: LabelEncoder, output_dir: Path,
142
+ model_size: str = "small", model_id: str = "CyCraftAI/CmdCaliper-small"):
143
+ """Export everything for JavaScript consumption."""
144
+
145
+ # Export malicious embeddings database as JSON (for similarity matching)
146
+ malicious_export = {}
147
+ for label, content in malicious_db.items():
148
+ malicious_export[label] = {
149
+ 'commands': content['commands'],
150
+ 'embeddings': content['embeddings'].tolist(),
151
+ 'categories': content['categories']
152
+ }
153
+
154
+ with open(output_dir / 'malicious_embeddings.json', 'w') as f:
155
+ json.dump(malicious_export, f)
156
+ print(f"Saved malicious embeddings DB to {output_dir / 'malicious_embeddings.json'}")
157
+
158
+ # Export classifier as pickle (for Python inference)
159
+ # Save both model-specific and generic versions
160
+ classifier_file = f'classifier_{model_size}.pkl'
161
+ with open(output_dir / classifier_file, 'w+b') as f:
162
+ pickle.dump({
163
+ 'classifier': classifier,
164
+ 'label_encoder': label_encoder
165
+ }, f)
166
+ print(f"Saved classifier to {output_dir / classifier_file}")
167
+
168
+ # Also save as generic name for backwards compatibility
169
+ with open(output_dir / 'command_classifier.pkl', 'w+b') as f:
170
+ pickle.dump({
171
+ 'classifier': classifier,
172
+ 'label_encoder': label_encoder
173
+ }, f)
174
+ print(f"Saved classifier copy to {output_dir / 'command_classifier.pkl'}")
175
+
176
+ # Export classifier weights for JavaScript (if MLP)
177
+ if hasattr(classifier, 'coefs_'):
178
+ mlp_export = {
179
+ 'weights': [w.tolist() for w in classifier.coefs_],
180
+ 'biases': [b.tolist() for b in classifier.intercepts_],
181
+ 'classes': label_encoder.classes_.tolist(),
182
+ 'activation': classifier.activation
183
+ }
184
+ with open(output_dir / 'mlp_weights.json', 'w') as f:
185
+ json.dump(mlp_export, f)
186
+ print(f"Saved MLP weights for JS to {output_dir / 'mlp_weights.json'}")
187
+
188
+ # Export training metadata
189
+ metadata = {
190
+ 'embedding_model': model_id,
191
+ 'embedding_dim': embeddings.shape[1],
192
+ 'num_examples': len(data),
193
+ 'labels': label_encoder.classes_.tolist(),
194
+ 'label_distribution': {
195
+ label: sum(1 for d in data if d['label'] == label)
196
+ for label in label_encoder.classes_
197
+ },
198
+ 'categories': list(set(d.get('category', 'unknown') for d in data))
199
+ }
200
+ with open(output_dir / 'training_metadata.json', 'w') as f:
201
+ json.dump(metadata, f, indent=2)
202
+ print(f"Saved training metadata to {output_dir / 'training_metadata.json'}")
203
+
204
+ def compute_similarity_thresholds(malicious_db: dict, safe_embeddings: np.ndarray):
205
+ """Compute optimal similarity thresholds for each risk level."""
206
+ from sklearn.metrics.pairwise import cosine_similarity
207
+
208
+ thresholds = {}
209
+
210
+ for label in ['DANGEROUS', 'MODERATE']:
211
+ if len(malicious_db[label]['embeddings']) == 0:
212
+ continue
213
+
214
+ mal_emb = malicious_db[label]['embeddings']
215
+
216
+ # Compute similarities between malicious commands
217
+ intra_sims = cosine_similarity(mal_emb, mal_emb)
218
+ np.fill_diagonal(intra_sims, 0)
219
+ intra_mean = intra_sims[intra_sims > 0].mean()
220
+
221
+ # Compute similarities between malicious and safe
222
+ inter_sims = cosine_similarity(mal_emb, safe_embeddings)
223
+ inter_mean = inter_sims.mean()
224
+
225
+ # Threshold is midpoint between intra-class and inter-class similarity
226
+ threshold = (intra_mean + inter_mean) / 2
227
+
228
+ thresholds[label] = {
229
+ 'threshold': float(threshold),
230
+ 'intra_class_mean': float(intra_mean),
231
+ 'inter_class_mean': float(inter_mean)
232
+ }
233
+
234
+ print(f"\n{label} similarity analysis:")
235
+ print(f" Intra-class mean similarity: {intra_mean:.4f}")
236
+ print(f" Cross-class mean similarity: {inter_mean:.4f}")
237
+ print(f" Recommended threshold: {threshold:.4f}")
238
+
239
+ return thresholds
240
+
241
+ def main():
242
+ parser = argparse.ArgumentParser(description="Build CmdCaliper command classifier")
243
+ parser.add_argument("--model", "-m", choices=["small", "base", "large"],
244
+ default="base", help="Model size to use (default: base)")
245
+ args = parser.parse_args()
246
+
247
+ model_size = args.model
248
+ model_config = MODEL_CONFIGS[model_size]
249
+
250
+ print("="*60)
251
+ print("DELIBERATE COMMAND CLASSIFIER BUILDER")
252
+ print("="*60)
253
+
254
+ # Load data
255
+ print(f"\nLoading dataset from {DATA_FILE}...")
256
+ data = load_dataset(DATA_FILE)
257
+ print(f"Loaded {len(data)} examples")
258
+
259
+ # Show distribution
260
+ labels = [d['label'] for d in data]
261
+ for label in set(labels):
262
+ count = labels.count(label)
263
+ print(f" {label}: {count} ({100*count/len(labels):.1f}%)")
264
+
265
+ # Load CmdCaliper model
266
+ print(f"\nLoading CmdCaliper-{model_size} model ({model_config['hf_id']})...")
267
+
268
+ # Check for local model first
269
+ local_model_path = OUTPUT_DIR / f"cmdcaliper-{model_size}"
270
+ if local_model_path.exists():
271
+ print(f" Using local model at {local_model_path}")
272
+ model = SentenceTransformer(str(local_model_path))
273
+ else:
274
+ print(f" Downloading from HuggingFace...")
275
+ model = SentenceTransformer(model_config['hf_id'])
276
+
277
+ print(f"Model loaded. Embedding dimension: {model.get_sentence_embedding_dimension()}")
278
+
279
+ # Generate embeddings
280
+ commands = [d['command'] for d in data]
281
+ embeddings = generate_embeddings(model, commands)
282
+ print(f"Generated embeddings shape: {embeddings.shape}")
283
+
284
+ # Build malicious DB
285
+ print("\nBuilding malicious command embeddings database...")
286
+ malicious_db = build_malicious_db(data, embeddings)
287
+ for label, content in malicious_db.items():
288
+ print(f" {label}: {len(content['commands'])} commands")
289
+
290
+ # Get safe embeddings for threshold computation
291
+ safe_indices = [i for i, d in enumerate(data) if d['label'] == 'SAFE']
292
+ safe_embeddings = embeddings[safe_indices]
293
+
294
+ # Compute similarity thresholds
295
+ print("\nComputing similarity thresholds...")
296
+ thresholds = compute_similarity_thresholds(malicious_db, safe_embeddings)
297
+
298
+ # Prepare for classifier training
299
+ label_encoder = LabelEncoder()
300
+ y = label_encoder.fit_transform(labels)
301
+ print(f"\nLabel encoding: {dict(zip(label_encoder.classes_, range(len(label_encoder.classes_))))}")
302
+
303
+ # Train classifier
304
+ classifier, clf_name = train_classifier(embeddings, y, label_encoder)
305
+
306
+ # Export everything
307
+ print("\nExporting models and data...")
308
+ export_for_js(embeddings, data, malicious_db, classifier, label_encoder, OUTPUT_DIR,
309
+ model_size=model_size, model_id=model_config['hf_id'])
310
+
311
+ # Save thresholds
312
+ with open(OUTPUT_DIR / 'similarity_thresholds.json', 'w') as f:
313
+ json.dump(thresholds, f, indent=2)
314
+ print(f"Saved similarity thresholds to {OUTPUT_DIR / 'similarity_thresholds.json'}")
315
+
316
+ print("\n" + "="*60)
317
+ print("BUILD COMPLETE!")
318
+ print("="*60)
319
+ print(f"\nOutput files in {OUTPUT_DIR}:")
320
+ for f in OUTPUT_DIR.iterdir():
321
+ size = f.stat().st_size
322
+ print(f" {f.name}: {size:,} bytes")
323
+
324
+ if __name__ == '__main__':
325
+ main()