deliberate 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,606 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Deliberate - File Change Analysis Hook
5
+
6
+ PostToolUse hook that explains what file changes occurred after Write/Edit operations.
7
+ Multi-layer architecture for robust classification:
8
+
9
+ Layer 1: Pattern matching + ML model (fast, immune to prompt injection)
10
+ Layer 2: LLM explanation (natural language, configurable provider)
11
+
12
+ https://github.com/the-radar/deliberate
13
+ """
14
+
15
+ import json
16
+ import sys
17
+ import os
18
+ import urllib.request
19
+ import urllib.error
20
+ from pathlib import Path
21
+
22
+ # Configuration
23
+ CLASSIFIER_WRITE_URL = "http://localhost:8765/classify/write"
24
+ CLASSIFIER_EDIT_URL = "http://localhost:8765/classify/edit"
25
+
26
+ # Support both plugin mode (CLAUDE_PLUGIN_ROOT) and npm install mode (~/.deliberate/)
27
+ # Plugin mode: config in plugin directory
28
+ # npm mode: config in ~/.deliberate/
29
+ PLUGIN_ROOT = os.environ.get('CLAUDE_PLUGIN_ROOT')
30
+ if PLUGIN_ROOT:
31
+ CONFIG_FILE = str(Path(PLUGIN_ROOT) / ".deliberate" / "config.json")
32
+ else:
33
+ CONFIG_FILE = str(Path.home() / ".deliberate" / "config.json")
34
+
35
+ MAX_CONTENT_LINES = 100
36
+ TIMEOUT_SECONDS = 30
37
+ CLASSIFIER_TIMEOUT = 5
38
+ DEBUG = False
39
+ USE_CLASSIFIER = True
40
+
41
+ # Session state for deduplication and Pre/Post caching
42
+ import hashlib
43
+ import random
44
+ from datetime import datetime
45
+
46
+
47
+ def get_state_file(session_id: str) -> str:
48
+ """Get session-specific state file path."""
49
+ return os.path.expanduser(f"~/.claude/deliberate_changes_state_{session_id}.json")
50
+
51
+
52
+ def get_cache_file(session_id: str, file_hash: str) -> str:
53
+ """Get cache file for Pre/Post hook result sharing."""
54
+ # Using /tmp is intentional for ephemeral cache (nosec B108)
55
+ return f"/tmp/deliberate_cache_{session_id}_{file_hash}.json" # nosec B108
56
+
57
+
58
+ def cleanup_old_state_files():
59
+ """Remove state files older than 7 days (runs 10% of the time)."""
60
+ if random.random() > 0.1:
61
+ return
62
+ try:
63
+ state_dir = os.path.expanduser("~/.claude")
64
+ if not os.path.exists(state_dir):
65
+ return
66
+ current_time = datetime.now().timestamp()
67
+ seven_days_ago = current_time - (7 * 24 * 60 * 60)
68
+ for filename in os.listdir(state_dir):
69
+ if filename.startswith("deliberate_") and filename.endswith(".json"):
70
+ file_path = os.path.join(state_dir, filename)
71
+ try:
72
+ if os.path.getmtime(file_path) < seven_days_ago:
73
+ os.remove(file_path)
74
+ except (OSError, IOError):
75
+ pass
76
+ except Exception:
77
+ pass
78
+
79
+
80
+ def load_state(session_id: str) -> set:
81
+ """Load the set of already-shown warning keys for this session."""
82
+ state_file = get_state_file(session_id)
83
+ if os.path.exists(state_file):
84
+ try:
85
+ with open(state_file, 'r') as f:
86
+ return set(json.load(f))
87
+ except (json.JSONDecodeError, IOError):
88
+ return set()
89
+ return set()
90
+
91
+
92
+ def save_state(session_id: str, shown_warnings: set):
93
+ """Save the set of shown warning keys."""
94
+ state_file = get_state_file(session_id)
95
+ try:
96
+ os.makedirs(os.path.dirname(state_file), exist_ok=True)
97
+ with open(state_file, 'w') as f:
98
+ json.dump(list(shown_warnings), f)
99
+ except IOError:
100
+ pass
101
+
102
+
103
+ def get_warning_key(file_path: str, content_hash: str) -> str:
104
+ """Generate a unique key for deduplication."""
105
+ # MD5 used for cache key only, not security (nosec B324)
106
+ return f"file-{hashlib.md5(file_path.encode(), usedforsecurity=False).hexdigest()[:8]}-{content_hash[:8]}"
107
+
108
+
109
+ def load_blocking_config() -> dict:
110
+ """Load blocking configuration from ~/.deliberate/config.json"""
111
+ try:
112
+ config_path = Path(CONFIG_FILE)
113
+ if config_path.exists():
114
+ with open(config_path, 'r', encoding='utf-8') as f:
115
+ config = json.load(f)
116
+ blocking = config.get("blocking", {})
117
+ return {
118
+ "enabled": blocking.get("enabled", False),
119
+ "confidenceThreshold": blocking.get("confidenceThreshold", 0.85)
120
+ }
121
+ except Exception:
122
+ pass
123
+ return {"enabled": False, "confidenceThreshold": 0.85}
124
+
125
+
126
+ def load_dedup_config() -> bool:
127
+ """Load deduplication config - returns True if dedup is enabled (default)."""
128
+ try:
129
+ config_path = Path(CONFIG_FILE)
130
+ if config_path.exists():
131
+ with open(config_path, 'r', encoding='utf-8') as f:
132
+ config = json.load(f)
133
+ return config.get("deduplication", {}).get("enabled", True)
134
+ except Exception:
135
+ pass
136
+ return True
137
+
138
+
139
+ def extract_content(tool_name: str, tool_input: dict) -> tuple:
140
+ """Extract file_path and content from tool input, handling Write/Edit/MultiEdit."""
141
+ file_path = tool_input.get("file_path", "")
142
+
143
+ if tool_name == "Write":
144
+ content = tool_input.get("content", "")
145
+ return file_path, content, "write", None, content
146
+
147
+ elif tool_name == "Edit":
148
+ old_string = tool_input.get("old_string", "")
149
+ new_string = tool_input.get("new_string", "")
150
+ return file_path, new_string, "edit", old_string, new_string
151
+
152
+ elif tool_name == "MultiEdit":
153
+ edits = tool_input.get("edits", [])
154
+ # Combine all new_strings for analysis
155
+ all_new = " ".join(edit.get("new_string", "") for edit in edits)
156
+ all_old = " ".join(edit.get("old_string", "") for edit in edits)
157
+ return file_path, all_new, "multiedit", all_old, all_new
158
+
159
+ return "", "", "", None, None
160
+
161
+
162
+ def get_token_from_keychain():
163
+ # type: () -> str | None
164
+ """Get Claude Code OAuth token from macOS Keychain."""
165
+ try:
166
+ import subprocess
167
+ result = subprocess.run(
168
+ ["/usr/bin/security", "find-generic-password", "-s", "Claude Code-credentials", "-w"],
169
+ capture_output=True,
170
+ text=True,
171
+ timeout=5
172
+ )
173
+ if result.returncode != 0:
174
+ return None
175
+
176
+ credentials_json = result.stdout.strip()
177
+ if not credentials_json:
178
+ return None
179
+
180
+ data = json.loads(credentials_json)
181
+ token = data.get("claudeAiOauth", {}).get("accessToken")
182
+
183
+ if token and token.startswith("sk-ant-oat01-"):
184
+ return token
185
+ return None
186
+ except Exception:
187
+ return None
188
+
189
+
190
+ def load_llm_config():
191
+ # type: () -> dict | None
192
+ """Load LLM configuration from ~/.deliberate/config.json or keychain"""
193
+ try:
194
+ config_path = Path(CONFIG_FILE)
195
+ if config_path.exists():
196
+ with open(config_path, 'r', encoding='utf-8') as f:
197
+ config = json.load(f)
198
+ llm = config.get("llm", {})
199
+ provider = llm.get("provider")
200
+ if not provider:
201
+ return None
202
+
203
+ # For claude-subscription, get fresh token from keychain
204
+ api_key = llm.get("apiKey")
205
+ if provider == "claude-subscription":
206
+ keychain_token = get_token_from_keychain()
207
+ if keychain_token:
208
+ api_key = keychain_token
209
+
210
+ return {
211
+ "provider": provider,
212
+ "base_url": llm.get("baseUrl"),
213
+ "api_key": api_key,
214
+ "model": llm.get("model")
215
+ }
216
+ except Exception as e:
217
+ debug(f"Error loading config: {e}")
218
+ return None
219
+
220
+ def debug(msg):
221
+ if DEBUG:
222
+ print(f"[deliberate-changes] {msg}", file=sys.stderr)
223
+
224
+
225
+ def call_classifier(operation: str, file_path: str, content: str = None, old_string: str = None, new_string: str = None) -> dict | None:
226
+ """Call the classifier server for pattern + ML based classification."""
227
+ if not USE_CLASSIFIER:
228
+ return None
229
+
230
+ try:
231
+ if operation == "write":
232
+ request_body = json.dumps({
233
+ "filePath": file_path,
234
+ "content": content[:2000] if content else None
235
+ }).encode('utf-8')
236
+ url = CLASSIFIER_WRITE_URL
237
+ else: # edit
238
+ request_body = json.dumps({
239
+ "filePath": file_path,
240
+ "oldString": old_string[:1000] if old_string else None,
241
+ "newString": new_string[:1000] if new_string else None
242
+ }).encode('utf-8')
243
+ url = CLASSIFIER_EDIT_URL
244
+
245
+ req = urllib.request.Request(
246
+ url,
247
+ data=request_body,
248
+ headers={"Content-Type": "application/json"},
249
+ method="POST"
250
+ )
251
+
252
+ with urllib.request.urlopen(req, timeout=CLASSIFIER_TIMEOUT) as response: # nosec B310
253
+ result = json.loads(response.read().decode('utf-8'))
254
+ debug(f"Classifier result: {result}")
255
+ return result
256
+
257
+ except urllib.error.URLError as e:
258
+ debug(f"Classifier unavailable: {e}")
259
+ return None
260
+ except Exception as e:
261
+ debug(f"Classifier error: {e}")
262
+ return None
263
+
264
+
265
+ def call_llm_for_explanation(file_path: str, operation: str, content: str, pre_classification: dict | None = None) -> dict | None:
266
+ """Call the configured LLM to explain the changes using Claude Agent SDK."""
267
+
268
+ llm_config = load_llm_config()
269
+ if not llm_config:
270
+ debug("No LLM configured")
271
+ return None
272
+
273
+ provider = llm_config["provider"]
274
+
275
+ # Only use SDK for claude-subscription provider
276
+ if provider != "claude-subscription":
277
+ debug("Non-OAuth provider - falling back to direct API")
278
+ return None
279
+
280
+ file_name = os.path.basename(file_path)
281
+
282
+ # Build context from pre-classification if available
283
+ context_note = ""
284
+ if pre_classification:
285
+ risk = pre_classification.get("risk", "UNKNOWN")
286
+ reason = pre_classification.get("reason", "")
287
+ source = pre_classification.get("source", "classifier")
288
+ context_note = f"\n\nPre-screening ({source}): {risk} - {reason}"
289
+
290
+ if operation == "write":
291
+ prompt = f"""Analyze this file write for both purpose and security implications. Be concise (1-2 sentences).{context_note}
292
+
293
+ File: {file_name}
294
+ Operation: Created/overwrote file
295
+
296
+ Content preview:
297
+ ```
298
+ {content[:2000]}
299
+ ```
300
+
301
+ Consider:
302
+ - What does this file do?
303
+ - Any security concerns? (credentials, permissions, executable code, network access, data exposure)
304
+ - Could this be malicious or have unintended side effects?
305
+
306
+ Format your response as:
307
+ RISK: [SAFE|MODERATE|DANGEROUS]
308
+ EXPLANATION: [your explanation including any security notes]"""
309
+ else: # edit or multiedit
310
+ prompt = f"""Analyze this edit for both purpose and security implications. Be concise (1-2 sentences).{context_note}
311
+
312
+ File: {file_name}
313
+ Operation: {"Multiple edits (batch)" if operation == "multiedit" else "Find and replace"}
314
+
315
+ {content}
316
+
317
+ Consider:
318
+ - What does this change do?
319
+ - Any security concerns? (weakening validation, exposing data, changing permissions, modifying auth logic)
320
+ - Could this introduce vulnerabilities?
321
+
322
+ Format your response as:
323
+ RISK: [SAFE|MODERATE|DANGEROUS]
324
+ EXPLANATION: [your explanation including any security notes]"""
325
+
326
+ try:
327
+ # Use Claude Agent SDK
328
+ import subprocess
329
+ import tempfile
330
+
331
+ # Create temp file for SDK script
332
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
333
+ sdk_script = f"""
334
+ import os
335
+ import sys
336
+ import json
337
+ import asyncio
338
+ from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
339
+
340
+ # Set OAuth token from keychain
341
+ token = {repr(llm_config["api_key"])}
342
+ os.environ["CLAUDE_CODE_OAUTH_TOKEN"] = token
343
+
344
+ async def main():
345
+ # Create SDK client - disallow all tools (just need text response)
346
+ client = ClaudeSDKClient(
347
+ options=ClaudeAgentOptions(
348
+ model={repr(llm_config["model"])},
349
+ max_turns=1,
350
+ disallowed_tools=['Task', 'TaskOutput', 'Bash', 'Glob', 'Grep', 'ExitPlanMode', 'Read', 'Edit', 'Write', 'NotebookEdit', 'WebFetch', 'TodoWrite', 'KillShell', 'AskUserQuestion', 'Skill', 'SlashCommand', 'EnterPlanMode', 'WebSearch']
351
+ )
352
+ )
353
+
354
+ # Send prompt
355
+ prompt = {repr(prompt)}
356
+
357
+ async with client:
358
+ await client.query(prompt)
359
+
360
+ # Collect response from ResultMessage
361
+ response_text = ""
362
+ async for msg in client.receive_response():
363
+ msg_type = type(msg).__name__
364
+ if msg_type == 'ResultMessage' and hasattr(msg, 'result'):
365
+ response_text = msg.result
366
+ break
367
+
368
+ print(response_text)
369
+
370
+ # Run async main
371
+ asyncio.run(main())
372
+ """
373
+ f.write(sdk_script)
374
+ script_path = f.name
375
+
376
+ # Run SDK script
377
+ result = subprocess.run(
378
+ ["python3", script_path],
379
+ capture_output=True,
380
+ text=True,
381
+ timeout=TIMEOUT_SECONDS
382
+ )
383
+
384
+ os.unlink(script_path)
385
+
386
+ if result.returncode != 0:
387
+ debug(f"SDK script failed: {result.stderr}")
388
+ return None
389
+
390
+ llm_content = result.stdout.strip()
391
+
392
+ # Parse the response
393
+ risk = "MODERATE"
394
+ explanation = llm_content
395
+
396
+ if "RISK:" in llm_content and "EXPLANATION:" in llm_content:
397
+ parts = llm_content.split("EXPLANATION:")
398
+ risk_line = parts[0]
399
+ explanation = parts[1].strip() if len(parts) > 1 else llm_content
400
+
401
+ if "DANGEROUS" in risk_line:
402
+ risk = "DANGEROUS"
403
+ elif "SAFE" in risk_line:
404
+ risk = "SAFE"
405
+
406
+ return {"risk": risk, "explanation": explanation}
407
+
408
+ except Exception as e:
409
+ debug(f"SDK error: {e}")
410
+ return None
411
+
412
+
413
+ def main():
414
+ debug("Hook started")
415
+
416
+ # Periodically clean up old state files
417
+ cleanup_old_state_files()
418
+
419
+ try:
420
+ input_data = json.load(sys.stdin)
421
+ debug(f"Got input: tool={input_data.get('tool_name')}")
422
+ except json.JSONDecodeError as e:
423
+ debug(f"JSON decode error: {e}")
424
+ sys.exit(0)
425
+
426
+ # Extract session ID for deduplication
427
+ session_id = input_data.get("session_id", "default")
428
+ hook_event = input_data.get("hook_event_name", "PreToolUse")
429
+ tool_name = input_data.get("tool_name", "")
430
+ tool_input = input_data.get("tool_input", {})
431
+
432
+ # Only process Write, Edit, and MultiEdit
433
+ if tool_name not in ("Write", "Edit", "MultiEdit"):
434
+ debug(f"Not Write/Edit/MultiEdit, skipping: {tool_name}")
435
+ sys.exit(0)
436
+
437
+ # Extract content using unified function
438
+ file_path, content, operation, old_string, new_string = extract_content(tool_name, tool_input)
439
+
440
+ if not file_path:
441
+ debug("No file path, skipping")
442
+ sys.exit(0)
443
+
444
+ if not content and not new_string:
445
+ debug("No content, skipping")
446
+ sys.exit(0)
447
+
448
+ # Generate content hash for caching and deduplication
449
+ content_hash = hashlib.md5((file_path + (new_string or content or "")).encode(), usedforsecurity=False).hexdigest()
450
+
451
+ # Build content description based on operation
452
+ if operation == "write":
453
+ lines = content.split('\n')
454
+ line_count = len(lines)
455
+ if line_count > MAX_CONTENT_LINES:
456
+ preview = '\n'.join(lines[:MAX_CONTENT_LINES])
457
+ content_desc = f"{preview}\n... ({line_count - MAX_CONTENT_LINES} more lines)"
458
+ else:
459
+ content_desc = content
460
+
461
+ elif operation == "multiedit":
462
+ edits = tool_input.get("edits", [])
463
+ edit_count = len(edits)
464
+ content_desc = f"MultiEdit: {edit_count} changes\n"
465
+ for i, edit in enumerate(edits[:3]): # Show first 3
466
+ old = edit.get("old_string", "")[:200]
467
+ new = edit.get("new_string", "")[:200]
468
+ content_desc += f"\n[{i+1}] {old[:50]}... → {new[:50]}..."
469
+ if edit_count > 3:
470
+ content_desc += f"\n... and {edit_count - 3} more edits"
471
+
472
+ else: # edit
473
+ content_desc = f"OLD:\n```\n{old_string[:1000]}\n```\n\nNEW:\n```\n{new_string[:1000]}\n```"
474
+
475
+ # Get relative path for display
476
+ rel_path = os.path.basename(file_path)
477
+ try:
478
+ home = os.path.expanduser("~")
479
+ if file_path.startswith(home):
480
+ rel_path = "~" + file_path[len(home):]
481
+ except Exception:
482
+ pass
483
+
484
+ # Layer 1: Try classifier server first (pattern + ML) for risk level
485
+ # MultiEdit uses the edit endpoint (has old/new strings like Edit)
486
+ classifier_op = "edit" if operation == "multiedit" else operation
487
+ classifier_result = call_classifier(
488
+ operation=classifier_op,
489
+ file_path=file_path,
490
+ content=content if operation == "write" else None,
491
+ old_string=old_string if operation in ("edit", "multiedit") else None,
492
+ new_string=new_string if operation in ("edit", "multiedit") else None
493
+ )
494
+
495
+ # Layer 2: Get LLM explanation for detailed analysis
496
+ debug(f"Analyzing {operation}: {file_path[:80]}")
497
+ llm_result = call_llm_for_explanation(file_path, operation, content_desc, classifier_result)
498
+
499
+ # Progressive degradation: Use classifier if LLM unavailable
500
+ llm_unavailable_warning = ""
501
+ if not llm_result:
502
+ if classifier_result and classifier_result.get("source") != "fallback":
503
+ # Classifier worked, use its result even without LLM explanation
504
+ risk = classifier_result.get("risk", "MODERATE")
505
+ explanation = classifier_result.get('reason', 'Review file change manually')
506
+ llm_unavailable_warning = "\n\n⚠️ LLM unavailable - using basic pattern matching only.\nTo get detailed explanations, configure: ~/.deliberate/config.json\nOr run: deliberate install"
507
+ debug("LLM unavailable, using classifier-only result")
508
+ else:
509
+ # Both layers failed - exit silently (fail-open)
510
+ # This prevents blocking user if Deliberate is misconfigured
511
+ debug("Both classifier and LLM unavailable, allowing file change")
512
+ sys.exit(0)
513
+ else:
514
+ # Use classifier risk if available, otherwise use LLM risk
515
+ if classifier_result:
516
+ risk = classifier_result.get("risk", llm_result["risk"])
517
+ else:
518
+ risk = llm_result["risk"]
519
+ explanation = llm_result["explanation"]
520
+
521
+ # Session deduplication - check if we've already warned about this exact change
522
+ if load_dedup_config():
523
+ warning_key = get_warning_key(file_path, content_hash)
524
+ shown_warnings = load_state(session_id)
525
+
526
+ if warning_key in shown_warnings:
527
+ # Already shown this warning in this session - allow without re-prompting
528
+ debug(f"Deduplicated: {warning_key} already shown this session")
529
+ sys.exit(0)
530
+
531
+ # Mark as shown and save state
532
+ shown_warnings.add(warning_key)
533
+ save_state(session_id, shown_warnings)
534
+
535
+ # NOTE: This is PostToolUse - the write already happened
536
+ # Show informational output for ALL risk levels (including SAFE)
537
+ # User can review what happened even for safe changes
538
+ # We can only inform the user, not block. No exit(2) here.
539
+
540
+ # ANSI color codes for terminal output
541
+ BOLD = "\033[1m"
542
+ CYAN = "\033[96m"
543
+ RED = "\033[91m"
544
+ YELLOW = "\033[93m"
545
+ GREEN = "\033[92m"
546
+ RESET = "\033[0m"
547
+
548
+ # Choose emoji and color based on risk for visual branding
549
+ if risk == "DANGEROUS":
550
+ emoji = "🚨"
551
+ color = RED
552
+ elif risk == "SAFE":
553
+ emoji = "✅"
554
+ color = GREEN
555
+ else:
556
+ emoji = "⚡"
557
+ color = YELLOW
558
+
559
+ # Operation label
560
+ if operation == "write":
561
+ op_label = "Write"
562
+ elif operation == "multiedit":
563
+ op_label = "MultiEdit"
564
+ else:
565
+ op_label = "Edit"
566
+
567
+ # User-facing message with branded formatting and colors
568
+ # Make the explanation text visible with the risk color so it's not skipped
569
+ user_message = f"{emoji} {BOLD}{CYAN}DELIBERATE{RESET} {BOLD}{color}[{risk}]{RESET} {op_label}\n File: {rel_path}\n {color}{explanation}{RESET}{llm_unavailable_warning}"
570
+
571
+ # Context for Claude
572
+ context = f"**Deliberate {op_label}** [{risk}] {rel_path}: {explanation}{llm_unavailable_warning}"
573
+
574
+ # Cache result for Post hook to read (if this is PreToolUse)
575
+ cache_file = get_cache_file(session_id, content_hash)
576
+ try:
577
+ with open(cache_file, 'w') as f:
578
+ json.dump({
579
+ "risk": risk,
580
+ "explanation": explanation,
581
+ "user_message": user_message,
582
+ "context": context,
583
+ "op_label": op_label,
584
+ "rel_path": rel_path
585
+ }, f)
586
+ except IOError:
587
+ pass
588
+
589
+ # Output for PostToolUse - informational only
590
+ # systemMessage is what makes it visible to the user
591
+ # permissionDecision/permissionDecisionReason are for PreToolUse only
592
+ output = {
593
+ "systemMessage": user_message,
594
+ "hookSpecificOutput": {
595
+ "hookEventName": "PostToolUse",
596
+ "additionalContext": context
597
+ }
598
+ }
599
+
600
+ print(json.dumps(output))
601
+
602
+ sys.exit(0)
603
+
604
+
605
+ if __name__ == "__main__":
606
+ main()