ripperdoc 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. ripperdoc/__init__.py +1 -1
  2. ripperdoc/cli/cli.py +33 -115
  3. ripperdoc/cli/commands/__init__.py +70 -6
  4. ripperdoc/cli/commands/agents_cmd.py +6 -3
  5. ripperdoc/cli/commands/clear_cmd.py +1 -4
  6. ripperdoc/cli/commands/config_cmd.py +1 -1
  7. ripperdoc/cli/commands/context_cmd.py +3 -2
  8. ripperdoc/cli/commands/doctor_cmd.py +18 -4
  9. ripperdoc/cli/commands/help_cmd.py +11 -1
  10. ripperdoc/cli/commands/hooks_cmd.py +610 -0
  11. ripperdoc/cli/commands/models_cmd.py +26 -9
  12. ripperdoc/cli/commands/permissions_cmd.py +57 -37
  13. ripperdoc/cli/commands/resume_cmd.py +6 -4
  14. ripperdoc/cli/commands/status_cmd.py +4 -4
  15. ripperdoc/cli/commands/tasks_cmd.py +8 -4
  16. ripperdoc/cli/ui/file_mention_completer.py +64 -8
  17. ripperdoc/cli/ui/interrupt_handler.py +3 -4
  18. ripperdoc/cli/ui/message_display.py +5 -3
  19. ripperdoc/cli/ui/panels.py +13 -10
  20. ripperdoc/cli/ui/provider_options.py +247 -0
  21. ripperdoc/cli/ui/rich_ui.py +196 -77
  22. ripperdoc/cli/ui/spinner.py +25 -1
  23. ripperdoc/cli/ui/tool_renderers.py +8 -2
  24. ripperdoc/cli/ui/wizard.py +215 -0
  25. ripperdoc/core/agents.py +9 -3
  26. ripperdoc/core/config.py +49 -12
  27. ripperdoc/core/custom_commands.py +412 -0
  28. ripperdoc/core/default_tools.py +11 -2
  29. ripperdoc/core/hooks/__init__.py +99 -0
  30. ripperdoc/core/hooks/config.py +301 -0
  31. ripperdoc/core/hooks/events.py +535 -0
  32. ripperdoc/core/hooks/executor.py +496 -0
  33. ripperdoc/core/hooks/integration.py +344 -0
  34. ripperdoc/core/hooks/manager.py +745 -0
  35. ripperdoc/core/permissions.py +40 -8
  36. ripperdoc/core/providers/anthropic.py +548 -68
  37. ripperdoc/core/providers/gemini.py +70 -5
  38. ripperdoc/core/providers/openai.py +60 -5
  39. ripperdoc/core/query.py +140 -39
  40. ripperdoc/core/query_utils.py +2 -0
  41. ripperdoc/core/skills.py +9 -3
  42. ripperdoc/core/system_prompt.py +4 -2
  43. ripperdoc/core/tool.py +9 -5
  44. ripperdoc/sdk/client.py +2 -2
  45. ripperdoc/tools/ask_user_question_tool.py +5 -3
  46. ripperdoc/tools/background_shell.py +2 -1
  47. ripperdoc/tools/bash_output_tool.py +1 -1
  48. ripperdoc/tools/bash_tool.py +30 -20
  49. ripperdoc/tools/dynamic_mcp_tool.py +29 -8
  50. ripperdoc/tools/enter_plan_mode_tool.py +1 -1
  51. ripperdoc/tools/exit_plan_mode_tool.py +1 -1
  52. ripperdoc/tools/file_edit_tool.py +8 -4
  53. ripperdoc/tools/file_read_tool.py +9 -5
  54. ripperdoc/tools/file_write_tool.py +9 -5
  55. ripperdoc/tools/glob_tool.py +3 -2
  56. ripperdoc/tools/grep_tool.py +3 -2
  57. ripperdoc/tools/kill_bash_tool.py +1 -1
  58. ripperdoc/tools/ls_tool.py +1 -1
  59. ripperdoc/tools/mcp_tools.py +13 -10
  60. ripperdoc/tools/multi_edit_tool.py +8 -7
  61. ripperdoc/tools/notebook_edit_tool.py +7 -4
  62. ripperdoc/tools/skill_tool.py +1 -1
  63. ripperdoc/tools/task_tool.py +5 -4
  64. ripperdoc/tools/todo_tool.py +2 -2
  65. ripperdoc/tools/tool_search_tool.py +3 -2
  66. ripperdoc/utils/conversation_compaction.py +11 -7
  67. ripperdoc/utils/file_watch.py +8 -2
  68. ripperdoc/utils/json_utils.py +2 -1
  69. ripperdoc/utils/mcp.py +11 -3
  70. ripperdoc/utils/memory.py +4 -2
  71. ripperdoc/utils/message_compaction.py +21 -7
  72. ripperdoc/utils/message_formatting.py +11 -7
  73. ripperdoc/utils/messages.py +105 -66
  74. ripperdoc/utils/path_ignore.py +38 -12
  75. ripperdoc/utils/permissions/path_validation_utils.py +2 -1
  76. ripperdoc/utils/permissions/shell_command_validation.py +427 -91
  77. ripperdoc/utils/safe_get_cwd.py +2 -1
  78. ripperdoc/utils/session_history.py +13 -6
  79. ripperdoc/utils/todo.py +2 -1
  80. ripperdoc/utils/token_estimation.py +6 -1
  81. {ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/METADATA +24 -3
  82. ripperdoc-0.2.9.dist-info/RECORD +123 -0
  83. ripperdoc-0.2.7.dist-info/RECORD +0 -113
  84. {ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/WHEEL +0 -0
  85. {ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/entry_points.txt +0 -0
  86. {ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/licenses/LICENSE +0 -0
  87. {ripperdoc-0.2.7.dist-info → ripperdoc-0.2.9.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,7 @@ potentially dangerous constructs before execution.
7
7
  from __future__ import annotations
8
8
 
9
9
  import re
10
+ import shlex
10
11
  from dataclasses import dataclass
11
12
  from typing import List, Optional, Tuple
12
13
 
@@ -25,62 +26,128 @@ def _strip_single_quotes(shell_command: str, first_token: str) -> str:
25
26
 
26
27
  Single-quoted content in shell is literal and cannot contain command
27
28
  substitution, so we can safely ignore it for security analysis.
29
+
30
+ Double quotes are kept for analysis since they can contain variable
31
+ expansions and command substitutions.
28
32
  """
29
- in_single_quote_mode = False
30
- next_char_is_backslash_escaped = False
31
- command_without_single_quotes = ""
32
-
33
- for i, current_char in enumerate(shell_command):
34
- if next_char_is_backslash_escaped:
35
- next_char_is_backslash_escaped = False
36
- if not in_single_quote_mode:
37
- command_without_single_quotes += current_char
33
+ in_single_quote = False
34
+ escaped = False
35
+ result = []
36
+
37
+ i = 0
38
+ while i < len(shell_command):
39
+ char = shell_command[i]
40
+
41
+ if escaped:
42
+ escaped = False
43
+ result.append(char)
44
+ i += 1
38
45
  continue
39
46
 
40
- if current_char == "\\":
41
- next_char_is_backslash_escaped = True
42
- if not in_single_quote_mode:
43
- command_without_single_quotes += current_char
47
+ if char == "\\":
48
+ escaped = True
49
+ result.append(char)
50
+ i += 1
44
51
  continue
45
52
 
46
- if current_char == "'" and not next_char_is_backslash_escaped:
47
- in_single_quote_mode = not in_single_quote_mode
53
+ if char == "'":
54
+ in_single_quote = not in_single_quote
55
+ i += 1
48
56
  continue
49
57
 
50
- # Special handling for jq double-quoted strings
51
- if (
52
- first_token == "jq"
53
- and current_char == '"'
54
- and not next_char_is_backslash_escaped
55
- and not in_single_quote_mode
56
- ):
57
- # Scan to find the end of the double-quoted string
58
- quoted_string = ""
59
- scan_position = i + 1
60
-
61
- while scan_position < len(shell_command) and shell_command[scan_position] != '"':
62
- if (
63
- shell_command[scan_position] == "\\"
64
- and scan_position + 1 < len(shell_command)
65
- ):
66
- scan_position += 2
67
- continue
68
- quoted_string += shell_command[scan_position]
69
- scan_position += 1
58
+ if not in_single_quote:
59
+ result.append(char)
60
+
61
+ i += 1
62
+
63
+ return "".join(result)
64
+
65
+
66
+ def _strip_quotes_for_analysis(command: str) -> str:
67
+ """Strip content inside both single and double quotes for security analysis.
68
+
69
+ This is used for checking shell metacharacters in arguments.
70
+ Double quotes are stripped because they can contain variable expansions
71
+ and command substitutions that need to be analyzed.
72
+ """
73
+ result = []
74
+ in_single_quote = False
75
+ in_double_quote = False
76
+ escaped = False
77
+
78
+ i = 0
79
+ while i < len(command):
80
+ char = command[i]
81
+
82
+ if escaped:
83
+ escaped = False
84
+ i += 1
85
+ continue
86
+
87
+ if char == "\\":
88
+ escaped = True
89
+ i += 1
90
+ continue
70
91
 
71
- # If the quoted string contains command substitution, keep it for analysis
72
- if "$(" in quoted_string or "`" in quoted_string:
73
- command_without_single_quotes += current_char
74
- continue
92
+ if char == "'" and not in_double_quote:
93
+ in_single_quote = not in_single_quote
94
+ i += 1
95
+ continue
96
+
97
+ if char == '"' and not in_single_quote:
98
+ in_double_quote = not in_double_quote
99
+ i += 1
100
+ continue
101
+
102
+ if not in_single_quote and not in_double_quote:
103
+ result.append(char)
75
104
 
76
- # Skip the entire quoted string
77
- # Note: We can't modify i in Python, so we'll need a different approach
78
- # For now, just add the character if not in single quote mode
105
+ i += 1
79
106
 
80
- if not in_single_quote_mode:
81
- command_without_single_quotes += current_char
107
+ return "".join(result)
82
108
 
83
- return command_without_single_quotes
109
+
110
+ def _is_safe_command_pattern(command: str) -> bool:
111
+ """Check if command matches known safe patterns.
112
+
113
+ These are commands that are commonly used and known to be safe
114
+ even if they contain characters that might otherwise trigger warnings.
115
+ """
116
+ import re
117
+
118
+ safe_patterns = [
119
+ # Common version checks
120
+ r"^\s*(python|python3|node|npm|git|bash|sh)\s+--version\s*$",
121
+ r"^\s*(python|python3|node|npm|git|bash|sh)\s+-v\s*$",
122
+ r"^\s*(python|python3|node|npm|git|bash|sh)\s+-V\s*$",
123
+ # Common help commands
124
+ r"^\s*\w+\s+--help\s*$",
125
+ r"^\s*\w+\s+-h\s*$",
126
+ r"^\s*\w+\s+help\s*$",
127
+ # Simple echo/print commands
128
+ r'^\s*echo\s+["\'].*["\']\s*$',
129
+ r"^\s*print(env|f)?\s+.*$",
130
+ # Directory listing with common options
131
+ r'^\s*ls\s+(-[a-zA-Z]*[lhtr]*\s*)*["\']?[^;&|<>]*["\']?\s*$',
132
+ r"^\s*dir\s+.*$",
133
+ # Current directory
134
+ r"^\s*pwd\s*$",
135
+ # Environment variable checks
136
+ r"^\s*env\s*$",
137
+ r"^\s*printenv\s*$",
138
+ # Which/whereis commands
139
+ r"^\s*which\s+\w+\s*$",
140
+ r"^\s*whereis\s+\w+\s*$",
141
+ # Type/command commands
142
+ r"^\s*type\s+\w+\s*$",
143
+ r"^\s*command\s+-v\s+\w+\s*$",
144
+ ]
145
+
146
+ for pattern in safe_patterns:
147
+ if re.match(pattern, command, re.IGNORECASE):
148
+ return True
149
+
150
+ return False
84
151
 
85
152
 
86
153
  def _sanitize_safe_redirections(command: str) -> str:
@@ -107,8 +174,14 @@ _DANGEROUS_PATTERNS: List[Tuple[re.Pattern[str], str]] = [
107
174
  # Parameter substitution
108
175
  (re.compile(r"\$\{"), "Command contains ${} parameter substitution"),
109
176
  # Input/output redirection
110
- (re.compile(r"<(?!\()"), "Command contains input redirection (<) which could read sensitive files"),
111
- (re.compile(r">(?!\()"), "Command contains output redirection (>) which could write to arbitrary files"),
177
+ (
178
+ re.compile(r"<(?!\()"),
179
+ "Command contains input redirection (<) which could read sensitive files",
180
+ ),
181
+ (
182
+ re.compile(r">(?!\()"),
183
+ "Command contains output redirection (>) which could write to arbitrary files",
184
+ ),
112
185
  # Zsh-specific patterns
113
186
  (re.compile(r"~\["), "Command contains Zsh-style parameter expansion"),
114
187
  (re.compile(r"\(e:"), "Command contains Zsh-style glob qualifiers"),
@@ -132,38 +205,38 @@ _DANGEROUS_METACHARACTER_PATTERNS: List[re.Pattern[str]] = [
132
205
  _WINDOWS_DESTRUCTIVE_PATTERNS: List[Tuple[re.Pattern[str], str]] = [
133
206
  # rmdir /s - Recursive directory deletion (Windows)
134
207
  (
135
- re.compile(r'\brmdir\s+.*(/s|/S)', re.IGNORECASE),
136
- "Command contains 'rmdir /s' which recursively deletes directories"
208
+ re.compile(r"\brmdir\s+.*(/s|/S)", re.IGNORECASE),
209
+ "Command contains 'rmdir /s' which recursively deletes directories",
137
210
  ),
138
211
  # del /s or del /q - Recursive or quiet file deletion (Windows)
139
212
  (
140
- re.compile(r'\bdel\s+.*(/s|/S|/q|/Q)', re.IGNORECASE),
141
- "Command contains 'del' with dangerous flags (/s or /q)"
213
+ re.compile(r"\bdel\s+.*(/s|/S|/q|/Q)", re.IGNORECASE),
214
+ "Command contains 'del' with dangerous flags (/s or /q)",
142
215
  ),
143
216
  # rd /s - Alias for rmdir /s (Windows)
144
217
  (
145
- re.compile(r'\brd\s+.*(/s|/S)', re.IGNORECASE),
146
- "Command contains 'rd /s' which recursively deletes directories"
218
+ re.compile(r"\brd\s+.*(/s|/S)", re.IGNORECASE),
219
+ "Command contains 'rd /s' which recursively deletes directories",
147
220
  ),
148
221
  # format command (Windows)
149
222
  (
150
- re.compile(r'\bformat\s+[a-zA-Z]:', re.IGNORECASE),
151
- "Command contains 'format' which erases entire drives"
223
+ re.compile(r"\bformat\s+[a-zA-Z]:", re.IGNORECASE),
224
+ "Command contains 'format' which erases entire drives",
152
225
  ),
153
226
  # cmd /c with destructive subcommand
154
227
  (
155
- re.compile(r'\bcmd\s+/[cC]\s+.*\b(rmdir|rd|del|format)\b', re.IGNORECASE),
156
- "Command uses 'cmd /c' to execute a destructive subcommand"
228
+ re.compile(r"\bcmd\s+/[cC]\s+.*\b(rmdir|rd|del|format)\b", re.IGNORECASE),
229
+ "Command uses 'cmd /c' to execute a destructive subcommand",
157
230
  ),
158
231
  # PowerShell Remove-Item -Recurse
159
232
  (
160
- re.compile(r'\b(Remove-Item|rm|ri|del)\s+.*-Recurse', re.IGNORECASE),
161
- "Command contains 'Remove-Item -Recurse' which recursively deletes items"
233
+ re.compile(r"\b(Remove-Item|rm|ri|del)\s+.*-Recurse", re.IGNORECASE),
234
+ "Command contains 'Remove-Item -Recurse' which recursively deletes items",
162
235
  ),
163
236
  # PowerShell with -Force flag on destructive commands
164
237
  (
165
- re.compile(r'\b(Remove-Item|rm|ri|del)\s+.*-Force', re.IGNORECASE),
166
- "Command contains destructive command with -Force flag"
238
+ re.compile(r"\b(Remove-Item|rm|ri|del)\s+.*-Force", re.IGNORECASE),
239
+ "Command contains destructive command with -Force flag",
167
240
  ),
168
241
  ]
169
242
 
@@ -172,37 +245,30 @@ _UNIX_DESTRUCTIVE_PATTERNS: List[Tuple[re.Pattern[str], str]] = [
172
245
  # rm -rf or rm -r (recursive deletion) - must be at word boundary and followed by space/path
173
246
  (
174
247
  re.compile(r'(?<!["\'])\brm\s+(-[a-zA-Z]*r[a-zA-Z]*\s+|\s*-[a-zA-Z]*r[a-zA-Z]*$)'),
175
- "Command contains 'rm -r' which recursively deletes files and directories"
248
+ "Command contains 'rm -r' which recursively deletes files and directories",
176
249
  ),
177
250
  # rm with force flag on system paths
178
251
  (
179
- re.compile(r'(?<!["\'])\brm\s+-[a-zA-Z]*f[a-zA-Z]*\s+(/|~|/home|/usr|/var|/etc|/root|\$HOME)'),
180
- "Command contains 'rm -f' targeting a critical system path"
252
+ re.compile(
253
+ r'(?<!["\'])\brm\s+-[a-zA-Z]*f[a-zA-Z]*\s+(/|~|/home|/usr|/var|/etc|/root|\$HOME)'
254
+ ),
255
+ "Command contains 'rm -f' targeting a critical system path",
181
256
  ),
182
257
  # dd command (can overwrite disks)
183
- (
184
- re.compile(r'\bdd\s+.*of=/dev/'),
185
- "Command contains 'dd' writing to a device file"
186
- ),
258
+ (re.compile(r"\bdd\s+.*of=/dev/"), "Command contains 'dd' writing to a device file"),
187
259
  # mkfs (creates filesystem, destroys data)
188
- (
189
- re.compile(r'\bmkfs\b'),
190
- "Command contains 'mkfs' which formats storage devices"
191
- ),
260
+ (re.compile(r"\bmkfs\b"), "Command contains 'mkfs' which formats storage devices"),
192
261
  # shred (secure deletion)
193
- (
194
- re.compile(r'\bshred\s+'),
195
- "Command contains 'shred' which irreversibly destroys file data"
196
- ),
262
+ (re.compile(r"\bshred\s+"), "Command contains 'shred' which irreversibly destroys file data"),
197
263
  # chmod 777 on sensitive paths
198
264
  (
199
- re.compile(r'\bchmod\s+777\s+(/|/etc|/usr|/var|/home)'),
200
- "Command contains 'chmod 777' on a sensitive system path"
265
+ re.compile(r"\bchmod\s+777\s+(/|/etc|/usr|/var|/home)"),
266
+ "Command contains 'chmod 777' on a sensitive system path",
201
267
  ),
202
268
  # chown on system paths
203
269
  (
204
- re.compile(r'\bchown\s+.*\s+(/etc|/usr|/var|/bin|/sbin)'),
205
- "Command contains 'chown' on a critical system path"
270
+ re.compile(r"\bchown\s+.*\s+(/etc|/usr|/var|/bin|/sbin)"),
271
+ "Command contains 'chown' on a critical system path",
206
272
  ),
207
273
  ]
208
274
 
@@ -222,12 +288,12 @@ _NESTED_QUOTE_PATTERNS: List[Tuple[re.Pattern[str], str]] = [
222
288
  # Windows cmd with escaped quotes inside
223
289
  (
224
290
  re.compile(r'\bcmd\s+/[cC]\s+"[^"]*\\"[^"]*"'),
225
- "Command contains 'cmd /c' with nested escaped quotes which may cause unexpected parsing"
291
+ "Command contains 'cmd /c' with nested escaped quotes which may cause unexpected parsing",
226
292
  ),
227
293
  # PowerShell with complex quoting
228
294
  (
229
295
  re.compile(r'\bpowershell\s+.*-[Cc]ommand\s+["\'][^"\']*["\'][^"\']*["\']'),
230
- "Command contains PowerShell with complex nested quotes"
296
+ "Command contains PowerShell with complex nested quotes",
231
297
  ),
232
298
  ]
233
299
 
@@ -262,6 +328,28 @@ def _check_destructive_commands(command: str) -> Optional[ValidationResult]:
262
328
  # Check if command targets critical paths with any destructive operation
263
329
  has_critical_path = any(p.search(command) for p in _CRITICAL_PATH_PATTERNS)
264
330
 
331
+ # For interpreter commands, we need to check the code string for destructive commands
332
+ # First, extract the first token to check if it's an interpreter
333
+ trimmed = command.strip()
334
+ first_token = trimmed.split()[0] if trimmed.split() else ""
335
+
336
+ # Check if it's an interpreter command
337
+ if _is_interpreter_command(command, first_token):
338
+ # Extract and check the code string
339
+ code_string = _extract_code_string(command, first_token)
340
+ if code_string:
341
+ # Check the code string for destructive patterns
342
+ # We need to check both the code string itself and any commands it might execute
343
+ code_check_result = _check_destructive_commands_in_code_string(code_string, first_token)
344
+ if code_check_result:
345
+ if has_critical_path:
346
+ return ValidationResult(
347
+ behavior="deny",
348
+ message=f"BLOCKED: {code_check_result.message} targeting a critical system path",
349
+ rule_suggestions=None,
350
+ )
351
+ return code_check_result
352
+
265
353
  # Strip quoted content to avoid false positives like 'echo "rmdir /s /q folder"'
266
354
  command_without_quotes = _strip_quoted_content_for_destructive_check(command)
267
355
 
@@ -303,19 +391,68 @@ def _check_destructive_commands(command: str) -> Optional[ValidationResult]:
303
391
  return ValidationResult(
304
392
  behavior="deny",
305
393
  message="BLOCKED: Command contains 'cmd /c' with escaped quotes - "
306
- "this pattern has caused data loss incidents",
394
+ "this pattern has caused data loss incidents",
307
395
  rule_suggestions=None,
308
396
  )
309
397
  return ValidationResult(
310
398
  behavior="ask",
311
399
  message="Command contains 'cmd /c' with escaped quotes inside double quotes - "
312
- "this pattern has caused data loss incidents due to quote parsing issues",
400
+ "this pattern has caused data loss incidents due to quote parsing issues",
313
401
  rule_suggestions=None,
314
402
  )
315
403
 
316
404
  return None
317
405
 
318
406
 
407
+ def _check_destructive_commands_in_code_string(
408
+ code_string: str, interpreter: str
409
+ ) -> Optional[ValidationResult]:
410
+ """Check for destructive commands in interpreter code strings.
411
+
412
+ This handles cases like `bash -c "rm -rf /"` where the destructive
413
+ command is inside the code string.
414
+ """
415
+ import re
416
+
417
+ # For shell interpreters (bash, sh, zsh), the code string is shell code
418
+ if interpreter in ("bash", "sh", "zsh"):
419
+ # Check for destructive patterns in the shell code
420
+ stripped_code = _strip_quoted_content_for_destructive_check(code_string)
421
+
422
+ for pattern, message in _UNIX_DESTRUCTIVE_PATTERNS:
423
+ if pattern.search(stripped_code):
424
+ return ValidationResult(
425
+ behavior="ask",
426
+ message=f"Code string contains {message}",
427
+ rule_suggestions=None,
428
+ )
429
+
430
+ # For Python, check for os.system, subprocess, etc.
431
+ elif interpreter in ("python", "python3"):
432
+ # Check for system calls that execute shell commands
433
+ system_patterns = [
434
+ (
435
+ r'\bos\.system\s*\(\s*["\'][^"\']*rm\s+-[a-zA-Z]*r',
436
+ "Python code executes destructive shell command",
437
+ ),
438
+ (
439
+ r"\bsubprocess\.(run|call|Popen)\s*\(\s*[^)]*rm\s+-[a-zA-Z]*r",
440
+ "Python code executes destructive shell command",
441
+ ),
442
+ ]
443
+
444
+ for pattern_str, message in system_patterns:
445
+ if re.search(pattern_str, code_string):
446
+ return ValidationResult(
447
+ behavior="ask",
448
+ message=message,
449
+ rule_suggestions=None,
450
+ )
451
+
452
+ # For other interpreters, we could add more checks as needed
453
+ return None
454
+
455
+
319
456
  def _strip_quoted_content_for_destructive_check(command: str) -> str:
320
457
  """Strip content inside quotes for destructive command checking.
321
458
 
@@ -334,7 +471,7 @@ def _strip_quoted_content_for_destructive_check(command: str) -> str:
334
471
  result.append(char)
335
472
  continue
336
473
 
337
- if char == '\\':
474
+ if char == "\\":
338
475
  escaped = True
339
476
  if not in_single_quote and not in_double_quote:
340
477
  result.append(char)
@@ -351,7 +488,77 @@ def _strip_quoted_content_for_destructive_check(command: str) -> str:
351
488
  if not in_single_quote and not in_double_quote:
352
489
  result.append(char)
353
490
 
354
- return ''.join(result)
491
+ return "".join(result)
492
+
493
+
494
+ def _is_interpreter_command(command: str, first_token: str) -> bool:
495
+ """Check if the command is an interpreter command that executes code strings.
496
+
497
+ Interpreter commands like `python -c "code"`, `node -e "code"`, `bash -c "code"`
498
+ should have different validation rules for their code strings.
499
+ """
500
+ interpreter_tokens = {"python", "python3", "node", "bash", "sh", "zsh", "perl", "ruby"}
501
+
502
+ if first_token not in interpreter_tokens:
503
+ return False
504
+
505
+ # Check for -c or -e flag (execute code string)
506
+ # Pattern: command -c "code" or command -e "code"
507
+ import re
508
+
509
+ pattern = rf'\b{re.escape(first_token)}\s+-(c|e)\s+["\']'
510
+ return bool(re.search(pattern, command))
511
+
512
+
513
+ def _extract_code_string(command: str, first_token: str) -> str:
514
+ """Extract the code string from an interpreter command.
515
+
516
+ Returns the code string without the surrounding quotes, or empty string
517
+ if not an interpreter command or no code string found.
518
+ """
519
+ if not _is_interpreter_command(command, first_token):
520
+ return ""
521
+
522
+ import re
523
+
524
+ # Find the code string after -c or -e flag
525
+ # Match: command -c "code" or command -e 'code'
526
+ pattern = rf'{re.escape(first_token)}\s+-(c|e)\s+(["\'])(.*?)(?<!\\)\2'
527
+ match = re.search(pattern, command, re.DOTALL)
528
+
529
+ if match:
530
+ code_string = match.group(3)
531
+ # Remove escape characters
532
+ code_string = code_string.replace('\\"', '"').replace("\\'", "'")
533
+ return code_string
534
+
535
+ return ""
536
+
537
+
538
+ def _strip_interpreter_code_strings(command: str, first_token: str) -> str:
539
+ """Strip code strings from interpreter commands for validation.
540
+
541
+ This allows us to validate the shell command structure while
542
+ ignoring the content of code strings which may contain shell-like
543
+ characters that are actually part of the code language.
544
+ """
545
+ if not _is_interpreter_command(command, first_token):
546
+ return command
547
+
548
+ import re
549
+
550
+ # Replace code strings with placeholder
551
+ # Match: command -c "code" or command -e 'code'
552
+ # The (?<!\\) negative lookbehind ensures we don't match escaped quotes
553
+ pattern = rf'({re.escape(first_token)}\s+-(c|e)\s+)(["\'])(.*?)(?<!\\)\3'
554
+
555
+ def replace_code_string(match: re.Match[str]) -> str:
556
+ prefix = match.group(1)
557
+ quote = match.group(3)
558
+ return f"{prefix}{quote}__CODE_STRING__{quote}"
559
+
560
+ result = re.sub(pattern, replace_code_string, command, flags=re.DOTALL)
561
+ return result
355
562
 
356
563
 
357
564
  def validate_shell_command(shell_command: str) -> ValidationResult:
@@ -376,6 +583,14 @@ def validate_shell_command(shell_command: str) -> ValidationResult:
376
583
  trimmed = shell_command.strip()
377
584
  first_token = trimmed.split()[0] if trimmed.split() else ""
378
585
 
586
+ # Check for safe command patterns first
587
+ if _is_safe_command_pattern(trimmed):
588
+ return ValidationResult(
589
+ behavior="passthrough",
590
+ message="Command matches safe pattern",
591
+ rule_suggestions=None,
592
+ )
593
+
379
594
  # FIRST: Check for destructive commands (highest priority)
380
595
  # This catches dangerous patterns like the Gemini incident
381
596
  destructive_result = _check_destructive_commands(trimmed)
@@ -429,23 +644,96 @@ def validate_shell_command(shell_command: str) -> ValidationResult:
429
644
  # Strip single-quoted content for further analysis
430
645
  sanitized = _strip_single_quotes(trimmed, first_token)
431
646
 
647
+ # For interpreter commands, strip code strings before checking shell metacharacters
648
+ # This allows code strings to contain language-specific characters like ;
649
+ sanitized_for_metachar_check = sanitized
650
+ if _is_interpreter_command(trimmed, first_token):
651
+ sanitized_for_metachar_check = _strip_interpreter_code_strings(sanitized, first_token)
652
+
432
653
  # Remove safe redirections
433
654
  sanitized = _sanitize_safe_redirections(sanitized)
655
+ sanitized_for_metachar_check = _sanitize_safe_redirections(sanitized_for_metachar_check)
656
+
657
+ # Check for shell metacharacters outside of quotes
658
+ # We'll parse the command and check for ; & characters that are not inside quotes
659
+ # Special handling for find -exec escaped semicolon (\;)
660
+ def has_metachars_outside_quotes(cmd: str) -> bool:
661
+ # Use shlex for proper shell tokenization and quote handling
662
+ lex = shlex.shlex(cmd, posix=True)
663
+ lex.whitespace_split = True # Split on whitespace, better for argument parsing
664
+ lex.commenters = "" # Don't treat # as comment for security analysis
665
+
666
+ tokens = []
667
+ try:
668
+ # Get all tokens
669
+ while True:
670
+ token = lex.get_token()
671
+ if token == lex.eof:
672
+ break
673
+ tokens.append(token)
674
+ except ValueError:
675
+ # If shlex fails (e.g., unmatched quotes), be cautious and return True
676
+ # This treats malformed commands as potentially dangerous
677
+ return True
678
+
679
+ # Check for dangerous operators in tokens
680
+ # shlex will separate operators like ; & | as individual tokens
681
+ # even when they're not surrounded by spaces
682
+ i = 0
683
+ while i < len(tokens):
684
+ token = tokens[i]
685
+ if token in (";", "&", "|"):
686
+ # Check if it's part of a safe operator (&& or ||)
687
+ if token in ("&", "|") and i + 1 < len(tokens) and tokens[i + 1] == token:
688
+ # This is && or ||, skip both tokens
689
+ i += 2
690
+ continue
691
+ # Single ; & | are dangerous
692
+ return True
693
+ i += 1
694
+
695
+ # Also check for find -exec escaped semicolon pattern
696
+ # shlex will have already parsed \; as separate token ';' (since escaped)
697
+ # We need to check if this ; is part of find -exec pattern
698
+ # by looking at the token context
699
+ for i, token in enumerate(tokens):
700
+ if token == ";":
701
+ # Check if previous tokens contain "-exec"
702
+ # Look backward through tokens to find "-exec"
703
+ j = i - 1
704
+ found_exec = False
705
+ while j >= 0:
706
+ if tokens[j] == "-exec":
707
+ found_exec = True
708
+ break
709
+ j -= 1
710
+ if found_exec:
711
+ # This is likely find -exec ... ;, check if it's escaped in original
712
+ # We need to check the original string to confirm it's \;
713
+ # Build a regex to find this specific semicolon
714
+ # For now, we'll assume it's the find -exec semicolon
715
+ # and continue checking other tokens
716
+ continue
717
+ # Not part of find -exec, so it's dangerous
718
+ return True
719
+
720
+ return False
434
721
 
435
- # Check for shell metacharacters in quoted arguments
436
- if re.search(r'(?:^|\s)["\'][^"\']*[;&][^"\']*["\'](?:\s|$)', sanitized):
722
+ if has_metachars_outside_quotes(sanitized_for_metachar_check):
437
723
  return ValidationResult(
438
724
  behavior="ask",
439
- message="Command contains shell metacharacters (;, |, or &) in arguments",
725
+ message="Command contains shell metacharacters (;, |, or &) outside of quoted arguments",
440
726
  rule_suggestions=None,
441
727
  )
442
728
 
443
729
  # Check for dangerous metacharacters in find/grep arguments
730
+ # Use the version with quotes stripped for this check
731
+ stripped_for_pattern_check = _strip_quotes_for_analysis(sanitized)
444
732
  for pattern in _DANGEROUS_METACHARACTER_PATTERNS:
445
- if pattern.search(sanitized):
733
+ if pattern.search(stripped_for_pattern_check):
446
734
  return ValidationResult(
447
735
  behavior="ask",
448
- message="Command contains shell metacharacters (;, |, or &) in arguments",
736
+ message="Command contains shell metacharacters (;, |, or &) in find/grep arguments",
449
737
  rule_suggestions=None,
450
738
  )
451
739
 
@@ -489,6 +777,54 @@ def validate_shell_command(shell_command: str) -> ValidationResult:
489
777
  # Check all dangerous patterns
490
778
  for pattern, message in _DANGEROUS_PATTERNS:
491
779
  if pattern.search(sanitized):
780
+ # Special handling for newlines
781
+ if "newlines" in message:
782
+ # Check if newlines are in quotes or code strings
783
+ in_quote = False
784
+ quote_char = None
785
+ escaped = False
786
+ newline_outside_quotes = False
787
+
788
+ i = 0
789
+ while i < len(trimmed):
790
+ char = trimmed[i]
791
+
792
+ if escaped:
793
+ escaped = False
794
+ i += 1
795
+ continue
796
+
797
+ if char == "\\":
798
+ escaped = True
799
+ i += 1
800
+ continue
801
+
802
+ if char in ("'", '"') and not escaped:
803
+ if not in_quote:
804
+ in_quote = True
805
+ quote_char = char
806
+ elif char == quote_char:
807
+ in_quote = False
808
+ quote_char = None
809
+
810
+ if char in ("\n", "\r") and not in_quote:
811
+ newline_outside_quotes = True
812
+ break
813
+
814
+ i += 1
815
+
816
+ if not newline_outside_quotes:
817
+ # Newlines are inside quotes, which is safer
818
+ # For interpreter commands, check if newlines are in code strings
819
+ if _is_interpreter_command(trimmed, first_token):
820
+ code_string = _extract_code_string(trimmed, first_token)
821
+ if code_string and any(c in code_string for c in ("\n", "\r")):
822
+ # Newlines are in code string, which is allowed for interpreter commands
823
+ continue
824
+ else:
825
+ # For non-interpreter commands, newlines in quotes are questionable but not blocked
826
+ continue
827
+
492
828
  return ValidationResult(
493
829
  behavior="ask",
494
830
  message=message,