ripperdoc 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ripperdoc/__init__.py +1 -1
- ripperdoc/cli/cli.py +28 -115
- ripperdoc/cli/commands/__init__.py +0 -1
- ripperdoc/cli/commands/agents_cmd.py +6 -3
- ripperdoc/cli/commands/clear_cmd.py +1 -4
- ripperdoc/cli/commands/config_cmd.py +1 -1
- ripperdoc/cli/commands/context_cmd.py +3 -2
- ripperdoc/cli/commands/doctor_cmd.py +18 -4
- ripperdoc/cli/commands/hooks_cmd.py +27 -53
- ripperdoc/cli/commands/models_cmd.py +26 -9
- ripperdoc/cli/commands/permissions_cmd.py +27 -9
- ripperdoc/cli/commands/resume_cmd.py +5 -3
- ripperdoc/cli/commands/status_cmd.py +4 -4
- ripperdoc/cli/commands/tasks_cmd.py +8 -4
- ripperdoc/cli/ui/file_mention_completer.py +2 -1
- ripperdoc/cli/ui/interrupt_handler.py +2 -3
- ripperdoc/cli/ui/message_display.py +4 -2
- ripperdoc/cli/ui/provider_options.py +247 -0
- ripperdoc/cli/ui/rich_ui.py +110 -59
- ripperdoc/cli/ui/spinner.py +25 -1
- ripperdoc/cli/ui/tool_renderers.py +8 -2
- ripperdoc/cli/ui/wizard.py +215 -0
- ripperdoc/core/agents.py +9 -3
- ripperdoc/core/config.py +49 -12
- ripperdoc/core/custom_commands.py +7 -6
- ripperdoc/core/default_tools.py +11 -2
- ripperdoc/core/hooks/config.py +1 -3
- ripperdoc/core/hooks/events.py +23 -28
- ripperdoc/core/hooks/executor.py +4 -6
- ripperdoc/core/hooks/integration.py +12 -21
- ripperdoc/core/hooks/manager.py +40 -15
- ripperdoc/core/permissions.py +40 -8
- ripperdoc/core/providers/anthropic.py +109 -36
- ripperdoc/core/providers/gemini.py +70 -5
- ripperdoc/core/providers/openai.py +60 -5
- ripperdoc/core/query.py +82 -38
- ripperdoc/core/query_utils.py +2 -0
- ripperdoc/core/skills.py +9 -3
- ripperdoc/core/system_prompt.py +4 -2
- ripperdoc/core/tool.py +9 -5
- ripperdoc/sdk/client.py +2 -2
- ripperdoc/tools/ask_user_question_tool.py +5 -3
- ripperdoc/tools/background_shell.py +2 -1
- ripperdoc/tools/bash_output_tool.py +1 -1
- ripperdoc/tools/bash_tool.py +26 -16
- ripperdoc/tools/dynamic_mcp_tool.py +29 -8
- ripperdoc/tools/enter_plan_mode_tool.py +1 -1
- ripperdoc/tools/exit_plan_mode_tool.py +1 -1
- ripperdoc/tools/file_edit_tool.py +8 -4
- ripperdoc/tools/file_read_tool.py +8 -4
- ripperdoc/tools/file_write_tool.py +9 -5
- ripperdoc/tools/glob_tool.py +3 -2
- ripperdoc/tools/grep_tool.py +3 -2
- ripperdoc/tools/kill_bash_tool.py +1 -1
- ripperdoc/tools/ls_tool.py +1 -1
- ripperdoc/tools/mcp_tools.py +13 -10
- ripperdoc/tools/multi_edit_tool.py +8 -7
- ripperdoc/tools/notebook_edit_tool.py +7 -4
- ripperdoc/tools/skill_tool.py +1 -1
- ripperdoc/tools/task_tool.py +5 -4
- ripperdoc/tools/todo_tool.py +2 -2
- ripperdoc/tools/tool_search_tool.py +3 -2
- ripperdoc/utils/conversation_compaction.py +8 -4
- ripperdoc/utils/file_watch.py +8 -2
- ripperdoc/utils/json_utils.py +2 -1
- ripperdoc/utils/mcp.py +11 -3
- ripperdoc/utils/memory.py +4 -2
- ripperdoc/utils/message_compaction.py +21 -7
- ripperdoc/utils/message_formatting.py +11 -7
- ripperdoc/utils/messages.py +105 -66
- ripperdoc/utils/path_ignore.py +35 -8
- ripperdoc/utils/permissions/path_validation_utils.py +2 -1
- ripperdoc/utils/permissions/shell_command_validation.py +427 -91
- ripperdoc/utils/safe_get_cwd.py +2 -1
- ripperdoc/utils/session_history.py +13 -6
- ripperdoc/utils/todo.py +2 -1
- ripperdoc/utils/token_estimation.py +6 -1
- {ripperdoc-0.2.8.dist-info → ripperdoc-0.2.9.dist-info}/METADATA +1 -1
- ripperdoc-0.2.9.dist-info/RECORD +123 -0
- ripperdoc-0.2.8.dist-info/RECORD +0 -121
- {ripperdoc-0.2.8.dist-info → ripperdoc-0.2.9.dist-info}/WHEEL +0 -0
- {ripperdoc-0.2.8.dist-info → ripperdoc-0.2.9.dist-info}/entry_points.txt +0 -0
- {ripperdoc-0.2.8.dist-info → ripperdoc-0.2.9.dist-info}/licenses/LICENSE +0 -0
- {ripperdoc-0.2.8.dist-info → ripperdoc-0.2.9.dist-info}/top_level.txt +0 -0
|
@@ -7,6 +7,7 @@ potentially dangerous constructs before execution.
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
9
|
import re
|
|
10
|
+
import shlex
|
|
10
11
|
from dataclasses import dataclass
|
|
11
12
|
from typing import List, Optional, Tuple
|
|
12
13
|
|
|
@@ -25,62 +26,128 @@ def _strip_single_quotes(shell_command: str, first_token: str) -> str:
|
|
|
25
26
|
|
|
26
27
|
Single-quoted content in shell is literal and cannot contain command
|
|
27
28
|
substitution, so we can safely ignore it for security analysis.
|
|
29
|
+
|
|
30
|
+
Double quotes are kept for analysis since they can contain variable
|
|
31
|
+
expansions and command substitutions.
|
|
28
32
|
"""
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
33
|
+
in_single_quote = False
|
|
34
|
+
escaped = False
|
|
35
|
+
result = []
|
|
36
|
+
|
|
37
|
+
i = 0
|
|
38
|
+
while i < len(shell_command):
|
|
39
|
+
char = shell_command[i]
|
|
40
|
+
|
|
41
|
+
if escaped:
|
|
42
|
+
escaped = False
|
|
43
|
+
result.append(char)
|
|
44
|
+
i += 1
|
|
38
45
|
continue
|
|
39
46
|
|
|
40
|
-
if
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
47
|
+
if char == "\\":
|
|
48
|
+
escaped = True
|
|
49
|
+
result.append(char)
|
|
50
|
+
i += 1
|
|
44
51
|
continue
|
|
45
52
|
|
|
46
|
-
if
|
|
47
|
-
|
|
53
|
+
if char == "'":
|
|
54
|
+
in_single_quote = not in_single_quote
|
|
55
|
+
i += 1
|
|
48
56
|
continue
|
|
49
57
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
58
|
+
if not in_single_quote:
|
|
59
|
+
result.append(char)
|
|
60
|
+
|
|
61
|
+
i += 1
|
|
62
|
+
|
|
63
|
+
return "".join(result)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _strip_quotes_for_analysis(command: str) -> str:
|
|
67
|
+
"""Strip content inside both single and double quotes for security analysis.
|
|
68
|
+
|
|
69
|
+
This is used for checking shell metacharacters in arguments.
|
|
70
|
+
Double quotes are stripped because they can contain variable expansions
|
|
71
|
+
and command substitutions that need to be analyzed.
|
|
72
|
+
"""
|
|
73
|
+
result = []
|
|
74
|
+
in_single_quote = False
|
|
75
|
+
in_double_quote = False
|
|
76
|
+
escaped = False
|
|
77
|
+
|
|
78
|
+
i = 0
|
|
79
|
+
while i < len(command):
|
|
80
|
+
char = command[i]
|
|
81
|
+
|
|
82
|
+
if escaped:
|
|
83
|
+
escaped = False
|
|
84
|
+
i += 1
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
if char == "\\":
|
|
88
|
+
escaped = True
|
|
89
|
+
i += 1
|
|
90
|
+
continue
|
|
70
91
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
92
|
+
if char == "'" and not in_double_quote:
|
|
93
|
+
in_single_quote = not in_single_quote
|
|
94
|
+
i += 1
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
if char == '"' and not in_single_quote:
|
|
98
|
+
in_double_quote = not in_double_quote
|
|
99
|
+
i += 1
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
if not in_single_quote and not in_double_quote:
|
|
103
|
+
result.append(char)
|
|
75
104
|
|
|
76
|
-
|
|
77
|
-
# Note: We can't modify i in Python, so we'll need a different approach
|
|
78
|
-
# For now, just add the character if not in single quote mode
|
|
105
|
+
i += 1
|
|
79
106
|
|
|
80
|
-
|
|
81
|
-
command_without_single_quotes += current_char
|
|
107
|
+
return "".join(result)
|
|
82
108
|
|
|
83
|
-
|
|
109
|
+
|
|
110
|
+
def _is_safe_command_pattern(command: str) -> bool:
|
|
111
|
+
"""Check if command matches known safe patterns.
|
|
112
|
+
|
|
113
|
+
These are commands that are commonly used and known to be safe
|
|
114
|
+
even if they contain characters that might otherwise trigger warnings.
|
|
115
|
+
"""
|
|
116
|
+
import re
|
|
117
|
+
|
|
118
|
+
safe_patterns = [
|
|
119
|
+
# Common version checks
|
|
120
|
+
r"^\s*(python|python3|node|npm|git|bash|sh)\s+--version\s*$",
|
|
121
|
+
r"^\s*(python|python3|node|npm|git|bash|sh)\s+-v\s*$",
|
|
122
|
+
r"^\s*(python|python3|node|npm|git|bash|sh)\s+-V\s*$",
|
|
123
|
+
# Common help commands
|
|
124
|
+
r"^\s*\w+\s+--help\s*$",
|
|
125
|
+
r"^\s*\w+\s+-h\s*$",
|
|
126
|
+
r"^\s*\w+\s+help\s*$",
|
|
127
|
+
# Simple echo/print commands
|
|
128
|
+
r'^\s*echo\s+["\'].*["\']\s*$',
|
|
129
|
+
r"^\s*print(env|f)?\s+.*$",
|
|
130
|
+
# Directory listing with common options
|
|
131
|
+
r'^\s*ls\s+(-[a-zA-Z]*[lhtr]*\s*)*["\']?[^;&|<>]*["\']?\s*$',
|
|
132
|
+
r"^\s*dir\s+.*$",
|
|
133
|
+
# Current directory
|
|
134
|
+
r"^\s*pwd\s*$",
|
|
135
|
+
# Environment variable checks
|
|
136
|
+
r"^\s*env\s*$",
|
|
137
|
+
r"^\s*printenv\s*$",
|
|
138
|
+
# Which/whereis commands
|
|
139
|
+
r"^\s*which\s+\w+\s*$",
|
|
140
|
+
r"^\s*whereis\s+\w+\s*$",
|
|
141
|
+
# Type/command commands
|
|
142
|
+
r"^\s*type\s+\w+\s*$",
|
|
143
|
+
r"^\s*command\s+-v\s+\w+\s*$",
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
for pattern in safe_patterns:
|
|
147
|
+
if re.match(pattern, command, re.IGNORECASE):
|
|
148
|
+
return True
|
|
149
|
+
|
|
150
|
+
return False
|
|
84
151
|
|
|
85
152
|
|
|
86
153
|
def _sanitize_safe_redirections(command: str) -> str:
|
|
@@ -107,8 +174,14 @@ _DANGEROUS_PATTERNS: List[Tuple[re.Pattern[str], str]] = [
|
|
|
107
174
|
# Parameter substitution
|
|
108
175
|
(re.compile(r"\$\{"), "Command contains ${} parameter substitution"),
|
|
109
176
|
# Input/output redirection
|
|
110
|
-
(
|
|
111
|
-
|
|
177
|
+
(
|
|
178
|
+
re.compile(r"<(?!\()"),
|
|
179
|
+
"Command contains input redirection (<) which could read sensitive files",
|
|
180
|
+
),
|
|
181
|
+
(
|
|
182
|
+
re.compile(r">(?!\()"),
|
|
183
|
+
"Command contains output redirection (>) which could write to arbitrary files",
|
|
184
|
+
),
|
|
112
185
|
# Zsh-specific patterns
|
|
113
186
|
(re.compile(r"~\["), "Command contains Zsh-style parameter expansion"),
|
|
114
187
|
(re.compile(r"\(e:"), "Command contains Zsh-style glob qualifiers"),
|
|
@@ -132,38 +205,38 @@ _DANGEROUS_METACHARACTER_PATTERNS: List[re.Pattern[str]] = [
|
|
|
132
205
|
_WINDOWS_DESTRUCTIVE_PATTERNS: List[Tuple[re.Pattern[str], str]] = [
|
|
133
206
|
# rmdir /s - Recursive directory deletion (Windows)
|
|
134
207
|
(
|
|
135
|
-
re.compile(r
|
|
136
|
-
"Command contains 'rmdir /s' which recursively deletes directories"
|
|
208
|
+
re.compile(r"\brmdir\s+.*(/s|/S)", re.IGNORECASE),
|
|
209
|
+
"Command contains 'rmdir /s' which recursively deletes directories",
|
|
137
210
|
),
|
|
138
211
|
# del /s or del /q - Recursive or quiet file deletion (Windows)
|
|
139
212
|
(
|
|
140
|
-
re.compile(r
|
|
141
|
-
"Command contains 'del' with dangerous flags (/s or /q)"
|
|
213
|
+
re.compile(r"\bdel\s+.*(/s|/S|/q|/Q)", re.IGNORECASE),
|
|
214
|
+
"Command contains 'del' with dangerous flags (/s or /q)",
|
|
142
215
|
),
|
|
143
216
|
# rd /s - Alias for rmdir /s (Windows)
|
|
144
217
|
(
|
|
145
|
-
re.compile(r
|
|
146
|
-
"Command contains 'rd /s' which recursively deletes directories"
|
|
218
|
+
re.compile(r"\brd\s+.*(/s|/S)", re.IGNORECASE),
|
|
219
|
+
"Command contains 'rd /s' which recursively deletes directories",
|
|
147
220
|
),
|
|
148
221
|
# format command (Windows)
|
|
149
222
|
(
|
|
150
|
-
re.compile(r
|
|
151
|
-
"Command contains 'format' which erases entire drives"
|
|
223
|
+
re.compile(r"\bformat\s+[a-zA-Z]:", re.IGNORECASE),
|
|
224
|
+
"Command contains 'format' which erases entire drives",
|
|
152
225
|
),
|
|
153
226
|
# cmd /c with destructive subcommand
|
|
154
227
|
(
|
|
155
|
-
re.compile(r
|
|
156
|
-
"Command uses 'cmd /c' to execute a destructive subcommand"
|
|
228
|
+
re.compile(r"\bcmd\s+/[cC]\s+.*\b(rmdir|rd|del|format)\b", re.IGNORECASE),
|
|
229
|
+
"Command uses 'cmd /c' to execute a destructive subcommand",
|
|
157
230
|
),
|
|
158
231
|
# PowerShell Remove-Item -Recurse
|
|
159
232
|
(
|
|
160
|
-
re.compile(r
|
|
161
|
-
"Command contains 'Remove-Item -Recurse' which recursively deletes items"
|
|
233
|
+
re.compile(r"\b(Remove-Item|rm|ri|del)\s+.*-Recurse", re.IGNORECASE),
|
|
234
|
+
"Command contains 'Remove-Item -Recurse' which recursively deletes items",
|
|
162
235
|
),
|
|
163
236
|
# PowerShell with -Force flag on destructive commands
|
|
164
237
|
(
|
|
165
|
-
re.compile(r
|
|
166
|
-
"Command contains destructive command with -Force flag"
|
|
238
|
+
re.compile(r"\b(Remove-Item|rm|ri|del)\s+.*-Force", re.IGNORECASE),
|
|
239
|
+
"Command contains destructive command with -Force flag",
|
|
167
240
|
),
|
|
168
241
|
]
|
|
169
242
|
|
|
@@ -172,37 +245,30 @@ _UNIX_DESTRUCTIVE_PATTERNS: List[Tuple[re.Pattern[str], str]] = [
|
|
|
172
245
|
# rm -rf or rm -r (recursive deletion) - must be at word boundary and followed by space/path
|
|
173
246
|
(
|
|
174
247
|
re.compile(r'(?<!["\'])\brm\s+(-[a-zA-Z]*r[a-zA-Z]*\s+|\s*-[a-zA-Z]*r[a-zA-Z]*$)'),
|
|
175
|
-
"Command contains 'rm -r' which recursively deletes files and directories"
|
|
248
|
+
"Command contains 'rm -r' which recursively deletes files and directories",
|
|
176
249
|
),
|
|
177
250
|
# rm with force flag on system paths
|
|
178
251
|
(
|
|
179
|
-
re.compile(
|
|
180
|
-
|
|
252
|
+
re.compile(
|
|
253
|
+
r'(?<!["\'])\brm\s+-[a-zA-Z]*f[a-zA-Z]*\s+(/|~|/home|/usr|/var|/etc|/root|\$HOME)'
|
|
254
|
+
),
|
|
255
|
+
"Command contains 'rm -f' targeting a critical system path",
|
|
181
256
|
),
|
|
182
257
|
# dd command (can overwrite disks)
|
|
183
|
-
(
|
|
184
|
-
re.compile(r'\bdd\s+.*of=/dev/'),
|
|
185
|
-
"Command contains 'dd' writing to a device file"
|
|
186
|
-
),
|
|
258
|
+
(re.compile(r"\bdd\s+.*of=/dev/"), "Command contains 'dd' writing to a device file"),
|
|
187
259
|
# mkfs (creates filesystem, destroys data)
|
|
188
|
-
(
|
|
189
|
-
re.compile(r'\bmkfs\b'),
|
|
190
|
-
"Command contains 'mkfs' which formats storage devices"
|
|
191
|
-
),
|
|
260
|
+
(re.compile(r"\bmkfs\b"), "Command contains 'mkfs' which formats storage devices"),
|
|
192
261
|
# shred (secure deletion)
|
|
193
|
-
(
|
|
194
|
-
re.compile(r'\bshred\s+'),
|
|
195
|
-
"Command contains 'shred' which irreversibly destroys file data"
|
|
196
|
-
),
|
|
262
|
+
(re.compile(r"\bshred\s+"), "Command contains 'shred' which irreversibly destroys file data"),
|
|
197
263
|
# chmod 777 on sensitive paths
|
|
198
264
|
(
|
|
199
|
-
re.compile(r
|
|
200
|
-
"Command contains 'chmod 777' on a sensitive system path"
|
|
265
|
+
re.compile(r"\bchmod\s+777\s+(/|/etc|/usr|/var|/home)"),
|
|
266
|
+
"Command contains 'chmod 777' on a sensitive system path",
|
|
201
267
|
),
|
|
202
268
|
# chown on system paths
|
|
203
269
|
(
|
|
204
|
-
re.compile(r
|
|
205
|
-
"Command contains 'chown' on a critical system path"
|
|
270
|
+
re.compile(r"\bchown\s+.*\s+(/etc|/usr|/var|/bin|/sbin)"),
|
|
271
|
+
"Command contains 'chown' on a critical system path",
|
|
206
272
|
),
|
|
207
273
|
]
|
|
208
274
|
|
|
@@ -222,12 +288,12 @@ _NESTED_QUOTE_PATTERNS: List[Tuple[re.Pattern[str], str]] = [
|
|
|
222
288
|
# Windows cmd with escaped quotes inside
|
|
223
289
|
(
|
|
224
290
|
re.compile(r'\bcmd\s+/[cC]\s+"[^"]*\\"[^"]*"'),
|
|
225
|
-
"Command contains 'cmd /c' with nested escaped quotes which may cause unexpected parsing"
|
|
291
|
+
"Command contains 'cmd /c' with nested escaped quotes which may cause unexpected parsing",
|
|
226
292
|
),
|
|
227
293
|
# PowerShell with complex quoting
|
|
228
294
|
(
|
|
229
295
|
re.compile(r'\bpowershell\s+.*-[Cc]ommand\s+["\'][^"\']*["\'][^"\']*["\']'),
|
|
230
|
-
"Command contains PowerShell with complex nested quotes"
|
|
296
|
+
"Command contains PowerShell with complex nested quotes",
|
|
231
297
|
),
|
|
232
298
|
]
|
|
233
299
|
|
|
@@ -262,6 +328,28 @@ def _check_destructive_commands(command: str) -> Optional[ValidationResult]:
|
|
|
262
328
|
# Check if command targets critical paths with any destructive operation
|
|
263
329
|
has_critical_path = any(p.search(command) for p in _CRITICAL_PATH_PATTERNS)
|
|
264
330
|
|
|
331
|
+
# For interpreter commands, we need to check the code string for destructive commands
|
|
332
|
+
# First, extract the first token to check if it's an interpreter
|
|
333
|
+
trimmed = command.strip()
|
|
334
|
+
first_token = trimmed.split()[0] if trimmed.split() else ""
|
|
335
|
+
|
|
336
|
+
# Check if it's an interpreter command
|
|
337
|
+
if _is_interpreter_command(command, first_token):
|
|
338
|
+
# Extract and check the code string
|
|
339
|
+
code_string = _extract_code_string(command, first_token)
|
|
340
|
+
if code_string:
|
|
341
|
+
# Check the code string for destructive patterns
|
|
342
|
+
# We need to check both the code string itself and any commands it might execute
|
|
343
|
+
code_check_result = _check_destructive_commands_in_code_string(code_string, first_token)
|
|
344
|
+
if code_check_result:
|
|
345
|
+
if has_critical_path:
|
|
346
|
+
return ValidationResult(
|
|
347
|
+
behavior="deny",
|
|
348
|
+
message=f"BLOCKED: {code_check_result.message} targeting a critical system path",
|
|
349
|
+
rule_suggestions=None,
|
|
350
|
+
)
|
|
351
|
+
return code_check_result
|
|
352
|
+
|
|
265
353
|
# Strip quoted content to avoid false positives like 'echo "rmdir /s /q folder"'
|
|
266
354
|
command_without_quotes = _strip_quoted_content_for_destructive_check(command)
|
|
267
355
|
|
|
@@ -303,19 +391,68 @@ def _check_destructive_commands(command: str) -> Optional[ValidationResult]:
|
|
|
303
391
|
return ValidationResult(
|
|
304
392
|
behavior="deny",
|
|
305
393
|
message="BLOCKED: Command contains 'cmd /c' with escaped quotes - "
|
|
306
|
-
|
|
394
|
+
"this pattern has caused data loss incidents",
|
|
307
395
|
rule_suggestions=None,
|
|
308
396
|
)
|
|
309
397
|
return ValidationResult(
|
|
310
398
|
behavior="ask",
|
|
311
399
|
message="Command contains 'cmd /c' with escaped quotes inside double quotes - "
|
|
312
|
-
|
|
400
|
+
"this pattern has caused data loss incidents due to quote parsing issues",
|
|
313
401
|
rule_suggestions=None,
|
|
314
402
|
)
|
|
315
403
|
|
|
316
404
|
return None
|
|
317
405
|
|
|
318
406
|
|
|
407
|
+
def _check_destructive_commands_in_code_string(
|
|
408
|
+
code_string: str, interpreter: str
|
|
409
|
+
) -> Optional[ValidationResult]:
|
|
410
|
+
"""Check for destructive commands in interpreter code strings.
|
|
411
|
+
|
|
412
|
+
This handles cases like `bash -c "rm -rf /"` where the destructive
|
|
413
|
+
command is inside the code string.
|
|
414
|
+
"""
|
|
415
|
+
import re
|
|
416
|
+
|
|
417
|
+
# For shell interpreters (bash, sh, zsh), the code string is shell code
|
|
418
|
+
if interpreter in ("bash", "sh", "zsh"):
|
|
419
|
+
# Check for destructive patterns in the shell code
|
|
420
|
+
stripped_code = _strip_quoted_content_for_destructive_check(code_string)
|
|
421
|
+
|
|
422
|
+
for pattern, message in _UNIX_DESTRUCTIVE_PATTERNS:
|
|
423
|
+
if pattern.search(stripped_code):
|
|
424
|
+
return ValidationResult(
|
|
425
|
+
behavior="ask",
|
|
426
|
+
message=f"Code string contains {message}",
|
|
427
|
+
rule_suggestions=None,
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
# For Python, check for os.system, subprocess, etc.
|
|
431
|
+
elif interpreter in ("python", "python3"):
|
|
432
|
+
# Check for system calls that execute shell commands
|
|
433
|
+
system_patterns = [
|
|
434
|
+
(
|
|
435
|
+
r'\bos\.system\s*\(\s*["\'][^"\']*rm\s+-[a-zA-Z]*r',
|
|
436
|
+
"Python code executes destructive shell command",
|
|
437
|
+
),
|
|
438
|
+
(
|
|
439
|
+
r"\bsubprocess\.(run|call|Popen)\s*\(\s*[^)]*rm\s+-[a-zA-Z]*r",
|
|
440
|
+
"Python code executes destructive shell command",
|
|
441
|
+
),
|
|
442
|
+
]
|
|
443
|
+
|
|
444
|
+
for pattern_str, message in system_patterns:
|
|
445
|
+
if re.search(pattern_str, code_string):
|
|
446
|
+
return ValidationResult(
|
|
447
|
+
behavior="ask",
|
|
448
|
+
message=message,
|
|
449
|
+
rule_suggestions=None,
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
# For other interpreters, we could add more checks as needed
|
|
453
|
+
return None
|
|
454
|
+
|
|
455
|
+
|
|
319
456
|
def _strip_quoted_content_for_destructive_check(command: str) -> str:
|
|
320
457
|
"""Strip content inside quotes for destructive command checking.
|
|
321
458
|
|
|
@@ -334,7 +471,7 @@ def _strip_quoted_content_for_destructive_check(command: str) -> str:
|
|
|
334
471
|
result.append(char)
|
|
335
472
|
continue
|
|
336
473
|
|
|
337
|
-
if char ==
|
|
474
|
+
if char == "\\":
|
|
338
475
|
escaped = True
|
|
339
476
|
if not in_single_quote and not in_double_quote:
|
|
340
477
|
result.append(char)
|
|
@@ -351,7 +488,77 @@ def _strip_quoted_content_for_destructive_check(command: str) -> str:
|
|
|
351
488
|
if not in_single_quote and not in_double_quote:
|
|
352
489
|
result.append(char)
|
|
353
490
|
|
|
354
|
-
return
|
|
491
|
+
return "".join(result)
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def _is_interpreter_command(command: str, first_token: str) -> bool:
|
|
495
|
+
"""Check if the command is an interpreter command that executes code strings.
|
|
496
|
+
|
|
497
|
+
Interpreter commands like `python -c "code"`, `node -e "code"`, `bash -c "code"`
|
|
498
|
+
should have different validation rules for their code strings.
|
|
499
|
+
"""
|
|
500
|
+
interpreter_tokens = {"python", "python3", "node", "bash", "sh", "zsh", "perl", "ruby"}
|
|
501
|
+
|
|
502
|
+
if first_token not in interpreter_tokens:
|
|
503
|
+
return False
|
|
504
|
+
|
|
505
|
+
# Check for -c or -e flag (execute code string)
|
|
506
|
+
# Pattern: command -c "code" or command -e "code"
|
|
507
|
+
import re
|
|
508
|
+
|
|
509
|
+
pattern = rf'\b{re.escape(first_token)}\s+-(c|e)\s+["\']'
|
|
510
|
+
return bool(re.search(pattern, command))
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def _extract_code_string(command: str, first_token: str) -> str:
|
|
514
|
+
"""Extract the code string from an interpreter command.
|
|
515
|
+
|
|
516
|
+
Returns the code string without the surrounding quotes, or empty string
|
|
517
|
+
if not an interpreter command or no code string found.
|
|
518
|
+
"""
|
|
519
|
+
if not _is_interpreter_command(command, first_token):
|
|
520
|
+
return ""
|
|
521
|
+
|
|
522
|
+
import re
|
|
523
|
+
|
|
524
|
+
# Find the code string after -c or -e flag
|
|
525
|
+
# Match: command -c "code" or command -e 'code'
|
|
526
|
+
pattern = rf'{re.escape(first_token)}\s+-(c|e)\s+(["\'])(.*?)(?<!\\)\2'
|
|
527
|
+
match = re.search(pattern, command, re.DOTALL)
|
|
528
|
+
|
|
529
|
+
if match:
|
|
530
|
+
code_string = match.group(3)
|
|
531
|
+
# Remove escape characters
|
|
532
|
+
code_string = code_string.replace('\\"', '"').replace("\\'", "'")
|
|
533
|
+
return code_string
|
|
534
|
+
|
|
535
|
+
return ""
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
def _strip_interpreter_code_strings(command: str, first_token: str) -> str:
|
|
539
|
+
"""Strip code strings from interpreter commands for validation.
|
|
540
|
+
|
|
541
|
+
This allows us to validate the shell command structure while
|
|
542
|
+
ignoring the content of code strings which may contain shell-like
|
|
543
|
+
characters that are actually part of the code language.
|
|
544
|
+
"""
|
|
545
|
+
if not _is_interpreter_command(command, first_token):
|
|
546
|
+
return command
|
|
547
|
+
|
|
548
|
+
import re
|
|
549
|
+
|
|
550
|
+
# Replace code strings with placeholder
|
|
551
|
+
# Match: command -c "code" or command -e 'code'
|
|
552
|
+
# The (?<!\\) negative lookbehind ensures we don't match escaped quotes
|
|
553
|
+
pattern = rf'({re.escape(first_token)}\s+-(c|e)\s+)(["\'])(.*?)(?<!\\)\3'
|
|
554
|
+
|
|
555
|
+
def replace_code_string(match: re.Match[str]) -> str:
|
|
556
|
+
prefix = match.group(1)
|
|
557
|
+
quote = match.group(3)
|
|
558
|
+
return f"{prefix}{quote}__CODE_STRING__{quote}"
|
|
559
|
+
|
|
560
|
+
result = re.sub(pattern, replace_code_string, command, flags=re.DOTALL)
|
|
561
|
+
return result
|
|
355
562
|
|
|
356
563
|
|
|
357
564
|
def validate_shell_command(shell_command: str) -> ValidationResult:
|
|
@@ -376,6 +583,14 @@ def validate_shell_command(shell_command: str) -> ValidationResult:
|
|
|
376
583
|
trimmed = shell_command.strip()
|
|
377
584
|
first_token = trimmed.split()[0] if trimmed.split() else ""
|
|
378
585
|
|
|
586
|
+
# Check for safe command patterns first
|
|
587
|
+
if _is_safe_command_pattern(trimmed):
|
|
588
|
+
return ValidationResult(
|
|
589
|
+
behavior="passthrough",
|
|
590
|
+
message="Command matches safe pattern",
|
|
591
|
+
rule_suggestions=None,
|
|
592
|
+
)
|
|
593
|
+
|
|
379
594
|
# FIRST: Check for destructive commands (highest priority)
|
|
380
595
|
# This catches dangerous patterns like the Gemini incident
|
|
381
596
|
destructive_result = _check_destructive_commands(trimmed)
|
|
@@ -429,23 +644,96 @@ def validate_shell_command(shell_command: str) -> ValidationResult:
|
|
|
429
644
|
# Strip single-quoted content for further analysis
|
|
430
645
|
sanitized = _strip_single_quotes(trimmed, first_token)
|
|
431
646
|
|
|
647
|
+
# For interpreter commands, strip code strings before checking shell metacharacters
|
|
648
|
+
# This allows code strings to contain language-specific characters like ;
|
|
649
|
+
sanitized_for_metachar_check = sanitized
|
|
650
|
+
if _is_interpreter_command(trimmed, first_token):
|
|
651
|
+
sanitized_for_metachar_check = _strip_interpreter_code_strings(sanitized, first_token)
|
|
652
|
+
|
|
432
653
|
# Remove safe redirections
|
|
433
654
|
sanitized = _sanitize_safe_redirections(sanitized)
|
|
655
|
+
sanitized_for_metachar_check = _sanitize_safe_redirections(sanitized_for_metachar_check)
|
|
656
|
+
|
|
657
|
+
# Check for shell metacharacters outside of quotes
|
|
658
|
+
# We'll parse the command and check for ; & characters that are not inside quotes
|
|
659
|
+
# Special handling for find -exec escaped semicolon (\;)
|
|
660
|
+
def has_metachars_outside_quotes(cmd: str) -> bool:
|
|
661
|
+
# Use shlex for proper shell tokenization and quote handling
|
|
662
|
+
lex = shlex.shlex(cmd, posix=True)
|
|
663
|
+
lex.whitespace_split = True # Split on whitespace, better for argument parsing
|
|
664
|
+
lex.commenters = "" # Don't treat # as comment for security analysis
|
|
665
|
+
|
|
666
|
+
tokens = []
|
|
667
|
+
try:
|
|
668
|
+
# Get all tokens
|
|
669
|
+
while True:
|
|
670
|
+
token = lex.get_token()
|
|
671
|
+
if token == lex.eof:
|
|
672
|
+
break
|
|
673
|
+
tokens.append(token)
|
|
674
|
+
except ValueError:
|
|
675
|
+
# If shlex fails (e.g., unmatched quotes), be cautious and return True
|
|
676
|
+
# This treats malformed commands as potentially dangerous
|
|
677
|
+
return True
|
|
678
|
+
|
|
679
|
+
# Check for dangerous operators in tokens
|
|
680
|
+
# shlex will separate operators like ; & | as individual tokens
|
|
681
|
+
# even when they're not surrounded by spaces
|
|
682
|
+
i = 0
|
|
683
|
+
while i < len(tokens):
|
|
684
|
+
token = tokens[i]
|
|
685
|
+
if token in (";", "&", "|"):
|
|
686
|
+
# Check if it's part of a safe operator (&& or ||)
|
|
687
|
+
if token in ("&", "|") and i + 1 < len(tokens) and tokens[i + 1] == token:
|
|
688
|
+
# This is && or ||, skip both tokens
|
|
689
|
+
i += 2
|
|
690
|
+
continue
|
|
691
|
+
# Single ; & | are dangerous
|
|
692
|
+
return True
|
|
693
|
+
i += 1
|
|
694
|
+
|
|
695
|
+
# Also check for find -exec escaped semicolon pattern
|
|
696
|
+
# shlex will have already parsed \; as separate token ';' (since escaped)
|
|
697
|
+
# We need to check if this ; is part of find -exec pattern
|
|
698
|
+
# by looking at the token context
|
|
699
|
+
for i, token in enumerate(tokens):
|
|
700
|
+
if token == ";":
|
|
701
|
+
# Check if previous tokens contain "-exec"
|
|
702
|
+
# Look backward through tokens to find "-exec"
|
|
703
|
+
j = i - 1
|
|
704
|
+
found_exec = False
|
|
705
|
+
while j >= 0:
|
|
706
|
+
if tokens[j] == "-exec":
|
|
707
|
+
found_exec = True
|
|
708
|
+
break
|
|
709
|
+
j -= 1
|
|
710
|
+
if found_exec:
|
|
711
|
+
# This is likely find -exec ... ;, check if it's escaped in original
|
|
712
|
+
# We need to check the original string to confirm it's \;
|
|
713
|
+
# Build a regex to find this specific semicolon
|
|
714
|
+
# For now, we'll assume it's the find -exec semicolon
|
|
715
|
+
# and continue checking other tokens
|
|
716
|
+
continue
|
|
717
|
+
# Not part of find -exec, so it's dangerous
|
|
718
|
+
return True
|
|
719
|
+
|
|
720
|
+
return False
|
|
434
721
|
|
|
435
|
-
|
|
436
|
-
if re.search(r'(?:^|\s)["\'][^"\']*[;&][^"\']*["\'](?:\s|$)', sanitized):
|
|
722
|
+
if has_metachars_outside_quotes(sanitized_for_metachar_check):
|
|
437
723
|
return ValidationResult(
|
|
438
724
|
behavior="ask",
|
|
439
|
-
message="Command contains shell metacharacters (;, |, or &)
|
|
725
|
+
message="Command contains shell metacharacters (;, |, or &) outside of quoted arguments",
|
|
440
726
|
rule_suggestions=None,
|
|
441
727
|
)
|
|
442
728
|
|
|
443
729
|
# Check for dangerous metacharacters in find/grep arguments
|
|
730
|
+
# Use the version with quotes stripped for this check
|
|
731
|
+
stripped_for_pattern_check = _strip_quotes_for_analysis(sanitized)
|
|
444
732
|
for pattern in _DANGEROUS_METACHARACTER_PATTERNS:
|
|
445
|
-
if pattern.search(
|
|
733
|
+
if pattern.search(stripped_for_pattern_check):
|
|
446
734
|
return ValidationResult(
|
|
447
735
|
behavior="ask",
|
|
448
|
-
message="Command contains shell metacharacters (;, |, or &) in arguments",
|
|
736
|
+
message="Command contains shell metacharacters (;, |, or &) in find/grep arguments",
|
|
449
737
|
rule_suggestions=None,
|
|
450
738
|
)
|
|
451
739
|
|
|
@@ -489,6 +777,54 @@ def validate_shell_command(shell_command: str) -> ValidationResult:
|
|
|
489
777
|
# Check all dangerous patterns
|
|
490
778
|
for pattern, message in _DANGEROUS_PATTERNS:
|
|
491
779
|
if pattern.search(sanitized):
|
|
780
|
+
# Special handling for newlines
|
|
781
|
+
if "newlines" in message:
|
|
782
|
+
# Check if newlines are in quotes or code strings
|
|
783
|
+
in_quote = False
|
|
784
|
+
quote_char = None
|
|
785
|
+
escaped = False
|
|
786
|
+
newline_outside_quotes = False
|
|
787
|
+
|
|
788
|
+
i = 0
|
|
789
|
+
while i < len(trimmed):
|
|
790
|
+
char = trimmed[i]
|
|
791
|
+
|
|
792
|
+
if escaped:
|
|
793
|
+
escaped = False
|
|
794
|
+
i += 1
|
|
795
|
+
continue
|
|
796
|
+
|
|
797
|
+
if char == "\\":
|
|
798
|
+
escaped = True
|
|
799
|
+
i += 1
|
|
800
|
+
continue
|
|
801
|
+
|
|
802
|
+
if char in ("'", '"') and not escaped:
|
|
803
|
+
if not in_quote:
|
|
804
|
+
in_quote = True
|
|
805
|
+
quote_char = char
|
|
806
|
+
elif char == quote_char:
|
|
807
|
+
in_quote = False
|
|
808
|
+
quote_char = None
|
|
809
|
+
|
|
810
|
+
if char in ("\n", "\r") and not in_quote:
|
|
811
|
+
newline_outside_quotes = True
|
|
812
|
+
break
|
|
813
|
+
|
|
814
|
+
i += 1
|
|
815
|
+
|
|
816
|
+
if not newline_outside_quotes:
|
|
817
|
+
# Newlines are inside quotes, which is safer
|
|
818
|
+
# For interpreter commands, check if newlines are in code strings
|
|
819
|
+
if _is_interpreter_command(trimmed, first_token):
|
|
820
|
+
code_string = _extract_code_string(trimmed, first_token)
|
|
821
|
+
if code_string and any(c in code_string for c in ("\n", "\r")):
|
|
822
|
+
# Newlines are in code string, which is allowed for interpreter commands
|
|
823
|
+
continue
|
|
824
|
+
else:
|
|
825
|
+
# For non-interpreter commands, newlines in quotes are questionable but not blocked
|
|
826
|
+
continue
|
|
827
|
+
|
|
492
828
|
return ValidationResult(
|
|
493
829
|
behavior="ask",
|
|
494
830
|
message=message,
|