mseep-cmd-line-mcp 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,423 @@
1
+ """Security utilities for the command-line MCP server."""
2
+
3
+ import logging
4
+ import os
5
+ import re
6
+ import shlex
7
+ from typing import Dict, List, Optional, Tuple, Union
8
+
9
+ # Configure logger
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def parse_command(command: str) -> Tuple[str, List[str]]:
14
+ """Parse a command string into command and arguments.
15
+
16
+ Args:
17
+ command: The command string
18
+
19
+ Returns:
20
+ A tuple of (command, arguments)
21
+ """
22
+ # Handle the case where a pipe segment might not start with a command
23
+ # For example: `-v` is a flag, not a command in `cmd | -v`
24
+ command = command.strip()
25
+
26
+ # If it starts with a dash, it's probably a flag/option continuation
27
+ if command.startswith("-"):
28
+ return "", [command]
29
+
30
+ try:
31
+ parts = shlex.split(command)
32
+ if not parts:
33
+ return "", []
34
+ return parts[0], parts[1:]
35
+ except ValueError:
36
+ # If shlex.split fails (e.g., on unbalanced quotes),
37
+ # fall back to a simpler split
38
+ parts = command.strip().split()
39
+ if not parts:
40
+ return "", []
41
+ return parts[0], parts[1:]
42
+
43
+
44
+ def validate_command(
45
+ command: str,
46
+ read_commands: List[str],
47
+ write_commands: List[str],
48
+ system_commands: List[str],
49
+ blocked_commands: List[str],
50
+ dangerous_patterns: List[str],
51
+ allow_command_separators: bool = True,
52
+ ) -> Dict[str, Union[bool, str, Optional[str]]]:
53
+ """Validate a command for security.
54
+
55
+ Args:
56
+ command: The command to validate
57
+ read_commands: List of read-only commands
58
+ write_commands: List of write commands
59
+ system_commands: List of system commands
60
+ blocked_commands: List of blocked commands
61
+ dangerous_patterns: List of dangerous patterns to block
62
+ allow_command_separators: Whether to allow command separators (|, ;, &)
63
+
64
+ Returns:
65
+ A dictionary with validation results
66
+ """
67
+ result = {"is_valid": False, "command_type": None, "error": None}
68
+
69
+ # Empty command
70
+ if not command.strip():
71
+ result["error"] = "Empty command"
72
+ return result
73
+
74
+ # If command separators are not allowed, check for them
75
+ if not allow_command_separators:
76
+ # Check for pipe, semicolon, or ampersand
77
+ if re.search(r"[|;&]", command):
78
+ result["error"] = (
79
+ "Command separators (|, ;, &) are not allowed in the current configuration"
80
+ )
81
+ return result
82
+
83
+ # Check for dangerous patterns
84
+ for pattern in dangerous_patterns:
85
+ if re.search(pattern, command):
86
+ # More descriptive error message
87
+ if pattern == r"\$\(":
88
+ result["error"] = (
89
+ "Command contains command substitution $(). This is blocked for security reasons."
90
+ )
91
+ elif pattern == r"\$\{\w+\}":
92
+ result["error"] = (
93
+ "Command contains variable substitution ${var}. This is blocked for security reasons."
94
+ )
95
+ elif pattern == r"`":
96
+ result["error"] = (
97
+ "Command contains backtick command substitution. This is blocked for security reasons."
98
+ )
99
+ else:
100
+ result["error"] = f"Command contains dangerous pattern: {pattern}"
101
+ return result
102
+
103
+ # If command chaining is allowed, validate each part
104
+ for separator in ["|", ";", "&"]:
105
+ if separator in command:
106
+ # Initialize these variables to fix "possibly unbound" warnings
107
+ parts = []
108
+ separator_name = "command chain"
109
+
110
+ # Determine which separator is being used
111
+ if separator == "|":
112
+ parts = command.split("|")
113
+ separator_name = "pipeline"
114
+ elif separator == ";":
115
+ parts = command.split(";")
116
+ separator_name = "command sequence"
117
+ elif separator == "&":
118
+ parts = command.split("&")
119
+ separator_name = "background command"
120
+
121
+ # Track command types across all parts
122
+ all_parts_types = []
123
+
124
+ for part in parts:
125
+ part = part.strip()
126
+ if not part:
127
+ result["error"] = f"Empty command in {separator_name}"
128
+ return result
129
+
130
+ # Parse each command - be smarter about pipes
131
+ try:
132
+ cmd_part, _ = parse_command(part)
133
+ except ValueError as e:
134
+ result["error"] = (
135
+ f"Invalid command syntax in {separator_name}: {str(e)}"
136
+ )
137
+ return result
138
+
139
+ # Special handling for pipeline segments that aren't simple commands
140
+ if separator == "|" and (part.strip().startswith("-") or not cmd_part):
141
+ # This is likely a continuation of a previous pipe, not a command itself
142
+ # For example: `command | grep "pattern"` vs `command | -v`
143
+ # We'll consider these as safe continuations
144
+ continue
145
+
146
+ # Check if any command is blocked
147
+ if cmd_part in blocked_commands:
148
+ result["error"] = (
149
+ f"Command '{cmd_part}' in {separator_name} is blocked for security reasons"
150
+ )
151
+ return result
152
+
153
+ # Check if the command is recognized
154
+ # Skip this check for empty/continuation pipeline segments
155
+ if (
156
+ cmd_part
157
+ and cmd_part not in read_commands
158
+ and cmd_part not in write_commands
159
+ and cmd_part not in system_commands
160
+ ):
161
+ result["error"] = (
162
+ f"Command '{cmd_part}' in {separator_name} is not recognized or supported. Supported commands: {', '.join(read_commands + write_commands + system_commands)}"
163
+ )
164
+ return result
165
+
166
+ # Track command types (only for actual commands)
167
+ if cmd_part:
168
+ if cmd_part in read_commands:
169
+ all_parts_types.append("read")
170
+ elif cmd_part in write_commands:
171
+ all_parts_types.append("write")
172
+ elif cmd_part in system_commands:
173
+ all_parts_types.append("system")
174
+
175
+ # Determine the most privileged command type
176
+ if "system" in all_parts_types:
177
+ result["command_type"] = "system"
178
+ elif "write" in all_parts_types:
179
+ result["command_type"] = "write"
180
+ else:
181
+ result["command_type"] = "read"
182
+
183
+ result["is_valid"] = True
184
+ return result
185
+
186
+ # For non-pipeline commands, validate normally
187
+ try:
188
+ main_cmd, _ = parse_command(command)
189
+ except ValueError as e:
190
+ result["error"] = f"Invalid command syntax: {str(e)}"
191
+ return result
192
+
193
+ # Check if command is blocked
194
+ if main_cmd in blocked_commands:
195
+ result["error"] = f"Command '{main_cmd}' is blocked for security reasons"
196
+ return result
197
+
198
+ # Determine command type with better error message
199
+ if main_cmd in read_commands:
200
+ result["command_type"] = "read"
201
+ result["is_valid"] = True
202
+ elif main_cmd in write_commands:
203
+ result["command_type"] = "write"
204
+ result["is_valid"] = True
205
+ elif main_cmd in system_commands:
206
+ result["command_type"] = "system"
207
+ result["is_valid"] = True
208
+ else:
209
+ # List available commands
210
+ supported_cmds = read_commands + write_commands + system_commands
211
+ result["error"] = (
212
+ f"Command '{main_cmd}' is not recognized or supported. Supported commands: {', '.join(supported_cmds)}"
213
+ )
214
+
215
+ return result
216
+
217
+
218
+ def normalize_path(path: str) -> str:
219
+ """Normalize a path to absolute path with no symlinks or relative components.
220
+
221
+ Args:
222
+ path: The path to normalize
223
+
224
+ Returns:
225
+ Normalized absolute path
226
+ """
227
+ # Expand user directory for paths that start with ~
228
+ if path.startswith("~"):
229
+ path = os.path.expanduser(path)
230
+
231
+ # Convert to absolute path
232
+ abs_path = os.path.abspath(path)
233
+ # Normalize to resolve '..' and '.' components
234
+ norm_path = os.path.normpath(abs_path)
235
+ # Try to resolve any symlinks if possible
236
+ try:
237
+ real_path = os.path.realpath(norm_path)
238
+ return real_path
239
+ except (OSError, IOError):
240
+ # Fall back to normalized path if realpath fails
241
+ return norm_path
242
+
243
+
244
+ def extract_directory_from_command(command: str) -> Optional[str]:
245
+ """Extract the working directory from a command.
246
+
247
+ Args:
248
+ command: The command string
249
+
250
+ Returns:
251
+ The working directory or None if it can't be determined
252
+ """
253
+ # We need to analyze the command to figure out which directory it's operating in
254
+ # This is a heuristic approach and may need refinement for specific commands
255
+
256
+ try:
257
+ # Special case for tilde paths in the command
258
+ if "~/" in command:
259
+ # Find the tilde path pattern
260
+ match = re.search(r"~/\S+", command)
261
+ if match:
262
+ tilde_path = match.group(0)
263
+ # Get everything up to a space, pipe, or other delimiter
264
+ # to capture just the path part
265
+ expanded_path = os.path.expanduser(tilde_path)
266
+
267
+ if os.path.isdir(expanded_path):
268
+ return normalize_path(expanded_path)
269
+ else:
270
+ # If it's a file, get its parent directory
271
+ parent = os.path.dirname(expanded_path)
272
+ if parent:
273
+ return normalize_path(parent)
274
+
275
+ # Handle pipeline commands
276
+ if "|" in command:
277
+ # For piped commands, check each part and take the most specific directory
278
+ pipe_parts = command.split("|")
279
+ for part in pipe_parts:
280
+ # If any part of the pipeline accesses a specific directory, use that
281
+ dir_from_part = extract_directory_from_command(part.strip())
282
+ if dir_from_part and dir_from_part != os.getcwd():
283
+ return dir_from_part
284
+
285
+ # If we couldn't find a specific directory in any part, analyze the first command
286
+ return extract_directory_from_command(pipe_parts[0].strip())
287
+
288
+ # Handle semicolon-separated commands
289
+ if ";" in command:
290
+ # For semicolon-separated commands, process each command independently
291
+ # Return the first specific directory found (not current directory)
292
+ commands = command.split(";")
293
+ for cmd in commands:
294
+ dir_from_cmd = extract_directory_from_command(cmd.strip())
295
+ if dir_from_cmd and dir_from_cmd != os.getcwd():
296
+ return dir_from_cmd
297
+
298
+ # If no specific directory found, use the first command's directory
299
+ return extract_directory_from_command(commands[0].strip())
300
+
301
+ # Process a single command
302
+ parts = shlex.split(command)
303
+ if not parts:
304
+ return None
305
+
306
+ main_cmd = parts[0]
307
+ args = parts[1:]
308
+
309
+ # First check for directory arguments containing tilde expansion
310
+ for arg in args:
311
+ if not arg.startswith("-") and ("~" in arg):
312
+ expanded_path = os.path.expanduser(arg)
313
+
314
+ if os.path.isdir(expanded_path):
315
+ return normalize_path(expanded_path)
316
+ parent = os.path.dirname(expanded_path)
317
+ if parent and parent != "." and os.path.isdir(parent):
318
+ return normalize_path(parent)
319
+
320
+ # Handle common file/directory commands
321
+ if main_cmd in [
322
+ "ls",
323
+ "cd",
324
+ "find",
325
+ "du",
326
+ "rm",
327
+ "mkdir",
328
+ "rmdir",
329
+ "touch",
330
+ "chmod",
331
+ "chown",
332
+ ]:
333
+ # For these commands, the first non-flag argument is usually the directory
334
+ for arg in args:
335
+ if not arg.startswith("-"):
336
+ # Get the directory part
337
+ if os.path.isdir(arg):
338
+ return normalize_path(arg)
339
+ else:
340
+ parent = os.path.dirname(arg)
341
+ if parent:
342
+ return normalize_path(parent)
343
+ else:
344
+ # If no parent directory specified, assume current directory
345
+ return os.getcwd()
346
+
347
+ # If no directory argument found, assume current directory
348
+ return os.getcwd()
349
+
350
+ # For cat, less, head, tail, grep, wc, etc. operating on files
351
+ elif main_cmd in ["cat", "less", "head", "tail", "grep", "wc", "awk", "sed"]:
352
+ # Get the last non-flag argument which is usually the file
353
+ file_arg = None
354
+ for arg in args:
355
+ if not arg.startswith("-"):
356
+ file_arg = arg
357
+
358
+ if file_arg:
359
+ parent = os.path.dirname(file_arg)
360
+ if parent:
361
+ return normalize_path(parent)
362
+ elif "~" in file_arg:
363
+ # Handle tilde in path
364
+ expanded = os.path.expanduser(file_arg)
365
+ parent = os.path.dirname(expanded)
366
+ if parent:
367
+ return normalize_path(parent)
368
+
369
+ # Default to current directory
370
+ return os.getcwd()
371
+
372
+ # For commands that don't specify a directory
373
+ else:
374
+ # Default to current directory
375
+ return os.getcwd()
376
+
377
+ except (ValueError, IndexError):
378
+ # If parsing fails, default to current directory
379
+ return os.getcwd()
380
+
381
+
382
+ def is_directory_whitelisted(directory: str, whitelisted_dirs: List[str]) -> bool:
383
+ """Check if a directory is whitelisted or is a subdirectory of a whitelisted directory.
384
+
385
+ Args:
386
+ directory: The directory to check
387
+ whitelisted_dirs: List of whitelisted directories
388
+
389
+ Returns:
390
+ True if the directory is whitelisted, False otherwise
391
+ """
392
+ try:
393
+ normalized_dir = normalize_path(directory)
394
+
395
+ # Check if the directory is explicitly whitelisted
396
+ for whitelist_dir in whitelisted_dirs:
397
+ # Handle special whitelisted paths
398
+ if whitelist_dir == "~" or whitelist_dir.startswith("~/"):
399
+ # Convert ~ to user's home directory
400
+ normalized_whitelist = normalize_path(whitelist_dir)
401
+ else:
402
+ normalized_whitelist = normalize_path(whitelist_dir)
403
+
404
+ # Exact match
405
+ if normalized_dir == normalized_whitelist:
406
+ return True
407
+
408
+ # Check if it's a subdirectory of a whitelisted directory
409
+ if normalized_dir.startswith(normalized_whitelist + os.sep):
410
+ return True
411
+
412
+ # Handle wildcard paths
413
+ if "*" in whitelist_dir:
414
+ # Convert glob pattern to regex pattern
415
+ pattern = whitelist_dir.replace("*", ".*")
416
+ if re.match(pattern, normalized_dir):
417
+ return True
418
+
419
+ return False
420
+ except Exception as error:
421
+ # If there's any error in normalization or checking, log it and return False
422
+ logger.error(f"Error checking if directory is whitelisted: {str(error)}")
423
+ return False