mcpower-proxy 0.0.73__py3-none-any.whl → 0.0.77__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,28 +4,94 @@ Common shell execution handler - IDE-agnostic
4
4
  Handles both request (before) and response (after) inspection for shell commands.
5
5
  """
6
6
 
7
- import sys
7
+ import os
8
8
  from typing import Optional, Dict, List
9
9
 
10
10
  from modules.logs.audit_trail import AuditTrailLogger
11
11
  from modules.logs.logger import MCPLogger
12
12
  from modules.redaction import redact
13
13
  from modules.utils.ids import get_session_id, read_app_uid, get_project_mcpower_dir
14
- from .types import HookConfig
15
14
  from .output import output_result, output_error
15
+ from .shell_parser_bashlex import parse_shell_command
16
+ from .types import HookConfig
16
17
  from .utils import create_validator, inspect_and_enforce
17
18
 
18
19
 
20
+ def extract_and_redact_command_files(
21
+ command: str,
22
+ cwd: Optional[str],
23
+ logger: MCPLogger
24
+ ) -> Dict[str, str]:
25
+ """
26
+ Extract input files from a shell command and return their redacted contents.
27
+
28
+ Args:
29
+ command: The shell command to parse
30
+ cwd: Current working directory (for resolving relative paths)
31
+ logger: Logger instance for warnings/errors
32
+
33
+ Returns:
34
+ Dictionary mapping filename to redacted file content
35
+ Format: {filename: redacted_content}
36
+ """
37
+ files_dict = {}
38
+
39
+ try:
40
+ # Parse command to extract input files
41
+ _, input_files = parse_shell_command(command, initial_cwd=cwd)
42
+
43
+ logger.info(f"Extracted {len(input_files)} input files from command: {input_files}")
44
+
45
+ # Process each file
46
+ for filename in input_files:
47
+ try:
48
+ # Resolve absolute path
49
+ if os.path.isabs(filename):
50
+ filepath = filename
51
+ elif cwd:
52
+ filepath = os.path.join(cwd, filename)
53
+ else:
54
+ filepath = filename
55
+
56
+ # Read file content
57
+ if os.path.exists(filepath) and os.path.isfile(filepath):
58
+ try:
59
+ with open(filepath, 'r', encoding='utf-8') as f:
60
+ content = f.read()
61
+
62
+ # Redact sensitive content
63
+ redacted_content = redact(content)
64
+
65
+ # Add to dict (use original filename, not resolved path)
66
+ files_dict[filename] = redacted_content
67
+ logger.info(f"Successfully read and redacted file: {filename}")
68
+
69
+ except UnicodeDecodeError:
70
+ logger.warning(f"File {filename} is not a text file, skipping")
71
+ except Exception as e:
72
+ logger.warning(f"Failed to read file {filename}: {e}")
73
+ else:
74
+ logger.warning(f"File {filename} does not exist or is not a file, skipping")
75
+
76
+ except Exception as e:
77
+ logger.warning(f"Error processing file {filename}: {e}")
78
+
79
+ except Exception as e:
80
+ logger.warning(f"Failed to parse command for file extraction: {e}")
81
+
82
+ return files_dict
83
+
84
+
19
85
  async def handle_shell_execution(
20
- logger: MCPLogger,
21
- audit_logger: AuditTrailLogger,
22
- stdin_input: str,
23
- prompt_id: str,
24
- event_id: str,
25
- cwd: Optional[str],
26
- config: HookConfig,
27
- tool_name: str,
28
- is_request: bool = True
86
+ logger: MCPLogger,
87
+ audit_logger: AuditTrailLogger,
88
+ stdin_input: str,
89
+ prompt_id: str,
90
+ event_id: str,
91
+ cwd: Optional[str],
92
+ config: HookConfig,
93
+ tool_name: str,
94
+ is_request: bool = True
29
95
  ):
30
96
  """
31
97
  Generic shell execution handler - handles both request and response
@@ -60,20 +126,20 @@ async def handle_shell_execution(
60
126
 
61
127
 
62
128
  async def _handle_shell_operation(
63
- logger: MCPLogger,
64
- audit_logger: AuditTrailLogger,
65
- stdin_input: str,
66
- prompt_id: str,
67
- event_id: str,
68
- cwd: Optional[str],
69
- config: HookConfig,
70
- is_request: bool,
71
- required_fields: Dict[str, type],
72
- redact_fields: List[str],
73
- tool_name: str,
74
- operation_name: str,
75
- audit_event_type: str,
76
- audit_forwarded_event_type: str
129
+ logger: MCPLogger,
130
+ audit_logger: AuditTrailLogger,
131
+ stdin_input: str,
132
+ prompt_id: str,
133
+ event_id: str,
134
+ cwd: Optional[str],
135
+ config: HookConfig,
136
+ is_request: bool,
137
+ required_fields: Dict[str, type],
138
+ redact_fields: List[str],
139
+ tool_name: str,
140
+ operation_name: str,
141
+ audit_event_type: str,
142
+ audit_forwarded_event_type: str
77
143
  ):
78
144
  """
79
145
  Internal shell operation handler - shared logic for request and response
@@ -88,11 +154,11 @@ async def _handle_shell_operation(
88
154
  audit_forwarded_event_type: Audit event name for forwarded operation
89
155
  """
90
156
  session_id = get_session_id()
91
-
92
- logger.info(f"{tool_name} handler started (client={config.client_name}, prompt_id={prompt_id}, event_id={event_id}, cwd={cwd})")
93
-
157
+
158
+ logger.info(
159
+ f"{tool_name} handler started (client={config.client_name}, prompt_id={prompt_id}, event_id={event_id}, cwd={cwd})")
160
+
94
161
  try:
95
- # Validate input
96
162
  try:
97
163
  validator = create_validator(required_fields=required_fields)
98
164
  input_data = validator(stdin_input)
@@ -100,43 +166,52 @@ async def _handle_shell_operation(
100
166
  logger.error(f"Input validation error: {e}")
101
167
  output_error(logger, config.output_format, "permission", str(e))
102
168
  return
103
-
169
+
104
170
  app_uid = read_app_uid(logger, get_project_mcpower_dir(cwd))
105
171
  audit_logger.set_app_uid(app_uid)
106
-
107
- # Redact sensitive data for logging
172
+
108
173
  redacted_data = {}
109
174
  for k, v in input_data.items():
110
175
  if k in required_fields:
111
176
  redacted_data[k] = redact(v) if k in redact_fields else v
112
-
113
- logger.info(f"Analyzing {tool_name}: {redacted_data}")
114
-
115
- # Use different structure for request vs response events
116
- # Requests: params nested, Responses: unpacked at root
117
- if is_request:
118
- audit_data = {
119
- "server": config.server_name,
120
- "tool": tool_name,
121
- "params": redacted_data
122
- }
123
- else:
124
- audit_data = {
125
- "server": config.server_name,
126
- "tool": tool_name,
127
- **redacted_data
128
- }
129
-
177
+
178
+ # Extract and redact input files for request inspection
179
+ files_dict = {}
180
+ if is_request and "command" in input_data:
181
+ command = input_data["command"]
182
+ files_dict = extract_and_redact_command_files(command, cwd, logger)
183
+ if files_dict:
184
+ logger.info(f"Extracted and redacted {len(files_dict)} files from command")
185
+
186
+ def get_audit_data():
187
+ # Use different structure for request vs response events
188
+ # Requests: params nested, Responses: unpacked at root
189
+ if is_request:
190
+ return {
191
+ "server": config.server_name,
192
+ "tool": tool_name,
193
+ "params": redacted_data,
194
+ "files": list(files_dict.keys()) if files_dict else None
195
+ }
196
+ else:
197
+ return {
198
+ "server": config.server_name,
199
+ "tool": tool_name,
200
+ **redacted_data
201
+ }
202
+
130
203
  audit_logger.log_event(
131
204
  audit_event_type,
132
- audit_data,
133
- event_id=event_id
205
+ get_audit_data(),
206
+ event_id=event_id,
207
+ prompt_id=prompt_id
134
208
  )
135
-
136
- # Build content_data with redacted fields
137
- content_data = redacted_data
138
-
139
- # Call security API and enforce decision
209
+
210
+ # Build content_data with redacted fields and files
211
+ content_data = redacted_data.copy()
212
+ if files_dict:
213
+ content_data["files"] = files_dict
214
+
140
215
  try:
141
216
  decision = await inspect_and_enforce(
142
217
  is_request=is_request,
@@ -152,28 +227,14 @@ async def _handle_shell_operation(
152
227
  cwd=cwd,
153
228
  client_name=config.client_name
154
229
  )
155
-
156
- # Log audit event for forwarding
157
- # Use different structure for request vs response
158
- if is_request:
159
- forwarded_data = {
160
- "server": config.server_name,
161
- "tool": tool_name,
162
- "params": redacted_data
163
- }
164
- else:
165
- forwarded_data = {
166
- "server": config.server_name,
167
- "tool": tool_name,
168
- **redacted_data
169
- }
170
-
230
+
171
231
  audit_logger.log_event(
172
232
  audit_forwarded_event_type,
173
- forwarded_data,
174
- event_id=event_id
233
+ get_audit_data(),
234
+ event_id=event_id,
235
+ prompt_id=prompt_id
175
236
  )
176
-
237
+
177
238
  reasons = decision.get("reasons", [])
178
239
  user_message = f"{operation_name} approved"
179
240
  if not reasons:
@@ -181,16 +242,16 @@ async def _handle_shell_operation(
181
242
  else:
182
243
  agent_message = f"{operation_name} approved: {'; '.join(reasons)}"
183
244
  output_result(logger, config.output_format, "permission", True, user_message, agent_message)
184
-
245
+
185
246
  except Exception as e:
186
247
  # Decision enforcement failed - block
187
248
  error_msg = str(e)
188
249
  user_message = f"{operation_name} blocked by security policy"
189
250
  if "User blocked" in error_msg or "User denied" in error_msg:
190
251
  user_message = f"{operation_name} blocked by user"
191
-
252
+
192
253
  output_result(logger, config.output_format, "permission", False, user_message, error_msg)
193
-
254
+
194
255
  except Exception as e:
195
256
  logger.error(f"Unexpected error in {tool_name} handler: {e}", exc_info=True)
196
257
  output_error(logger, config.output_format, "permission", f"Unexpected error: {str(e)}")
@@ -0,0 +1,394 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Shell command parser using bashlex library.
4
+ Parses shell commands to extract sub-commands and file references using proper bash parsing.
5
+ """
6
+
7
+ import bashlex
8
+ import os
9
+ from typing import List, Tuple, Set, Optional, Dict
10
+
11
+
12
+ def parse_shell_command(command: str, initial_cwd: Optional[str] = None) -> Tuple[List[str], List[str]]:
13
+ """
14
+ Parse a shell command using bashlex and extract sub-commands and input files.
15
+
16
+ Args:
17
+ command: A shell command string (supports pipes, redirections, etc.)
18
+ initial_cwd: Initial working directory (defaults to current directory)
19
+
20
+ Returns:
21
+ A tuple of (sub_commands, input_files) where:
22
+ - sub_commands: List of individual commands when split by pipes
23
+ - input_files: List of files that are used as inputs (excludes output-only files)
24
+
25
+ Examples:
26
+ >>> parse_shell_command("python a.py | tee b.log")
27
+ (['python a.py', 'tee b.log'], ['a.py', 'b.log'])
28
+
29
+ >>> parse_shell_command("cat a.txt > /tmp/b.txt")
30
+ (['cat a.txt > /tmp/b.txt'], ['a.txt'])
31
+
32
+ >>> parse_shell_command("grep foo file.txt | sort | uniq > output.txt")
33
+ (['grep foo file.txt', 'sort', 'uniq > output.txt'], ['file.txt'])
34
+ """
35
+ try:
36
+ # Parse the command into an AST
37
+ parts = bashlex.parse(command)
38
+ except Exception as e:
39
+ # If parsing fails, fall back to simple split
40
+ print(f"Warning: bashlex parsing failed: {e}")
41
+ return ([command], [])
42
+
43
+ # Extract sub-commands and files
44
+ sub_commands = []
45
+ all_files: Set[str] = set()
46
+ output_files: Set[str] = set()
47
+
48
+ # Track directory changes
49
+ context = {
50
+ 'cwd': initial_cwd or os.getcwd(),
51
+ 'file_to_cwd': {} # Map each file to the directory it was found in
52
+ }
53
+
54
+ for ast in parts:
55
+ _extract_from_ast(ast, command, sub_commands, all_files, output_files, False, context)
56
+
57
+ # Remove output-only files from the result
58
+ input_files = sorted(list(all_files - output_files))
59
+
60
+ return sub_commands, input_files
61
+
62
+
63
+ def _extract_from_ast(
64
+ node,
65
+ command: str,
66
+ sub_commands: List[str],
67
+ all_files: Set[str],
68
+ output_files: Set[str],
69
+ parent_is_pipe: bool = False,
70
+ context: Optional[Dict] = None
71
+ ) -> None:
72
+ """
73
+ Recursively extract sub-commands and files from a bashlex AST node.
74
+
75
+ Args:
76
+ node: bashlex AST node
77
+ command: Original command string (for extracting text)
78
+ sub_commands: List to append sub-commands to
79
+ all_files: Set to add all file references to
80
+ output_files: Set to add output-only files to
81
+ parent_is_pipe: True if parent node is a pipe operator
82
+ context: Dictionary with 'cwd' for current working directory
83
+ """
84
+ if context is None:
85
+ context = {'cwd': os.getcwd()}
86
+
87
+ # Check node kind to determine type
88
+ node_kind = getattr(node, 'kind', None)
89
+
90
+ if node_kind == 'list':
91
+ # List node contains multiple parts connected by operators (&&, ||, ;)
92
+ # Process sequentially to track directory changes
93
+ if hasattr(node, 'parts'):
94
+ for part in node.parts:
95
+ _extract_from_ast(part, command, sub_commands, all_files, output_files, False, context)
96
+
97
+ elif node_kind == 'pipeline':
98
+ # Pipeline node - extract individual commands
99
+ _extract_pipeline(node, command, sub_commands, all_files, output_files, context)
100
+
101
+ elif node_kind == 'command':
102
+ # Command node - extract the command text and analyze its parts
103
+ if hasattr(node, 'pos'):
104
+ start, end = node.pos
105
+ cmd_text = command[start:end]
106
+ sub_commands.append(cmd_text)
107
+
108
+ # Get the command name (first word) for context
109
+ cmd_name = None
110
+ if hasattr(node, 'parts') and len(node.parts) > 0:
111
+ first_part = node.parts[0]
112
+ if hasattr(first_part, 'word'):
113
+ cmd_name = first_part.word
114
+
115
+ # Check if this is a cd command and update context
116
+ if cmd_name == 'cd' and hasattr(node, 'parts') and len(node.parts) > 1:
117
+ second_part = node.parts[1]
118
+ if hasattr(second_part, 'word'):
119
+ target_dir = second_part.word
120
+ # Resolve the new directory
121
+ if os.path.isabs(target_dir):
122
+ context['cwd'] = target_dir
123
+ else:
124
+ context['cwd'] = os.path.normpath(os.path.join(context['cwd'], target_dir))
125
+
126
+ # Extract files from command parts (arguments and redirections)
127
+ if hasattr(node, 'parts'):
128
+ for i, part in enumerate(node.parts):
129
+ part_kind = getattr(part, 'kind', None)
130
+ if part_kind == 'redirect':
131
+ _extract_redirect(part, command, all_files, output_files, context)
132
+ elif i > 0: # Skip the command name itself (index 0)
133
+ _extract_files_from_node(part, command, all_files, output_files, cmd_name, context)
134
+
135
+ elif node_kind == 'compound':
136
+ # Compound command (like if, while, for, etc.)
137
+ if hasattr(node, 'list'):
138
+ for item in node.list:
139
+ _extract_from_ast(item, command, sub_commands, all_files, output_files, False, context)
140
+
141
+ elif node_kind == 'operator':
142
+ # Operator node (like &&, ||, ;) - ignore
143
+ pass
144
+
145
+ elif node_kind == 'pipe':
146
+ # Pipe node - ignore (we handle pipes at the pipeline level)
147
+ pass
148
+
149
+
150
+ def _extract_pipeline(node, command: str, sub_commands: List[str], all_files: Set[str], output_files: Set[str], context: Dict) -> None:
151
+ """Extract commands from a pipeline node."""
152
+ if hasattr(node, 'parts'):
153
+ for part in node.parts:
154
+ part_kind = getattr(part, 'kind', None)
155
+ # Skip pipe nodes, only process commands
156
+ if part_kind != 'pipe':
157
+ _extract_from_ast(part, command, sub_commands, all_files, output_files, True, context)
158
+
159
+
160
+ def _extract_files_from_node(node, command: str, all_files: Set[str], output_files: Set[str], cmd_name: Optional[str] = None, context: Optional[Dict] = None) -> None:
161
+ """Extract file references from a node.
162
+
163
+ Args:
164
+ node: bashlex AST node
165
+ command: Original command string
166
+ all_files: Set to add all file references to
167
+ output_files: Set to add output-only files to
168
+ cmd_name: Name of the command this node belongs to (for context)
169
+ context: Dictionary with 'cwd' for current working directory
170
+ """
171
+ if context is None:
172
+ context = {'cwd': os.getcwd()}
173
+
174
+ node_kind = getattr(node, 'kind', None)
175
+
176
+ if node_kind == 'word':
177
+ # Word node - check if it's a file reference
178
+ word = node.word if hasattr(node, 'word') else None
179
+
180
+ if word and _looks_like_file(word, cmd_name):
181
+ # Resolve relative paths against current working directory
182
+ resolved_path = _resolve_path(word, context['cwd'])
183
+ all_files.add(resolved_path)
184
+
185
+ # Recursively check parts (for command substitutions, etc.)
186
+ if hasattr(node, 'parts'):
187
+ for part in node.parts:
188
+ _extract_files_from_node(part, command, all_files, output_files, cmd_name, context)
189
+
190
+ elif node_kind == 'commandsubstitution':
191
+ # Command substitution $(...) - recursively parse
192
+ if hasattr(node, 'command'):
193
+ _extract_from_ast(node.command, command, [], all_files, output_files, False, context)
194
+
195
+ elif node_kind == 'processsubstitution':
196
+ # Process substitution <(...) or >(...) - recursively parse
197
+ if hasattr(node, 'command'):
198
+ _extract_from_ast(node.command, command, [], all_files, output_files, False, context)
199
+
200
+
201
+ def _extract_redirect(redirect, command: str, all_files: Set[str], output_files: Set[str], context: Optional[Dict] = None) -> None:
202
+ """Extract file references from redirection nodes."""
203
+ if context is None:
204
+ context = {'cwd': os.getcwd()}
205
+
206
+ redirect_type = getattr(redirect, 'type', None)
207
+
208
+ # Get the target of the redirection
209
+ if hasattr(redirect, 'output'):
210
+ target = redirect.output
211
+ target_word = target.word if hasattr(target, 'word') else None
212
+
213
+ # Redirections always point to files, not directories
214
+ if target_word and _looks_like_file(target_word, None):
215
+ # Resolve relative paths against current working directory
216
+ resolved_path = _resolve_path(target_word, context['cwd'])
217
+
218
+ # Determine if it's input or output
219
+ if redirect_type in ('>', '>>', '>&', '>|', '&>'):
220
+ # Output redirection
221
+ output_files.add(resolved_path)
222
+ all_files.add(resolved_path)
223
+ elif redirect_type == '<':
224
+ # Input redirection
225
+ all_files.add(resolved_path)
226
+ else:
227
+ # Unknown, be conservative and include it
228
+ all_files.add(resolved_path)
229
+
230
+
231
+ def _resolve_path(path: str, cwd: str) -> str:
232
+ """
233
+ Resolve a file path relative to a working directory.
234
+
235
+ Args:
236
+ path: File path (relative or absolute)
237
+ cwd: Current working directory
238
+
239
+ Returns:
240
+ Absolute path
241
+ """
242
+ if os.path.isabs(path):
243
+ return path
244
+ else:
245
+ return os.path.normpath(os.path.join(cwd, path))
246
+
247
+
248
+ def _looks_like_file(word: str, cmd_name: Optional[str] = None) -> bool:
249
+ """
250
+ Heuristic to determine if a word is an actual readable file path.
251
+ Not patterns, not variables, not directories - actual files we can open.
252
+
253
+ Args:
254
+ word: A word from the command
255
+ cmd_name: The command this word belongs to (for context)
256
+
257
+ Returns:
258
+ True if it looks like a file path
259
+ """
260
+ if not word:
261
+ return False
262
+
263
+ # Commands that take directory arguments, not files
264
+ DIRECTORY_COMMANDS = {
265
+ 'cd', 'pushd', 'popd', 'mkdir', 'rmdir', 'chdir',
266
+ }
267
+
268
+ # If this is a directory command, reject all arguments
269
+ if cmd_name and cmd_name in DIRECTORY_COMMANDS:
270
+ return False
271
+
272
+ # Exclude URLs (http://, https://, ftp://, file://, etc.)
273
+ if '://' in word:
274
+ return False
275
+
276
+ # Exclude shell meta-characters and patterns
277
+ if any(char in word for char in ['*', '?', '[', ']']): # Glob patterns
278
+ return False
279
+
280
+ if '$' in word or '`' in word: # Variables or command substitution
281
+ return False
282
+
283
+ # Exclude sed/awk patterns
284
+ if word.startswith('s/') and word.count('/') >= 2:
285
+ return False
286
+
287
+ # Exclude regex patterns
288
+ if word.startswith('^') or word.endswith('$'):
289
+ return False
290
+
291
+ # Exclude options
292
+ if word.startswith('-') or word.startswith('+'):
293
+ return False
294
+
295
+ # Exclude bare dots
296
+ if word in {'.', '..'}:
297
+ return False
298
+
299
+ # Exclude bare directories (but /tmp/file is OK)
300
+ if word in {'/', '/tmp', '/dev', '/usr', '/etc', '/var', '/opt', '/home'}:
301
+ return False
302
+
303
+ # --- POSITIVE CHECKS ---
304
+
305
+ # Has extension = very likely a file
306
+ if '.' in word and not word.startswith('.'):
307
+ # Get the extension
308
+ parts = word.rsplit('.', 1)
309
+ if len(parts) == 2:
310
+ name, ext = parts
311
+ # Be more permissive with extensions
312
+ if name and ext and ext.replace('_', '').replace('-', '').isalnum():
313
+ if len(ext) <= 10: # Most extensions are < 10 chars
314
+ return True
315
+
316
+ # Has path separator = could be a file
317
+ if '/' in word:
318
+ # Check if it's a path to something specific (not just dirs)
319
+ if not word.endswith('/'): # Not ending with / (directory indicator)
320
+ parts = word.split('/')
321
+ last_part = parts[-1] if parts else ''
322
+
323
+ # If last part has extension, definitely a file
324
+ if '.' in last_part and not last_part.startswith('.'):
325
+ return True
326
+
327
+ # If it's under specific directories that contain files
328
+ if word.startswith('/dev/') and len(word) > 5: # /dev/null, /dev/tty, etc.
329
+ return True
330
+ if word.startswith('/tmp/') and len(word) > 5: # /tmp/anything
331
+ return True
332
+ if word.startswith('/etc/') and len(word) > 5: # /etc/passwd, etc.
333
+ return True
334
+ if word.startswith('/usr/bin/') and len(word) > 9: # Executables
335
+ return True
336
+ if word.startswith('/usr/local/bin/') and len(word) > 15:
337
+ return True
338
+
339
+ # If last part looks like a filename (even without extension)
340
+ if last_part and last_part.replace('-', '').replace('_', '').isalnum():
341
+ # Could be an executable or script
342
+ return True
343
+
344
+ # Check for well-known files without extensions (case-insensitive)
345
+ filename_only = word.split('/')[-1].lower()
346
+ if filename_only in {'makefile', 'readme', 'license', 'dockerfile',
347
+ 'gemfile', 'rakefile', 'procfile', 'vagrantfile',
348
+ 'jenkinsfile', 'cakefile', 'gulpfile', 'gruntfile',
349
+ 'brewfile', 'berksfile', 'guardfile', 'fastfile',
350
+ 'cartfile', 'appfile', 'podfile', 'snapfile'}:
351
+ return True
352
+
353
+ # Stand-alone word without path - be conservative
354
+ if '/' not in word:
355
+ # If it has an extension, probably a file in current directory
356
+ if '.' in word and not word.startswith('.'):
357
+ return True
358
+
359
+ # Well-known executable names without extensions
360
+ if word in {'script', 'run', 'build', 'test', 'deploy', 'install',
361
+ 'configure', 'setup', 'bootstrap', 'init'}:
362
+ return True
363
+
364
+ # Otherwise, we can't be sure it's a file (could be a command)
365
+ return False
366
+
367
+ return False
368
+
369
+
370
+ # Testing
371
+ if __name__ == "__main__":
372
+ # Test cases
373
+ test_cases = [
374
+ "cd /Users/user/src/project/server && python test.py",
375
+ "python a.py | tee b.log",
376
+ "cat a.txt > /tmp/b.txt",
377
+ "grep foo file.txt | sort | uniq > output.txt",
378
+ "cat file1.txt file2.txt | grep pattern > result.txt",
379
+ "python script.py < input.txt > output.txt",
380
+ "ls -la /tmp | grep '\\.txt$' | wc -l",
381
+ "tar -xzf archive.tar.gz",
382
+ "find . -name '*.py' | xargs grep pattern",
383
+ ]
384
+
385
+ print("Shell Command Parser (bashlex) - Test Cases\n" + "="*60)
386
+ for cmd in test_cases:
387
+ try:
388
+ sub_cmds, files = parse_shell_command(cmd)
389
+ print(f"\nCommand: {cmd}")
390
+ print(f"Sub-commands: {sub_cmds}")
391
+ print(f"Input files: {files}")
392
+ except Exception as e:
393
+ print(f"\nCommand: {cmd}")
394
+ print(f"Error: {e}")