bone-agent 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +184 -0
  3. package/bin/npm-wrapper.js +235 -0
  4. package/bin/rg +0 -0
  5. package/bin/rg.exe +0 -0
  6. package/config.yaml.example +133 -0
  7. package/package.json +53 -0
  8. package/requirements.txt +9 -0
  9. package/src/__init__.py +11 -0
  10. package/src/core/__init__.py +1 -0
  11. package/src/core/agentic.py +1054 -0
  12. package/src/core/chat_manager.py +1552 -0
  13. package/src/core/config_manager.py +247 -0
  14. package/src/core/cron.py +527 -0
  15. package/src/core/cron_allowlist.py +118 -0
  16. package/src/core/memory.py +232 -0
  17. package/src/core/retry.py +71 -0
  18. package/src/core/sub_agent.py +326 -0
  19. package/src/core/tool_approval.py +220 -0
  20. package/src/core/tool_feedback.py +778 -0
  21. package/src/exceptions.py +79 -0
  22. package/src/llm/__init__.py +1 -0
  23. package/src/llm/client.py +171 -0
  24. package/src/llm/config.py +466 -0
  25. package/src/llm/prompts.py +735 -0
  26. package/src/llm/providers.py +417 -0
  27. package/src/llm/streaming.py +163 -0
  28. package/src/llm/token_tracker.py +368 -0
  29. package/src/tools/__init__.py +212 -0
  30. package/src/tools/constants.py +59 -0
  31. package/src/tools/create_file.py +136 -0
  32. package/src/tools/directory.py +389 -0
  33. package/src/tools/edit.py +543 -0
  34. package/src/tools/file_reader.py +322 -0
  35. package/src/tools/helpers/__init__.py +105 -0
  36. package/src/tools/helpers/base.py +550 -0
  37. package/src/tools/helpers/converters.py +44 -0
  38. package/src/tools/helpers/file_helpers.py +189 -0
  39. package/src/tools/helpers/formatters.py +411 -0
  40. package/src/tools/helpers/loader.py +231 -0
  41. package/src/tools/helpers/parallel_executor.py +231 -0
  42. package/src/tools/helpers/path_resolver.py +226 -0
  43. package/src/tools/helpers/plugin_manifest.py +156 -0
  44. package/src/tools/obsidian.py +96 -0
  45. package/src/tools/review_sub_agent.py +189 -0
  46. package/src/tools/rg_search.py +393 -0
  47. package/src/tools/search_plugins.py +109 -0
  48. package/src/tools/select_option.py +593 -0
  49. package/src/tools/shell.py +302 -0
  50. package/src/tools/sub_agent.py +139 -0
  51. package/src/tools/task_list.py +269 -0
  52. package/src/tools/web_search.py +61 -0
  53. package/src/ui/__init__.py +1 -0
  54. package/src/ui/banner.py +87 -0
  55. package/src/ui/commands.py +2694 -0
  56. package/src/ui/displays.py +213 -0
  57. package/src/ui/loader.py +284 -0
  58. package/src/ui/main.py +646 -0
  59. package/src/ui/prompt_utils.py +113 -0
  60. package/src/ui/setting_selector.py +590 -0
  61. package/src/ui/setup_wizard.py +294 -0
  62. package/src/ui/sub_agent_panel.py +234 -0
  63. package/src/ui/tool_confirmation.py +215 -0
  64. package/src/utils/__init__.py +1 -0
  65. package/src/utils/citation_parser.py +199 -0
  66. package/src/utils/editor.py +158 -0
  67. package/src/utils/gitignore_filter.py +149 -0
  68. package/src/utils/logger.py +254 -0
  69. package/src/utils/paths.py +30 -0
  70. package/src/utils/result_parsers.py +108 -0
  71. package/src/utils/safe_commands.py +243 -0
  72. package/src/utils/settings.py +174 -0
  73. package/src/utils/validation.py +191 -0
  74. package/src/utils/web_search.py +173 -0
@@ -0,0 +1,243 @@
1
+ """Structured command safety system for auto-approval.
2
+
3
+ Replaces the flat ALLOWED_COMMANDS whitelist with a command+subcommand
4
+ granularity system that distinguishes read-only operations from mutations.
5
+
6
+ Design principles:
7
+ - No args = not safe (for commands with subcommand variants)
8
+ - Gate anything that has potential to be unsafe
9
+ - Deny-by-default: commands not in the dict require approval
10
+ - Compound flags use longest-prefix matching
11
+ """
12
+
13
+ import os
14
+ import shlex
15
+ from utils.validation import CHAINING_OPERATORS
16
+
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # SAFE_COMMAND_RULES
20
+ # ---------------------------------------------------------------------------
21
+ # Maps command names to their safety profile:
22
+ # None → always safe (inherently read-only, e.g., ps, pwd)
23
+ # set() → only safe for listed subcommands/flags
24
+ #
25
+ # Platform normalization strips .exe suffix and lowercases before lookup.
26
+
27
+ SAFE_COMMAND_RULES: dict[str, frozenset | None] = {
28
+ # --- Always safe (truly read-only, no mutating subcommands) ---
29
+ "pwd": None,
30
+ "which": None,
31
+ "whereis": None,
32
+ "uname": None,
33
+ "hostname": None,
34
+ "uptime": None,
35
+ "date": None,
36
+ "cal": None,
37
+ "whoami": None,
38
+ "id": None,
39
+ "env": frozenset({"--version", "--help"}),
40
+ "printenv": frozenset({"--version", "--help"}),
41
+ "lscpu": None,
42
+ "lsblk": None,
43
+ "file": None,
44
+ "stat": None,
45
+ "md5sum": None,
46
+ "sha256sum": None,
47
+ "free": None,
48
+ "df": None,
49
+ "du": None,
50
+ "dmesg": None,
51
+ "ltrace": None,
52
+ "ps": None,
53
+ "pgrep": None,
54
+ "pidof": None,
55
+ "lsof": None,
56
+ "ping": None,
57
+ "nslookup": None,
58
+ "dig": None,
59
+ "ss": None,
60
+ "ifconfig": None,
61
+ "netstat": None,
62
+ "journalctl": None,
63
+ "apt-cache": None,
64
+ "apt-show": None,
65
+ "dpkg-query": None,
66
+
67
+ # --- Subcommand-gated (safe only for specific read-only operations) ---
68
+ "git": frozenset({
69
+ "status", "log", "diff", "show", "branch",
70
+ "remote", "tag",
71
+ "rev-parse", "shortlog", "describe", "symbolic-ref",
72
+ "reflog", "name-rev", "blame", "annotate",
73
+ "for-each-ref", "ls-files", "ls-tree", "ls-remote",
74
+ }),
75
+ "pip": frozenset({"show", "list", "--version", "check", "debug", "index", "inspect"}),
76
+ "pip3": frozenset({"show", "list", "--version", "check", "debug", "index", "inspect"}),
77
+ "npm": frozenset({"list", "ls", "view", "version", "outdated", "pack", "info", "doctor", "audit"}),
78
+ "node": frozenset({"--version"}),
79
+ "python": frozenset({"--version"}),
80
+ "python3": frozenset({"--version"}),
81
+ "pacman": frozenset({
82
+ "-Q", "-Qi", "-Ql", "-Qo", "-Qs", "-Qt",
83
+ "-F", "-Si", "-Ss", "-Fl", "-G",
84
+ }),
85
+ "dpkg": frozenset({"-l", "-s", "-S", "-L", "-p", "--verify", "--audit"}),
86
+ "rpm": frozenset({"-q", "-qa", "-qi", "-ql", "-qf", "--queryformat"}),
87
+ "dnf": frozenset({"list", "info", "search", "check-update", "repoquery"}),
88
+ "yum": frozenset({"list", "info", "search", "check-update"}),
89
+ "systemctl": frozenset({
90
+ "status", "list-units", "list-unit-files", "show",
91
+ "is-active", "is-enabled", "cat", "list-timers",
92
+ "list-sockets", "list-jobs",
93
+ }),
94
+ "service": frozenset({"--status-all"}), # "service <name> status" handled by _is_safe_service_command
95
+ "ip": frozenset({"addr", "address", "link", "route", "neigh", "maddr", "rule", "netns"}),
96
+
97
+ # --- Windows equivalents ---
98
+ "where": None,
99
+ "systeminfo": None,
100
+ "Get-Process": None,
101
+ "Get-Service": None,
102
+ "Get-ChildItem": None,
103
+ "Get-Content": None,
104
+ "Get-Location": None,
105
+ "Test-Connection": None,
106
+ "Get-NetIPAddress": None,
107
+ }
108
+
109
+
110
+ # Sub-subcommand deny lists for commands where the first arg passes safety
111
+ # but later args can be mutating. Checked AFTER first-arg matching.
112
+ # If any token appears in the deny list, the command is rejected.
113
+ _IP_MUTATING_VERBS = frozenset({
114
+ "set", "add", "delete", "replace", "flush", "change",
115
+ })
116
+
117
+ # Commands that need deep token scanning mapped to their deny sets.
118
+ _DEEP_SCAN_RULES: dict[str, frozenset] = {
119
+ "ip": _IP_MUTATING_VERBS,
120
+ }
121
+
122
+
123
+ def _tokenize(command: str) -> list[str]:
124
+ """Tokenize a command string using platform-appropriate splitting."""
125
+ use_posix = os.name != "nt"
126
+ try:
127
+ return shlex.split(command, posix=use_posix)
128
+ except ValueError:
129
+ return command.split()
130
+
131
+
132
+ def _normalize_command_name(name: str) -> str:
133
+ """Normalize a command name for lookup.
134
+
135
+ Strips .exe suffix and lowercases. Does NOT normalize PowerShell
136
+ cmdlet casing (case-insensitive lookup handles that).
137
+ """
138
+ if name.lower().endswith(".exe"):
139
+ name = name[:-4]
140
+ return name.lower()
141
+
142
+
143
+ def _matches_safe_subcommand(arg: str, safe_set: frozenset) -> bool:
144
+ """Check if an argument matches any entry in the safe subcommand set.
145
+
146
+ Uses longest-prefix matching for flag-style arguments:
147
+ e.g., if '-Qi' is safe, then '-Qil' also matches.
148
+ For word-style subcommands (e.g., git 'status'), exact match only.
149
+
150
+ Comparison is case-insensitive.
151
+ """
152
+ arg_lower = arg.lower()
153
+
154
+ # Build lowercase version of safe_set for case-insensitive comparison
155
+ safe_lower = {s.lower() for s in safe_set}
156
+
157
+ # Exact match
158
+ if arg_lower in safe_lower:
159
+ return True
160
+
161
+ # Longest-prefix match for flags (arguments starting with -)
162
+ if arg_lower.startswith("-"):
163
+ # Try progressively shorter prefixes
164
+ for length in range(len(arg_lower) - 1, 1, -1):
165
+ prefix = arg_lower[:length]
166
+ if prefix in safe_lower:
167
+ return True
168
+
169
+ return False
170
+
171
+
172
+ def is_safe_command(command: str) -> bool:
173
+ """Check if a command should be auto-approved (safe, read-only).
174
+
175
+ A command is auto-approved when:
176
+ 1. It contains no chaining/redirection operators
177
+ 2. The command name is in SAFE_COMMAND_RULES
178
+ 3. If gated (has a set of safe subcommands), the first argument
179
+ matches an entry in the set
180
+ 4. If always-safe (None), it's approved with or without args
181
+
182
+ Args:
183
+ command: Command string to validate
184
+
185
+ Returns:
186
+ bool: True if the command is safe to auto-approve
187
+ """
188
+ command = command.strip()
189
+ if not command:
190
+ return False
191
+
192
+ # Strip "powershell " prefix if present (legacy support for Windows users)
193
+ if command.lower().startswith("powershell "):
194
+ command = command[len("powershell "):].strip()
195
+
196
+ # Reject any command containing chaining/redirection operators
197
+ if CHAINING_OPERATORS.search(command):
198
+ return False
199
+
200
+ # Tokenize and get command name
201
+ tokens = _tokenize(command)
202
+ if not tokens:
203
+ return False
204
+
205
+ cmd_name = _normalize_command_name(tokens[0])
206
+
207
+ # Look up in rules (deny-by-default)
208
+ if cmd_name not in SAFE_COMMAND_RULES:
209
+ # Unknown command — require approval
210
+ return False
211
+
212
+ rule = SAFE_COMMAND_RULES[cmd_name]
213
+ if rule is None:
214
+ # Always-safe command (e.g., ps, pwd)
215
+ return True
216
+
217
+ if not rule:
218
+ # Empty frozenset — no safe subcommands defined, deny
219
+ return False
220
+
221
+ # Gated command — need at least one subcommand arg
222
+ if len(tokens) < 2:
223
+ return False
224
+
225
+ # Check first argument against safe subcommand set
226
+ first_arg = tokens[1]
227
+
228
+ # Special case: "service <name> status" — the safe subcommand is the LAST arg
229
+ if cmd_name == "service" and len(tokens) >= 3 and tokens[-1].lower() == "status":
230
+ return True
231
+
232
+ if not _matches_safe_subcommand(first_arg, rule):
233
+ return False
234
+
235
+ # Deep scan: for commands with known mutating sub-subcommands,
236
+ # reject if any remaining token matches the deny list.
237
+ deny_set = _DEEP_SCAN_RULES.get(cmd_name)
238
+ if deny_set and len(tokens) > 2:
239
+ for tok in tokens[2:]:
240
+ if tok.lower() in deny_set:
241
+ return False
242
+
243
+ return True
@@ -0,0 +1,174 @@
1
+ """Centralized configuration for bone-agent."""
2
+ import re
3
+ from dataclasses import dataclass, field
4
+ from pathlib import Path
5
+ from typing import Set
6
+
7
+ # Load config from llm.config
8
+ # Note: src/ is added to sys.path in main.py, so we can import directly
9
+ from llm.config import _CONFIG
10
+
11
+ # Styles and themes
12
+ from pygments.styles.monokai import MonokaiStyle
13
+
14
+
15
+ class MonokaiDarkBGStyle(MonokaiStyle):
16
+ """Monokai style with dark background for code highlighting."""
17
+ background_color = "#141414"
18
+
19
+
20
+ _HEADING_RE = re.compile(r'^(#{1,6})\s+(.+)$', re.MULTILINE)
21
+
22
+
23
+ def left_align_headings(text: str) -> str:
24
+ """Strip markdown heading markers to avoid Rich's centering."""
25
+ return _HEADING_RE.sub(lambda m: m.group(2), text)
26
+
27
+
28
+ @dataclass
29
+ class ServerSettings:
30
+ """Local llama-server configuration."""
31
+ ngl_layers: int = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("ngl_layers", 30))
32
+ ctx_size: int = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("ctx_size", 8192))
33
+ n_predict: int = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("n_predict", 8192))
34
+ rope_scale: float = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("rope_scale", 1.0))
35
+ health_check_timeout_sec: int = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("health_check_timeout_sec", 120))
36
+ health_check_interval_sec: float = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("health_check_interval_sec", 1.0))
37
+
38
+
39
+ @dataclass
40
+ class ToolSettings:
41
+ """Tool execution limits and defaults."""
42
+ max_tool_calls: int = field(default_factory=lambda: _CONFIG.get("TOOL_SETTINGS", {}).get("max_tool_calls", 100))
43
+ command_timeout_sec: int = field(default_factory=lambda: _CONFIG.get("TOOL_SETTINGS", {}).get("command_timeout_sec", 30))
44
+ enable_parallel_execution: bool = field(default_factory=lambda: _CONFIG.get("TOOL_SETTINGS", {}).get("enable_parallel_execution", True))
45
+ max_parallel_workers: int = field(default_factory=lambda: _CONFIG.get("TOOL_SETTINGS", {}).get("max_parallel_workers", 10))
46
+ max_command_output_lines: int = field(default_factory=lambda: _CONFIG.get("TOOL_SETTINGS", {}).get("max_command_output_lines", 100))
47
+ max_shell_output_lines: int = field(default_factory=lambda: _CONFIG.get("TOOL_SETTINGS", {}).get("max_shell_output_lines", 200))
48
+ max_file_preview_lines: int = field(default_factory=lambda: _CONFIG.get("TOOL_SETTINGS", {}).get("max_file_preview_lines", 200))
49
+ disabled_tools: list = field(default_factory=lambda: _CONFIG.get("TOOL_SETTINGS", {}).get("disabled_tools", []))
50
+
51
+ @dataclass
52
+ class FileSettings:
53
+ """File scanning and reading limits."""
54
+ max_file_bytes: int = field(default_factory=lambda: _CONFIG.get("FILE_SETTINGS", {}).get("max_file_bytes", 200_000))
55
+ max_total_bytes: int = field(default_factory=lambda: _CONFIG.get("FILE_SETTINGS", {}).get("max_total_bytes", 1_500_000))
56
+ exclude_dirs: Set[str] = None
57
+
58
+ def __post_init__(self):
59
+ if self.exclude_dirs is None:
60
+ config_exclude = _CONFIG.get("FILE_SETTINGS", {}).get("exclude_dirs")
61
+ if config_exclude:
62
+ self.exclude_dirs = set(config_exclude)
63
+ else:
64
+ self.exclude_dirs = {".git", ".venv", "llama.cpp", "bin", "__pycache__"}
65
+
66
+
67
+ @dataclass
68
+ class ToolCompactionSettings:
69
+ """Per-message tool result compaction settings."""
70
+ enable_per_message_compaction: bool = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("tool_compaction", {}).get("enable_per_message_compaction", True))
71
+ uncompacted_tail_tokens: int = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("tool_compaction", {}).get("uncompacted_tail_tokens", 40_000))
72
+ min_tool_blocks: int = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("tool_compaction", {}).get("min_tool_blocks", 5))
73
+ compact_failed_tools: bool = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("tool_compaction", {}).get("compact_failed_tools", True))
74
+
75
+
76
+ @dataclass
77
+ class SubAgentSettings:
78
+ """Sub-agent token limits and behavior configuration."""
79
+ soft_limit_tokens: int = field(default_factory=lambda: _CONFIG.get("SUB_AGENT_SETTINGS", {}).get("soft_limit_tokens", 300_000))
80
+ hard_limit_tokens: int = field(default_factory=lambda: _CONFIG.get("SUB_AGENT_SETTINGS", {}).get("hard_limit_tokens", 500_000))
81
+ enable_compaction: bool = field(default_factory=lambda: _CONFIG.get("SUB_AGENT_SETTINGS", {}).get("enable_compaction", True))
82
+ compact_trigger_tokens: int = field(default_factory=lambda: _CONFIG.get("SUB_AGENT_SETTINGS", {}).get("compact_trigger_tokens", 50_000))
83
+ allowed_tools: list = field(default_factory=lambda: _CONFIG.get("SUB_AGENT_SETTINGS", {}).get("allowed_tools", ["rg", "read_file", "list_directory", "web_search"]))
84
+ dump_context_on_hard_limit: bool = field(default_factory=lambda: _CONFIG.get("SUB_AGENT_SETTINGS", {}).get("dump_context_on_hard_limit", True))
85
+
86
+
87
+ # Context compaction settings
88
+ @dataclass
89
+ class ContextSettings:
90
+ """Context compaction thresholds and defaults."""
91
+ compact_trigger_tokens: int = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("compact_trigger_tokens", 100_000))
92
+ max_context_window: int = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("max_context_window", 200_000))
93
+ log_conversations: bool = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("log_conversations", False))
94
+ conversations_dir: str = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("conversations_dir", "conversations"))
95
+ notify_auto_compaction: bool = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("notify_auto_compaction", True))
96
+ tool_compaction: ToolCompactionSettings = field(default_factory=ToolCompactionSettings)
97
+ hard_limit_tokens: int = field(init=False, repr=False)
98
+
99
+ def __post_init__(self):
100
+ _ctx = _CONFIG.get("CONTEXT_SETTINGS", {})
101
+ if "hard_limit_tokens" in _ctx:
102
+ self.hard_limit_tokens = _ctx["hard_limit_tokens"]
103
+ else:
104
+ self.hard_limit_tokens = int(self.max_context_window * 0.9)
105
+
106
+
107
+ @dataclass
108
+ class ObsidianSettings:
109
+ """Obsidian vault integration settings.
110
+
111
+ Supports runtime updates via update() method for /obsidian commands.
112
+ """
113
+ vault_path: str = field(default_factory=lambda: _CONFIG.get("OBSIDIAN_SETTINGS", {}).get("vault_path", ""))
114
+ enabled: bool = field(default_factory=lambda: _CONFIG.get("OBSIDIAN_SETTINGS", {}).get("enabled", False))
115
+ exclude_folders: str = field(default_factory=lambda: _CONFIG.get("OBSIDIAN_SETTINGS", {}).get("exclude_folders", ".obsidian,.trash,node_modules,.git,__pycache__"))
116
+ project_base: str = field(default_factory=lambda: _CONFIG.get("OBSIDIAN_SETTINGS", {}).get("project_base", "Dev"))
117
+
118
+ def update(self, **kwargs):
119
+ """Update settings fields at runtime.
120
+
121
+ Args:
122
+ **kwargs: Field names and values to update
123
+ """
124
+ from dataclasses import fields
125
+ valid_keys = {f.name for f in fields(self)}
126
+ for key, value in kwargs.items():
127
+ if key in valid_keys:
128
+ setattr(self, key, value)
129
+
130
+ def is_configured(self) -> bool:
131
+ """Check if Obsidian integration is configured in settings.
132
+
133
+ Returns:
134
+ True if enabled and vault_path is set (does NOT validate disk)
135
+ """
136
+ return self.enabled and bool(self.vault_path)
137
+
138
+ def is_active(self) -> bool:
139
+ """Check if Obsidian integration is fully operational.
140
+
141
+ Validates the vault path exists on disk and contains .obsidian/.
142
+
143
+ Returns:
144
+ True if enabled, vault_path is set, and vault is valid on disk
145
+ """
146
+ if not self.enabled or not self.vault_path:
147
+ return False
148
+ root = Path(self.vault_path).resolve()
149
+ if not root.is_dir():
150
+ return False
151
+ return (root / ".obsidian").is_dir()
152
+
153
+ @property
154
+ def exclude_folders_list(self) -> list:
155
+ """Return exclude_folders as a pre-parsed list of strings.
156
+
157
+ Avoids repeated str.split(",") on every rg call.
158
+ """
159
+ return [f.strip() for f in self.exclude_folders.split(",") if f.strip()]
160
+
161
+
162
+ # Global instances
163
+ server_settings = ServerSettings()
164
+ tool_settings = ToolSettings()
165
+ file_settings = FileSettings()
166
+ context_settings = ContextSettings()
167
+ sub_agent_settings = SubAgentSettings()
168
+ obsidian_settings = ObsidianSettings()
169
+
170
+ # Tool execution constants
171
+ MAX_TOOL_CALLS = tool_settings.max_tool_calls
172
+ MAX_COMMAND_OUTPUT_LINES = tool_settings.max_command_output_lines
173
+ MAX_SHELL_OUTPUT_LINES = tool_settings.max_shell_output_lines
174
+ MAX_FILE_PREVIEW_LINES = tool_settings.max_file_preview_lines
@@ -0,0 +1,191 @@
1
+ """Command validation."""
2
+
3
+ import os
4
+ import re
5
+ import shlex
6
+ from urllib.parse import urlparse
7
+
8
+ # Shell operators that indicate command chaining or redirection.
9
+ # Shared between validation.py and shell.py — keep in one place to avoid drift.
10
+ # Matches: &&, ||, ;, |, >, <, backticks, $(), ${}, newlines
11
+ # NOTE: Alternations are sorted longest-first so that '&&' and '||' match
12
+ # before '|' — reordering the raw list is safe because we sort at runtime.
13
+ _RAW_CHAINING_PATTERNS = ["&&", "||", ";", "|", ">", "<", "`", "$(", "${", "\n", "\r"]
14
+ CHAINING_OPERATORS = re.compile(
15
+ "|".join(re.escape(p) for p in sorted(_RAW_CHAINING_PATTERNS, key=len, reverse=True))
16
+ )
17
+
18
+ # Localhost patterns allowed over plain HTTP (no TLS needed for loopback)
19
+ _LOCALHOST_HOSTS = frozenset({"localhost", "127.0.0.1", "::1", "0.0.0.0"})
20
+
21
+
22
+ def validate_api_url(url: str) -> tuple[bool, str]:
23
+ """Validate an API base URL for security.
24
+
25
+ Enforces HTTPS for all non-localhost endpoints.
26
+ Rejects obviously malformed URLs.
27
+
28
+ Returns:
29
+ (is_valid, error_message)
30
+ """
31
+ try:
32
+ parsed = urlparse(url)
33
+ except Exception:
34
+ return False, f"Malformed URL: {url}"
35
+
36
+ if parsed.scheme not in ("http", "https"):
37
+ return False, f"Invalid URL scheme '{parsed.scheme}', expected http or https"
38
+
39
+ if parsed.scheme == "http" and parsed.hostname not in _LOCALHOST_HOSTS:
40
+ return False, (
41
+ f"Plain HTTP is not allowed for remote endpoints. "
42
+ f"Use HTTPS for {parsed.hostname or url}"
43
+ )
44
+
45
+ return True, ""
46
+
47
+
48
+ # Commands that should be silently rejected in execute_command (redirect to native tools)
49
+ # These are commands that have better native tool equivalents
50
+ SILENT_COMMAND_BLOCKED = {
51
+ # Code search (use rg tool)
52
+ "rg", "rg.exe", "ripgrep",
53
+
54
+ # File reading (use read_file tool)
55
+ "cat", "get-content", "type",
56
+
57
+ # Directory listing (use list_directory tool)
58
+ "ls", "get-childitem", "dir",
59
+
60
+ # File creation (use create_file tool)
61
+ "touch", "new-item",
62
+
63
+ # File editing (use edit_file tool)
64
+ "set-content", "add-content", "echo", "tee",
65
+
66
+ # Additional shell commands that should use native tools
67
+ "grep", "find", "head", "tail", "sed", "awk", "sort", "uniq", "wc",
68
+ }
69
+
70
+
71
+
72
+ def check_for_silent_blocked_command(command):
73
+ """Check if command should be silently blocked (redirect to native tool).
74
+
75
+ Args:
76
+ command: Command string to validate
77
+
78
+ Returns:
79
+ tuple: (is_blocked, reprompt_message)
80
+ is_blocked is True if command should be silently blocked
81
+ reprompt_message contains guidance for the AI on what tool to use
82
+ """
83
+ command = command.strip()
84
+ if not command:
85
+ return False, None
86
+
87
+ # Strip "powershell " prefix if present
88
+ if command.lower().startswith("powershell "):
89
+ command = command[len("powershell "):].strip()
90
+
91
+ # For chained commands, only skip silent blocking if the FIRST command
92
+ # is not a blocked tool. e.g. "cd /var/log && tail -f" is allowed, but
93
+ # "cat file && echo done" is still redirected to read_file.
94
+ if CHAINING_OPERATORS.search(command):
95
+ first_segment = CHAINING_OPERATORS.split(command, maxsplit=1)[0].strip()
96
+ first_tokens = _tokenize_segment(first_segment)
97
+ if first_tokens and first_tokens[0].lower() not in SILENT_COMMAND_BLOCKED:
98
+ return False, None
99
+ # else: fall through to blocked check below
100
+
101
+ # Tokenize and get command name
102
+ tokens = _tokenize_segment(command)
103
+ if not tokens:
104
+ return False, None
105
+
106
+ cmd_name = tokens[0].lower()
107
+
108
+ # Check if command is in the silent blocked list
109
+ if cmd_name in SILENT_COMMAND_BLOCKED:
110
+ tool_map = {
111
+ "rg": "rg tool", "rg.exe": "rg tool", "ripgrep": "rg tool",
112
+ "cat": "read_file tool", "get-content": "read_file tool", "type": "read_file tool",
113
+ "ls": "list_directory tool", "get-childitem": "list_directory tool", "dir": "list_directory tool",
114
+ "touch": "create_file tool", "new-item": "create_file tool",
115
+ "set-content": "edit_file tool", "add-content": "edit_file tool", "echo": "edit_file tool", "tee": "edit_file tool",
116
+ "grep": "rg tool for code search, or read_file tool for searching within a file",
117
+ "find": "list_directory tool with recursive=True for listing files, or rg tool for searching content",
118
+ "head": "read_file tool with start_line=1 and max_lines=N",
119
+ "tail": "read_file tool with start_line and max_lines parameters",
120
+ "sed": "edit_file tool for text replacements",
121
+ "awk": "read_file tool followed by post-processing, or use rg tool for pattern matching",
122
+ "sort": "read_file tool then process results",
123
+ "uniq": "read_file tool then process results",
124
+ "wc": "read_file tool shows line counts",
125
+ }
126
+ tool_suggestion = tool_map.get(cmd_name, "appropriate native tool")
127
+ reprompt_msg = (
128
+ f"Use the {tool_suggestion} instead of '{cmd_name}'. "
129
+ f"Native tools provide better integration with the system."
130
+ )
131
+ return True, reprompt_msg
132
+
133
+ return False, None
134
+
135
+
136
+
137
+ def _tokenize_segment(segment):
138
+ use_posix = os.name != "nt"
139
+ try:
140
+ return shlex.split(segment, posix=use_posix)
141
+ except ValueError:
142
+ return segment.split()
143
+
144
+
145
+ def check_command(command):
146
+ """Perform basic structural validation on a command.
147
+
148
+ Rejects empty commands and nested powershell invocations.
149
+ Approval and safety checks are handled upstream by the caller.
150
+
151
+ Args:
152
+ command: Command string to validate
153
+
154
+ Returns:
155
+ tuple: (is_valid, reason) - is_valid is True if the command
156
+ has a non-empty structure. reason is set on rejection.
157
+ """
158
+ command = command.strip()
159
+ if not command:
160
+ return False, "empty command"
161
+
162
+ # Strip "powershell " prefix if present (legacy support for Windows users)
163
+ if command.lower().startswith("powershell "):
164
+ command = command[len("powershell "):].strip()
165
+
166
+ # After stripping prefix, reject if it still starts with "powershell"
167
+ if command.lower().startswith("powershell"):
168
+ return False, "nested powershell invocation"
169
+
170
+ # Basic validation - ensure command has content
171
+ tokens = _tokenize_segment(command)
172
+ if not tokens:
173
+ return False, "empty command"
174
+
175
+ # Allow all other commands
176
+ return True, None
177
+
178
+
179
+ def is_auto_approved_command(command):
180
+ """Check if a command should be auto-approved (safe, read-only commands).
181
+
182
+ Delegates to the structured safety system in utils.safe_commands.
183
+
184
+ Args:
185
+ command: Command string to validate
186
+
187
+ Returns:
188
+ bool: True if command is safe to auto-approve
189
+ """
190
+ from utils.safe_commands import is_safe_command
191
+ return is_safe_command(command)