bone-agent 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +184 -0
- package/bin/npm-wrapper.js +235 -0
- package/bin/rg +0 -0
- package/bin/rg.exe +0 -0
- package/config.yaml.example +133 -0
- package/package.json +53 -0
- package/requirements.txt +9 -0
- package/src/__init__.py +11 -0
- package/src/core/__init__.py +1 -0
- package/src/core/agentic.py +1054 -0
- package/src/core/chat_manager.py +1552 -0
- package/src/core/config_manager.py +247 -0
- package/src/core/cron.py +527 -0
- package/src/core/cron_allowlist.py +118 -0
- package/src/core/memory.py +232 -0
- package/src/core/retry.py +71 -0
- package/src/core/sub_agent.py +326 -0
- package/src/core/tool_approval.py +220 -0
- package/src/core/tool_feedback.py +778 -0
- package/src/exceptions.py +79 -0
- package/src/llm/__init__.py +1 -0
- package/src/llm/client.py +171 -0
- package/src/llm/config.py +466 -0
- package/src/llm/prompts.py +735 -0
- package/src/llm/providers.py +417 -0
- package/src/llm/streaming.py +163 -0
- package/src/llm/token_tracker.py +368 -0
- package/src/tools/__init__.py +212 -0
- package/src/tools/constants.py +59 -0
- package/src/tools/create_file.py +136 -0
- package/src/tools/directory.py +389 -0
- package/src/tools/edit.py +543 -0
- package/src/tools/file_reader.py +322 -0
- package/src/tools/helpers/__init__.py +105 -0
- package/src/tools/helpers/base.py +550 -0
- package/src/tools/helpers/converters.py +44 -0
- package/src/tools/helpers/file_helpers.py +189 -0
- package/src/tools/helpers/formatters.py +411 -0
- package/src/tools/helpers/loader.py +231 -0
- package/src/tools/helpers/parallel_executor.py +231 -0
- package/src/tools/helpers/path_resolver.py +226 -0
- package/src/tools/helpers/plugin_manifest.py +156 -0
- package/src/tools/obsidian.py +96 -0
- package/src/tools/review_sub_agent.py +189 -0
- package/src/tools/rg_search.py +393 -0
- package/src/tools/search_plugins.py +109 -0
- package/src/tools/select_option.py +593 -0
- package/src/tools/shell.py +302 -0
- package/src/tools/sub_agent.py +139 -0
- package/src/tools/task_list.py +269 -0
- package/src/tools/web_search.py +61 -0
- package/src/ui/__init__.py +1 -0
- package/src/ui/banner.py +87 -0
- package/src/ui/commands.py +2694 -0
- package/src/ui/displays.py +213 -0
- package/src/ui/loader.py +284 -0
- package/src/ui/main.py +646 -0
- package/src/ui/prompt_utils.py +113 -0
- package/src/ui/setting_selector.py +590 -0
- package/src/ui/setup_wizard.py +294 -0
- package/src/ui/sub_agent_panel.py +234 -0
- package/src/ui/tool_confirmation.py +215 -0
- package/src/utils/__init__.py +1 -0
- package/src/utils/citation_parser.py +199 -0
- package/src/utils/editor.py +158 -0
- package/src/utils/gitignore_filter.py +149 -0
- package/src/utils/logger.py +254 -0
- package/src/utils/paths.py +30 -0
- package/src/utils/result_parsers.py +108 -0
- package/src/utils/safe_commands.py +243 -0
- package/src/utils/settings.py +174 -0
- package/src/utils/validation.py +191 -0
- package/src/utils/web_search.py +173 -0
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"""Structured command safety system for auto-approval.
|
|
2
|
+
|
|
3
|
+
Replaces the flat ALLOWED_COMMANDS whitelist with a command+subcommand
|
|
4
|
+
granularity system that distinguishes read-only operations from mutations.
|
|
5
|
+
|
|
6
|
+
Design principles:
|
|
7
|
+
- No args = not safe (for commands with subcommand variants)
|
|
8
|
+
- Gate anything that has potential to be unsafe
|
|
9
|
+
- Deny-by-default: commands not in the dict require approval
|
|
10
|
+
- Compound flags use longest-prefix matching
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import shlex
|
|
15
|
+
from utils.validation import CHAINING_OPERATORS
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
# SAFE_COMMAND_RULES
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
# Maps command names to their safety profile:
|
|
22
|
+
# None → always safe (inherently read-only, e.g., ps, pwd)
|
|
23
|
+
# set() → only safe for listed subcommands/flags
|
|
24
|
+
#
|
|
25
|
+
# Platform normalization strips .exe suffix and lowercases before lookup.
|
|
26
|
+
|
|
27
|
+
SAFE_COMMAND_RULES: dict[str, frozenset | None] = {
|
|
28
|
+
# --- Always safe (truly read-only, no mutating subcommands) ---
|
|
29
|
+
"pwd": None,
|
|
30
|
+
"which": None,
|
|
31
|
+
"whereis": None,
|
|
32
|
+
"uname": None,
|
|
33
|
+
"hostname": None,
|
|
34
|
+
"uptime": None,
|
|
35
|
+
"date": None,
|
|
36
|
+
"cal": None,
|
|
37
|
+
"whoami": None,
|
|
38
|
+
"id": None,
|
|
39
|
+
"env": frozenset({"--version", "--help"}),
|
|
40
|
+
"printenv": frozenset({"--version", "--help"}),
|
|
41
|
+
"lscpu": None,
|
|
42
|
+
"lsblk": None,
|
|
43
|
+
"file": None,
|
|
44
|
+
"stat": None,
|
|
45
|
+
"md5sum": None,
|
|
46
|
+
"sha256sum": None,
|
|
47
|
+
"free": None,
|
|
48
|
+
"df": None,
|
|
49
|
+
"du": None,
|
|
50
|
+
"dmesg": None,
|
|
51
|
+
"ltrace": None,
|
|
52
|
+
"ps": None,
|
|
53
|
+
"pgrep": None,
|
|
54
|
+
"pidof": None,
|
|
55
|
+
"lsof": None,
|
|
56
|
+
"ping": None,
|
|
57
|
+
"nslookup": None,
|
|
58
|
+
"dig": None,
|
|
59
|
+
"ss": None,
|
|
60
|
+
"ifconfig": None,
|
|
61
|
+
"netstat": None,
|
|
62
|
+
"journalctl": None,
|
|
63
|
+
"apt-cache": None,
|
|
64
|
+
"apt-show": None,
|
|
65
|
+
"dpkg-query": None,
|
|
66
|
+
|
|
67
|
+
# --- Subcommand-gated (safe only for specific read-only operations) ---
|
|
68
|
+
"git": frozenset({
|
|
69
|
+
"status", "log", "diff", "show", "branch",
|
|
70
|
+
"remote", "tag",
|
|
71
|
+
"rev-parse", "shortlog", "describe", "symbolic-ref",
|
|
72
|
+
"reflog", "name-rev", "blame", "annotate",
|
|
73
|
+
"for-each-ref", "ls-files", "ls-tree", "ls-remote",
|
|
74
|
+
}),
|
|
75
|
+
"pip": frozenset({"show", "list", "--version", "check", "debug", "index", "inspect"}),
|
|
76
|
+
"pip3": frozenset({"show", "list", "--version", "check", "debug", "index", "inspect"}),
|
|
77
|
+
"npm": frozenset({"list", "ls", "view", "version", "outdated", "pack", "info", "doctor", "audit"}),
|
|
78
|
+
"node": frozenset({"--version"}),
|
|
79
|
+
"python": frozenset({"--version"}),
|
|
80
|
+
"python3": frozenset({"--version"}),
|
|
81
|
+
"pacman": frozenset({
|
|
82
|
+
"-Q", "-Qi", "-Ql", "-Qo", "-Qs", "-Qt",
|
|
83
|
+
"-F", "-Si", "-Ss", "-Fl", "-G",
|
|
84
|
+
}),
|
|
85
|
+
"dpkg": frozenset({"-l", "-s", "-S", "-L", "-p", "--verify", "--audit"}),
|
|
86
|
+
"rpm": frozenset({"-q", "-qa", "-qi", "-ql", "-qf", "--queryformat"}),
|
|
87
|
+
"dnf": frozenset({"list", "info", "search", "check-update", "repoquery"}),
|
|
88
|
+
"yum": frozenset({"list", "info", "search", "check-update"}),
|
|
89
|
+
"systemctl": frozenset({
|
|
90
|
+
"status", "list-units", "list-unit-files", "show",
|
|
91
|
+
"is-active", "is-enabled", "cat", "list-timers",
|
|
92
|
+
"list-sockets", "list-jobs",
|
|
93
|
+
}),
|
|
94
|
+
"service": frozenset({"--status-all"}), # "service <name> status" handled by _is_safe_service_command
|
|
95
|
+
"ip": frozenset({"addr", "address", "link", "route", "neigh", "maddr", "rule", "netns"}),
|
|
96
|
+
|
|
97
|
+
# --- Windows equivalents ---
|
|
98
|
+
"where": None,
|
|
99
|
+
"systeminfo": None,
|
|
100
|
+
"Get-Process": None,
|
|
101
|
+
"Get-Service": None,
|
|
102
|
+
"Get-ChildItem": None,
|
|
103
|
+
"Get-Content": None,
|
|
104
|
+
"Get-Location": None,
|
|
105
|
+
"Test-Connection": None,
|
|
106
|
+
"Get-NetIPAddress": None,
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# Sub-subcommand deny lists for commands where the first arg passes safety
|
|
111
|
+
# but later args can be mutating. Checked AFTER first-arg matching.
|
|
112
|
+
# If any token appears in the deny list, the command is rejected.
|
|
113
|
+
_IP_MUTATING_VERBS = frozenset({
|
|
114
|
+
"set", "add", "delete", "replace", "flush", "change",
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
# Commands that need deep token scanning mapped to their deny sets.
|
|
118
|
+
_DEEP_SCAN_RULES: dict[str, frozenset] = {
|
|
119
|
+
"ip": _IP_MUTATING_VERBS,
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _tokenize(command: str) -> list[str]:
|
|
124
|
+
"""Tokenize a command string using platform-appropriate splitting."""
|
|
125
|
+
use_posix = os.name != "nt"
|
|
126
|
+
try:
|
|
127
|
+
return shlex.split(command, posix=use_posix)
|
|
128
|
+
except ValueError:
|
|
129
|
+
return command.split()
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _normalize_command_name(name: str) -> str:
|
|
133
|
+
"""Normalize a command name for lookup.
|
|
134
|
+
|
|
135
|
+
Strips .exe suffix and lowercases. Does NOT normalize PowerShell
|
|
136
|
+
cmdlet casing (case-insensitive lookup handles that).
|
|
137
|
+
"""
|
|
138
|
+
if name.lower().endswith(".exe"):
|
|
139
|
+
name = name[:-4]
|
|
140
|
+
return name.lower()
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _matches_safe_subcommand(arg: str, safe_set: frozenset) -> bool:
|
|
144
|
+
"""Check if an argument matches any entry in the safe subcommand set.
|
|
145
|
+
|
|
146
|
+
Uses longest-prefix matching for flag-style arguments:
|
|
147
|
+
e.g., if '-Qi' is safe, then '-Qil' also matches.
|
|
148
|
+
For word-style subcommands (e.g., git 'status'), exact match only.
|
|
149
|
+
|
|
150
|
+
Comparison is case-insensitive.
|
|
151
|
+
"""
|
|
152
|
+
arg_lower = arg.lower()
|
|
153
|
+
|
|
154
|
+
# Build lowercase version of safe_set for case-insensitive comparison
|
|
155
|
+
safe_lower = {s.lower() for s in safe_set}
|
|
156
|
+
|
|
157
|
+
# Exact match
|
|
158
|
+
if arg_lower in safe_lower:
|
|
159
|
+
return True
|
|
160
|
+
|
|
161
|
+
# Longest-prefix match for flags (arguments starting with -)
|
|
162
|
+
if arg_lower.startswith("-"):
|
|
163
|
+
# Try progressively shorter prefixes
|
|
164
|
+
for length in range(len(arg_lower) - 1, 1, -1):
|
|
165
|
+
prefix = arg_lower[:length]
|
|
166
|
+
if prefix in safe_lower:
|
|
167
|
+
return True
|
|
168
|
+
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def is_safe_command(command: str) -> bool:
|
|
173
|
+
"""Check if a command should be auto-approved (safe, read-only).
|
|
174
|
+
|
|
175
|
+
A command is auto-approved when:
|
|
176
|
+
1. It contains no chaining/redirection operators
|
|
177
|
+
2. The command name is in SAFE_COMMAND_RULES
|
|
178
|
+
3. If gated (has a set of safe subcommands), the first argument
|
|
179
|
+
matches an entry in the set
|
|
180
|
+
4. If always-safe (None), it's approved with or without args
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
command: Command string to validate
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
bool: True if the command is safe to auto-approve
|
|
187
|
+
"""
|
|
188
|
+
command = command.strip()
|
|
189
|
+
if not command:
|
|
190
|
+
return False
|
|
191
|
+
|
|
192
|
+
# Strip "powershell " prefix if present (legacy support for Windows users)
|
|
193
|
+
if command.lower().startswith("powershell "):
|
|
194
|
+
command = command[len("powershell "):].strip()
|
|
195
|
+
|
|
196
|
+
# Reject any command containing chaining/redirection operators
|
|
197
|
+
if CHAINING_OPERATORS.search(command):
|
|
198
|
+
return False
|
|
199
|
+
|
|
200
|
+
# Tokenize and get command name
|
|
201
|
+
tokens = _tokenize(command)
|
|
202
|
+
if not tokens:
|
|
203
|
+
return False
|
|
204
|
+
|
|
205
|
+
cmd_name = _normalize_command_name(tokens[0])
|
|
206
|
+
|
|
207
|
+
# Look up in rules (deny-by-default)
|
|
208
|
+
if cmd_name not in SAFE_COMMAND_RULES:
|
|
209
|
+
# Unknown command — require approval
|
|
210
|
+
return False
|
|
211
|
+
|
|
212
|
+
rule = SAFE_COMMAND_RULES[cmd_name]
|
|
213
|
+
if rule is None:
|
|
214
|
+
# Always-safe command (e.g., ps, pwd)
|
|
215
|
+
return True
|
|
216
|
+
|
|
217
|
+
if not rule:
|
|
218
|
+
# Empty frozenset — no safe subcommands defined, deny
|
|
219
|
+
return False
|
|
220
|
+
|
|
221
|
+
# Gated command — need at least one subcommand arg
|
|
222
|
+
if len(tokens) < 2:
|
|
223
|
+
return False
|
|
224
|
+
|
|
225
|
+
# Check first argument against safe subcommand set
|
|
226
|
+
first_arg = tokens[1]
|
|
227
|
+
|
|
228
|
+
# Special case: "service <name> status" — the safe subcommand is the LAST arg
|
|
229
|
+
if cmd_name == "service" and len(tokens) >= 3 and tokens[-1].lower() == "status":
|
|
230
|
+
return True
|
|
231
|
+
|
|
232
|
+
if not _matches_safe_subcommand(first_arg, rule):
|
|
233
|
+
return False
|
|
234
|
+
|
|
235
|
+
# Deep scan: for commands with known mutating sub-subcommands,
|
|
236
|
+
# reject if any remaining token matches the deny list.
|
|
237
|
+
deny_set = _DEEP_SCAN_RULES.get(cmd_name)
|
|
238
|
+
if deny_set and len(tokens) > 2:
|
|
239
|
+
for tok in tokens[2:]:
|
|
240
|
+
if tok.lower() in deny_set:
|
|
241
|
+
return False
|
|
242
|
+
|
|
243
|
+
return True
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""Centralized configuration for bone-agent."""
|
|
2
|
+
import re
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Set
|
|
6
|
+
|
|
7
|
+
# Load config from llm.config
|
|
8
|
+
# Note: src/ is added to sys.path in main.py, so we can import directly
|
|
9
|
+
from llm.config import _CONFIG
|
|
10
|
+
|
|
11
|
+
# Styles and themes
|
|
12
|
+
from pygments.styles.monokai import MonokaiStyle
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class MonokaiDarkBGStyle(MonokaiStyle):
|
|
16
|
+
"""Monokai style with dark background for code highlighting."""
|
|
17
|
+
background_color = "#141414"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
_HEADING_RE = re.compile(r'^(#{1,6})\s+(.+)$', re.MULTILINE)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def left_align_headings(text: str) -> str:
|
|
24
|
+
"""Strip markdown heading markers to avoid Rich's centering."""
|
|
25
|
+
return _HEADING_RE.sub(lambda m: m.group(2), text)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class ServerSettings:
|
|
30
|
+
"""Local llama-server configuration."""
|
|
31
|
+
ngl_layers: int = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("ngl_layers", 30))
|
|
32
|
+
ctx_size: int = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("ctx_size", 8192))
|
|
33
|
+
n_predict: int = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("n_predict", 8192))
|
|
34
|
+
rope_scale: float = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("rope_scale", 1.0))
|
|
35
|
+
health_check_timeout_sec: int = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("health_check_timeout_sec", 120))
|
|
36
|
+
health_check_interval_sec: float = field(default_factory=lambda: _CONFIG.get("SERVER_SETTINGS", {}).get("health_check_interval_sec", 1.0))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class ToolSettings:
|
|
41
|
+
"""Tool execution limits and defaults."""
|
|
42
|
+
max_tool_calls: int = field(default_factory=lambda: _CONFIG.get("TOOL_SETTINGS", {}).get("max_tool_calls", 100))
|
|
43
|
+
command_timeout_sec: int = field(default_factory=lambda: _CONFIG.get("TOOL_SETTINGS", {}).get("command_timeout_sec", 30))
|
|
44
|
+
enable_parallel_execution: bool = field(default_factory=lambda: _CONFIG.get("TOOL_SETTINGS", {}).get("enable_parallel_execution", True))
|
|
45
|
+
max_parallel_workers: int = field(default_factory=lambda: _CONFIG.get("TOOL_SETTINGS", {}).get("max_parallel_workers", 10))
|
|
46
|
+
max_command_output_lines: int = field(default_factory=lambda: _CONFIG.get("TOOL_SETTINGS", {}).get("max_command_output_lines", 100))
|
|
47
|
+
max_shell_output_lines: int = field(default_factory=lambda: _CONFIG.get("TOOL_SETTINGS", {}).get("max_shell_output_lines", 200))
|
|
48
|
+
max_file_preview_lines: int = field(default_factory=lambda: _CONFIG.get("TOOL_SETTINGS", {}).get("max_file_preview_lines", 200))
|
|
49
|
+
disabled_tools: list = field(default_factory=lambda: _CONFIG.get("TOOL_SETTINGS", {}).get("disabled_tools", []))
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class FileSettings:
|
|
53
|
+
"""File scanning and reading limits."""
|
|
54
|
+
max_file_bytes: int = field(default_factory=lambda: _CONFIG.get("FILE_SETTINGS", {}).get("max_file_bytes", 200_000))
|
|
55
|
+
max_total_bytes: int = field(default_factory=lambda: _CONFIG.get("FILE_SETTINGS", {}).get("max_total_bytes", 1_500_000))
|
|
56
|
+
exclude_dirs: Set[str] = None
|
|
57
|
+
|
|
58
|
+
def __post_init__(self):
|
|
59
|
+
if self.exclude_dirs is None:
|
|
60
|
+
config_exclude = _CONFIG.get("FILE_SETTINGS", {}).get("exclude_dirs")
|
|
61
|
+
if config_exclude:
|
|
62
|
+
self.exclude_dirs = set(config_exclude)
|
|
63
|
+
else:
|
|
64
|
+
self.exclude_dirs = {".git", ".venv", "llama.cpp", "bin", "__pycache__"}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass
|
|
68
|
+
class ToolCompactionSettings:
|
|
69
|
+
"""Per-message tool result compaction settings."""
|
|
70
|
+
enable_per_message_compaction: bool = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("tool_compaction", {}).get("enable_per_message_compaction", True))
|
|
71
|
+
uncompacted_tail_tokens: int = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("tool_compaction", {}).get("uncompacted_tail_tokens", 40_000))
|
|
72
|
+
min_tool_blocks: int = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("tool_compaction", {}).get("min_tool_blocks", 5))
|
|
73
|
+
compact_failed_tools: bool = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("tool_compaction", {}).get("compact_failed_tools", True))
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dataclass
|
|
77
|
+
class SubAgentSettings:
|
|
78
|
+
"""Sub-agent token limits and behavior configuration."""
|
|
79
|
+
soft_limit_tokens: int = field(default_factory=lambda: _CONFIG.get("SUB_AGENT_SETTINGS", {}).get("soft_limit_tokens", 300_000))
|
|
80
|
+
hard_limit_tokens: int = field(default_factory=lambda: _CONFIG.get("SUB_AGENT_SETTINGS", {}).get("hard_limit_tokens", 500_000))
|
|
81
|
+
enable_compaction: bool = field(default_factory=lambda: _CONFIG.get("SUB_AGENT_SETTINGS", {}).get("enable_compaction", True))
|
|
82
|
+
compact_trigger_tokens: int = field(default_factory=lambda: _CONFIG.get("SUB_AGENT_SETTINGS", {}).get("compact_trigger_tokens", 50_000))
|
|
83
|
+
allowed_tools: list = field(default_factory=lambda: _CONFIG.get("SUB_AGENT_SETTINGS", {}).get("allowed_tools", ["rg", "read_file", "list_directory", "web_search"]))
|
|
84
|
+
dump_context_on_hard_limit: bool = field(default_factory=lambda: _CONFIG.get("SUB_AGENT_SETTINGS", {}).get("dump_context_on_hard_limit", True))
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# Context compaction settings
|
|
88
|
+
@dataclass
|
|
89
|
+
class ContextSettings:
|
|
90
|
+
"""Context compaction thresholds and defaults."""
|
|
91
|
+
compact_trigger_tokens: int = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("compact_trigger_tokens", 100_000))
|
|
92
|
+
max_context_window: int = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("max_context_window", 200_000))
|
|
93
|
+
log_conversations: bool = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("log_conversations", False))
|
|
94
|
+
conversations_dir: str = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("conversations_dir", "conversations"))
|
|
95
|
+
notify_auto_compaction: bool = field(default_factory=lambda: _CONFIG.get("CONTEXT_SETTINGS", {}).get("notify_auto_compaction", True))
|
|
96
|
+
tool_compaction: ToolCompactionSettings = field(default_factory=ToolCompactionSettings)
|
|
97
|
+
hard_limit_tokens: int = field(init=False, repr=False)
|
|
98
|
+
|
|
99
|
+
def __post_init__(self):
|
|
100
|
+
_ctx = _CONFIG.get("CONTEXT_SETTINGS", {})
|
|
101
|
+
if "hard_limit_tokens" in _ctx:
|
|
102
|
+
self.hard_limit_tokens = _ctx["hard_limit_tokens"]
|
|
103
|
+
else:
|
|
104
|
+
self.hard_limit_tokens = int(self.max_context_window * 0.9)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@dataclass
|
|
108
|
+
class ObsidianSettings:
|
|
109
|
+
"""Obsidian vault integration settings.
|
|
110
|
+
|
|
111
|
+
Supports runtime updates via update() method for /obsidian commands.
|
|
112
|
+
"""
|
|
113
|
+
vault_path: str = field(default_factory=lambda: _CONFIG.get("OBSIDIAN_SETTINGS", {}).get("vault_path", ""))
|
|
114
|
+
enabled: bool = field(default_factory=lambda: _CONFIG.get("OBSIDIAN_SETTINGS", {}).get("enabled", False))
|
|
115
|
+
exclude_folders: str = field(default_factory=lambda: _CONFIG.get("OBSIDIAN_SETTINGS", {}).get("exclude_folders", ".obsidian,.trash,node_modules,.git,__pycache__"))
|
|
116
|
+
project_base: str = field(default_factory=lambda: _CONFIG.get("OBSIDIAN_SETTINGS", {}).get("project_base", "Dev"))
|
|
117
|
+
|
|
118
|
+
def update(self, **kwargs):
|
|
119
|
+
"""Update settings fields at runtime.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
**kwargs: Field names and values to update
|
|
123
|
+
"""
|
|
124
|
+
from dataclasses import fields
|
|
125
|
+
valid_keys = {f.name for f in fields(self)}
|
|
126
|
+
for key, value in kwargs.items():
|
|
127
|
+
if key in valid_keys:
|
|
128
|
+
setattr(self, key, value)
|
|
129
|
+
|
|
130
|
+
def is_configured(self) -> bool:
|
|
131
|
+
"""Check if Obsidian integration is configured in settings.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
True if enabled and vault_path is set (does NOT validate disk)
|
|
135
|
+
"""
|
|
136
|
+
return self.enabled and bool(self.vault_path)
|
|
137
|
+
|
|
138
|
+
def is_active(self) -> bool:
|
|
139
|
+
"""Check if Obsidian integration is fully operational.
|
|
140
|
+
|
|
141
|
+
Validates the vault path exists on disk and contains .obsidian/.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
True if enabled, vault_path is set, and vault is valid on disk
|
|
145
|
+
"""
|
|
146
|
+
if not self.enabled or not self.vault_path:
|
|
147
|
+
return False
|
|
148
|
+
root = Path(self.vault_path).resolve()
|
|
149
|
+
if not root.is_dir():
|
|
150
|
+
return False
|
|
151
|
+
return (root / ".obsidian").is_dir()
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def exclude_folders_list(self) -> list:
|
|
155
|
+
"""Return exclude_folders as a pre-parsed list of strings.
|
|
156
|
+
|
|
157
|
+
Avoids repeated str.split(",") on every rg call.
|
|
158
|
+
"""
|
|
159
|
+
return [f.strip() for f in self.exclude_folders.split(",") if f.strip()]
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# Global instances
|
|
163
|
+
server_settings = ServerSettings()
|
|
164
|
+
tool_settings = ToolSettings()
|
|
165
|
+
file_settings = FileSettings()
|
|
166
|
+
context_settings = ContextSettings()
|
|
167
|
+
sub_agent_settings = SubAgentSettings()
|
|
168
|
+
obsidian_settings = ObsidianSettings()
|
|
169
|
+
|
|
170
|
+
# Tool execution constants
|
|
171
|
+
MAX_TOOL_CALLS = tool_settings.max_tool_calls
|
|
172
|
+
MAX_COMMAND_OUTPUT_LINES = tool_settings.max_command_output_lines
|
|
173
|
+
MAX_SHELL_OUTPUT_LINES = tool_settings.max_shell_output_lines
|
|
174
|
+
MAX_FILE_PREVIEW_LINES = tool_settings.max_file_preview_lines
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""Command validation."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
import shlex
|
|
6
|
+
from urllib.parse import urlparse
|
|
7
|
+
|
|
8
|
+
# Shell operators that indicate command chaining or redirection.
|
|
9
|
+
# Shared between validation.py and shell.py — keep in one place to avoid drift.
|
|
10
|
+
# Matches: &&, ||, ;, |, >, <, backticks, $(), ${}, newlines
|
|
11
|
+
# NOTE: Alternations are sorted longest-first so that '&&' and '||' match
|
|
12
|
+
# before '|' — reordering the raw list is safe because we sort at runtime.
|
|
13
|
+
_RAW_CHAINING_PATTERNS = ["&&", "||", ";", "|", ">", "<", "`", "$(", "${", "\n", "\r"]
|
|
14
|
+
CHAINING_OPERATORS = re.compile(
|
|
15
|
+
"|".join(re.escape(p) for p in sorted(_RAW_CHAINING_PATTERNS, key=len, reverse=True))
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# Localhost patterns allowed over plain HTTP (no TLS needed for loopback)
|
|
19
|
+
_LOCALHOST_HOSTS = frozenset({"localhost", "127.0.0.1", "::1", "0.0.0.0"})
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def validate_api_url(url: str) -> tuple[bool, str]:
|
|
23
|
+
"""Validate an API base URL for security.
|
|
24
|
+
|
|
25
|
+
Enforces HTTPS for all non-localhost endpoints.
|
|
26
|
+
Rejects obviously malformed URLs.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
(is_valid, error_message)
|
|
30
|
+
"""
|
|
31
|
+
try:
|
|
32
|
+
parsed = urlparse(url)
|
|
33
|
+
except Exception:
|
|
34
|
+
return False, f"Malformed URL: {url}"
|
|
35
|
+
|
|
36
|
+
if parsed.scheme not in ("http", "https"):
|
|
37
|
+
return False, f"Invalid URL scheme '{parsed.scheme}', expected http or https"
|
|
38
|
+
|
|
39
|
+
if parsed.scheme == "http" and parsed.hostname not in _LOCALHOST_HOSTS:
|
|
40
|
+
return False, (
|
|
41
|
+
f"Plain HTTP is not allowed for remote endpoints. "
|
|
42
|
+
f"Use HTTPS for {parsed.hostname or url}"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
return True, ""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# Commands that should be silently rejected in execute_command (redirect to native tools)
|
|
49
|
+
# These are commands that have better native tool equivalents
|
|
50
|
+
SILENT_COMMAND_BLOCKED = {
|
|
51
|
+
# Code search (use rg tool)
|
|
52
|
+
"rg", "rg.exe", "ripgrep",
|
|
53
|
+
|
|
54
|
+
# File reading (use read_file tool)
|
|
55
|
+
"cat", "get-content", "type",
|
|
56
|
+
|
|
57
|
+
# Directory listing (use list_directory tool)
|
|
58
|
+
"ls", "get-childitem", "dir",
|
|
59
|
+
|
|
60
|
+
# File creation (use create_file tool)
|
|
61
|
+
"touch", "new-item",
|
|
62
|
+
|
|
63
|
+
# File editing (use edit_file tool)
|
|
64
|
+
"set-content", "add-content", "echo", "tee",
|
|
65
|
+
|
|
66
|
+
# Additional shell commands that should use native tools
|
|
67
|
+
"grep", "find", "head", "tail", "sed", "awk", "sort", "uniq", "wc",
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def check_for_silent_blocked_command(command):
|
|
73
|
+
"""Check if command should be silently blocked (redirect to native tool).
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
command: Command string to validate
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
tuple: (is_blocked, reprompt_message)
|
|
80
|
+
is_blocked is True if command should be silently blocked
|
|
81
|
+
reprompt_message contains guidance for the AI on what tool to use
|
|
82
|
+
"""
|
|
83
|
+
command = command.strip()
|
|
84
|
+
if not command:
|
|
85
|
+
return False, None
|
|
86
|
+
|
|
87
|
+
# Strip "powershell " prefix if present
|
|
88
|
+
if command.lower().startswith("powershell "):
|
|
89
|
+
command = command[len("powershell "):].strip()
|
|
90
|
+
|
|
91
|
+
# For chained commands, only skip silent blocking if the FIRST command
|
|
92
|
+
# is not a blocked tool. e.g. "cd /var/log && tail -f" is allowed, but
|
|
93
|
+
# "cat file && echo done" is still redirected to read_file.
|
|
94
|
+
if CHAINING_OPERATORS.search(command):
|
|
95
|
+
first_segment = CHAINING_OPERATORS.split(command, maxsplit=1)[0].strip()
|
|
96
|
+
first_tokens = _tokenize_segment(first_segment)
|
|
97
|
+
if first_tokens and first_tokens[0].lower() not in SILENT_COMMAND_BLOCKED:
|
|
98
|
+
return False, None
|
|
99
|
+
# else: fall through to blocked check below
|
|
100
|
+
|
|
101
|
+
# Tokenize and get command name
|
|
102
|
+
tokens = _tokenize_segment(command)
|
|
103
|
+
if not tokens:
|
|
104
|
+
return False, None
|
|
105
|
+
|
|
106
|
+
cmd_name = tokens[0].lower()
|
|
107
|
+
|
|
108
|
+
# Check if command is in the silent blocked list
|
|
109
|
+
if cmd_name in SILENT_COMMAND_BLOCKED:
|
|
110
|
+
tool_map = {
|
|
111
|
+
"rg": "rg tool", "rg.exe": "rg tool", "ripgrep": "rg tool",
|
|
112
|
+
"cat": "read_file tool", "get-content": "read_file tool", "type": "read_file tool",
|
|
113
|
+
"ls": "list_directory tool", "get-childitem": "list_directory tool", "dir": "list_directory tool",
|
|
114
|
+
"touch": "create_file tool", "new-item": "create_file tool",
|
|
115
|
+
"set-content": "edit_file tool", "add-content": "edit_file tool", "echo": "edit_file tool", "tee": "edit_file tool",
|
|
116
|
+
"grep": "rg tool for code search, or read_file tool for searching within a file",
|
|
117
|
+
"find": "list_directory tool with recursive=True for listing files, or rg tool for searching content",
|
|
118
|
+
"head": "read_file tool with start_line=1 and max_lines=N",
|
|
119
|
+
"tail": "read_file tool with start_line and max_lines parameters",
|
|
120
|
+
"sed": "edit_file tool for text replacements",
|
|
121
|
+
"awk": "read_file tool followed by post-processing, or use rg tool for pattern matching",
|
|
122
|
+
"sort": "read_file tool then process results",
|
|
123
|
+
"uniq": "read_file tool then process results",
|
|
124
|
+
"wc": "read_file tool shows line counts",
|
|
125
|
+
}
|
|
126
|
+
tool_suggestion = tool_map.get(cmd_name, "appropriate native tool")
|
|
127
|
+
reprompt_msg = (
|
|
128
|
+
f"Use the {tool_suggestion} instead of '{cmd_name}'. "
|
|
129
|
+
f"Native tools provide better integration with the system."
|
|
130
|
+
)
|
|
131
|
+
return True, reprompt_msg
|
|
132
|
+
|
|
133
|
+
return False, None
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _tokenize_segment(segment):
|
|
138
|
+
use_posix = os.name != "nt"
|
|
139
|
+
try:
|
|
140
|
+
return shlex.split(segment, posix=use_posix)
|
|
141
|
+
except ValueError:
|
|
142
|
+
return segment.split()
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def check_command(command):
|
|
146
|
+
"""Perform basic structural validation on a command.
|
|
147
|
+
|
|
148
|
+
Rejects empty commands and nested powershell invocations.
|
|
149
|
+
Approval and safety checks are handled upstream by the caller.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
command: Command string to validate
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
tuple: (is_valid, reason) - is_valid is True if the command
|
|
156
|
+
has a non-empty structure. reason is set on rejection.
|
|
157
|
+
"""
|
|
158
|
+
command = command.strip()
|
|
159
|
+
if not command:
|
|
160
|
+
return False, "empty command"
|
|
161
|
+
|
|
162
|
+
# Strip "powershell " prefix if present (legacy support for Windows users)
|
|
163
|
+
if command.lower().startswith("powershell "):
|
|
164
|
+
command = command[len("powershell "):].strip()
|
|
165
|
+
|
|
166
|
+
# After stripping prefix, reject if it still starts with "powershell"
|
|
167
|
+
if command.lower().startswith("powershell"):
|
|
168
|
+
return False, "nested powershell invocation"
|
|
169
|
+
|
|
170
|
+
# Basic validation - ensure command has content
|
|
171
|
+
tokens = _tokenize_segment(command)
|
|
172
|
+
if not tokens:
|
|
173
|
+
return False, "empty command"
|
|
174
|
+
|
|
175
|
+
# Allow all other commands
|
|
176
|
+
return True, None
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def is_auto_approved_command(command):
|
|
180
|
+
"""Check if a command should be auto-approved (safe, read-only commands).
|
|
181
|
+
|
|
182
|
+
Delegates to the structured safety system in utils.safe_commands.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
command: Command string to validate
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
bool: True if command is safe to auto-approve
|
|
189
|
+
"""
|
|
190
|
+
from utils.safe_commands import is_safe_command
|
|
191
|
+
return is_safe_command(command)
|