bone-agent 1.4.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/bone.js +39 -0
- package/package.json +25 -39
- package/LICENSE +0 -21
- package/README.md +0 -201
- package/bin/npm-wrapper.js +0 -235
- package/bin/rg +0 -0
- package/bin/rg.exe +0 -0
- package/config.yaml.example +0 -144
- package/prompts/main/ask_questions.md +0 -31
- package/prompts/main/batch_independent_calls.md +0 -5
- package/prompts/main/casual_interactions.md +0 -11
- package/prompts/main/code_references.md +0 -8
- package/prompts/main/communication_style.md +0 -12
- package/prompts/main/context_reliability.md +0 -12
- package/prompts/main/conversational_tool_calling.md +0 -15
- package/prompts/main/dream.md +0 -50
- package/prompts/main/editing_pattern.md +0 -13
- package/prompts/main/error_handling.md +0 -6
- package/prompts/main/exploration_pattern.md +0 -21
- package/prompts/main/intro.md +0 -1
- package/prompts/main/obsidian.md +0 -16
- package/prompts/main/obsidian_project.md +0 -79
- package/prompts/main/professional_objectivity.md +0 -3
- package/prompts/main/skills.md +0 -3
- package/prompts/main/targeted_searching.md +0 -10
- package/prompts/main/task_lists_pattern.md +0 -8
- package/prompts/main/temp_folder.md +0 -9
- package/prompts/main/think_before_acting.md +0 -10
- package/prompts/main/tone_and_style.md +0 -4
- package/prompts/main/tool_preferences.md +0 -24
- package/prompts/main/trust_subagent_context.md +0 -21
- package/prompts/main/when_to_use_sub_agent.md +0 -7
- package/prompts/micro/ask_questions.md +0 -1
- package/prompts/micro/batch_independent_calls.md +0 -1
- package/prompts/micro/casual_interactions.md +0 -1
- package/prompts/micro/code_references.md +0 -1
- package/prompts/micro/communication_style.md +0 -1
- package/prompts/micro/context_reliability.md +0 -1
- package/prompts/micro/conversational_tool_calling.md +0 -1
- package/prompts/micro/editing_pattern.md +0 -1
- package/prompts/micro/error_handling.md +0 -1
- package/prompts/micro/exploration_pattern.md +0 -1
- package/prompts/micro/intro.md +0 -1
- package/prompts/micro/obsidian.md +0 -4
- package/prompts/micro/obsidian_project.md +0 -5
- package/prompts/micro/professional_objectivity.md +0 -1
- package/prompts/micro/skills.md +0 -1
- package/prompts/micro/targeted_searching.md +0 -1
- package/prompts/micro/task_lists_pattern.md +0 -1
- package/prompts/micro/temp_folder.md +0 -1
- package/prompts/micro/think_before_acting.md +0 -5
- package/prompts/micro/tone_and_style.md +0 -1
- package/prompts/micro/tool_preferences.md +0 -1
- package/prompts/micro/trust_subagent_context.md +0 -1
- package/prompts/micro/when_to_use_sub_agent.md +0 -1
- package/requirements.txt +0 -9
- package/src/__init__.py +0 -11
- package/src/core/__init__.py +0 -1
- package/src/core/agentic.py +0 -1085
- package/src/core/chat_manager.py +0 -1577
- package/src/core/config_manager.py +0 -260
- package/src/core/cron.py +0 -578
- package/src/core/cron_allowlist.py +0 -118
- package/src/core/memory.py +0 -145
- package/src/core/metadata.py +0 -75
- package/src/core/retry.py +0 -71
- package/src/core/skills.py +0 -463
- package/src/core/sub_agent.py +0 -376
- package/src/core/tool_approval.py +0 -220
- package/src/core/tool_feedback.py +0 -789
- package/src/exceptions.py +0 -79
- package/src/llm/__init__.py +0 -1
- package/src/llm/client.py +0 -176
- package/src/llm/codex_provider.py +0 -350
- package/src/llm/config.py +0 -536
- package/src/llm/prompts.py +0 -494
- package/src/llm/providers.py +0 -438
- package/src/llm/streaming.py +0 -163
- package/src/llm/token_tracker.py +0 -399
- package/src/tools/__init__.py +0 -151
- package/src/tools/constants.py +0 -59
- package/src/tools/create_file.py +0 -136
- package/src/tools/directory.py +0 -389
- package/src/tools/edit.py +0 -549
- package/src/tools/file_reader.py +0 -322
- package/src/tools/helpers/__init__.py +0 -99
- package/src/tools/helpers/base.py +0 -599
- package/src/tools/helpers/converters.py +0 -44
- package/src/tools/helpers/file_helpers.py +0 -189
- package/src/tools/helpers/formatters.py +0 -411
- package/src/tools/helpers/loader.py +0 -145
- package/src/tools/helpers/parallel_executor.py +0 -231
- package/src/tools/helpers/path_resolver.py +0 -283
- package/src/tools/helpers/plugin_manifest.py +0 -185
- package/src/tools/obsidian.py +0 -96
- package/src/tools/review_sub_agent.py +0 -190
- package/src/tools/rg_search.py +0 -477
- package/src/tools/search_plugins.py +0 -177
- package/src/tools/select_option.py +0 -600
- package/src/tools/shell.py +0 -302
- package/src/tools/sub_agent.py +0 -139
- package/src/tools/task_list.py +0 -269
- package/src/tools/web_search.py +0 -61
- package/src/ui/__init__.py +0 -1
- package/src/ui/banner.py +0 -87
- package/src/ui/commands.py +0 -3131
- package/src/ui/displays.py +0 -239
- package/src/ui/loader.py +0 -284
- package/src/ui/main.py +0 -643
- package/src/ui/prompt_utils.py +0 -113
- package/src/ui/setting_selector.py +0 -590
- package/src/ui/setup_wizard.py +0 -294
- package/src/ui/sub_agent_panel.py +0 -234
- package/src/ui/tool_confirmation.py +0 -226
- package/src/utils/__init__.py +0 -1
- package/src/utils/citation_parser.py +0 -199
- package/src/utils/editor.py +0 -207
- package/src/utils/gitignore_filter.py +0 -149
- package/src/utils/logger.py +0 -254
- package/src/utils/paths.py +0 -30
- package/src/utils/result_parsers.py +0 -108
- package/src/utils/safe_commands.py +0 -243
- package/src/utils/settings.py +0 -195
- package/src/utils/user_message_logger.py +0 -120
- package/src/utils/validation.py +0 -201
- package/src/utils/web_search.py +0 -173
package/src/utils/validation.py
DELETED
|
@@ -1,201 +0,0 @@
|
|
|
1
|
-
"""Command validation."""
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import re
|
|
5
|
-
import shlex
|
|
6
|
-
from urllib.parse import urlparse
|
|
7
|
-
|
|
8
|
-
# Shell operators that indicate command chaining or redirection.
|
|
9
|
-
# Shared between validation.py and shell.py — keep in one place to avoid drift.
|
|
10
|
-
# Matches: &&, ||, ;, |, >, <, backticks, $(), ${}, newlines
|
|
11
|
-
# NOTE: Alternations are sorted longest-first so that '&&' and '||' match
|
|
12
|
-
# before '|' — reordering the raw list is safe because we sort at runtime.
|
|
13
|
-
_RAW_CHAINING_PATTERNS = ["&&", "||", ";", "|", ">", "<", "`", "$(", "${", "\n", "\r"]
|
|
14
|
-
CHAINING_OPERATORS = re.compile(
|
|
15
|
-
"|".join(re.escape(p) for p in sorted(_RAW_CHAINING_PATTERNS, key=len, reverse=True))
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
# Localhost patterns allowed over plain HTTP (no TLS needed for loopback)
|
|
19
|
-
_LOCALHOST_HOSTS = frozenset({"localhost", "127.0.0.1", "::1", "0.0.0.0"})
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def validate_api_url(url: str) -> tuple[bool, str]:
|
|
23
|
-
"""Validate an API base URL for security.
|
|
24
|
-
|
|
25
|
-
Enforces HTTPS for all non-localhost endpoints.
|
|
26
|
-
Rejects obviously malformed URLs.
|
|
27
|
-
|
|
28
|
-
Returns:
|
|
29
|
-
(is_valid, error_message)
|
|
30
|
-
"""
|
|
31
|
-
try:
|
|
32
|
-
parsed = urlparse(url)
|
|
33
|
-
except Exception:
|
|
34
|
-
return False, f"Malformed URL: {url}"
|
|
35
|
-
|
|
36
|
-
if parsed.scheme not in ("http", "https"):
|
|
37
|
-
return False, f"Invalid URL scheme '{parsed.scheme}', expected http or https"
|
|
38
|
-
|
|
39
|
-
if parsed.scheme == "http" and parsed.hostname not in _LOCALHOST_HOSTS:
|
|
40
|
-
return False, (
|
|
41
|
-
f"Plain HTTP is not allowed for remote endpoints. "
|
|
42
|
-
f"Use HTTPS for {parsed.hostname or url}"
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
return True, ""
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
# Commands that should be silently rejected in execute_command (redirect to native tools)
|
|
49
|
-
# These are commands that have better native tool equivalents
|
|
50
|
-
SILENT_COMMAND_BLOCKED = {
|
|
51
|
-
# Code search (use rg tool)
|
|
52
|
-
"rg", "rg.exe", "ripgrep",
|
|
53
|
-
|
|
54
|
-
# File reading (use read_file tool)
|
|
55
|
-
"cat", "get-content", "type",
|
|
56
|
-
|
|
57
|
-
# Directory listing (use list_directory tool)
|
|
58
|
-
"ls", "get-childitem", "dir",
|
|
59
|
-
|
|
60
|
-
# File creation (use create_file tool)
|
|
61
|
-
"touch", "new-item",
|
|
62
|
-
|
|
63
|
-
# File editing (use edit_file tool)
|
|
64
|
-
"set-content", "add-content", "echo", "tee",
|
|
65
|
-
|
|
66
|
-
# Additional shell commands that should use native tools
|
|
67
|
-
"grep", "find", "head", "tail", "sed", "awk", "sort", "uniq", "wc",
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
def check_for_silent_blocked_command(command):
|
|
73
|
-
"""Check if command should be silently blocked (redirect to native tool).
|
|
74
|
-
|
|
75
|
-
Args:
|
|
76
|
-
command: Command string to validate
|
|
77
|
-
|
|
78
|
-
Returns:
|
|
79
|
-
tuple: (is_blocked, reprompt_message)
|
|
80
|
-
is_blocked is True if command should be silently blocked
|
|
81
|
-
reprompt_message contains guidance for the AI on what tool to use
|
|
82
|
-
"""
|
|
83
|
-
command = command.strip()
|
|
84
|
-
if not command:
|
|
85
|
-
return False, None
|
|
86
|
-
|
|
87
|
-
# Strip "powershell " prefix if present
|
|
88
|
-
if command.lower().startswith("powershell "):
|
|
89
|
-
command = command[len("powershell "):].strip()
|
|
90
|
-
|
|
91
|
-
# For chained commands, only skip silent blocking if the FIRST command
|
|
92
|
-
# is not a blocked tool. e.g. "cd /var/log && tail -f" is allowed, but
|
|
93
|
-
# "cat file && echo done" is still redirected to read_file.
|
|
94
|
-
if CHAINING_OPERATORS.search(command):
|
|
95
|
-
first_segment = CHAINING_OPERATORS.split(command, maxsplit=1)[0].strip()
|
|
96
|
-
first_tokens = _tokenize_segment(first_segment)
|
|
97
|
-
if first_tokens and first_tokens[0].lower() not in SILENT_COMMAND_BLOCKED:
|
|
98
|
-
return False, None
|
|
99
|
-
# else: fall through to blocked check below
|
|
100
|
-
|
|
101
|
-
# Tokenize and get command name
|
|
102
|
-
tokens = _tokenize_segment(command)
|
|
103
|
-
if not tokens:
|
|
104
|
-
return False, None
|
|
105
|
-
|
|
106
|
-
cmd_name = tokens[0].lower()
|
|
107
|
-
|
|
108
|
-
# Check if command is in the silent blocked list
|
|
109
|
-
if cmd_name in SILENT_COMMAND_BLOCKED:
|
|
110
|
-
tool_map = {
|
|
111
|
-
"rg": "rg tool", "rg.exe": "rg tool", "ripgrep": "rg tool",
|
|
112
|
-
"cat": "read_file tool", "get-content": "read_file tool", "type": "read_file tool",
|
|
113
|
-
"ls": "list_directory tool", "get-childitem": "list_directory tool", "dir": "list_directory tool",
|
|
114
|
-
"touch": "create_file tool", "new-item": "create_file tool",
|
|
115
|
-
"set-content": "edit_file tool", "add-content": "edit_file tool", "echo": "edit_file tool", "tee": "edit_file tool",
|
|
116
|
-
"grep": "rg tool for code search, or read_file tool for searching within a file",
|
|
117
|
-
"find": "list_directory tool with recursive=True for listing files, or rg tool for searching content",
|
|
118
|
-
"head": "read_file tool with start_line=1 and max_lines=N",
|
|
119
|
-
"tail": "read_file tool with start_line and max_lines parameters",
|
|
120
|
-
"sed": "edit_file tool for text replacements",
|
|
121
|
-
"awk": "read_file tool followed by post-processing, or use rg tool for pattern matching",
|
|
122
|
-
"sort": "read_file tool then process results",
|
|
123
|
-
"uniq": "read_file tool then process results",
|
|
124
|
-
"wc": "read_file tool shows line counts",
|
|
125
|
-
}
|
|
126
|
-
tool_suggestion = tool_map.get(cmd_name, "appropriate native tool")
|
|
127
|
-
reprompt_msg = (
|
|
128
|
-
f"Use the {tool_suggestion} instead of '{cmd_name}'. "
|
|
129
|
-
f"Native tools provide better integration with the system."
|
|
130
|
-
)
|
|
131
|
-
return True, reprompt_msg
|
|
132
|
-
|
|
133
|
-
return False, None
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def _tokenize_segment(segment):
|
|
138
|
-
use_posix = os.name != "nt"
|
|
139
|
-
try:
|
|
140
|
-
return shlex.split(segment, posix=use_posix)
|
|
141
|
-
except ValueError:
|
|
142
|
-
return segment.split()
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
def check_command(command):
|
|
146
|
-
"""Perform basic structural validation on a command.
|
|
147
|
-
|
|
148
|
-
Rejects empty commands and nested powershell invocations.
|
|
149
|
-
Approval and safety checks are handled upstream by the caller.
|
|
150
|
-
|
|
151
|
-
Args:
|
|
152
|
-
command: Command string to validate
|
|
153
|
-
|
|
154
|
-
Returns:
|
|
155
|
-
tuple: (is_valid, reason) - is_valid is True if the command
|
|
156
|
-
has a non-empty structure. reason is set on rejection.
|
|
157
|
-
"""
|
|
158
|
-
command = command.strip()
|
|
159
|
-
if not command:
|
|
160
|
-
return False, "empty command"
|
|
161
|
-
|
|
162
|
-
# Strip "powershell " prefix if present (legacy support for Windows users)
|
|
163
|
-
if command.lower().startswith("powershell "):
|
|
164
|
-
command = command[len("powershell "):].strip()
|
|
165
|
-
|
|
166
|
-
# After stripping prefix, reject if it still starts with "powershell"
|
|
167
|
-
if command.lower().startswith("powershell"):
|
|
168
|
-
return False, "nested powershell invocation"
|
|
169
|
-
|
|
170
|
-
# Multi-line shell scripts/heredocs are valid command payloads.
|
|
171
|
-
# Do not tokenize the full script here: tokenization can reject valid shell
|
|
172
|
-
# syntax before the shell sees it. Safety/approval checks happen upstream.
|
|
173
|
-
if "\n" in command:
|
|
174
|
-
for line in command.splitlines():
|
|
175
|
-
line = line.strip()
|
|
176
|
-
if line and line.lower().startswith("powershell"):
|
|
177
|
-
return False, "nested powershell invocation"
|
|
178
|
-
return True, None
|
|
179
|
-
|
|
180
|
-
# Basic validation - ensure command has content
|
|
181
|
-
tokens = _tokenize_segment(command)
|
|
182
|
-
if not tokens:
|
|
183
|
-
return False, "empty command"
|
|
184
|
-
|
|
185
|
-
# Allow all other commands
|
|
186
|
-
return True, None
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
def is_auto_approved_command(command):
|
|
190
|
-
"""Check if a command should be auto-approved (safe, read-only commands).
|
|
191
|
-
|
|
192
|
-
Delegates to the structured safety system in utils.safe_commands.
|
|
193
|
-
|
|
194
|
-
Args:
|
|
195
|
-
command: Command string to validate
|
|
196
|
-
|
|
197
|
-
Returns:
|
|
198
|
-
bool: True if command is safe to auto-approve
|
|
199
|
-
"""
|
|
200
|
-
from utils.safe_commands import is_safe_command
|
|
201
|
-
return is_safe_command(command)
|
package/src/utils/web_search.py
DELETED
|
@@ -1,173 +0,0 @@
|
|
|
1
|
-
"""Web search using DuckDuckGo (no API key required)."""
|
|
2
|
-
|
|
3
|
-
import time
|
|
4
|
-
import requests
|
|
5
|
-
from readability import Document
|
|
6
|
-
import html2text
|
|
7
|
-
|
|
8
|
-
from ddgs import DDGS
|
|
9
|
-
from exceptions import LLMConnectionError
|
|
10
|
-
|
|
11
|
-
# Number of top results to fetch full content from
|
|
12
|
-
_DEFAULT_FETCH_COUNT = 3
|
|
13
|
-
# Max characters per fetched page to avoid context bloat
|
|
14
|
-
_MAX_CONTENT_LENGTH = 8000
|
|
15
|
-
# HTTP timeout for page fetching (seconds)
|
|
16
|
-
_FETCH_TIMEOUT = 10
|
|
17
|
-
# Delay between page fetches to avoid rate limiting (seconds)
|
|
18
|
-
_FETCH_DELAY = 1.0
|
|
19
|
-
# User agent for page fetching
|
|
20
|
-
_USER_AGENT = "Mozilla/5.0 (compatible; bone-agent/1.0; +https://github.com/vincentm65/bone-agent-cli)"
|
|
21
|
-
|
|
22
|
-
def _fetch_page_content(url, console=None):
|
|
23
|
-
"""Fetch a URL and extract main article content as markdown.
|
|
24
|
-
|
|
25
|
-
Args:
|
|
26
|
-
url: URL to fetch
|
|
27
|
-
|
|
28
|
-
Returns:
|
|
29
|
-
str: Extracted markdown content, or empty string on failure
|
|
30
|
-
"""
|
|
31
|
-
try:
|
|
32
|
-
response = requests.get(
|
|
33
|
-
url,
|
|
34
|
-
headers={"User-Agent": _USER_AGENT},
|
|
35
|
-
timeout=_FETCH_TIMEOUT,
|
|
36
|
-
allow_redirects=True
|
|
37
|
-
)
|
|
38
|
-
response.raise_for_status()
|
|
39
|
-
|
|
40
|
-
# Skip non-HTML content (PDFs, images, JSON APIs, etc.)
|
|
41
|
-
content_type = response.headers.get("content-type", "")
|
|
42
|
-
if "text/html" not in content_type and "text/plain" not in content_type:
|
|
43
|
-
if console:
|
|
44
|
-
console.print(f" [dim]Skipped {url} (non-HTML: {content_type})[/dim]")
|
|
45
|
-
return ""
|
|
46
|
-
|
|
47
|
-
# Check for empty response before parsing
|
|
48
|
-
if not response.text or not response.text.strip():
|
|
49
|
-
if console:
|
|
50
|
-
console.print(f" [dim]Empty response from {url}[/dim]")
|
|
51
|
-
return ""
|
|
52
|
-
|
|
53
|
-
# Use readability to extract the main article content
|
|
54
|
-
doc = Document(response.text)
|
|
55
|
-
summary_html = doc.summary()
|
|
56
|
-
|
|
57
|
-
# Convert cleaned HTML to markdown (per-call instance for thread safety)
|
|
58
|
-
md = html2text.HTML2Text()
|
|
59
|
-
md.ignore_links = False
|
|
60
|
-
md.ignore_images = True
|
|
61
|
-
md.body_width = 0
|
|
62
|
-
content = md.handle(summary_html).strip()
|
|
63
|
-
|
|
64
|
-
# Truncate at last newline/whitespace before limit to avoid mid-word splits
|
|
65
|
-
if len(content) > _MAX_CONTENT_LENGTH:
|
|
66
|
-
cutoff = content.rfind("\n", 0, _MAX_CONTENT_LENGTH)
|
|
67
|
-
if cutoff < _MAX_CONTENT_LENGTH * 0.8:
|
|
68
|
-
cutoff = _MAX_CONTENT_LENGTH
|
|
69
|
-
content = content[:cutoff] + "\n\n[... content truncated]"
|
|
70
|
-
|
|
71
|
-
return content
|
|
72
|
-
|
|
73
|
-
except requests.RequestException as e:
|
|
74
|
-
if console:
|
|
75
|
-
console.print(f" [dim]Failed to fetch {url}: {e}[/dim]")
|
|
76
|
-
return ""
|
|
77
|
-
except Exception as e:
|
|
78
|
-
if console:
|
|
79
|
-
console.print(f" [dim]Failed to parse {url}: {e}[/dim]")
|
|
80
|
-
return ""
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
def run_web_search(arguments, console):
|
|
84
|
-
"""Execute web search using DuckDuckGo and return formatted results.
|
|
85
|
-
|
|
86
|
-
Args:
|
|
87
|
-
arguments: {
|
|
88
|
-
"query": "search terms to look for",
|
|
89
|
-
"num_results": 5, # optional, number of results (default: 5, max: 10)
|
|
90
|
-
"fetch_content": true # optional, fetch full page content (default: true)
|
|
91
|
-
}
|
|
92
|
-
console: Rich console for output
|
|
93
|
-
|
|
94
|
-
Returns:
|
|
95
|
-
str: Formatted search results with metadata for model consumption
|
|
96
|
-
|
|
97
|
-
Raises:
|
|
98
|
-
LLMConnectionError: If network search fails
|
|
99
|
-
"""
|
|
100
|
-
query = arguments.get("query")
|
|
101
|
-
num_results = arguments.get("num_results", 5)
|
|
102
|
-
fetch_content = arguments.get("fetch_content", True)
|
|
103
|
-
|
|
104
|
-
if not query:
|
|
105
|
-
raise LLMConnectionError(
|
|
106
|
-
"Missing required parameter: query",
|
|
107
|
-
details={"arguments": arguments}
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
# Validate and clamp num_results between 1 and 10
|
|
111
|
-
try:
|
|
112
|
-
num_results = max(1, min(10, int(num_results)))
|
|
113
|
-
except (ValueError, TypeError):
|
|
114
|
-
num_results = 5
|
|
115
|
-
|
|
116
|
-
try:
|
|
117
|
-
with DDGS() as ddgs:
|
|
118
|
-
results = list(ddgs.text(query, max_results=num_results))
|
|
119
|
-
|
|
120
|
-
if not results:
|
|
121
|
-
return "results_found=0\nNo results found.\n\n"
|
|
122
|
-
|
|
123
|
-
# Determine how many results to fetch content from
|
|
124
|
-
fetch_count = min(_DEFAULT_FETCH_COUNT, len(results)) if fetch_content else 0
|
|
125
|
-
pages_fetched = 0
|
|
126
|
-
pages_failed = 0
|
|
127
|
-
|
|
128
|
-
# Format results for model
|
|
129
|
-
output_lines = []
|
|
130
|
-
for idx, result in enumerate(results, 1):
|
|
131
|
-
title = result.get("title", "Untitled")
|
|
132
|
-
url = result.get("href", "N/A")
|
|
133
|
-
body = result.get("body", "No content")
|
|
134
|
-
|
|
135
|
-
output_lines.append(f"[{idx}] {title}")
|
|
136
|
-
output_lines.append(f"URL: {url}")
|
|
137
|
-
output_lines.append(f"Snippet: {body}")
|
|
138
|
-
|
|
139
|
-
# Fetch full content for top results
|
|
140
|
-
if fetch_content and idx <= fetch_count:
|
|
141
|
-
content = _fetch_page_content(url, console)
|
|
142
|
-
if content:
|
|
143
|
-
output_lines.append(f"\n--- Content ---\n{content}")
|
|
144
|
-
pages_fetched += 1
|
|
145
|
-
else:
|
|
146
|
-
output_lines.append(f"\n[Failed to fetch page content]")
|
|
147
|
-
pages_failed += 1
|
|
148
|
-
|
|
149
|
-
# Rate limiting: delay between fetches
|
|
150
|
-
if idx < fetch_count:
|
|
151
|
-
time.sleep(_FETCH_DELAY)
|
|
152
|
-
|
|
153
|
-
if idx < len(results):
|
|
154
|
-
output_lines.append("")
|
|
155
|
-
|
|
156
|
-
# Build result string with metadata for model
|
|
157
|
-
result_content = "\n".join(output_lines)
|
|
158
|
-
meta = f"results_found={len(results)}"
|
|
159
|
-
if fetch_content:
|
|
160
|
-
meta += f", pages_fetched={pages_fetched}"
|
|
161
|
-
if pages_failed:
|
|
162
|
-
meta += f", pages_failed={pages_failed}"
|
|
163
|
-
return f"{meta}\n{result_content}\n\n"
|
|
164
|
-
|
|
165
|
-
except LLMConnectionError:
|
|
166
|
-
# Re-raise our custom exceptions
|
|
167
|
-
raise
|
|
168
|
-
except Exception as e:
|
|
169
|
-
console.print(f"Web search failed: {e}", style="red")
|
|
170
|
-
raise LLMConnectionError(
|
|
171
|
-
f"Failed to perform web search",
|
|
172
|
-
details={"query": query, "original_error": str(e)}
|
|
173
|
-
)
|