bareagent-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bareagent/__init__.py +10 -0
- bareagent/concurrency/__init__.py +6 -0
- bareagent/concurrency/background.py +97 -0
- bareagent/concurrency/notification.py +61 -0
- bareagent/concurrency/scheduler.py +136 -0
- bareagent/config.toml +299 -0
- bareagent/core/__init__.py +1 -0
- bareagent/core/config_paths.py +49 -0
- bareagent/core/context.py +127 -0
- bareagent/core/fileutil.py +103 -0
- bareagent/core/goal.py +214 -0
- bareagent/core/handlers/__init__.py +1 -0
- bareagent/core/handlers/bash.py +79 -0
- bareagent/core/handlers/file_edit.py +47 -0
- bareagent/core/handlers/file_read.py +270 -0
- bareagent/core/handlers/file_write.py +34 -0
- bareagent/core/handlers/glob_search.py +30 -0
- bareagent/core/handlers/goal.py +60 -0
- bareagent/core/handlers/grep_search.py +52 -0
- bareagent/core/handlers/memory.py +71 -0
- bareagent/core/handlers/plan.py +106 -0
- bareagent/core/handlers/search_utils.py +77 -0
- bareagent/core/handlers/skill.py +87 -0
- bareagent/core/handlers/subagent_send.py +70 -0
- bareagent/core/handlers/web_fetch.py +126 -0
- bareagent/core/handlers/web_search.py +165 -0
- bareagent/core/handlers/workflow.py +190 -0
- bareagent/core/loop.py +535 -0
- bareagent/core/retry.py +131 -0
- bareagent/core/sandbox.py +27 -0
- bareagent/core/schema.py +21 -0
- bareagent/core/tools.py +779 -0
- bareagent/core/workflow.py +517 -0
- bareagent/core/workflow_registry.py +219 -0
- bareagent/debug/__init__.py +0 -0
- bareagent/debug/interaction_log.py +263 -0
- bareagent/debug/viewer.html +1750 -0
- bareagent/debug/web_viewer.py +157 -0
- bareagent/hooks/__init__.py +32 -0
- bareagent/hooks/config.py +118 -0
- bareagent/hooks/engine.py +197 -0
- bareagent/hooks/errors.py +14 -0
- bareagent/hooks/events.py +22 -0
- bareagent/lsp/__init__.py +63 -0
- bareagent/lsp/config.py +134 -0
- bareagent/lsp/coord.py +118 -0
- bareagent/lsp/diagnostics.py +240 -0
- bareagent/lsp/errors.py +24 -0
- bareagent/lsp/manager.py +866 -0
- bareagent/lsp/tools.py +629 -0
- bareagent/lsp/workspace_edit.py +305 -0
- bareagent/main.py +4205 -0
- bareagent/mcp/__init__.py +69 -0
- bareagent/mcp/_sse.py +69 -0
- bareagent/mcp/client.py +341 -0
- bareagent/mcp/config.py +169 -0
- bareagent/mcp/errors.py +32 -0
- bareagent/mcp/manager.py +318 -0
- bareagent/mcp/protocol.py +187 -0
- bareagent/mcp/registry.py +557 -0
- bareagent/mcp/transport/__init__.py +15 -0
- bareagent/mcp/transport/base.py +149 -0
- bareagent/mcp/transport/http_legacy.py +192 -0
- bareagent/mcp/transport/http_streamable.py +217 -0
- bareagent/mcp/transport/stdio.py +202 -0
- bareagent/memory/__init__.py +1 -0
- bareagent/memory/compact.py +203 -0
- bareagent/memory/conversation_io.py +226 -0
- bareagent/memory/embedding.py +194 -0
- bareagent/memory/persistent.py +515 -0
- bareagent/memory/token_counter.py +67 -0
- bareagent/memory/token_tracker.py +262 -0
- bareagent/memory/transcript.py +100 -0
- bareagent/permission/__init__.py +1 -0
- bareagent/permission/guard.py +329 -0
- bareagent/permission/rules.py +19 -0
- bareagent/planning/__init__.py +19 -0
- bareagent/planning/agent_types.py +169 -0
- bareagent/planning/skill_gen.py +141 -0
- bareagent/planning/skill_store.py +173 -0
- bareagent/planning/skills.py +146 -0
- bareagent/planning/subagent.py +355 -0
- bareagent/planning/subagent_registry.py +77 -0
- bareagent/planning/tasks.py +348 -0
- bareagent/planning/todo.py +153 -0
- bareagent/planning/worktree.py +122 -0
- bareagent/provider/__init__.py +1 -0
- bareagent/provider/anthropic.py +348 -0
- bareagent/provider/base.py +136 -0
- bareagent/provider/factory.py +130 -0
- bareagent/provider/openai.py +881 -0
- bareagent/provider/presets.py +72 -0
- bareagent/provider/setup.py +356 -0
- bareagent/skills/.gitkeep +1 -0
- bareagent/skills/code-review/SKILL.md +68 -0
- bareagent/skills/git/SKILL.md +68 -0
- bareagent/skills/test/SKILL.md +70 -0
- bareagent/team/__init__.py +17 -0
- bareagent/team/autonomous.py +193 -0
- bareagent/team/mailbox.py +239 -0
- bareagent/team/manager.py +155 -0
- bareagent/team/protocols.py +129 -0
- bareagent/tracing/__init__.py +12 -0
- bareagent/tracing/_api.py +92 -0
- bareagent/tracing/_proxy.py +60 -0
- bareagent/tracing/composite.py +115 -0
- bareagent/tracing/json_file.py +115 -0
- bareagent/tracing/langfuse.py +139 -0
- bareagent/tracing/otel.py +107 -0
- bareagent/tracing/setup.py +85 -0
- bareagent/ui/__init__.py +24 -0
- bareagent/ui/console.py +167 -0
- bareagent/ui/prompt.py +78 -0
- bareagent/ui/protocol.py +24 -0
- bareagent/ui/stream.py +66 -0
- bareagent/ui/theme.py +240 -0
- bareagent_cli-0.1.0.dist-info/METADATA +331 -0
- bareagent_cli-0.1.0.dist-info/RECORD +121 -0
- bareagent_cli-0.1.0.dist-info/WHEEL +4 -0
- bareagent_cli-0.1.0.dist-info/entry_points.txt +2 -0
- bareagent_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from collections.abc import Iterator
|
|
5
|
+
from pathlib import Path, PurePosixPath
|
|
6
|
+
|
|
7
|
+
IGNORED_PATH_NAMES = {
|
|
8
|
+
".git",
|
|
9
|
+
".pytest_cache",
|
|
10
|
+
".venv",
|
|
11
|
+
"__pycache__",
|
|
12
|
+
"node_modules",
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def is_ignored_descendant(path: Path, search_root: Path) -> bool:
|
|
17
|
+
"""Skip ignored trees unless the caller explicitly searched inside them."""
|
|
18
|
+
relative = path.relative_to(search_root)
|
|
19
|
+
return any(part in IGNORED_PATH_NAMES for part in relative.parts)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def iter_search_files(search_root: Path) -> Iterator[Path]:
|
|
23
|
+
if search_root.is_file():
|
|
24
|
+
yield search_root
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
resolved_root = search_root.resolve(strict=False)
|
|
28
|
+
for current_root, dir_names, file_names in os.walk(resolved_root):
|
|
29
|
+
current_path = Path(current_root)
|
|
30
|
+
dir_names[:] = sorted(
|
|
31
|
+
name
|
|
32
|
+
for name in dir_names
|
|
33
|
+
if not is_ignored_descendant(current_path / name, resolved_root)
|
|
34
|
+
)
|
|
35
|
+
for file_name in sorted(file_names):
|
|
36
|
+
file_path = current_path / file_name
|
|
37
|
+
if is_ignored_descendant(file_path, resolved_root):
|
|
38
|
+
continue
|
|
39
|
+
yield file_path
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def matches_glob_pattern(candidate: Path, search_root: Path, pattern: str) -> bool:
|
|
43
|
+
pattern_norm = pattern.replace("\\", "/")
|
|
44
|
+
relative = candidate.resolve(strict=False).relative_to(
|
|
45
|
+
search_root.resolve(strict=False)
|
|
46
|
+
)
|
|
47
|
+
relative_posix = relative.as_posix()
|
|
48
|
+
|
|
49
|
+
if "/" in pattern_norm or "**" in pattern_norm:
|
|
50
|
+
return any(
|
|
51
|
+
PurePosixPath(relative_posix).match(variant)
|
|
52
|
+
for variant in _expand_recursive_variants(pattern_norm)
|
|
53
|
+
)
|
|
54
|
+
return PurePosixPath(candidate.name).match(pattern_norm)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def requires_recursive_walk(pattern: str) -> bool:
|
|
58
|
+
pattern_norm = pattern.replace("\\", "/")
|
|
59
|
+
return "/" in pattern_norm or "**" in pattern_norm
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _expand_recursive_variants(pattern: str) -> set[str]:
|
|
63
|
+
variants = {pattern}
|
|
64
|
+
changed = True
|
|
65
|
+
while changed:
|
|
66
|
+
changed = False
|
|
67
|
+
new_variants: set[str] = set()
|
|
68
|
+
for variant in variants:
|
|
69
|
+
index = variant.find("**/")
|
|
70
|
+
while index != -1:
|
|
71
|
+
new_variants.add(variant[:index] + variant[index + 3 :])
|
|
72
|
+
index = variant.find("**/", index + 1)
|
|
73
|
+
extra = new_variants - variants
|
|
74
|
+
if extra:
|
|
75
|
+
variants.update(extra)
|
|
76
|
+
changed = True
|
|
77
|
+
return variants
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Handler + schema for the ``skill_create`` tool (experiential skill drafting).
|
|
2
|
+
|
|
3
|
+
Unlike most tools, ``skill_create`` is NOT registered in the global tool set.
|
|
4
|
+
It is exposed only inside the isolated "reflection" ``agent_loop`` call that
|
|
5
|
+
runs after a sufficiently complex multi-turn task (see ``main.py`` and
|
|
6
|
+
``src/planning/skill_gen.py``). Keeping it out of the global set means:
|
|
7
|
+
- the main loop never offers it, so skills are *triggered*, not spontaneous;
|
|
8
|
+
- sub-agents never receive it (isolation, like ``hook_engine``);
|
|
9
|
+
- ``[skills] auto_generate = false`` fully short-circuits — the tool simply
|
|
10
|
+
does not exist when the reflection never runs.
|
|
11
|
+
|
|
12
|
+
The handler is a thin wrapper over :class:`bareagent.planning.skill_store.SkillStore`,
|
|
13
|
+
converting expected storage errors into ``Error:`` strings so the model can
|
|
14
|
+
react instead of crashing the loop (see ``error-handling.md``).
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from typing import TYPE_CHECKING
|
|
20
|
+
|
|
21
|
+
from bareagent.core.schema import tool_schema
|
|
22
|
+
from bareagent.planning.skill_store import derive_skill_slug
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from collections.abc import Set as AbstractSet
|
|
26
|
+
|
|
27
|
+
from bareagent.planning.skill_store import SkillStore
|
|
28
|
+
|
|
29
|
+
SKILL_CREATE_TOOL_SCHEMA = tool_schema(
|
|
30
|
+
"skill_create",
|
|
31
|
+
(
|
|
32
|
+
"Save a reusable skill distilled from the workflow you just completed. "
|
|
33
|
+
"Writes a draft SKILL.md to the pending area; the user promotes it with "
|
|
34
|
+
"/skill keep. Call at most once per reflection."
|
|
35
|
+
),
|
|
36
|
+
{
|
|
37
|
+
"name": {
|
|
38
|
+
"type": "string",
|
|
39
|
+
"description": "Short kebab-case skill identifier, e.g. 'add-config-section'.",
|
|
40
|
+
},
|
|
41
|
+
"description": {
|
|
42
|
+
"type": "string",
|
|
43
|
+
"description": "One line starting with 'Use this when ...'.",
|
|
44
|
+
},
|
|
45
|
+
"body": {
|
|
46
|
+
"type": "string",
|
|
47
|
+
"description": (
|
|
48
|
+
"Markdown body: Steps / Pitfalls / Verification sections capturing "
|
|
49
|
+
"the procedure, dead-ends hit, and how success was checked."
|
|
50
|
+
),
|
|
51
|
+
},
|
|
52
|
+
},
|
|
53
|
+
["name", "description", "body"],
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
_HANDLED_ERRORS = (ValueError, OSError)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def run_skill_create(
|
|
60
|
+
*,
|
|
61
|
+
store: SkillStore,
|
|
62
|
+
name: str | None = None,
|
|
63
|
+
description: str | None = None,
|
|
64
|
+
body: str | None = None,
|
|
65
|
+
reserved_names: AbstractSet[str] | None = None,
|
|
66
|
+
) -> str:
|
|
67
|
+
if not name or not str(name).strip():
|
|
68
|
+
return "Error: skill_create requires a non-empty 'name'."
|
|
69
|
+
# Forbid colliding with a checked-in canon skill: a generated skill of that
|
|
70
|
+
# name would be shadowed by the canon (loader scans canon first) and never
|
|
71
|
+
# load. Reject so the model picks a distinct name (self-evolution scope:
|
|
72
|
+
# only generated skills evolve; canon is read-only).
|
|
73
|
+
if reserved_names:
|
|
74
|
+
slug = derive_skill_slug(str(name))
|
|
75
|
+
if slug in reserved_names:
|
|
76
|
+
return (
|
|
77
|
+
f"Error: '{slug}' is a built-in (repo) skill name and cannot be "
|
|
78
|
+
"overwritten. Choose a different name."
|
|
79
|
+
)
|
|
80
|
+
try:
|
|
81
|
+
return store.create_draft(
|
|
82
|
+
str(name),
|
|
83
|
+
str(description or ""),
|
|
84
|
+
str(body or ""),
|
|
85
|
+
)
|
|
86
|
+
except _HANDLED_ERRORS as exc:
|
|
87
|
+
return f"Error: {exc}"
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
|
|
5
|
+
from bareagent.core.schema import tool_schema
|
|
6
|
+
from bareagent.planning.subagent_registry import ResumableContext, SubagentRegistry
|
|
7
|
+
|
|
8
|
+
SUBAGENT_SEND_TOOL_SCHEMA = tool_schema(
|
|
9
|
+
"subagent_send",
|
|
10
|
+
(
|
|
11
|
+
"Continue a previously spawned foreground subagent, preserving its full "
|
|
12
|
+
"context. Pass the agent id returned when the subagent was spawned plus a "
|
|
13
|
+
"follow-up message; the subagent resumes its conversation and returns a "
|
|
14
|
+
"new result. Only foreground, non-worktree subagents are resumable -- "
|
|
15
|
+
"background and worktree-isolated subagents do not register a context."
|
|
16
|
+
),
|
|
17
|
+
{
|
|
18
|
+
"agent_id": {
|
|
19
|
+
"type": "string",
|
|
20
|
+
"description": "Id of the subagent to continue (e.g. sa-xxxxxxxx).",
|
|
21
|
+
},
|
|
22
|
+
"message": {
|
|
23
|
+
"type": "string",
|
|
24
|
+
"description": "Follow-up message to send to the subagent.",
|
|
25
|
+
},
|
|
26
|
+
},
|
|
27
|
+
["agent_id", "message"],
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _resume_footnote(agent_id: str) -> str:
|
|
32
|
+
return f"\n\n[subagent id {agent_id}: still resumable -- continue with subagent_send]"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def run_subagent_send(
|
|
36
|
+
agent_id: str,
|
|
37
|
+
message: str,
|
|
38
|
+
*,
|
|
39
|
+
registry: SubagentRegistry,
|
|
40
|
+
run_loop: Callable[[ResumableContext], str],
|
|
41
|
+
) -> str:
|
|
42
|
+
"""Pure-ish driver for the ``subagent_send`` tool (``run_loop`` injected).
|
|
43
|
+
|
|
44
|
+
Validates input, looks up the resumable context, appends the follow-up user
|
|
45
|
+
message, re-enters the loop via ``run_loop``, refreshes the context's
|
|
46
|
+
position in the registry (so an active multi-turn conversation is not
|
|
47
|
+
evicted), and returns the new result with a continuation footnote. Never
|
|
48
|
+
raises on bad input or a missing id -- returns a structured ``Error:`` string
|
|
49
|
+
instead.
|
|
50
|
+
"""
|
|
51
|
+
normalized_id = agent_id.strip() if isinstance(agent_id, str) else ""
|
|
52
|
+
if not normalized_id:
|
|
53
|
+
return "Error: agent_id must not be empty."
|
|
54
|
+
if not isinstance(message, str) or not message.strip():
|
|
55
|
+
return "Error: message must not be empty."
|
|
56
|
+
|
|
57
|
+
context = registry.get(normalized_id)
|
|
58
|
+
if context is None:
|
|
59
|
+
return (
|
|
60
|
+
f"Error: subagent {normalized_id} not found. It may have been evicted "
|
|
61
|
+
"(only the most recent foreground subagents stay resumable) or the "
|
|
62
|
+
"session was reset (/new, /resume, /import, /clear)."
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
context.messages.append({"role": "user", "content": message})
|
|
66
|
+
result = run_loop(context)
|
|
67
|
+
# Re-register on success to refresh FIFO position; if run_loop raised, we
|
|
68
|
+
# never get here and the context keeps its prior position.
|
|
69
|
+
registry.register(context)
|
|
70
|
+
return result + _resume_footnote(normalized_id)
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import html.parser
|
|
4
|
+
import re
|
|
5
|
+
from urllib.error import URLError
|
|
6
|
+
from urllib.request import Request, urlopen
|
|
7
|
+
|
|
8
|
+
_DEFAULT_TIMEOUT = 15
|
|
9
|
+
_DEFAULT_MAX_LENGTH = 10000
|
|
10
|
+
_USER_AGENT = "BareAgent/1.0"
|
|
11
|
+
_RE_WHITESPACE = re.compile(r"[ \t]+")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class _HTMLToText(html.parser.HTMLParser):
|
|
15
|
+
"""将 HTML 转为可读纯文本。
|
|
16
|
+
|
|
17
|
+
- 跳过 <script>、<style>、<nav>、<footer>、<header>、<noscript> 标签内容
|
|
18
|
+
- 在块级元素(p/div/h1-h6/li/br/tr)处插入换行
|
|
19
|
+
- 合并连续空白
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
_SKIP_TAGS = frozenset({"script", "style", "nav", "footer", "header", "noscript"})
|
|
23
|
+
_BLOCK_TAGS = frozenset(
|
|
24
|
+
{
|
|
25
|
+
"p",
|
|
26
|
+
"div",
|
|
27
|
+
"h1",
|
|
28
|
+
"h2",
|
|
29
|
+
"h3",
|
|
30
|
+
"h4",
|
|
31
|
+
"h5",
|
|
32
|
+
"h6",
|
|
33
|
+
"li",
|
|
34
|
+
"br",
|
|
35
|
+
"tr",
|
|
36
|
+
"blockquote",
|
|
37
|
+
"pre",
|
|
38
|
+
"section",
|
|
39
|
+
"article",
|
|
40
|
+
}
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def __init__(self) -> None:
|
|
44
|
+
super().__init__()
|
|
45
|
+
self._parts: list[str] = []
|
|
46
|
+
self._skip_depth = 0
|
|
47
|
+
|
|
48
|
+
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
|
49
|
+
if tag in self._SKIP_TAGS:
|
|
50
|
+
self._skip_depth += 1
|
|
51
|
+
if tag in self._BLOCK_TAGS and self._skip_depth == 0:
|
|
52
|
+
self._parts.append("\n")
|
|
53
|
+
|
|
54
|
+
def handle_endtag(self, tag: str) -> None:
|
|
55
|
+
if tag in self._SKIP_TAGS and self._skip_depth > 0:
|
|
56
|
+
self._skip_depth -= 1
|
|
57
|
+
if tag in self._BLOCK_TAGS and self._skip_depth == 0:
|
|
58
|
+
self._parts.append("\n")
|
|
59
|
+
|
|
60
|
+
def handle_data(self, data: str) -> None:
|
|
61
|
+
if self._skip_depth == 0:
|
|
62
|
+
self._parts.append(data)
|
|
63
|
+
|
|
64
|
+
def get_text(self) -> str:
|
|
65
|
+
raw = "".join(self._parts)
|
|
66
|
+
result_lines: list[str] = []
|
|
67
|
+
prev_empty = False
|
|
68
|
+
for line in raw.splitlines():
|
|
69
|
+
stripped = _RE_WHITESPACE.sub(" ", line).strip()
|
|
70
|
+
if not stripped:
|
|
71
|
+
if not prev_empty:
|
|
72
|
+
result_lines.append("")
|
|
73
|
+
prev_empty = True
|
|
74
|
+
else:
|
|
75
|
+
result_lines.append(stripped)
|
|
76
|
+
prev_empty = False
|
|
77
|
+
return "\n".join(result_lines).strip()
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def html_to_text(html_content: str) -> str:
|
|
81
|
+
"""将 HTML 字符串转为可读纯文本。"""
|
|
82
|
+
parser = _HTMLToText()
|
|
83
|
+
parser.feed(html_content)
|
|
84
|
+
return parser.get_text()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _truncate(text: str, max_length: int) -> str:
|
|
88
|
+
"""截断文本到指定长度,在最后一个完整行处截断。"""
|
|
89
|
+
if len(text) <= max_length:
|
|
90
|
+
return text
|
|
91
|
+
truncated = text[:max_length]
|
|
92
|
+
# 尝试在最后一个换行处截断
|
|
93
|
+
last_newline = truncated.rfind("\n")
|
|
94
|
+
if last_newline > max_length * 0.8:
|
|
95
|
+
truncated = truncated[:last_newline]
|
|
96
|
+
return truncated + "\n\n[... content truncated]"
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def run_web_fetch(
|
|
100
|
+
url: str,
|
|
101
|
+
max_length: int = _DEFAULT_MAX_LENGTH,
|
|
102
|
+
timeout: int = _DEFAULT_TIMEOUT,
|
|
103
|
+
) -> str:
|
|
104
|
+
"""Fetch content from a URL, convert HTML to text, and truncate."""
|
|
105
|
+
if not url.startswith(("http://", "https://")):
|
|
106
|
+
return f"Error: URL must start with http:// or https:// (got: {url})"
|
|
107
|
+
|
|
108
|
+
request = Request(url, headers={"User-Agent": _USER_AGENT})
|
|
109
|
+
try:
|
|
110
|
+
with urlopen(request, timeout=timeout) as resp: # noqa: S310
|
|
111
|
+
content_type = resp.headers.get("Content-Type", "")
|
|
112
|
+
charset = resp.headers.get_content_charset() or "utf-8"
|
|
113
|
+
|
|
114
|
+
raw_bytes = resp.read(max_length * 4)
|
|
115
|
+
body = raw_bytes.decode(charset, errors="replace")
|
|
116
|
+
except (URLError, OSError, TimeoutError) as exc:
|
|
117
|
+
return f"Error fetching URL: {exc}"
|
|
118
|
+
except ValueError as exc:
|
|
119
|
+
return f"Error: invalid URL: {exc}"
|
|
120
|
+
|
|
121
|
+
if "html" in content_type.lower():
|
|
122
|
+
text = html_to_text(body)
|
|
123
|
+
else:
|
|
124
|
+
text = body
|
|
125
|
+
|
|
126
|
+
return _truncate(text, max_length)
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import binascii
|
|
5
|
+
import html
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
from urllib.error import URLError
|
|
10
|
+
from urllib.parse import quote_plus
|
|
11
|
+
from urllib.request import Request, urlopen
|
|
12
|
+
|
|
13
|
+
from bareagent.core.handlers.web_fetch import _DEFAULT_TIMEOUT, html_to_text
|
|
14
|
+
|
|
15
|
+
_DEFAULT_MAX_RESULTS = 5
|
|
16
|
+
_MAX_READ_BYTES = 512_000
|
|
17
|
+
|
|
18
|
+
_BING_SEARCH_URL = "https://www.bing.com/search"
|
|
19
|
+
# Bing serves server-rendered organic results (<li class="b_algo">) only to lightweight /
|
|
20
|
+
# non-JS user agents. A modern desktop UA gets a JS shell whose results are injected
|
|
21
|
+
# client-side, so a plain HTTP fetch finds nothing. A text-browser UA forces the SSR path.
|
|
22
|
+
_BING_UA = "Lynx/2.8.9rel.1 libwww-FM/2.14"
|
|
23
|
+
|
|
24
|
+
_RE_BING_BLOCK = re.compile(r'<li class="b_algo".*?</li>', re.DOTALL)
|
|
25
|
+
_RE_BING_TITLE = re.compile(r"<h2[^>]*>\s*<a[^>]*>(.*?)</a>", re.DOTALL)
|
|
26
|
+
_RE_BING_HREF = re.compile(r'<h2[^>]*>\s*<a[^>]+href="([^"]+)"', re.DOTALL)
|
|
27
|
+
_RE_BING_SNIPPET = re.compile(r'<div class="b_caption".*?<p[^>]*>(.*?)</p>', re.DOTALL)
|
|
28
|
+
# Bing wraps result links in a /ck/a redirect; the real URL is base64url in `u=a1<...>`.
|
|
29
|
+
_RE_BING_REDIRECT_U = re.compile(r"[?&]u=a1([^&]+)")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _search_brave(
|
|
33
|
+
query: str,
|
|
34
|
+
max_results: int,
|
|
35
|
+
timeout: int,
|
|
36
|
+
api_key: str,
|
|
37
|
+
) -> list[dict[str, str]]:
|
|
38
|
+
"""通过 Brave Search API 搜索。"""
|
|
39
|
+
url = (
|
|
40
|
+
f"https://api.search.brave.com/res/v1/web/search"
|
|
41
|
+
f"?q={quote_plus(query)}&count={max_results}"
|
|
42
|
+
)
|
|
43
|
+
request = Request(
|
|
44
|
+
url,
|
|
45
|
+
headers={
|
|
46
|
+
"Accept": "application/json",
|
|
47
|
+
"Accept-Encoding": "identity",
|
|
48
|
+
"X-Subscription-Token": api_key,
|
|
49
|
+
},
|
|
50
|
+
)
|
|
51
|
+
with urlopen(request, timeout=timeout) as resp: # noqa: S310
|
|
52
|
+
data = json.loads(resp.read(_MAX_READ_BYTES).decode("utf-8"))
|
|
53
|
+
|
|
54
|
+
results: list[dict[str, str]] = []
|
|
55
|
+
for item in data.get("web", {}).get("results", [])[:max_results]:
|
|
56
|
+
results.append(
|
|
57
|
+
{
|
|
58
|
+
"title": item.get("title", ""),
|
|
59
|
+
"url": item.get("url", ""),
|
|
60
|
+
"snippet": item.get("description", ""),
|
|
61
|
+
}
|
|
62
|
+
)
|
|
63
|
+
return results
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _decode_bing_url(href: str) -> str:
|
|
67
|
+
"""Resolve a Bing /ck/a redirect href to the real destination URL.
|
|
68
|
+
|
|
69
|
+
The destination is base64url-encoded in the `u=a1<encoded>` query parameter.
|
|
70
|
+
Falls back to the raw (unescaped) href when the marker is absent or undecodable.
|
|
71
|
+
"""
|
|
72
|
+
unescaped = html.unescape(href)
|
|
73
|
+
match = _RE_BING_REDIRECT_U.search(unescaped)
|
|
74
|
+
if not match:
|
|
75
|
+
return unescaped
|
|
76
|
+
encoded = match.group(1)
|
|
77
|
+
padding = "=" * (-len(encoded) % 4)
|
|
78
|
+
try:
|
|
79
|
+
return base64.urlsafe_b64decode(encoded + padding).decode("utf-8", errors="replace")
|
|
80
|
+
except (binascii.Error, ValueError):
|
|
81
|
+
return unescaped
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _parse_bing_html(body: str) -> list[dict[str, str]]:
|
|
85
|
+
"""Extract organic results from a Bing HTML search results page."""
|
|
86
|
+
results: list[dict[str, str]] = []
|
|
87
|
+
for block in _RE_BING_BLOCK.findall(body):
|
|
88
|
+
title_match = _RE_BING_TITLE.search(block)
|
|
89
|
+
href_match = _RE_BING_HREF.search(block)
|
|
90
|
+
if not title_match or not href_match:
|
|
91
|
+
continue
|
|
92
|
+
title = html_to_text(title_match.group(1)).strip()
|
|
93
|
+
url = _decode_bing_url(href_match.group(1))
|
|
94
|
+
snippet_match = _RE_BING_SNIPPET.search(block)
|
|
95
|
+
snippet = html_to_text(snippet_match.group(1)).strip() if snippet_match else ""
|
|
96
|
+
results.append({"title": title, "url": url, "snippet": snippet})
|
|
97
|
+
return results
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _search_bing_html(
|
|
101
|
+
query: str,
|
|
102
|
+
max_results: int,
|
|
103
|
+
timeout: int,
|
|
104
|
+
) -> list[dict[str, str]]:
|
|
105
|
+
"""通过抓取 Bing HTML 结果页搜索(零配置、免 key、国内可直连)。"""
|
|
106
|
+
url = f"{_BING_SEARCH_URL}?q={quote_plus(query)}"
|
|
107
|
+
request = Request(
|
|
108
|
+
url,
|
|
109
|
+
headers={"User-Agent": _BING_UA, "Accept-Language": "en-US,en;q=0.9"},
|
|
110
|
+
)
|
|
111
|
+
with urlopen(request, timeout=timeout) as resp: # noqa: S310
|
|
112
|
+
body = resp.read(_MAX_READ_BYTES).decode("utf-8", errors="replace")
|
|
113
|
+
|
|
114
|
+
results = _parse_bing_html(body)
|
|
115
|
+
if results:
|
|
116
|
+
return results[:max_results]
|
|
117
|
+
# Distinguish a genuinely empty result set from an anti-bot / unsupported-browser page
|
|
118
|
+
# so the caller can surface an explicit error instead of a misleading "No results".
|
|
119
|
+
if "there are no results" in body.lower():
|
|
120
|
+
return []
|
|
121
|
+
raise RuntimeError(
|
|
122
|
+
"Bing returned no parseable results (likely an anti-bot or unsupported-browser "
|
|
123
|
+
"page). Set BRAVE_SEARCH_API_KEY to use a reliable search backend."
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _format_results(results: list[dict[str, str]], query: str) -> str:
|
|
128
|
+
"""将搜索结果格式化为可读文本。"""
|
|
129
|
+
if not results:
|
|
130
|
+
return f"No results found for: {query}"
|
|
131
|
+
|
|
132
|
+
lines: list[str] = [f"Search results for: {query}", ""]
|
|
133
|
+
for i, r in enumerate(results, 1):
|
|
134
|
+
lines.append(f"{i}. {r['title']}")
|
|
135
|
+
lines.append(f" URL: {r['url']}")
|
|
136
|
+
if r.get("snippet"):
|
|
137
|
+
lines.append(f" {r['snippet']}")
|
|
138
|
+
lines.append("")
|
|
139
|
+
return "\n".join(lines).rstrip()
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def run_web_search(
|
|
143
|
+
query: str,
|
|
144
|
+
max_results: int = _DEFAULT_MAX_RESULTS,
|
|
145
|
+
timeout: int = _DEFAULT_TIMEOUT,
|
|
146
|
+
) -> str:
|
|
147
|
+
"""Search the web and return formatted results."""
|
|
148
|
+
if not query.strip():
|
|
149
|
+
return "Error: search query cannot be empty."
|
|
150
|
+
|
|
151
|
+
brave_api_key = os.environ.get("BRAVE_SEARCH_API_KEY", "").strip()
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
if brave_api_key:
|
|
155
|
+
results = _search_brave(query, max_results, timeout, brave_api_key)
|
|
156
|
+
else:
|
|
157
|
+
results = _search_bing_html(query, max_results, timeout)
|
|
158
|
+
except (URLError, OSError, TimeoutError) as exc:
|
|
159
|
+
return f"Error searching: {exc}"
|
|
160
|
+
except RuntimeError as exc:
|
|
161
|
+
return f"Error: {exc}"
|
|
162
|
+
except (json.JSONDecodeError, KeyError) as exc:
|
|
163
|
+
return f"Error parsing search results: {exc}"
|
|
164
|
+
|
|
165
|
+
return _format_results(results, query)
|