bareagent-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. bareagent/__init__.py +10 -0
  2. bareagent/concurrency/__init__.py +6 -0
  3. bareagent/concurrency/background.py +97 -0
  4. bareagent/concurrency/notification.py +61 -0
  5. bareagent/concurrency/scheduler.py +136 -0
  6. bareagent/config.toml +299 -0
  7. bareagent/core/__init__.py +1 -0
  8. bareagent/core/config_paths.py +49 -0
  9. bareagent/core/context.py +127 -0
  10. bareagent/core/fileutil.py +103 -0
  11. bareagent/core/goal.py +214 -0
  12. bareagent/core/handlers/__init__.py +1 -0
  13. bareagent/core/handlers/bash.py +79 -0
  14. bareagent/core/handlers/file_edit.py +47 -0
  15. bareagent/core/handlers/file_read.py +270 -0
  16. bareagent/core/handlers/file_write.py +34 -0
  17. bareagent/core/handlers/glob_search.py +30 -0
  18. bareagent/core/handlers/goal.py +60 -0
  19. bareagent/core/handlers/grep_search.py +52 -0
  20. bareagent/core/handlers/memory.py +71 -0
  21. bareagent/core/handlers/plan.py +106 -0
  22. bareagent/core/handlers/search_utils.py +77 -0
  23. bareagent/core/handlers/skill.py +87 -0
  24. bareagent/core/handlers/subagent_send.py +70 -0
  25. bareagent/core/handlers/web_fetch.py +126 -0
  26. bareagent/core/handlers/web_search.py +165 -0
  27. bareagent/core/handlers/workflow.py +190 -0
  28. bareagent/core/loop.py +535 -0
  29. bareagent/core/retry.py +131 -0
  30. bareagent/core/sandbox.py +27 -0
  31. bareagent/core/schema.py +21 -0
  32. bareagent/core/tools.py +779 -0
  33. bareagent/core/workflow.py +517 -0
  34. bareagent/core/workflow_registry.py +219 -0
  35. bareagent/debug/__init__.py +0 -0
  36. bareagent/debug/interaction_log.py +263 -0
  37. bareagent/debug/viewer.html +1750 -0
  38. bareagent/debug/web_viewer.py +157 -0
  39. bareagent/hooks/__init__.py +32 -0
  40. bareagent/hooks/config.py +118 -0
  41. bareagent/hooks/engine.py +197 -0
  42. bareagent/hooks/errors.py +14 -0
  43. bareagent/hooks/events.py +22 -0
  44. bareagent/lsp/__init__.py +63 -0
  45. bareagent/lsp/config.py +134 -0
  46. bareagent/lsp/coord.py +118 -0
  47. bareagent/lsp/diagnostics.py +240 -0
  48. bareagent/lsp/errors.py +24 -0
  49. bareagent/lsp/manager.py +866 -0
  50. bareagent/lsp/tools.py +629 -0
  51. bareagent/lsp/workspace_edit.py +305 -0
  52. bareagent/main.py +4205 -0
  53. bareagent/mcp/__init__.py +69 -0
  54. bareagent/mcp/_sse.py +69 -0
  55. bareagent/mcp/client.py +341 -0
  56. bareagent/mcp/config.py +169 -0
  57. bareagent/mcp/errors.py +32 -0
  58. bareagent/mcp/manager.py +318 -0
  59. bareagent/mcp/protocol.py +187 -0
  60. bareagent/mcp/registry.py +557 -0
  61. bareagent/mcp/transport/__init__.py +15 -0
  62. bareagent/mcp/transport/base.py +149 -0
  63. bareagent/mcp/transport/http_legacy.py +192 -0
  64. bareagent/mcp/transport/http_streamable.py +217 -0
  65. bareagent/mcp/transport/stdio.py +202 -0
  66. bareagent/memory/__init__.py +1 -0
  67. bareagent/memory/compact.py +203 -0
  68. bareagent/memory/conversation_io.py +226 -0
  69. bareagent/memory/embedding.py +194 -0
  70. bareagent/memory/persistent.py +515 -0
  71. bareagent/memory/token_counter.py +67 -0
  72. bareagent/memory/token_tracker.py +262 -0
  73. bareagent/memory/transcript.py +100 -0
  74. bareagent/permission/__init__.py +1 -0
  75. bareagent/permission/guard.py +329 -0
  76. bareagent/permission/rules.py +19 -0
  77. bareagent/planning/__init__.py +19 -0
  78. bareagent/planning/agent_types.py +169 -0
  79. bareagent/planning/skill_gen.py +141 -0
  80. bareagent/planning/skill_store.py +173 -0
  81. bareagent/planning/skills.py +146 -0
  82. bareagent/planning/subagent.py +355 -0
  83. bareagent/planning/subagent_registry.py +77 -0
  84. bareagent/planning/tasks.py +348 -0
  85. bareagent/planning/todo.py +153 -0
  86. bareagent/planning/worktree.py +122 -0
  87. bareagent/provider/__init__.py +1 -0
  88. bareagent/provider/anthropic.py +348 -0
  89. bareagent/provider/base.py +136 -0
  90. bareagent/provider/factory.py +130 -0
  91. bareagent/provider/openai.py +881 -0
  92. bareagent/provider/presets.py +72 -0
  93. bareagent/provider/setup.py +356 -0
  94. bareagent/skills/.gitkeep +1 -0
  95. bareagent/skills/code-review/SKILL.md +68 -0
  96. bareagent/skills/git/SKILL.md +68 -0
  97. bareagent/skills/test/SKILL.md +70 -0
  98. bareagent/team/__init__.py +17 -0
  99. bareagent/team/autonomous.py +193 -0
  100. bareagent/team/mailbox.py +239 -0
  101. bareagent/team/manager.py +155 -0
  102. bareagent/team/protocols.py +129 -0
  103. bareagent/tracing/__init__.py +12 -0
  104. bareagent/tracing/_api.py +92 -0
  105. bareagent/tracing/_proxy.py +60 -0
  106. bareagent/tracing/composite.py +115 -0
  107. bareagent/tracing/json_file.py +115 -0
  108. bareagent/tracing/langfuse.py +139 -0
  109. bareagent/tracing/otel.py +107 -0
  110. bareagent/tracing/setup.py +85 -0
  111. bareagent/ui/__init__.py +24 -0
  112. bareagent/ui/console.py +167 -0
  113. bareagent/ui/prompt.py +78 -0
  114. bareagent/ui/protocol.py +24 -0
  115. bareagent/ui/stream.py +66 -0
  116. bareagent/ui/theme.py +240 -0
  117. bareagent_cli-0.1.0.dist-info/METADATA +331 -0
  118. bareagent_cli-0.1.0.dist-info/RECORD +121 -0
  119. bareagent_cli-0.1.0.dist-info/WHEEL +4 -0
  120. bareagent_cli-0.1.0.dist-info/entry_points.txt +2 -0
  121. bareagent_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from collections.abc import Iterator
5
+ from pathlib import Path, PurePosixPath
6
+
7
+ IGNORED_PATH_NAMES = {
8
+ ".git",
9
+ ".pytest_cache",
10
+ ".venv",
11
+ "__pycache__",
12
+ "node_modules",
13
+ }
14
+
15
+
16
+ def is_ignored_descendant(path: Path, search_root: Path) -> bool:
17
+ """Skip ignored trees unless the caller explicitly searched inside them."""
18
+ relative = path.relative_to(search_root)
19
+ return any(part in IGNORED_PATH_NAMES for part in relative.parts)
20
+
21
+
22
+ def iter_search_files(search_root: Path) -> Iterator[Path]:
23
+ if search_root.is_file():
24
+ yield search_root
25
+ return
26
+
27
+ resolved_root = search_root.resolve(strict=False)
28
+ for current_root, dir_names, file_names in os.walk(resolved_root):
29
+ current_path = Path(current_root)
30
+ dir_names[:] = sorted(
31
+ name
32
+ for name in dir_names
33
+ if not is_ignored_descendant(current_path / name, resolved_root)
34
+ )
35
+ for file_name in sorted(file_names):
36
+ file_path = current_path / file_name
37
+ if is_ignored_descendant(file_path, resolved_root):
38
+ continue
39
+ yield file_path
40
+
41
+
42
+ def matches_glob_pattern(candidate: Path, search_root: Path, pattern: str) -> bool:
43
+ pattern_norm = pattern.replace("\\", "/")
44
+ relative = candidate.resolve(strict=False).relative_to(
45
+ search_root.resolve(strict=False)
46
+ )
47
+ relative_posix = relative.as_posix()
48
+
49
+ if "/" in pattern_norm or "**" in pattern_norm:
50
+ return any(
51
+ PurePosixPath(relative_posix).match(variant)
52
+ for variant in _expand_recursive_variants(pattern_norm)
53
+ )
54
+ return PurePosixPath(candidate.name).match(pattern_norm)
55
+
56
+
57
+ def requires_recursive_walk(pattern: str) -> bool:
58
+ pattern_norm = pattern.replace("\\", "/")
59
+ return "/" in pattern_norm or "**" in pattern_norm
60
+
61
+
62
+ def _expand_recursive_variants(pattern: str) -> set[str]:
63
+ variants = {pattern}
64
+ changed = True
65
+ while changed:
66
+ changed = False
67
+ new_variants: set[str] = set()
68
+ for variant in variants:
69
+ index = variant.find("**/")
70
+ while index != -1:
71
+ new_variants.add(variant[:index] + variant[index + 3 :])
72
+ index = variant.find("**/", index + 1)
73
+ extra = new_variants - variants
74
+ if extra:
75
+ variants.update(extra)
76
+ changed = True
77
+ return variants
@@ -0,0 +1,87 @@
1
+ """Handler + schema for the ``skill_create`` tool (experiential skill drafting).
2
+
3
+ Unlike most tools, ``skill_create`` is NOT registered in the global tool set.
4
+ It is exposed only inside the isolated "reflection" ``agent_loop`` call that
5
+ runs after a sufficiently complex multi-turn task (see ``main.py`` and
6
+ ``src/planning/skill_gen.py``). Keeping it out of the global set means:
7
+ - the main loop never offers it, so skills are *triggered*, not spontaneous;
8
+ - sub-agents never receive it (isolation, like ``hook_engine``);
9
+ - ``[skills] auto_generate = false`` fully short-circuits — the tool simply
10
+ does not exist when the reflection never runs.
11
+
12
+ The handler is a thin wrapper over :class:`bareagent.planning.skill_store.SkillStore`,
13
+ converting expected storage errors into ``Error:`` strings so the model can
14
+ react instead of crashing the loop (see ``error-handling.md``).
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from typing import TYPE_CHECKING
20
+
21
+ from bareagent.core.schema import tool_schema
22
+ from bareagent.planning.skill_store import derive_skill_slug
23
+
24
+ if TYPE_CHECKING:
25
+ from collections.abc import Set as AbstractSet
26
+
27
+ from bareagent.planning.skill_store import SkillStore
28
+
29
+ SKILL_CREATE_TOOL_SCHEMA = tool_schema(
30
+ "skill_create",
31
+ (
32
+ "Save a reusable skill distilled from the workflow you just completed. "
33
+ "Writes a draft SKILL.md to the pending area; the user promotes it with "
34
+ "/skill keep. Call at most once per reflection."
35
+ ),
36
+ {
37
+ "name": {
38
+ "type": "string",
39
+ "description": "Short kebab-case skill identifier, e.g. 'add-config-section'.",
40
+ },
41
+ "description": {
42
+ "type": "string",
43
+ "description": "One line starting with 'Use this when ...'.",
44
+ },
45
+ "body": {
46
+ "type": "string",
47
+ "description": (
48
+ "Markdown body: Steps / Pitfalls / Verification sections capturing "
49
+ "the procedure, dead-ends hit, and how success was checked."
50
+ ),
51
+ },
52
+ },
53
+ ["name", "description", "body"],
54
+ )
55
+
56
+ _HANDLED_ERRORS = (ValueError, OSError)
57
+
58
+
59
+ def run_skill_create(
60
+ *,
61
+ store: SkillStore,
62
+ name: str | None = None,
63
+ description: str | None = None,
64
+ body: str | None = None,
65
+ reserved_names: AbstractSet[str] | None = None,
66
+ ) -> str:
67
+ if not name or not str(name).strip():
68
+ return "Error: skill_create requires a non-empty 'name'."
69
+ # Forbid colliding with a checked-in canon skill: a generated skill of that
70
+ # name would be shadowed by the canon (loader scans canon first) and never
71
+ # load. Reject so the model picks a distinct name (self-evolution scope:
72
+ # only generated skills evolve; canon is read-only).
73
+ if reserved_names:
74
+ slug = derive_skill_slug(str(name))
75
+ if slug in reserved_names:
76
+ return (
77
+ f"Error: '{slug}' is a built-in (repo) skill name and cannot be "
78
+ "overwritten. Choose a different name."
79
+ )
80
+ try:
81
+ return store.create_draft(
82
+ str(name),
83
+ str(description or ""),
84
+ str(body or ""),
85
+ )
86
+ except _HANDLED_ERRORS as exc:
87
+ return f"Error: {exc}"
@@ -0,0 +1,70 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable
4
+
5
+ from bareagent.core.schema import tool_schema
6
+ from bareagent.planning.subagent_registry import ResumableContext, SubagentRegistry
7
+
8
+ SUBAGENT_SEND_TOOL_SCHEMA = tool_schema(
9
+ "subagent_send",
10
+ (
11
+ "Continue a previously spawned foreground subagent, preserving its full "
12
+ "context. Pass the agent id returned when the subagent was spawned plus a "
13
+ "follow-up message; the subagent resumes its conversation and returns a "
14
+ "new result. Only foreground, non-worktree subagents are resumable -- "
15
+ "background and worktree-isolated subagents do not register a context."
16
+ ),
17
+ {
18
+ "agent_id": {
19
+ "type": "string",
20
+ "description": "Id of the subagent to continue (e.g. sa-xxxxxxxx).",
21
+ },
22
+ "message": {
23
+ "type": "string",
24
+ "description": "Follow-up message to send to the subagent.",
25
+ },
26
+ },
27
+ ["agent_id", "message"],
28
+ )
29
+
30
+
31
+ def _resume_footnote(agent_id: str) -> str:
32
+ return f"\n\n[subagent id {agent_id}: still resumable -- continue with subagent_send]"
33
+
34
+
35
+ def run_subagent_send(
36
+ agent_id: str,
37
+ message: str,
38
+ *,
39
+ registry: SubagentRegistry,
40
+ run_loop: Callable[[ResumableContext], str],
41
+ ) -> str:
42
+ """Pure-ish driver for the ``subagent_send`` tool (``run_loop`` injected).
43
+
44
+ Validates input, looks up the resumable context, appends the follow-up user
45
+ message, re-enters the loop via ``run_loop``, refreshes the context's
46
+ position in the registry (so an active multi-turn conversation is not
47
+ evicted), and returns the new result with a continuation footnote. Never
48
+ raises on bad input or a missing id -- returns a structured ``Error:`` string
49
+ instead.
50
+ """
51
+ normalized_id = agent_id.strip() if isinstance(agent_id, str) else ""
52
+ if not normalized_id:
53
+ return "Error: agent_id must not be empty."
54
+ if not isinstance(message, str) or not message.strip():
55
+ return "Error: message must not be empty."
56
+
57
+ context = registry.get(normalized_id)
58
+ if context is None:
59
+ return (
60
+ f"Error: subagent {normalized_id} not found. It may have been evicted "
61
+ "(only the most recent foreground subagents stay resumable) or the "
62
+ "session was reset (/new, /resume, /import, /clear)."
63
+ )
64
+
65
+ context.messages.append({"role": "user", "content": message})
66
+ result = run_loop(context)
67
+ # Re-register on success to refresh FIFO position; if run_loop raised, we
68
+ # never get here and the context keeps its prior position.
69
+ registry.register(context)
70
+ return result + _resume_footnote(normalized_id)
@@ -0,0 +1,126 @@
1
+ from __future__ import annotations
2
+
3
+ import html.parser
4
+ import re
5
+ from urllib.error import URLError
6
+ from urllib.request import Request, urlopen
7
+
8
+ _DEFAULT_TIMEOUT = 15
9
+ _DEFAULT_MAX_LENGTH = 10000
10
+ _USER_AGENT = "BareAgent/1.0"
11
+ _RE_WHITESPACE = re.compile(r"[ \t]+")
12
+
13
+
14
+ class _HTMLToText(html.parser.HTMLParser):
15
+ """将 HTML 转为可读纯文本。
16
+
17
+ - 跳过 <script>、<style>、<nav>、<footer>、<header>、<noscript> 标签内容
18
+ - 在块级元素(p/div/h1-h6/li/br/tr)处插入换行
19
+ - 合并连续空白
20
+ """
21
+
22
+ _SKIP_TAGS = frozenset({"script", "style", "nav", "footer", "header", "noscript"})
23
+ _BLOCK_TAGS = frozenset(
24
+ {
25
+ "p",
26
+ "div",
27
+ "h1",
28
+ "h2",
29
+ "h3",
30
+ "h4",
31
+ "h5",
32
+ "h6",
33
+ "li",
34
+ "br",
35
+ "tr",
36
+ "blockquote",
37
+ "pre",
38
+ "section",
39
+ "article",
40
+ }
41
+ )
42
+
43
+ def __init__(self) -> None:
44
+ super().__init__()
45
+ self._parts: list[str] = []
46
+ self._skip_depth = 0
47
+
48
+ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
49
+ if tag in self._SKIP_TAGS:
50
+ self._skip_depth += 1
51
+ if tag in self._BLOCK_TAGS and self._skip_depth == 0:
52
+ self._parts.append("\n")
53
+
54
+ def handle_endtag(self, tag: str) -> None:
55
+ if tag in self._SKIP_TAGS and self._skip_depth > 0:
56
+ self._skip_depth -= 1
57
+ if tag in self._BLOCK_TAGS and self._skip_depth == 0:
58
+ self._parts.append("\n")
59
+
60
+ def handle_data(self, data: str) -> None:
61
+ if self._skip_depth == 0:
62
+ self._parts.append(data)
63
+
64
+ def get_text(self) -> str:
65
+ raw = "".join(self._parts)
66
+ result_lines: list[str] = []
67
+ prev_empty = False
68
+ for line in raw.splitlines():
69
+ stripped = _RE_WHITESPACE.sub(" ", line).strip()
70
+ if not stripped:
71
+ if not prev_empty:
72
+ result_lines.append("")
73
+ prev_empty = True
74
+ else:
75
+ result_lines.append(stripped)
76
+ prev_empty = False
77
+ return "\n".join(result_lines).strip()
78
+
79
+
80
+ def html_to_text(html_content: str) -> str:
81
+ """将 HTML 字符串转为可读纯文本。"""
82
+ parser = _HTMLToText()
83
+ parser.feed(html_content)
84
+ return parser.get_text()
85
+
86
+
87
+ def _truncate(text: str, max_length: int) -> str:
88
+ """截断文本到指定长度,在最后一个完整行处截断。"""
89
+ if len(text) <= max_length:
90
+ return text
91
+ truncated = text[:max_length]
92
+ # 尝试在最后一个换行处截断
93
+ last_newline = truncated.rfind("\n")
94
+ if last_newline > max_length * 0.8:
95
+ truncated = truncated[:last_newline]
96
+ return truncated + "\n\n[... content truncated]"
97
+
98
+
99
+ def run_web_fetch(
100
+ url: str,
101
+ max_length: int = _DEFAULT_MAX_LENGTH,
102
+ timeout: int = _DEFAULT_TIMEOUT,
103
+ ) -> str:
104
+ """Fetch content from a URL, convert HTML to text, and truncate."""
105
+ if not url.startswith(("http://", "https://")):
106
+ return f"Error: URL must start with http:// or https:// (got: {url})"
107
+
108
+ request = Request(url, headers={"User-Agent": _USER_AGENT})
109
+ try:
110
+ with urlopen(request, timeout=timeout) as resp: # noqa: S310
111
+ content_type = resp.headers.get("Content-Type", "")
112
+ charset = resp.headers.get_content_charset() or "utf-8"
113
+
114
+ raw_bytes = resp.read(max_length * 4)
115
+ body = raw_bytes.decode(charset, errors="replace")
116
+ except (URLError, OSError, TimeoutError) as exc:
117
+ return f"Error fetching URL: {exc}"
118
+ except ValueError as exc:
119
+ return f"Error: invalid URL: {exc}"
120
+
121
+ if "html" in content_type.lower():
122
+ text = html_to_text(body)
123
+ else:
124
+ text = body
125
+
126
+ return _truncate(text, max_length)
@@ -0,0 +1,165 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import binascii
5
+ import html
6
+ import json
7
+ import os
8
+ import re
9
+ from urllib.error import URLError
10
+ from urllib.parse import quote_plus
11
+ from urllib.request import Request, urlopen
12
+
13
+ from bareagent.core.handlers.web_fetch import _DEFAULT_TIMEOUT, html_to_text
14
+
15
+ _DEFAULT_MAX_RESULTS = 5
16
+ _MAX_READ_BYTES = 512_000
17
+
18
+ _BING_SEARCH_URL = "https://www.bing.com/search"
19
+ # Bing serves server-rendered organic results (<li class="b_algo">) only to lightweight /
20
+ # non-JS user agents. A modern desktop UA gets a JS shell whose results are injected
21
+ # client-side, so a plain HTTP fetch finds nothing. A text-browser UA forces the SSR path.
22
+ _BING_UA = "Lynx/2.8.9rel.1 libwww-FM/2.14"
23
+
24
+ _RE_BING_BLOCK = re.compile(r'<li class="b_algo".*?</li>', re.DOTALL)
25
+ _RE_BING_TITLE = re.compile(r"<h2[^>]*>\s*<a[^>]*>(.*?)</a>", re.DOTALL)
26
+ _RE_BING_HREF = re.compile(r'<h2[^>]*>\s*<a[^>]+href="([^"]+)"', re.DOTALL)
27
+ _RE_BING_SNIPPET = re.compile(r'<div class="b_caption".*?<p[^>]*>(.*?)</p>', re.DOTALL)
28
+ # Bing wraps result links in a /ck/a redirect; the real URL is base64url in `u=a1<...>`.
29
+ _RE_BING_REDIRECT_U = re.compile(r"[?&]u=a1([^&]+)")
30
+
31
+
32
+ def _search_brave(
33
+ query: str,
34
+ max_results: int,
35
+ timeout: int,
36
+ api_key: str,
37
+ ) -> list[dict[str, str]]:
38
+ """通过 Brave Search API 搜索。"""
39
+ url = (
40
+ f"https://api.search.brave.com/res/v1/web/search"
41
+ f"?q={quote_plus(query)}&count={max_results}"
42
+ )
43
+ request = Request(
44
+ url,
45
+ headers={
46
+ "Accept": "application/json",
47
+ "Accept-Encoding": "identity",
48
+ "X-Subscription-Token": api_key,
49
+ },
50
+ )
51
+ with urlopen(request, timeout=timeout) as resp: # noqa: S310
52
+ data = json.loads(resp.read(_MAX_READ_BYTES).decode("utf-8"))
53
+
54
+ results: list[dict[str, str]] = []
55
+ for item in data.get("web", {}).get("results", [])[:max_results]:
56
+ results.append(
57
+ {
58
+ "title": item.get("title", ""),
59
+ "url": item.get("url", ""),
60
+ "snippet": item.get("description", ""),
61
+ }
62
+ )
63
+ return results
64
+
65
+
66
+ def _decode_bing_url(href: str) -> str:
67
+ """Resolve a Bing /ck/a redirect href to the real destination URL.
68
+
69
+ The destination is base64url-encoded in the `u=a1<encoded>` query parameter.
70
+ Falls back to the raw (unescaped) href when the marker is absent or undecodable.
71
+ """
72
+ unescaped = html.unescape(href)
73
+ match = _RE_BING_REDIRECT_U.search(unescaped)
74
+ if not match:
75
+ return unescaped
76
+ encoded = match.group(1)
77
+ padding = "=" * (-len(encoded) % 4)
78
+ try:
79
+ return base64.urlsafe_b64decode(encoded + padding).decode("utf-8", errors="replace")
80
+ except (binascii.Error, ValueError):
81
+ return unescaped
82
+
83
+
84
+ def _parse_bing_html(body: str) -> list[dict[str, str]]:
85
+ """Extract organic results from a Bing HTML search results page."""
86
+ results: list[dict[str, str]] = []
87
+ for block in _RE_BING_BLOCK.findall(body):
88
+ title_match = _RE_BING_TITLE.search(block)
89
+ href_match = _RE_BING_HREF.search(block)
90
+ if not title_match or not href_match:
91
+ continue
92
+ title = html_to_text(title_match.group(1)).strip()
93
+ url = _decode_bing_url(href_match.group(1))
94
+ snippet_match = _RE_BING_SNIPPET.search(block)
95
+ snippet = html_to_text(snippet_match.group(1)).strip() if snippet_match else ""
96
+ results.append({"title": title, "url": url, "snippet": snippet})
97
+ return results
98
+
99
+
100
+ def _search_bing_html(
101
+ query: str,
102
+ max_results: int,
103
+ timeout: int,
104
+ ) -> list[dict[str, str]]:
105
+ """通过抓取 Bing HTML 结果页搜索(零配置、免 key、国内可直连)。"""
106
+ url = f"{_BING_SEARCH_URL}?q={quote_plus(query)}"
107
+ request = Request(
108
+ url,
109
+ headers={"User-Agent": _BING_UA, "Accept-Language": "en-US,en;q=0.9"},
110
+ )
111
+ with urlopen(request, timeout=timeout) as resp: # noqa: S310
112
+ body = resp.read(_MAX_READ_BYTES).decode("utf-8", errors="replace")
113
+
114
+ results = _parse_bing_html(body)
115
+ if results:
116
+ return results[:max_results]
117
+ # Distinguish a genuinely empty result set from an anti-bot / unsupported-browser page
118
+ # so the caller can surface an explicit error instead of a misleading "No results".
119
+ if "there are no results" in body.lower():
120
+ return []
121
+ raise RuntimeError(
122
+ "Bing returned no parseable results (likely an anti-bot or unsupported-browser "
123
+ "page). Set BRAVE_SEARCH_API_KEY to use a reliable search backend."
124
+ )
125
+
126
+
127
+ def _format_results(results: list[dict[str, str]], query: str) -> str:
128
+ """将搜索结果格式化为可读文本。"""
129
+ if not results:
130
+ return f"No results found for: {query}"
131
+
132
+ lines: list[str] = [f"Search results for: {query}", ""]
133
+ for i, r in enumerate(results, 1):
134
+ lines.append(f"{i}. {r['title']}")
135
+ lines.append(f" URL: {r['url']}")
136
+ if r.get("snippet"):
137
+ lines.append(f" {r['snippet']}")
138
+ lines.append("")
139
+ return "\n".join(lines).rstrip()
140
+
141
+
142
+ def run_web_search(
143
+ query: str,
144
+ max_results: int = _DEFAULT_MAX_RESULTS,
145
+ timeout: int = _DEFAULT_TIMEOUT,
146
+ ) -> str:
147
+ """Search the web and return formatted results."""
148
+ if not query.strip():
149
+ return "Error: search query cannot be empty."
150
+
151
+ brave_api_key = os.environ.get("BRAVE_SEARCH_API_KEY", "").strip()
152
+
153
+ try:
154
+ if brave_api_key:
155
+ results = _search_brave(query, max_results, timeout, brave_api_key)
156
+ else:
157
+ results = _search_bing_html(query, max_results, timeout)
158
+ except (URLError, OSError, TimeoutError) as exc:
159
+ return f"Error searching: {exc}"
160
+ except RuntimeError as exc:
161
+ return f"Error: {exc}"
162
+ except (json.JSONDecodeError, KeyError) as exc:
163
+ return f"Error parsing search results: {exc}"
164
+
165
+ return _format_results(results, query)