illusion-code 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- illusion/__init__.py +24 -0
- illusion/__main__.py +15 -0
- illusion/_frontend/dist/index.mjs +39208 -0
- illusion/_frontend/package.json +27 -0
- illusion/_frontend/src/App.tsx +624 -0
- illusion/_frontend/src/components/CommandPicker.tsx +98 -0
- illusion/_frontend/src/components/Composer.tsx +55 -0
- illusion/_frontend/src/components/ComposerController.tsx +128 -0
- illusion/_frontend/src/components/ConversationView.tsx +750 -0
- illusion/_frontend/src/components/Footer.tsx +25 -0
- illusion/_frontend/src/components/MarkdownContent.tsx +537 -0
- illusion/_frontend/src/components/MarkdownTable.tsx +245 -0
- illusion/_frontend/src/components/ModalHost.tsx +425 -0
- illusion/_frontend/src/components/MultilineTextInput.tsx +250 -0
- illusion/_frontend/src/components/PromptInput.tsx +64 -0
- illusion/_frontend/src/components/SelectModal.tsx +78 -0
- illusion/_frontend/src/components/SidePanel.tsx +175 -0
- illusion/_frontend/src/components/Spinner.tsx +77 -0
- illusion/_frontend/src/components/StatusBar.tsx +142 -0
- illusion/_frontend/src/components/SwarmPanel.tsx +141 -0
- illusion/_frontend/src/components/TodoPanel.tsx +126 -0
- illusion/_frontend/src/components/ToolCallDisplay.tsx +202 -0
- illusion/_frontend/src/components/TranscriptPane.tsx +79 -0
- illusion/_frontend/src/components/WelcomeBanner.tsx +37 -0
- illusion/_frontend/src/hooks/useBackendSession.ts +468 -0
- illusion/_frontend/src/hooks/useTerminalSize.ts +9 -0
- illusion/_frontend/src/i18n.ts +78 -0
- illusion/_frontend/src/index.tsx +42 -0
- illusion/_frontend/src/theme/ThemeContext.tsx +19 -0
- illusion/_frontend/src/theme/builtinThemes.ts +89 -0
- illusion/_frontend/src/types.ts +110 -0
- illusion/_frontend/src/utils/markdown.ts +33 -0
- illusion/_frontend/src/utils/thinking.ts +191 -0
- illusion/_frontend/tsconfig.json +13 -0
- illusion/_web_dist/assets/index-BseIw-ik.css +10 -0
- illusion/_web_dist/assets/index-C_0ZWMuW.js +82 -0
- illusion/_web_dist/index.html +16 -0
- illusion/api/__init__.py +36 -0
- illusion/api/client.py +568 -0
- illusion/api/codex_client.py +563 -0
- illusion/api/compat.py +138 -0
- illusion/api/effort.py +128 -0
- illusion/api/errors.py +57 -0
- illusion/api/openai_client.py +819 -0
- illusion/api/provider.py +148 -0
- illusion/api/registry.py +479 -0
- illusion/api/usage.py +45 -0
- illusion/auth/__init__.py +50 -0
- illusion/auth/copilot.py +419 -0
- illusion/auth/external.py +612 -0
- illusion/auth/flows.py +58 -0
- illusion/auth/manager.py +214 -0
- illusion/auth/storage.py +372 -0
- illusion/bridge/__init__.py +38 -0
- illusion/bridge/manager.py +190 -0
- illusion/bridge/session_runner.py +84 -0
- illusion/bridge/types.py +113 -0
- illusion/bridge/work_secret.py +131 -0
- illusion/cli.py +1228 -0
- illusion/commands/__init__.py +32 -0
- illusion/commands/registry.py +1934 -0
- illusion/config/__init__.py +39 -0
- illusion/config/i18n.py +522 -0
- illusion/config/paths.py +259 -0
- illusion/config/settings.py +564 -0
- illusion/coordinator/__init__.py +41 -0
- illusion/coordinator/agent_definitions.py +1093 -0
- illusion/coordinator/coordinator_mode.py +127 -0
- illusion/engine/__init__.py +95 -0
- illusion/engine/cost_tracker.py +55 -0
- illusion/engine/messages.py +369 -0
- illusion/engine/query.py +632 -0
- illusion/engine/query_engine.py +343 -0
- illusion/engine/stream_events.py +169 -0
- illusion/hooks/__init__.py +67 -0
- illusion/hooks/events.py +43 -0
- illusion/hooks/executor.py +397 -0
- illusion/hooks/hot_reload.py +74 -0
- illusion/hooks/loader.py +133 -0
- illusion/hooks/schemas.py +121 -0
- illusion/hooks/types.py +86 -0
- illusion/mcp/__init__.py +104 -0
- illusion/mcp/client.py +377 -0
- illusion/mcp/config.py +140 -0
- illusion/mcp/types.py +175 -0
- illusion/memory/__init__.py +36 -0
- illusion/memory/manager.py +94 -0
- illusion/memory/memdir.py +58 -0
- illusion/memory/paths.py +57 -0
- illusion/memory/scan.py +120 -0
- illusion/memory/search.py +83 -0
- illusion/memory/types.py +43 -0
- illusion/output_styles/__init__.py +15 -0
- illusion/output_styles/loader.py +64 -0
- illusion/permissions/__init__.py +39 -0
- illusion/permissions/checker.py +174 -0
- illusion/permissions/modes.py +38 -0
- illusion/platforms.py +148 -0
- illusion/plugins/__init__.py +71 -0
- illusion/plugins/bundled/__init__.py +0 -0
- illusion/plugins/installer.py +59 -0
- illusion/plugins/loader.py +301 -0
- illusion/plugins/schemas.py +51 -0
- illusion/plugins/types.py +56 -0
- illusion/prompts/__init__.py +29 -0
- illusion/prompts/claudemd.py +74 -0
- illusion/prompts/context.py +187 -0
- illusion/prompts/environment.py +189 -0
- illusion/prompts/system_prompt.py +155 -0
- illusion/py.typed +0 -0
- illusion/sandbox/__init__.py +29 -0
- illusion/sandbox/adapter.py +174 -0
- illusion/services/__init__.py +59 -0
- illusion/services/compact/__init__.py +1015 -0
- illusion/services/cron.py +338 -0
- illusion/services/cron_scheduler.py +715 -0
- illusion/services/file_history.py +258 -0
- illusion/services/lsp/__init__.py +455 -0
- illusion/services/session_storage.py +237 -0
- illusion/services/token_estimation.py +72 -0
- illusion/skills/__init__.py +60 -0
- illusion/skills/bundled/__init__.py +110 -0
- illusion/skills/bundled/content/batch.md +86 -0
- illusion/skills/bundled/content/coding-guidelines.md +70 -0
- illusion/skills/bundled/content/debug.md +38 -0
- illusion/skills/bundled/content/loop.md +82 -0
- illusion/skills/bundled/content/remember.md +105 -0
- illusion/skills/bundled/content/simplify.md +53 -0
- illusion/skills/bundled/content/skillify.md +113 -0
- illusion/skills/bundled/content/stuck.md +54 -0
- illusion/skills/bundled/content/update-config.md +329 -0
- illusion/skills/bundled/content/verify.md +74 -0
- illusion/skills/loader.py +219 -0
- illusion/skills/registry.py +40 -0
- illusion/skills/types.py +24 -0
- illusion/state/__init__.py +18 -0
- illusion/state/app_state.py +67 -0
- illusion/state/store.py +93 -0
- illusion/swarm/__init__.py +71 -0
- illusion/swarm/agent_executor.py +857 -0
- illusion/swarm/in_process.py +259 -0
- illusion/swarm/subprocess_backend.py +136 -0
- illusion/swarm/team_helpers.py +123 -0
- illusion/swarm/types.py +159 -0
- illusion/swarm/worktree.py +347 -0
- illusion/tasks/__init__.py +33 -0
- illusion/tasks/local_agent_task.py +42 -0
- illusion/tasks/local_shell_task.py +27 -0
- illusion/tasks/manager.py +377 -0
- illusion/tasks/stop_task.py +21 -0
- illusion/tasks/types.py +88 -0
- illusion/tools/__init__.py +126 -0
- illusion/tools/agent_tool.py +388 -0
- illusion/tools/ask_user_question_tool.py +186 -0
- illusion/tools/base.py +149 -0
- illusion/tools/bash_tool.py +413 -0
- illusion/tools/config_tool.py +90 -0
- illusion/tools/cron_tool.py +473 -0
- illusion/tools/enter_plan_mode_tool.py +147 -0
- illusion/tools/enter_worktree_tool.py +188 -0
- illusion/tools/exit_plan_mode_tool.py +69 -0
- illusion/tools/exit_worktree_tool.py +225 -0
- illusion/tools/file_edit_tool.py +283 -0
- illusion/tools/file_read_tool.py +294 -0
- illusion/tools/file_write_tool.py +184 -0
- illusion/tools/glob_tool.py +165 -0
- illusion/tools/grep_tool.py +190 -0
- illusion/tools/list_mcp_resources_tool.py +80 -0
- illusion/tools/lsp_tool.py +333 -0
- illusion/tools/mcp_auth_tool.py +100 -0
- illusion/tools/mcp_tool.py +75 -0
- illusion/tools/notebook_edit_tool.py +242 -0
- illusion/tools/powershell_tool.py +334 -0
- illusion/tools/read_mcp_resource_tool.py +63 -0
- illusion/tools/repl_tool.py +100 -0
- illusion/tools/send_message_tool.py +112 -0
- illusion/tools/shell_common.py +187 -0
- illusion/tools/skill_tool.py +86 -0
- illusion/tools/sleep_tool.py +62 -0
- illusion/tools/structured_output_tool.py +58 -0
- illusion/tools/task_create_tool.py +98 -0
- illusion/tools/task_get_tool.py +94 -0
- illusion/tools/task_list_tool.py +94 -0
- illusion/tools/task_output_tool.py +55 -0
- illusion/tools/task_stop_tool.py +52 -0
- illusion/tools/task_update_tool.py +224 -0
- illusion/tools/team_create_tool.py +236 -0
- illusion/tools/team_delete_tool.py +104 -0
- illusion/tools/todo_write_tool.py +198 -0
- illusion/tools/tool_search_tool.py +156 -0
- illusion/tools/web_fetch_tool.py +264 -0
- illusion/tools/web_search_tool.py +186 -0
- illusion/ui/__init__.py +23 -0
- illusion/ui/app.py +258 -0
- illusion/ui/backend_host.py +1180 -0
- illusion/ui/input.py +86 -0
- illusion/ui/output.py +363 -0
- illusion/ui/permission_dialog.py +47 -0
- illusion/ui/permission_store.py +99 -0
- illusion/ui/protocol.py +384 -0
- illusion/ui/react_launcher.py +280 -0
- illusion/ui/runtime.py +787 -0
- illusion/ui/textual_app.py +603 -0
- illusion/ui/web/__init__.py +10 -0
- illusion/ui/web/server.py +87 -0
- illusion/ui/web/ws_host.py +1197 -0
- illusion/utils/__init__.py +0 -0
- illusion/utils/ripgrep.py +299 -0
- illusion/utils/shell.py +248 -0
- illusion_code-0.1.0.dist-info/METADATA +1159 -0
- illusion_code-0.1.0.dist-info/RECORD +214 -0
- illusion_code-0.1.0.dist-info/WHEEL +4 -0
- illusion_code-0.1.0.dist-info/entry_points.txt +2 -0
- illusion_code-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""
|
|
2
|
+
工具搜索工具
|
|
3
|
+
============
|
|
4
|
+
|
|
5
|
+
本模块提供搜索可用工具注册表的功能,支持精确名称查询和关键词搜索,
|
|
6
|
+
返回匹配工具的完整 JSONSchema 定义。
|
|
7
|
+
|
|
8
|
+
主要组件:
|
|
9
|
+
- ToolSearchTool: 搜索工具注册表的工具
|
|
10
|
+
|
|
11
|
+
使用示例:
|
|
12
|
+
>>> from illusion.tools import ToolSearchTool
|
|
13
|
+
>>> tool = ToolSearchTool()
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import json
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from pydantic import BaseModel, Field
|
|
22
|
+
|
|
23
|
+
from illusion.tools.base import BaseTool, ToolExecutionContext, ToolResult
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ToolSearchToolInput(BaseModel):
|
|
27
|
+
"""工具搜索参数。
|
|
28
|
+
|
|
29
|
+
属性:
|
|
30
|
+
query: 在工具名称和描述中搜索的子字符串,支持特殊查询语法:
|
|
31
|
+
- "select:Tool1,Tool2" — 按名称精确获取
|
|
32
|
+
- "+term other" — 要求名称包含 term,按剩余词排序
|
|
33
|
+
- "keyword list" — 关键词搜索,按匹配度排序
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
query: str = Field(description="Substring to search in tool names and descriptions")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ToolSearchTool(BaseTool):
|
|
40
|
+
"""搜索工具注册表内容并返回匹配工具的完整 schema 定义。
|
|
41
|
+
|
|
42
|
+
支持三种查询模式:
|
|
43
|
+
1. select: 前缀 — 按逗号分隔的名称精确匹配
|
|
44
|
+
2. + 前缀 — 要求第一个词出现在工具名称中,按剩余词排名
|
|
45
|
+
3. 普通关键词 — 按匹配度排序返回最佳结果
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
name = "tool_search"
|
|
49
|
+
description = """Fetches full schema definitions for deferred tools so they can be called.
|
|
50
|
+
|
|
51
|
+
Deferred tools appear by name in <system-reminder> messages. Until fetched, only the name is known — there is no parameter schema, so the tool cannot be invoked. This tool takes a query, matches it against the deferred tool list, and returns the matched tools' complete JSONSchema definitions inside a <functions> block. Once a tool's schema appears in that result, it is callable exactly like any tool defined at the top of this prompt.
|
|
52
|
+
|
|
53
|
+
Result format: each matched tool appears as one <function>{"description": "...", "name": "...", "parameters": {...}}</function> line inside the <functions> block — the same encoding as the tool list at the top of this prompt.
|
|
54
|
+
|
|
55
|
+
Query forms:
|
|
56
|
+
- "select:Read,Edit,Grep" — fetch these exact tools by name
|
|
57
|
+
- "notebook jupyter" — keyword search, up to max_results best matches
|
|
58
|
+
- "+slack send" — require "slack" in the name, rank by remaining terms"""
|
|
59
|
+
input_model = ToolSearchToolInput
|
|
60
|
+
|
|
61
|
+
def is_read_only(self, arguments: ToolSearchToolInput) -> bool:
|
|
62
|
+
del arguments
|
|
63
|
+
return True
|
|
64
|
+
|
|
65
|
+
async def execute(self, arguments: ToolSearchToolInput, context: ToolExecutionContext) -> ToolResult:
|
|
66
|
+
registry = context.metadata.get("tool_registry") if hasattr(context, "metadata") else None
|
|
67
|
+
if registry is None:
|
|
68
|
+
return ToolResult(output="Tool registry context not available", is_error=True)
|
|
69
|
+
|
|
70
|
+
query = arguments.query.strip()
|
|
71
|
+
all_tools = registry.list_tools()
|
|
72
|
+
|
|
73
|
+
matches = self._match_tools(query, all_tools)
|
|
74
|
+
|
|
75
|
+
if not matches:
|
|
76
|
+
return ToolResult(output="(no matches)")
|
|
77
|
+
|
|
78
|
+
functions_xml = self._build_functions_block(matches)
|
|
79
|
+
return ToolResult(output=functions_xml)
|
|
80
|
+
|
|
81
|
+
def _match_tools(self, query: str, all_tools: list[BaseTool]) -> list[BaseTool]:
|
|
82
|
+
"""根据查询语法匹配工具。
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
query: 查询字符串
|
|
86
|
+
all_tools: 所有已注册工具
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
匹配的工具列表
|
|
90
|
+
"""
|
|
91
|
+
if query.startswith("select:"):
|
|
92
|
+
return self._match_select(query, all_tools)
|
|
93
|
+
if query.startswith("+"):
|
|
94
|
+
return self._match_require(query, all_tools)
|
|
95
|
+
return self._match_keyword(query, all_tools)
|
|
96
|
+
|
|
97
|
+
def _match_select(self, query: str, all_tools: list[BaseTool]) -> list[BaseTool]:
|
|
98
|
+
"""select:Name1,Name2,... — 按名称精确匹配。"""
|
|
99
|
+
names = {n.strip() for n in query[len("select:"):].split(",") if n.strip()}
|
|
100
|
+
return [t for t in all_tools if t.name in names]
|
|
101
|
+
|
|
102
|
+
def _match_require(self, query: str, all_tools: list[BaseTool]) -> list[BaseTool]:
|
|
103
|
+
"""+term other... — 名称必须包含 term,按剩余词排名,最多返回 5 个。"""
|
|
104
|
+
parts = query.split()
|
|
105
|
+
if not parts:
|
|
106
|
+
return []
|
|
107
|
+
required = parts[0][1:] # 去掉前导 +
|
|
108
|
+
remaining_terms = parts[1:]
|
|
109
|
+
|
|
110
|
+
candidates = [t for t in all_tools if required.lower() in t.name.lower()]
|
|
111
|
+
if not remaining_terms:
|
|
112
|
+
return candidates[:5]
|
|
113
|
+
|
|
114
|
+
scored = sorted(
|
|
115
|
+
candidates,
|
|
116
|
+
key=lambda t: self._keyword_score(t, remaining_terms),
|
|
117
|
+
reverse=True,
|
|
118
|
+
)
|
|
119
|
+
return scored[:5]
|
|
120
|
+
|
|
121
|
+
def _match_keyword(self, query: str, all_tools: list[BaseTool]) -> list[BaseTool]:
|
|
122
|
+
"""关键词搜索,按匹配度排序,最多返回 5 个。"""
|
|
123
|
+
terms = query.lower().split()
|
|
124
|
+
if not terms:
|
|
125
|
+
return []
|
|
126
|
+
|
|
127
|
+
scored = sorted(
|
|
128
|
+
all_tools,
|
|
129
|
+
key=lambda t: self._keyword_score(t, terms),
|
|
130
|
+
reverse=True,
|
|
131
|
+
)
|
|
132
|
+
return [t for t in scored if self._keyword_score(t, terms) > 0][:5]
|
|
133
|
+
|
|
134
|
+
@staticmethod
|
|
135
|
+
def _keyword_score(tool: BaseTool, terms: list[str]) -> int:
|
|
136
|
+
"""计算工具对关键词列表的匹配得分。"""
|
|
137
|
+
text = (tool.name + " " + tool.description).lower()
|
|
138
|
+
return sum(1 for term in terms if term in text)
|
|
139
|
+
|
|
140
|
+
def _build_functions_block(self, tools: list[BaseTool]) -> str:
|
|
141
|
+
"""将工具列表构建为 <function>JSONSchema</function> 格式。"""
|
|
142
|
+
lines: list[str] = []
|
|
143
|
+
for tool in tools:
|
|
144
|
+
schema_dict = self._tool_to_function_schema(tool)
|
|
145
|
+
lines.append(f"<function>{json.dumps(schema_dict, ensure_ascii=False)}</function>")
|
|
146
|
+
return "\n".join(lines)
|
|
147
|
+
|
|
148
|
+
@staticmethod
|
|
149
|
+
def _tool_to_function_schema(tool: BaseTool) -> dict[str, Any]:
|
|
150
|
+
"""将工具转换为 function schema 格式(使用 parameters 键)。"""
|
|
151
|
+
api_schema = tool.to_api_schema()
|
|
152
|
+
return {
|
|
153
|
+
"name": api_schema["name"],
|
|
154
|
+
"description": api_schema["description"],
|
|
155
|
+
"parameters": api_schema["input_schema"],
|
|
156
|
+
}
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
"""
|
|
2
|
+
网页抓取和摘要工具
|
|
3
|
+
==================
|
|
4
|
+
|
|
5
|
+
本模块提供获取和摘要远程网页内容的功能。
|
|
6
|
+
|
|
7
|
+
主要组件:
|
|
8
|
+
- WebFetchTool: 抓取并摘要网页的工具
|
|
9
|
+
|
|
10
|
+
使用示例:
|
|
11
|
+
>>> from illusion.tools import WebFetchTool
|
|
12
|
+
>>> tool = WebFetchTool()
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import html as _html_module
|
|
18
|
+
import re
|
|
19
|
+
import time
|
|
20
|
+
from urllib.parse import urlparse
|
|
21
|
+
|
|
22
|
+
import httpx
|
|
23
|
+
from openai import AsyncOpenAI
|
|
24
|
+
from pydantic import BaseModel, Field
|
|
25
|
+
|
|
26
|
+
from illusion.config.settings import load_settings
|
|
27
|
+
from illusion.tools.base import BaseTool, ToolExecutionContext, ToolResult
|
|
28
|
+
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
# 15-minute TTL cache
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
_cache: dict[str, tuple[float, str]] = {}
|
|
33
|
+
_CACHE_TTL = 15 * 60 # 15 minutes in seconds
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _cache_key(url: str, prompt: str, max_chars: int) -> str:
|
|
37
|
+
return f"{url}|{prompt}|{max_chars}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _cache_get(key: str) -> str | None:
|
|
41
|
+
entry = _cache.get(key)
|
|
42
|
+
if entry is None:
|
|
43
|
+
return None
|
|
44
|
+
ts, value = entry
|
|
45
|
+
if time.time() - ts > _CACHE_TTL:
|
|
46
|
+
del _cache[key]
|
|
47
|
+
return None
|
|
48
|
+
return value
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _cache_set(key: str, value: str) -> None:
|
|
52
|
+
_cache[key] = (time.time(), value)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class WebFetchToolInput(BaseModel):
|
|
56
|
+
"""网页抓取参数。
|
|
57
|
+
|
|
58
|
+
属性:
|
|
59
|
+
url: 要抓取的 HTTP 或 HTTPS URL
|
|
60
|
+
prompt: 描述你想从页面中提取什么信息
|
|
61
|
+
max_chars: 最大返回字符数(500-50000)
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
url: str = Field(description="HTTP or HTTPS URL to fetch")
|
|
65
|
+
prompt: str = Field(
|
|
66
|
+
default="Summarize the key content of this page.",
|
|
67
|
+
description="Describes what information you want to extract from the page",
|
|
68
|
+
)
|
|
69
|
+
max_chars: int = Field(default=12000, ge=500, le=50000)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class WebFetchTool(BaseTool):
|
|
73
|
+
"""抓取一个网页并使用 AI 模型处理内容。
|
|
74
|
+
|
|
75
|
+
用于获取和分析网络内容。
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
name = "web_fetch"
|
|
79
|
+
description = """- Fetches content from a specified URL and processes it using an AI model
|
|
80
|
+
- Takes a URL and a prompt as input
|
|
81
|
+
- Fetches the URL content, converts HTML to markdown
|
|
82
|
+
- Processes the content with the prompt using a small, fast model
|
|
83
|
+
- Returns the model's response about the content
|
|
84
|
+
- Use this tool when you need to retrieve and analyze web content
|
|
85
|
+
|
|
86
|
+
Usage notes:
|
|
87
|
+
- IMPORTANT: If an MCP-provided web fetch tool is available, prefer using that tool instead of this one, as it may have fewer restrictions.
|
|
88
|
+
- The URL must be a fully-formed valid URL
|
|
89
|
+
- HTTP URLs will be automatically upgraded to HTTPS
|
|
90
|
+
- The prompt should describe what information you want to extract from the page
|
|
91
|
+
- This tool is read-only and does not modify any files
|
|
92
|
+
- Results may be summarized if the content is very large
|
|
93
|
+
- Includes a self-cleaning 15-minute cache for faster responses when repeatedly accessing the same URL
|
|
94
|
+
- When a URL redirects to a different host, the tool will inform you and provide the redirect URL in a special format. You should then make a new WebFetch request with the redirect URL to fetch the content.
|
|
95
|
+
- For GitHub URLs, prefer using the gh CLI via Bash instead (e.g., gh pr view, gh issue view, gh api)."""
|
|
96
|
+
input_model = WebFetchToolInput
|
|
97
|
+
|
|
98
|
+
async def execute(self, arguments: WebFetchToolInput, context: ToolExecutionContext) -> ToolResult:
|
|
99
|
+
del context
|
|
100
|
+
url = arguments.url
|
|
101
|
+
|
|
102
|
+
# 自动升级 HTTP 到 HTTPS
|
|
103
|
+
parsed = urlparse(url)
|
|
104
|
+
if parsed.scheme == "http":
|
|
105
|
+
url = url.replace("http://", "https://", 1)
|
|
106
|
+
|
|
107
|
+
# 检查缓存
|
|
108
|
+
ck = _cache_key(url, arguments.prompt, arguments.max_chars)
|
|
109
|
+
cached = _cache_get(ck)
|
|
110
|
+
if cached is not None:
|
|
111
|
+
return ToolResult(output=cached)
|
|
112
|
+
|
|
113
|
+
# 发起 HTTP 请求(手动处理重定向以检测跨主机跳转)
|
|
114
|
+
try:
|
|
115
|
+
async with httpx.AsyncClient(follow_redirects=False, timeout=20.0) as client:
|
|
116
|
+
response = await client.get(url, headers={"User-Agent": "IllusionCode/0.1"})
|
|
117
|
+
# 检测跨主机重定向
|
|
118
|
+
while response.is_redirect:
|
|
119
|
+
location = response.headers.get("location", "")
|
|
120
|
+
if not location:
|
|
121
|
+
break
|
|
122
|
+
redirect_parsed = urlparse(location)
|
|
123
|
+
current_parsed = urlparse(url)
|
|
124
|
+
# 如果是相对路径或同主机,跟随
|
|
125
|
+
if not redirect_parsed.netloc or redirect_parsed.netloc == current_parsed.netloc:
|
|
126
|
+
url = location if redirect_parsed.netloc else f"{current_parsed.scheme}://{current_parsed.netloc}{location}"
|
|
127
|
+
response = await client.get(url, headers={"User-Agent": "IllusionCode/0.1"})
|
|
128
|
+
else:
|
|
129
|
+
return ToolResult(
|
|
130
|
+
output=(
|
|
131
|
+
f"Redirect detected to a different host. The URL {arguments.url} "
|
|
132
|
+
f"redirects to:\n\n{location}\n\n"
|
|
133
|
+
f"Please make a new WebFetch request with the redirect URL."
|
|
134
|
+
)
|
|
135
|
+
)
|
|
136
|
+
response.raise_for_status()
|
|
137
|
+
except httpx.HTTPError as exc:
|
|
138
|
+
return ToolResult(output=f"web_fetch failed: {exc}", is_error=True)
|
|
139
|
+
|
|
140
|
+
# 处理响应内容
|
|
141
|
+
content_type = response.headers.get("content-type", "")
|
|
142
|
+
body = response.text
|
|
143
|
+
# 如果是 HTML,转换为 Markdown
|
|
144
|
+
if "html" in content_type:
|
|
145
|
+
body = _html_to_markdown(body)
|
|
146
|
+
body = body.strip()
|
|
147
|
+
# 截断过长的内容
|
|
148
|
+
if len(body) > arguments.max_chars:
|
|
149
|
+
body = body[: arguments.max_chars].rstrip() + "\n...[truncated]"
|
|
150
|
+
|
|
151
|
+
# 使用 AI 模型处理内容
|
|
152
|
+
try:
|
|
153
|
+
ai_response = await _process_with_model(body, arguments.prompt)
|
|
154
|
+
except Exception:
|
|
155
|
+
# 模型调用失败时回退到直接返回内容
|
|
156
|
+
result = (
|
|
157
|
+
f"URL: {response.url}\n"
|
|
158
|
+
f"Status: {response.status_code}\n"
|
|
159
|
+
f"Content-Type: {content_type or '(unknown)'}\n\n"
|
|
160
|
+
f"{body}"
|
|
161
|
+
)
|
|
162
|
+
return ToolResult(output=result)
|
|
163
|
+
|
|
164
|
+
_cache_set(ck, ai_response)
|
|
165
|
+
return ToolResult(output=ai_response)
|
|
166
|
+
|
|
167
|
+
def is_read_only(self, arguments: BaseModel) -> bool:
|
|
168
|
+
del arguments
|
|
169
|
+
return True
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
async def _process_with_model(content: str, prompt: str) -> str:
|
|
173
|
+
"""使用 AI 模型处理内容。"""
|
|
174
|
+
settings = load_settings()
|
|
175
|
+
env = settings._active_env
|
|
176
|
+
api_key = env.api_key or None
|
|
177
|
+
base_url = env.base_url or None
|
|
178
|
+
|
|
179
|
+
if not api_key:
|
|
180
|
+
raise RuntimeError("No API key configured")
|
|
181
|
+
|
|
182
|
+
if env.api_format == "anthropic":
|
|
183
|
+
# Anthropic 需要固定 base_url
|
|
184
|
+
base_url = base_url or "https://api.anthropic.com"
|
|
185
|
+
client = AsyncOpenAI(api_key=api_key, base_url=f"{base_url}/v1")
|
|
186
|
+
else:
|
|
187
|
+
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
|
188
|
+
|
|
189
|
+
model_name = settings._active_model_name
|
|
190
|
+
|
|
191
|
+
system_prompt = (
|
|
192
|
+
"You are a web content summarizer. Analyze the provided web page content and respond "
|
|
193
|
+
"to the user's prompt. Be concise and accurate. Only use information from the provided content."
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
resp = await client.chat.completions.create(
|
|
197
|
+
model=model_name,
|
|
198
|
+
messages=[
|
|
199
|
+
{"role": "system", "content": system_prompt},
|
|
200
|
+
{"role": "user", "content": f"Web page content:\n\n{content}\n\nUser prompt: {prompt}"},
|
|
201
|
+
],
|
|
202
|
+
max_tokens=4096,
|
|
203
|
+
temperature=0.3,
|
|
204
|
+
)
|
|
205
|
+
return resp.choices[0].message.content or ""
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _html_to_markdown(html_text: str) -> str:
|
|
209
|
+
"""将 HTML 转换为 Markdown。"""
|
|
210
|
+
text = html_text
|
|
211
|
+
|
|
212
|
+
# 移除 script、style、nav、footer、header 标签及其内容
|
|
213
|
+
text = re.sub(r"(?is)<(script|style|nav|footer|header|noscript).*?>.*?</\1>", " ", text)
|
|
214
|
+
|
|
215
|
+
# 标题 h1-h6
|
|
216
|
+
for i in range(6, 0, -1):
|
|
217
|
+
text = re.sub(
|
|
218
|
+
rf"(?is)<h{i}[^>]*>\s*(.*?)\s*</h{i}>",
|
|
219
|
+
lambda m, n=i: "#" * n + " " + _strip_html(m.group(1)).strip() + "\n\n",
|
|
220
|
+
text,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# 粗体 / 斜体
|
|
224
|
+
text = re.sub(r"(?is)<(?:b|strong)[^>]*>(.*?)</(?:b|strong)>", r"**\1**", text)
|
|
225
|
+
text = re.sub(r"(?is)<(?:i|em)[^>]*>(.*?)</(?:i|em)>", r"*\1*", text)
|
|
226
|
+
|
|
227
|
+
# 链接(优先处理有 href 的 <a>)
|
|
228
|
+
text = re.sub(r'(?is)<a[^>]*href=["\']([^"\']+)["\'][^>]*>(.*?)</a>', r"[\2](\1)", text)
|
|
229
|
+
|
|
230
|
+
# 图片
|
|
231
|
+
text = re.sub(r'(?is)<img[^>]*src=["\']([^"\']+)["\'][^>]*/?>', r"", text)
|
|
232
|
+
|
|
233
|
+
# 段落
|
|
234
|
+
text = re.sub(r"(?is)<p[^>]*>\s*(.*?)\s*</p>", r"\1\n\n", text)
|
|
235
|
+
|
|
236
|
+
# 换行
|
|
237
|
+
text = re.sub(r"(?is)<br\s*/?>", "\n", text)
|
|
238
|
+
|
|
239
|
+
# 无序列表项
|
|
240
|
+
text = re.sub(r"(?is)<li[^>]*>\s*(.*?)\s*</li>", r"- \1\n", text)
|
|
241
|
+
|
|
242
|
+
# 代码块
|
|
243
|
+
text = re.sub(r"(?is)<pre[^>]*>(.*?)</pre>", r"\n```\n\1\n```\n", text)
|
|
244
|
+
text = re.sub(r"(?is)<code[^>]*>(.*?)</code>", r"`\1`", text)
|
|
245
|
+
|
|
246
|
+
# 删除所有剩余 HTML 标签
|
|
247
|
+
text = re.sub(r"(?s)<[^>]+>", " ", text)
|
|
248
|
+
|
|
249
|
+
# 解码 HTML 实体
|
|
250
|
+
text = _html_module.unescape(text)
|
|
251
|
+
|
|
252
|
+
# 规范化空白和多余空行
|
|
253
|
+
text = re.sub(r"[ \t\f\r]+", " ", text)
|
|
254
|
+
text = re.sub(r"\n[ \t]+\n", "\n\n", text)
|
|
255
|
+
text = re.sub(r"\n{3,}", "\n\n", text)
|
|
256
|
+
|
|
257
|
+
return text.strip()
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _strip_html(fragment: str) -> str:
|
|
261
|
+
"""移除 HTML 标签,保留纯文本。"""
|
|
262
|
+
text = re.sub(r"(?s)<[^>]+>", " ", fragment)
|
|
263
|
+
text = _html_module.unescape(text)
|
|
264
|
+
return re.sub(r"\s+", " ", text).strip()
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""
|
|
2
|
+
简单网页搜索工具
|
|
3
|
+
================
|
|
4
|
+
|
|
5
|
+
本模块提供执行网络搜索并返回紧凑顶部结果的功能。
|
|
6
|
+
|
|
7
|
+
主要组件:
|
|
8
|
+
- WebSearchTool: 执行网页搜索的工具
|
|
9
|
+
|
|
10
|
+
使用示例:
|
|
11
|
+
>>> from illusion.tools import WebSearchTool
|
|
12
|
+
>>> tool = WebSearchTool()
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import html
|
|
18
|
+
import re
|
|
19
|
+
from urllib.parse import parse_qs, unquote, urlparse
|
|
20
|
+
|
|
21
|
+
import httpx
|
|
22
|
+
from pydantic import BaseModel, Field
|
|
23
|
+
|
|
24
|
+
from illusion.tools.base import BaseTool, ToolExecutionContext, ToolResult
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class WebSearchToolInput(BaseModel):
|
|
28
|
+
"""网页搜索参数。
|
|
29
|
+
|
|
30
|
+
属性:
|
|
31
|
+
query: 搜索查询
|
|
32
|
+
max_results: 最大结果数量(1-10)
|
|
33
|
+
search_url: 可选的搜索端点覆盖
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
query: str = Field(description="Search query")
|
|
37
|
+
max_results: int = Field(default=5, ge=1, le=10, description="Maximum number of results")
|
|
38
|
+
search_url: str | None = Field(
|
|
39
|
+
default=None,
|
|
40
|
+
description="Optional override for the HTML search endpoint, useful for private search backends or testing.",
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class WebSearchTool(BaseTool):
|
|
45
|
+
"""运行网络搜索并返回紧凑的顶部结果。
|
|
46
|
+
|
|
47
|
+
用于获取超出 Illusion 知识截止日期的最新信息。
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
name = "web_search"
|
|
51
|
+
description = """- Allows Illusion to search the web and use the results to inform responses
|
|
52
|
+
- Provides up-to-date information for current events and recent data
|
|
53
|
+
- Returns search result information formatted as search result blocks, including links as markdown hyperlinks
|
|
54
|
+
- Use this tool for accessing information beyond Illusion's knowledge cutoff
|
|
55
|
+
- Searches are performed automatically within a single API call
|
|
56
|
+
|
|
57
|
+
CRITICAL REQUIREMENT - You MUST follow this:
|
|
58
|
+
- After answering the user's question, you MUST include a "Sources:" section at the end of your response
|
|
59
|
+
- In the Sources section, list all relevant URLs from the search results as markdown hyperlinks: [Title](URL)
|
|
60
|
+
- This is MANDATORY - never skip including sources in your response
|
|
61
|
+
- Example format:
|
|
62
|
+
|
|
63
|
+
[Your answer here]
|
|
64
|
+
|
|
65
|
+
Sources:
|
|
66
|
+
- [Source Title 1](https://example.com/1)
|
|
67
|
+
- [Source Title 2](https://example.com/2)
|
|
68
|
+
|
|
69
|
+
Usage notes:
|
|
70
|
+
- Web search is only available in the US
|
|
71
|
+
|
|
72
|
+
IMPORTANT - Use the correct year in search queries:
|
|
73
|
+
- The current month is <currentMonthYear>. You MUST use this year when searching for recent information, documentation, or current events.
|
|
74
|
+
- Example: If the user asks for "latest React docs", search for "React documentation" with the current year, NOT last year"""
|
|
75
|
+
input_model = WebSearchToolInput
|
|
76
|
+
|
|
77
|
+
def is_read_only(self, arguments: WebSearchToolInput) -> bool:
|
|
78
|
+
del arguments
|
|
79
|
+
return True
|
|
80
|
+
|
|
81
|
+
async def execute(
|
|
82
|
+
self,
|
|
83
|
+
arguments: WebSearchToolInput,
|
|
84
|
+
context: ToolExecutionContext,
|
|
85
|
+
) -> ToolResult:
|
|
86
|
+
del context
|
|
87
|
+
# 确定搜索端点
|
|
88
|
+
endpoint = arguments.search_url or "https://html.duckduckgo.com/html/"
|
|
89
|
+
try:
|
|
90
|
+
# 发起搜索请求
|
|
91
|
+
async with httpx.AsyncClient(follow_redirects=True, timeout=20.0) as client:
|
|
92
|
+
response = await client.get(
|
|
93
|
+
endpoint,
|
|
94
|
+
params={"q": arguments.query},
|
|
95
|
+
headers={"User-Agent": "IllusionCode/0.1"},
|
|
96
|
+
)
|
|
97
|
+
response.raise_for_status()
|
|
98
|
+
except httpx.HTTPError as exc:
|
|
99
|
+
return ToolResult(output=f"web_search failed: {exc}", is_error=True)
|
|
100
|
+
|
|
101
|
+
# 解析搜索结果
|
|
102
|
+
results = _parse_search_results(response.text, limit=arguments.max_results)
|
|
103
|
+
if not results:
|
|
104
|
+
return ToolResult(output="No search results found.", is_error=True)
|
|
105
|
+
|
|
106
|
+
# 构建输出 — 使用 Markdown 超链接格式
|
|
107
|
+
lines = [f"Search results for: {arguments.query}"]
|
|
108
|
+
for index, result in enumerate(results, start=1):
|
|
109
|
+
lines.append(f"{index}. [{result['title']}]({result['url']})")
|
|
110
|
+
if result["snippet"]:
|
|
111
|
+
lines.append(f" {result['snippet']}")
|
|
112
|
+
return ToolResult(output="\n".join(lines))
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _parse_search_results(body: str, *, limit: int) -> list[dict[str, str]]:
|
|
116
|
+
"""解析搜索结果页面。
|
|
117
|
+
|
|
118
|
+
参数:
|
|
119
|
+
body: 搜索结果页面的 HTML 内容
|
|
120
|
+
limit: 最大返回结果数
|
|
121
|
+
|
|
122
|
+
返回:
|
|
123
|
+
搜索结果字典列表
|
|
124
|
+
"""
|
|
125
|
+
# 提取摘要片段
|
|
126
|
+
snippets = [
|
|
127
|
+
_clean_html(match.group("snippet"))
|
|
128
|
+
for match in re.finditer(
|
|
129
|
+
r'<(?:a|div|span)[^>]+class="[^"]*(?:result__snippet|result-snippet)[^"]*"[^>]*>(?P<snippet>.*?)</(?:a|div|span)>',
|
|
130
|
+
body,
|
|
131
|
+
flags=re.IGNORECASE | re.DOTALL,
|
|
132
|
+
)
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
results: list[dict[str, str]] = []
|
|
136
|
+
# 查找所有链接
|
|
137
|
+
anchor_matches = re.finditer(
|
|
138
|
+
r"<a(?P<attrs>[^>]+)>(?P<title>.*?)</a>",
|
|
139
|
+
body,
|
|
140
|
+
flags=re.IGNORECASE | re.DOTALL,
|
|
141
|
+
)
|
|
142
|
+
for index, match in enumerate(anchor_matches):
|
|
143
|
+
attrs = match.group("attrs")
|
|
144
|
+
# 检查是否为结果链接
|
|
145
|
+
class_match = re.search(r'class="(?P<class>[^"]+)"', attrs, flags=re.IGNORECASE)
|
|
146
|
+
if class_match is None:
|
|
147
|
+
continue
|
|
148
|
+
class_names = class_match.group("class")
|
|
149
|
+
if "result__a" not in class_names and "result-link" not in class_names:
|
|
150
|
+
continue
|
|
151
|
+
# 提取 href
|
|
152
|
+
href_match = re.search(r'href="(?P<href>[^"]+)"', attrs, flags=re.IGNORECASE)
|
|
153
|
+
if href_match is None:
|
|
154
|
+
continue
|
|
155
|
+
# 解析标题和 URL
|
|
156
|
+
title = _clean_html(match.group("title"))
|
|
157
|
+
url = _normalize_result_url(href_match.group("href"))
|
|
158
|
+
snippet = snippets[index] if index < len(snippets) else ""
|
|
159
|
+
if title and url:
|
|
160
|
+
results.append({"title": title, "url": url, "snippet": snippet})
|
|
161
|
+
if len(results) >= limit:
|
|
162
|
+
break
|
|
163
|
+
return results
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _normalize_result_url(raw_url: str) -> str:
|
|
167
|
+
"""规范化 DuckDuckGo 重定向 URL。
|
|
168
|
+
|
|
169
|
+
将 /l/ 路径下的重定向 URL 解析为目标 URL。
|
|
170
|
+
"""
|
|
171
|
+
parsed = urlparse(raw_url)
|
|
172
|
+
if parsed.netloc.endswith("duckduckgo.com") and parsed.path.startswith("/l/"):
|
|
173
|
+
target = parse_qs(parsed.query).get("uddg", [""])[0]
|
|
174
|
+
return unquote(target) if target else raw_url
|
|
175
|
+
return raw_url
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _clean_html(fragment: str) -> str:
|
|
179
|
+
"""清理 HTML 片段,提取纯文本。"""
|
|
180
|
+
# 移除 HTML 标签
|
|
181
|
+
text = re.sub(r"(?s)<[^>]+>", " ", fragment)
|
|
182
|
+
# 解码 HTML 实体
|
|
183
|
+
text = html.unescape(text)
|
|
184
|
+
# 规范化空白
|
|
185
|
+
text = re.sub(r"\s+", " ", text).strip()
|
|
186
|
+
return text
|
illusion/ui/__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""
|
|
2
|
+
UI 模块
|
|
3
|
+
=======
|
|
4
|
+
|
|
5
|
+
本模块提供 IllusionCode 用户界面的核心功能。
|
|
6
|
+
|
|
7
|
+
主要组件:
|
|
8
|
+
- run_repl: 运行交互式 REPL(默认的 React 终端界面)
|
|
9
|
+
- run_print_mode: 运行非交互式打印模式(适合脚本和自动化任务)
|
|
10
|
+
|
|
11
|
+
使用示例:
|
|
12
|
+
>>> from illusion.ui import run_repl, run_print_mode
|
|
13
|
+
>>>
|
|
14
|
+
>>> # 启动交互式 REPL
|
|
15
|
+
>>> await run_repl()
|
|
16
|
+
>>>
|
|
17
|
+
>>> # 运行单次交互模式
|
|
18
|
+
>>> await run_print_mode(prompt="帮我写一个 hello world 程序")
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from illusion.ui.app import run_repl, run_print_mode
|
|
22
|
+
|
|
23
|
+
__all__ = ["run_repl", "run_print_mode"]
|