entari-plugin-hyw 4.0.0rc6__py3-none-any.whl → 4.0.0rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

@@ -0,0 +1,166 @@
1
+
2
+ import urllib.parse
3
+ import re
4
+ from typing import List, Dict, Any
5
+ from loguru import logger
6
+ from .base import SearchEngine
7
+
8
+
9
+ class DefaultEngine(SearchEngine):
10
+ """
11
+ Default browser address bar search engine.
12
+ Uses the browser's address bar to search (Ctrl+L -> type -> Enter).
13
+ This uses whatever default search engine the browser is configured with.
14
+ """
15
+
16
+ # Special marker to indicate this engine uses address bar input
17
+ USE_ADDRESS_BAR = True
18
+
19
+ def build_url(self, query: str, limit: int = 10) -> str:
20
+ """
21
+ For address bar search, we don't build a URL.
22
+ Return the raw query - SearchService will handle the address bar input.
23
+ """
24
+ # Return a special marker so SearchService knows to use address bar
25
+ return f"__ADDRESS_BAR_SEARCH__:{query}"
26
+
27
+ def parse(self, content: str) -> List[Dict[str, Any]]:
28
+ """
29
+ Parse search results from whatever search engine the browser uses.
30
+ We detect the engine from the HTML and use appropriate parsing.
31
+ """
32
+ results = []
33
+ seen_urls = set()
34
+
35
+ # Detect which search engine based on content
36
+ is_google = 'google' in content.lower() and ('class="g"' in content or 'data-hveid' in content)
37
+ is_bing = 'bing' in content.lower() and 'b_algo' in content
38
+ is_duckduckgo = 'duckduckgo' in content.lower()
39
+
40
+ if is_google:
41
+ results = self._parse_google(content, seen_urls)
42
+ elif is_bing:
43
+ results = self._parse_bing(content, seen_urls)
44
+ elif is_duckduckgo:
45
+ results = self._parse_duckduckgo(content, seen_urls)
46
+ else:
47
+ # Generic fallback
48
+ results = self._parse_generic(content, seen_urls)
49
+
50
+ logger.info(f"DefaultEngine parsed {len(results)} results (detected: {'google' if is_google else 'bing' if is_bing else 'ddg' if is_duckduckgo else 'generic'})")
51
+ return results
52
+
53
+ def _parse_google(self, content: str, seen_urls: set) -> List[Dict[str, Any]]:
54
+ """Parse Google search results."""
55
+ results = []
56
+ # Look for result links
57
+ link_regex = re.compile(
58
+ r'<a[^>]+href="(https?://(?!google\.com|accounts\.google)[^"]+)"[^>]*>([^<]+)</a>',
59
+ re.IGNORECASE
60
+ )
61
+
62
+ for match in link_regex.finditer(content):
63
+ if len(results) >= 15:
64
+ break
65
+ href = match.group(1)
66
+ title = match.group(2).strip()
67
+
68
+ if href in seen_urls or not title or len(title) < 3:
69
+ continue
70
+ if any(x in href for x in ['google.com', 'gstatic.com', 'youtube.com/redirect']):
71
+ continue
72
+
73
+ seen_urls.add(href)
74
+ results.append({
75
+ "title": re.sub(r'<[^>]+>', '', title),
76
+ "url": href,
77
+ "domain": urllib.parse.urlparse(href).hostname or "",
78
+ "content": "",
79
+ })
80
+ return results
81
+
82
+ def _parse_bing(self, content: str, seen_urls: set) -> List[Dict[str, Any]]:
83
+ """Parse Bing search results."""
84
+ results = []
85
+ link_regex = re.compile(
86
+ r'<a[^>]+href="(https?://(?!bing\.com|microsoft\.com)[^"]+)"[^>]*>(.*?)</a>',
87
+ re.IGNORECASE | re.DOTALL
88
+ )
89
+
90
+ for match in link_regex.finditer(content):
91
+ if len(results) >= 15:
92
+ break
93
+ href = match.group(1)
94
+ title_html = match.group(2)
95
+ title = re.sub(r'<[^>]+>', '', title_html).strip()
96
+
97
+ if href in seen_urls or not title or len(title) < 3:
98
+ continue
99
+ if any(x in href for x in ['bing.com', 'microsoft.com', 'msn.com']):
100
+ continue
101
+
102
+ seen_urls.add(href)
103
+ results.append({
104
+ "title": title,
105
+ "url": href,
106
+ "domain": urllib.parse.urlparse(href).hostname or "",
107
+ "content": "",
108
+ })
109
+ return results
110
+
111
+ def _parse_duckduckgo(self, content: str, seen_urls: set) -> List[Dict[str, Any]]:
112
+ """Parse DuckDuckGo results."""
113
+ results = []
114
+ link_regex = re.compile(
115
+ r'<a[^>]+href="(https?://(?!duckduckgo\.com)[^"]+)"[^>]*>(.*?)</a>',
116
+ re.IGNORECASE | re.DOTALL
117
+ )
118
+
119
+ for match in link_regex.finditer(content):
120
+ if len(results) >= 15:
121
+ break
122
+ href = match.group(1)
123
+ title_html = match.group(2)
124
+ title = re.sub(r'<[^>]+>', '', title_html).strip()
125
+
126
+ if href in seen_urls or not title or len(title) < 3:
127
+ continue
128
+
129
+ seen_urls.add(href)
130
+ results.append({
131
+ "title": title,
132
+ "url": href,
133
+ "domain": urllib.parse.urlparse(href).hostname or "",
134
+ "content": "",
135
+ })
136
+ return results
137
+
138
+ def _parse_generic(self, content: str, seen_urls: set) -> List[Dict[str, Any]]:
139
+ """Generic link parser for unknown search engines."""
140
+ results = []
141
+ link_regex = re.compile(
142
+ r'<a[^>]+href="(https?://[^"]+)"[^>]*>([^<]+)</a>',
143
+ re.IGNORECASE
144
+ )
145
+
146
+ for match in link_regex.finditer(content):
147
+ if len(results) >= 15:
148
+ break
149
+ href = match.group(1)
150
+ title = match.group(2).strip()
151
+
152
+ if href in seen_urls or not title or len(title) < 5:
153
+ continue
154
+ # Skip common non-result URLs
155
+ if any(x in href for x in ['javascript:', 'mailto:', '#', 'login', 'signin', 'account']):
156
+ continue
157
+
158
+ seen_urls.add(href)
159
+ results.append({
160
+ "title": title,
161
+ "url": href,
162
+ "domain": urllib.parse.urlparse(href).hostname or "",
163
+ "content": "",
164
+ })
165
+ return results
166
+
@@ -65,7 +65,7 @@ class SharedBrowserManager:
65
65
  # Hide scrollbars globally
66
66
  co.set_argument('--hide-scrollbars')
67
67
  # 十万的原因是滚动条屏蔽(大概吧)
68
- co.set_argument('--window-size=1280,20000')
68
+ co.set_argument('--window-size=1280,9000')
69
69
  self._page = ChromiumPage(addr_or_opts=co)
70
70
 
71
71
  # Show Landing Page
@@ -6,6 +6,8 @@ Provides page fetching and screenshot capabilities using DrissionPage.
6
6
 
7
7
  import asyncio
8
8
  import base64
9
+ import threading
10
+ import time
9
11
  from concurrent.futures import ThreadPoolExecutor
10
12
  from typing import Optional, Dict, Any, List
11
13
  from loguru import logger
@@ -20,16 +22,182 @@ class ScreenshotService:
20
22
  self.headless = headless
21
23
  self._manager = None
22
24
  self._executor = ThreadPoolExecutor(max_workers=10)
25
+ self._search_tab_pool = [] # List of Tab objects
26
+ self._pool_lock = threading.Lock()
23
27
 
24
28
  if auto_start:
25
29
  self._ensure_ready()
30
+
31
+ def prepare_search_tabs_background(self, count: int, url: str = "https://www.google.com") -> None:
32
+ """
33
+ Pre-launch tabs for search (BACKGROUND - fire and forget).
34
+ Tabs are created in background thread, may not be ready immediately.
35
+ """
36
+ self._executor.submit(self._prepare_search_tabs_sync, count, url)
37
+
38
+ def _prepare_search_tabs_sync(self, count: int, url: str = "https://www.google.com"):
39
+ """Sync implementation of tab preparation - creates tabs in PARALLEL."""
40
+ try:
41
+ self._ensure_ready()
42
+ page = self._manager.page
43
+ if not page: return
44
+
45
+ with self._pool_lock:
46
+ current_count = len(self._search_tab_pool)
47
+ needed = count - current_count
48
+
49
+ if needed <= 0:
50
+ return
51
+
52
+ logger.info(f"ScreenshotService: Pre-launching {needed} search tabs for {url} (parallel)...")
53
+
54
+ # Create tabs in parallel using threads
55
+ created_tabs = [None] * needed
56
+
57
+ def create_single_tab(index):
58
+ try:
59
+ tab = page.new_tab(url)
60
+ created_tabs[index] = tab
61
+ logger.debug(f"ScreenshotService: Tab {index} ready")
62
+ except Exception as e:
63
+ logger.error(f"ScreenshotService: Failed to create tab {index}: {e}")
64
+
65
+ threads = []
66
+ for i in range(needed):
67
+ t = threading.Thread(target=create_single_tab, args=(i,))
68
+ t.start()
69
+ threads.append(t)
70
+
71
+ # Wait for all threads to complete
72
+ for t in threads:
73
+ t.join()
74
+
75
+ # Add successfully created tabs to pool
76
+ with self._pool_lock:
77
+ for tab in created_tabs:
78
+ if tab:
79
+ self._search_tab_pool.append(tab)
80
+ logger.info(f"ScreenshotService: Tab pool ready ({len(self._search_tab_pool)} tabs)")
81
+
82
+ except Exception as e:
83
+ logger.error(f"ScreenshotService: Failed to prepare tabs: {e}")
84
+
85
+ async def search_via_page_input_batch(self, queries: List[str], url: str, selector: str = "#input") -> List[Dict[str, Any]]:
86
+ """
87
+ Execute concurrent searches using page inputs.
88
+ """
89
+ loop = asyncio.get_running_loop()
90
+ return await loop.run_in_executor(
91
+ self._executor,
92
+ self._search_via_page_input_batch_sync,
93
+ queries, url, selector
94
+ )
95
+
96
+ def _search_via_page_input_batch_sync(self, queries: List[str], url: str, selector: str) -> List[Dict[str, Any]]:
97
+ """Sync batch execution - create tabs sequentially, search in parallel."""
98
+ results = [None] * len(queries)
99
+ tabs = []
100
+
101
+ # Phase 1: Get/create tabs SEQUENTIALLY (DrissionPage isn't thread-safe for new_tab)
102
+ target_url = url or "https://www.google.com"
103
+ logger.info(f"ScreenshotService: Acquiring {len(queries)} tabs for parallel search...")
104
+
105
+ for i in range(len(queries)):
106
+ tab = None
107
+ # Try to get from pool first
108
+ with self._pool_lock:
109
+ if self._search_tab_pool:
110
+ tab = self._search_tab_pool.pop(0)
111
+ logger.debug(f"ScreenshotService: Got tab {i} from pool")
112
+
113
+ if not tab:
114
+ # Create new
115
+ self._ensure_ready()
116
+ tab = self._manager.page.new_tab(target_url)
117
+ logger.debug(f"ScreenshotService: Created tab {i} for {target_url}")
118
+
119
+ tabs.append(tab)
120
+
121
+ logger.info(f"ScreenshotService: {len(tabs)} tabs ready, starting parallel searches...")
122
+
123
+ # Phase 2: Execute searches in PARALLEL
124
+ def run_search(index, tab, query):
125
+ try:
126
+ logger.debug(f"Search[{index}]: Starting for '{query}' on {tab.url}")
127
+
128
+ # Wait for page to be ready first
129
+ try:
130
+ tab.wait.doc_loaded(timeout=10)
131
+ except:
132
+ pass
133
+
134
+ # Find input element with wait
135
+ logger.debug(f"Search[{index}]: Looking for input with selector '{selector}'")
136
+ ele = tab.ele(selector, timeout=5)
137
+ if not ele:
138
+ logger.debug(f"Search[{index}]: Primary selector failed, trying fallbacks")
139
+ for fallback in ["textarea[name='q']", "#APjFqb", "input[name='q']", "input[type='text']"]:
140
+ ele = tab.ele(fallback, timeout=2)
141
+ if ele:
142
+ logger.debug(f"Search[{index}]: Found input with fallback '{fallback}'")
143
+ break
144
+
145
+ if not ele:
146
+ logger.error(f"Search[{index}]: No input element found on {tab.url}!")
147
+ results[index] = {"content": "Error: input not found", "title": "Error", "url": tab.url, "html": tab.html[:5000]}
148
+ return
149
+
150
+ logger.debug(f"Search[{index}]: Typing query...")
151
+ ele.input(query)
152
+
153
+ logger.debug(f"Search[{index}]: Pressing Enter...")
154
+ tab.actions.key_down('enter').key_up('enter')
155
+
156
+ logger.debug(f"Search[{index}]: Waiting for search results...")
157
+ tab.wait.doc_loaded(timeout=10)
158
+ time.sleep(0.5)
159
+
160
+ logger.debug(f"Search[{index}]: Extracting content...")
161
+ html = tab.html
162
+ content = trafilatura.extract(
163
+ html, include_links=True, include_images=True, include_comments=False,
164
+ include_tables=True, favor_precision=False, output_format="markdown"
165
+ ) or ""
166
+
167
+ logger.info(f"ScreenshotService: Search '{query}' completed -> {tab.url}")
168
+
169
+ results[index] = {
170
+ "content": content,
171
+ "html": html,
172
+ "title": tab.title,
173
+ "url": tab.url,
174
+ "images": []
175
+ }
176
+
177
+ except Exception as e:
178
+ logger.error(f"ScreenshotService: Search error for '{query}': {e}")
179
+ results[index] = {"content": f"Error: {e}", "title": "Error", "url": "", "html": ""}
180
+ finally:
181
+ try: tab.close()
182
+ except: pass
183
+
184
+ threads = []
185
+ for i, (tab, query) in enumerate(zip(tabs, queries)):
186
+ t = threading.Thread(target=run_search, args=(i, tab, query))
187
+ t.start()
188
+ threads.append(t)
189
+
190
+ for t in threads:
191
+ t.join()
192
+
193
+ return results
26
194
 
27
195
  def _ensure_ready(self):
28
196
  """Ensure shared browser is ready."""
29
197
  from .manager import get_shared_browser_manager
30
198
  self._manager = get_shared_browser_manager(headless=self.headless)
31
199
 
32
- async def fetch_page(self, url: str, timeout: float = 20.0, include_screenshot: bool = True) -> Dict[str, Any]:
200
+ async def fetch_page(self, url: str, timeout: float = 10.0, include_screenshot: bool = True) -> Dict[str, Any]:
33
201
  """
34
202
  Fetch page content (and optionally screenshot).
35
203
  Runs in a thread executor to avoid blocking the async loop.
@@ -43,6 +211,87 @@ class ScreenshotService:
43
211
  include_screenshot
44
212
  )
45
213
 
214
+ async def search_via_address_bar(self, query: str, timeout: float = 20.0) -> Dict[str, Any]:
215
+ """
216
+ Search using browser's address bar (uses browser's default search engine).
217
+ Simulates: Ctrl+L (focus address bar) -> type query -> Enter
218
+ """
219
+ loop = asyncio.get_running_loop()
220
+ return await loop.run_in_executor(
221
+ self._executor,
222
+ self._search_via_address_bar_sync,
223
+ query,
224
+ timeout
225
+ )
226
+
227
+ def _search_via_address_bar_sync(self, query: str, timeout: float) -> Dict[str, Any]:
228
+ """Synchronous address bar search logic."""
229
+ if not query:
230
+ return {"content": "Error: missing query", "title": "Error", "url": "", "html": ""}
231
+
232
+ tab = None
233
+ try:
234
+ self._ensure_ready()
235
+ page = self._manager.page
236
+ if not page:
237
+ return {"content": "Error: Browser not available", "title": "Error", "url": "", "html": ""}
238
+
239
+ # Open new blank tab
240
+ tab = page.new_tab()
241
+
242
+ # Focus address bar with Ctrl+L (or Cmd+L on Mac)
243
+ import platform
244
+ if platform.system() == "Darwin":
245
+ tab.actions.key_down('cmd').key_down('l').key_up('l').key_up('cmd')
246
+ else:
247
+ tab.actions.key_down('ctrl').key_down('l').key_up('l').key_up('ctrl')
248
+
249
+ # Small delay for address bar to focus
250
+ import time as _time
251
+ _time.sleep(0.1)
252
+
253
+ # Type the query
254
+ tab.actions.type(query)
255
+
256
+ # Press Enter to search
257
+ tab.actions.key_down('enter').key_up('enter')
258
+
259
+ # Wait for page to load
260
+ try:
261
+ tab.wait.doc_loaded(timeout=timeout)
262
+ # Additional wait for search results
263
+ _time.sleep(1)
264
+ except:
265
+ pass
266
+
267
+ html = tab.html
268
+ title = tab.title
269
+ final_url = tab.url
270
+
271
+ # Extract content
272
+ content = trafilatura.extract(
273
+ html, include_links=True, include_images=True, include_comments=False,
274
+ include_tables=True, favor_precision=False, output_format="markdown"
275
+ ) or ""
276
+
277
+ logger.info(f"ScreenshotService: Address bar search completed -> {final_url}")
278
+
279
+ return {
280
+ "content": content,
281
+ "html": html,
282
+ "title": title,
283
+ "url": final_url,
284
+ "images": []
285
+ }
286
+
287
+ except Exception as e:
288
+ logger.error(f"ScreenshotService: Address bar search failed: {e}")
289
+ return {"content": f"Error: search failed ({e})", "title": "Error", "url": "", "html": ""}
290
+ finally:
291
+ if tab:
292
+ try: tab.close()
293
+ except: pass
294
+
46
295
  def _fetch_page_sync(self, url: str, timeout: float, include_screenshot: bool) -> Dict[str, Any]:
47
296
  """Synchronous fetch logic."""
48
297
  if not url:
@@ -55,39 +304,31 @@ class ScreenshotService:
55
304
  if not page:
56
305
  return {"content": "Error: Browser not available", "title": "Error", "url": url}
57
306
 
58
- # New Tab
307
+ # New Tab with URL directly
59
308
  tab = page.new_tab(url)
60
309
 
61
- # Wait logic
310
+ # Wait logic - optimized for search pages
62
311
  is_search_page = any(s in url.lower() for s in ['search', 'bing.com', 'duckduckgo', 'google.com/search', 'searx'])
63
312
  if is_search_page:
64
- # Quick check for results
65
- result_selectors = ['#results', '#b_results', '#search', '#links', '.result']
66
- for selector in result_selectors:
67
- if tab.ele(selector, timeout=1):
68
- break
313
+ # Optimized waiting for search engine results
314
+ try:
315
+ # Google uses #search or #rso
316
+ # DuckDuckGo uses #react-layout
317
+ # Bing uses #b_results
318
+ if 'google' in url.lower():
319
+ # Wait for results container (fastest possible return)
320
+ tab.ele('#search', timeout=timeout)
321
+ elif 'bing' in url.lower():
322
+ tab.ele('#b_results', timeout=timeout)
323
+ else:
324
+ # Generic search fallback
325
+ tab.wait.doc_loaded(timeout=timeout)
326
+ except:
327
+ pass
69
328
  else:
70
329
  # 1. Wait for document to settle (Fast Dynamic Wait)
71
330
  try:
72
- tab.wait.doc_loaded(timeout=5)
73
- # Brief check for loading overlays (fast skip if none)
74
- tab.run_js("""
75
- (async () => {
76
- const isVisible = (el) => !!(el.offsetWidth || el.offsetHeight || el.getClientRects().length);
77
- for (let i = 0; i < 15; i++) {
78
- const indicators = Array.from(document.querySelectorAll('*')).filter(el => {
79
- try {
80
- const text = (el.textContent || '').toLowerCase();
81
- const id = (el.id || '').toLowerCase();
82
- const cls = (el.getAttribute('class') || '').toLowerCase();
83
- return (text.includes('loading') || id.includes('loading') || cls.includes('loading')) && isVisible(el);
84
- } catch(e) { return false; }
85
- });
86
- if (indicators.length === 0) break;
87
- await new Promise(r => setTimeout(r, 100));
88
- }
89
- })()
90
- """, as_expr=True)
331
+ tab.wait.doc_loaded(timeout=timeout)
91
332
  except: pass
92
333
 
93
334
  html = tab.html
@@ -237,6 +237,8 @@ const instructStage = computed(() => {
237
237
  }
238
238
  })
239
239
 
240
+ const visionStage = computed(() => data.value?.stages?.find(s => s.name === 'Vision'))
241
+
240
242
  const summaryStage = computed(() => data.value?.stages?.find(s => s.name?.toLowerCase() === 'summary' || s.name?.toLowerCase() === 'agent'))
241
243
  // searchStage removed - no longer needed for display
242
244
 
@@ -654,7 +656,7 @@ The system automatically handles citations like [1] and [2], reordering them dyn
654
656
  </div>
655
657
 
656
658
  <!-- Flow: Unified Stage Info Area -->
657
- <div v-if="instructStage || summaryStage" class="relative group/flow">
659
+ <div v-if="instructStage || summaryStage || visionStage" class="relative group/flow">
658
660
  <!-- Corner Badge -->
659
661
  <div
660
662
  class="absolute -top-2 -left-2 h-7 px-2.5 z-10 flex items-center justify-center gap-1.5"
@@ -668,6 +670,35 @@ The system automatically handles citations like [1] and [2], reordering them dyn
668
670
  <div class="shadow-sm shadow-black/5 bg-white pt-8 px-6 pb-8">
669
671
  <div class="space-y-8 relative">
670
672
 
673
+ <!-- Vision Stage -->
674
+ <div v-if="visionStage" class="relative flex items-start gap-4 z-10 w-full">
675
+ <!-- Node: Brand Logo -->
676
+ <div class="shrink-0 w-6 h-6 flex items-center justify-center bg-white">
677
+ <img :src="getIconPath(visionStage)" class="w-5 h-5 object-contain" alt="" />
678
+ </div>
679
+ <!-- Content -->
680
+ <div class="flex-1 min-w-0 pt-1">
681
+ <div class="text-[17px] font-bold uppercase tracking-tight mb-1.5 leading-none" style="color: var(--text-primary)">Vision</div>
682
+ <div class="flex items-center justify-between gap-x-4 text-[13px] font-mono leading-tight w-full" style="color: var(--text-muted)">
683
+ <!-- Model Name (Truncated) -->
684
+ <span class="truncate max-w-[180px]" :title="visionStage.model">{{ visionStage.model }}</span>
685
+
686
+ <!-- Metrics -->
687
+ <div class="flex items-center gap-4 shrink-0">
688
+ <div class="flex items-center gap-1.5 opacity-80">
689
+ <Icon icon="mdi:clock-outline" class="text-[13px]" />
690
+ <span>{{ (visionStage.time || 0).toFixed(2) }}s</span>
691
+ </div>
692
+ <template v-if="visionStage.cost">
693
+ <div class="flex items-center gap-0.5 opacity-80">
694
+ <span>${{ visionStage.cost.toFixed(5) }}</span>
695
+ </div>
696
+ </template>
697
+ </div>
698
+ </div>
699
+ </div>
700
+ </div>
701
+
671
702
  <!-- Instruct Stage -->
672
703
  <div v-if="instructStage" class="relative flex items-start gap-4 z-10 w-full">
673
704
  <!-- Node: Brand Logo -->
@@ -11,10 +11,10 @@ from typing import Dict, Any
11
11
  # =============================================================================
12
12
 
13
13
  INSTRUCT_SP = """你是一个智能 INSTRUCT 助手。
14
- 你的目标是为用户的查询制定调研计划并执行
14
+ 你的目标是立即, 为用户的查询制定调研计划并执行
15
15
  > 尽量完成所有搜索和获取, 需要的时候可以并发执行多个同种或不同种工具
16
16
 
17
- - 用户消息包含典型名词、可能的专有名词组合: 调用 `web_search` 工具(最大3)。
17
+ - 用户消息包含典型名词、可能的专有名词组合: 调用 `web_search` 工具(最大3, 推荐1)。
18
18
  - 提炼消息中出独立的关键词搜索关键词本身, 不添加任何其他辅助搜索词, 搜索效果最好
19
19
  - 不混合搜索词, 每次只针对搜索一个关键词, 获取其目的方向的最新数据
20
20
  - 用户消息中出现链接时, 调用 `crawl_page`(最大3) 获取网页全文
@@ -35,6 +35,9 @@ INSTRUCT_SP = """你是一个智能 INSTRUCT 助手。
35
35
  ## 重要规则 (CRITICAL RULES):
36
36
  - 禁止输出任何文本回复:你必须且只能通过工具调用来行动。
37
37
  - 如果没有工具调用,流程将自动结束。
38
+
39
+ ## now
40
+ 请快速给出回复.
38
41
  """
39
42
 
40
43
  INSTRUCT_DEEPSEARCH_SP = """你是一个智能 INSTRUCT_DEEPSEARCH 审查助手, 你需要对 INSTRUCT 的输出进行多次信息补充直到信息足够、或达到次数上限(3次)
@@ -77,6 +80,22 @@ SUMMARY_REPORT_SP = """# 你是一个信息整合专家 (Summary Agent).
77
80
  """
78
81
 
79
82
 
83
+ # =============================================================================
84
+ # VISION DESCRIPTION PROMPT
85
+ # =============================================================================
86
+
87
+ VISION_DESCRIPTION_SP = """你是一个图像描述专家。
88
+ 根据用户发送的图片和文字,快速描述图片中的内容。
89
+
90
+ 要求:
91
+ - 客观描述图片中的主要元素、场景、人物、文字等
92
+ - 如果图片包含文字,请完整转录
93
+ - 如果用户有具体问题,围绕问题描述相关细节
94
+ - 描述应该简洁但信息丰富,控制在 300 字以内
95
+ - 使用用户的语言回复
96
+ """
97
+
98
+
80
99
  # =============================================================================
81
100
  # TOOL DEFINITIONS
82
101
  # =============================================================================
@@ -105,7 +124,7 @@ def get_web_search_tool() -> Dict[str, Any]:
105
124
  "type": "function",
106
125
  "function": {
107
126
  "name": "web_search",
108
- "description": "网络搜索",
127
+ "description": "网络搜索, 只容许输入正常的字符串查询, 禁止高级搜索",
109
128
  "parameters": {
110
129
  "type": "object",
111
130
  "properties": {"query": {"type": "string"}},