entari-plugin-hyw 4.0.0rc6__py3-none-any.whl → 4.0.0rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of entari-plugin-hyw might be problematic. Click here for more details.
- entari_plugin_hyw/Untitled-1 +1865 -0
- entari_plugin_hyw/__init__.py +733 -379
- entari_plugin_hyw/history.py +60 -57
- entari_plugin_hyw/misc.py +3 -0
- entari_plugin_hyw/search_cache.py +154 -0
- {entari_plugin_hyw-4.0.0rc6.dist-info → entari_plugin_hyw-4.0.0rc8.dist-info}/METADATA +3 -1
- entari_plugin_hyw-4.0.0rc8.dist-info/RECORD +68 -0
- {entari_plugin_hyw-4.0.0rc6.dist-info → entari_plugin_hyw-4.0.0rc8.dist-info}/WHEEL +1 -1
- {entari_plugin_hyw-4.0.0rc6.dist-info → entari_plugin_hyw-4.0.0rc8.dist-info}/top_level.txt +1 -0
- hyw_core/__init__.py +94 -0
- hyw_core/browser_control/__init__.py +65 -0
- hyw_core/browser_control/assets/card-dist/index.html +409 -0
- hyw_core/browser_control/assets/index.html +5691 -0
- hyw_core/browser_control/engines/__init__.py +17 -0
- hyw_core/browser_control/engines/default.py +166 -0
- {entari_plugin_hyw/browser → hyw_core/browser_control}/engines/duckduckgo.py +42 -8
- {entari_plugin_hyw/browser → hyw_core/browser_control}/engines/google.py +1 -1
- {entari_plugin_hyw/browser → hyw_core/browser_control}/manager.py +15 -8
- entari_plugin_hyw/render_vue.py → hyw_core/browser_control/renderer.py +29 -14
- hyw_core/browser_control/service.py +720 -0
- hyw_core/config.py +154 -0
- hyw_core/core.py +322 -0
- hyw_core/definitions.py +83 -0
- entari_plugin_hyw/modular_pipeline.py → hyw_core/pipeline.py +204 -86
- {entari_plugin_hyw → hyw_core}/search.py +60 -19
- hyw_core/stages/__init__.py +21 -0
- entari_plugin_hyw/stage_base.py → hyw_core/stages/base.py +3 -0
- entari_plugin_hyw/stage_summary.py → hyw_core/stages/summary.py +36 -7
- entari_plugin_hyw/assets/card-dist/index.html +0 -387
- entari_plugin_hyw/browser/__init__.py +0 -10
- entari_plugin_hyw/browser/engines/bing.py +0 -95
- entari_plugin_hyw/browser/service.py +0 -304
- entari_plugin_hyw/card-ui/.gitignore +0 -24
- entari_plugin_hyw/card-ui/README.md +0 -5
- entari_plugin_hyw/card-ui/index.html +0 -16
- entari_plugin_hyw/card-ui/package-lock.json +0 -2342
- entari_plugin_hyw/card-ui/package.json +0 -31
- entari_plugin_hyw/card-ui/public/logos/anthropic.svg +0 -1
- entari_plugin_hyw/card-ui/public/logos/cerebras.svg +0 -9
- entari_plugin_hyw/card-ui/public/logos/deepseek.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/gemini.svg +0 -1
- entari_plugin_hyw/card-ui/public/logos/google.svg +0 -1
- entari_plugin_hyw/card-ui/public/logos/grok.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/huggingface.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/microsoft.svg +0 -15
- entari_plugin_hyw/card-ui/public/logos/minimax.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/mistral.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/nvida.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/openai.svg +0 -1
- entari_plugin_hyw/card-ui/public/logos/openrouter.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/perplexity.svg +0 -24
- entari_plugin_hyw/card-ui/public/logos/qwen.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/xai.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/xiaomi.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/zai.png +0 -0
- entari_plugin_hyw/card-ui/public/vite.svg +0 -1
- entari_plugin_hyw/card-ui/src/App.vue +0 -756
- entari_plugin_hyw/card-ui/src/assets/vue.svg +0 -1
- entari_plugin_hyw/card-ui/src/components/HelloWorld.vue +0 -41
- entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue +0 -382
- entari_plugin_hyw/card-ui/src/components/SectionCard.vue +0 -41
- entari_plugin_hyw/card-ui/src/components/StageCard.vue +0 -240
- entari_plugin_hyw/card-ui/src/main.ts +0 -5
- entari_plugin_hyw/card-ui/src/style.css +0 -29
- entari_plugin_hyw/card-ui/src/test_regex.js +0 -103
- entari_plugin_hyw/card-ui/src/types.ts +0 -61
- entari_plugin_hyw/card-ui/tsconfig.app.json +0 -16
- entari_plugin_hyw/card-ui/tsconfig.json +0 -7
- entari_plugin_hyw/card-ui/tsconfig.node.json +0 -26
- entari_plugin_hyw/card-ui/vite.config.ts +0 -16
- entari_plugin_hyw/definitions.py +0 -155
- entari_plugin_hyw/stage_instruct.py +0 -345
- entari_plugin_hyw/stage_instruct_deepsearch.py +0 -104
- entari_plugin_hyw-4.0.0rc6.dist-info/RECORD +0 -100
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/anthropic.svg +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/cerebras.svg +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/deepseek.png +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/gemini.svg +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/google.svg +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/grok.png +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/huggingface.png +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/microsoft.svg +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/minimax.png +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/mistral.png +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/nvida.png +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/openai.svg +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/openrouter.png +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/perplexity.svg +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/qwen.png +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/xai.png +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/xiaomi.png +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/zai.png +0 -0
- {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/vite.svg +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/anthropic.svg +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/cerebras.svg +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/deepseek.png +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/gemini.svg +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/google.svg +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/grok.png +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/huggingface.png +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/microsoft.svg +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/minimax.png +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/mistral.png +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/nvida.png +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/openai.svg +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/openrouter.png +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/perplexity.svg +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/qwen.png +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/xai.png +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/xiaomi.png +0 -0
- {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/zai.png +0 -0
- {entari_plugin_hyw/browser → hyw_core/browser_control}/engines/base.py +0 -0
- {entari_plugin_hyw/browser → hyw_core/browser_control}/landing.html +0 -0
- {entari_plugin_hyw → hyw_core}/image_cache.py +0 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Search Engines Package
|
|
3
|
+
|
|
4
|
+
Provides search engine adapters for different search providers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .base import SearchEngine
|
|
8
|
+
from .google import GoogleEngine
|
|
9
|
+
from .duckduckgo import DuckDuckGoEngine
|
|
10
|
+
from .default import DefaultEngine
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"SearchEngine",
|
|
14
|
+
"GoogleEngine",
|
|
15
|
+
"DuckDuckGoEngine",
|
|
16
|
+
"DefaultEngine",
|
|
17
|
+
]
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
|
|
2
|
+
import urllib.parse
|
|
3
|
+
import re
|
|
4
|
+
from typing import List, Dict, Any
|
|
5
|
+
from loguru import logger
|
|
6
|
+
from .base import SearchEngine
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DefaultEngine(SearchEngine):
|
|
10
|
+
"""
|
|
11
|
+
Default browser address bar search engine.
|
|
12
|
+
Uses the browser's address bar to search (Ctrl+L -> type -> Enter).
|
|
13
|
+
This uses whatever default search engine the browser is configured with.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
# Special marker to indicate this engine uses address bar input
|
|
17
|
+
USE_ADDRESS_BAR = True
|
|
18
|
+
|
|
19
|
+
def build_url(self, query: str, limit: int = 10) -> str:
|
|
20
|
+
"""
|
|
21
|
+
For address bar search, we don't build a URL.
|
|
22
|
+
Return the raw query - SearchService will handle the address bar input.
|
|
23
|
+
"""
|
|
24
|
+
# Return a special marker so SearchService knows to use address bar
|
|
25
|
+
return f"__ADDRESS_BAR_SEARCH__:{query}"
|
|
26
|
+
|
|
27
|
+
def parse(self, content: str) -> List[Dict[str, Any]]:
|
|
28
|
+
"""
|
|
29
|
+
Parse search results from whatever search engine the browser uses.
|
|
30
|
+
We detect the engine from the HTML and use appropriate parsing.
|
|
31
|
+
"""
|
|
32
|
+
results = []
|
|
33
|
+
seen_urls = set()
|
|
34
|
+
|
|
35
|
+
# Detect which search engine based on content
|
|
36
|
+
is_google = 'google' in content.lower() and ('class="g"' in content or 'data-hveid' in content)
|
|
37
|
+
is_bing = 'bing' in content.lower() and 'b_algo' in content
|
|
38
|
+
is_duckduckgo = 'duckduckgo' in content.lower()
|
|
39
|
+
|
|
40
|
+
if is_google:
|
|
41
|
+
results = self._parse_google(content, seen_urls)
|
|
42
|
+
elif is_bing:
|
|
43
|
+
results = self._parse_bing(content, seen_urls)
|
|
44
|
+
elif is_duckduckgo:
|
|
45
|
+
results = self._parse_duckduckgo(content, seen_urls)
|
|
46
|
+
else:
|
|
47
|
+
# Generic fallback
|
|
48
|
+
results = self._parse_generic(content, seen_urls)
|
|
49
|
+
|
|
50
|
+
logger.info(f"DefaultEngine parsed {len(results)} results (detected: {'google' if is_google else 'bing' if is_bing else 'ddg' if is_duckduckgo else 'generic'})")
|
|
51
|
+
return results
|
|
52
|
+
|
|
53
|
+
def _parse_google(self, content: str, seen_urls: set) -> List[Dict[str, Any]]:
|
|
54
|
+
"""Parse Google search results."""
|
|
55
|
+
results = []
|
|
56
|
+
# Look for result links
|
|
57
|
+
link_regex = re.compile(
|
|
58
|
+
r'<a[^>]+href="(https?://(?!google\.com|accounts\.google)[^"]+)"[^>]*>([^<]+)</a>',
|
|
59
|
+
re.IGNORECASE
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
for match in link_regex.finditer(content):
|
|
63
|
+
if len(results) >= 15:
|
|
64
|
+
break
|
|
65
|
+
href = match.group(1)
|
|
66
|
+
title = match.group(2).strip()
|
|
67
|
+
|
|
68
|
+
if href in seen_urls or not title or len(title) < 3:
|
|
69
|
+
continue
|
|
70
|
+
if any(x in href for x in ['google.com', 'gstatic.com', 'youtube.com/redirect']):
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
seen_urls.add(href)
|
|
74
|
+
results.append({
|
|
75
|
+
"title": re.sub(r'<[^>]+>', '', title),
|
|
76
|
+
"url": href,
|
|
77
|
+
"domain": urllib.parse.urlparse(href).hostname or "",
|
|
78
|
+
"content": "",
|
|
79
|
+
})
|
|
80
|
+
return results
|
|
81
|
+
|
|
82
|
+
def _parse_bing(self, content: str, seen_urls: set) -> List[Dict[str, Any]]:
|
|
83
|
+
"""Parse Bing search results."""
|
|
84
|
+
results = []
|
|
85
|
+
link_regex = re.compile(
|
|
86
|
+
r'<a[^>]+href="(https?://(?!bing\.com|microsoft\.com)[^"]+)"[^>]*>(.*?)</a>',
|
|
87
|
+
re.IGNORECASE | re.DOTALL
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
for match in link_regex.finditer(content):
|
|
91
|
+
if len(results) >= 15:
|
|
92
|
+
break
|
|
93
|
+
href = match.group(1)
|
|
94
|
+
title_html = match.group(2)
|
|
95
|
+
title = re.sub(r'<[^>]+>', '', title_html).strip()
|
|
96
|
+
|
|
97
|
+
if href in seen_urls or not title or len(title) < 3:
|
|
98
|
+
continue
|
|
99
|
+
if any(x in href for x in ['bing.com', 'microsoft.com', 'msn.com']):
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
seen_urls.add(href)
|
|
103
|
+
results.append({
|
|
104
|
+
"title": title,
|
|
105
|
+
"url": href,
|
|
106
|
+
"domain": urllib.parse.urlparse(href).hostname or "",
|
|
107
|
+
"content": "",
|
|
108
|
+
})
|
|
109
|
+
return results
|
|
110
|
+
|
|
111
|
+
def _parse_duckduckgo(self, content: str, seen_urls: set) -> List[Dict[str, Any]]:
|
|
112
|
+
"""Parse DuckDuckGo results."""
|
|
113
|
+
results = []
|
|
114
|
+
link_regex = re.compile(
|
|
115
|
+
r'<a[^>]+href="(https?://(?!duckduckgo\.com)[^"]+)"[^>]*>(.*?)</a>',
|
|
116
|
+
re.IGNORECASE | re.DOTALL
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
for match in link_regex.finditer(content):
|
|
120
|
+
if len(results) >= 15:
|
|
121
|
+
break
|
|
122
|
+
href = match.group(1)
|
|
123
|
+
title_html = match.group(2)
|
|
124
|
+
title = re.sub(r'<[^>]+>', '', title_html).strip()
|
|
125
|
+
|
|
126
|
+
if href in seen_urls or not title or len(title) < 3:
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
seen_urls.add(href)
|
|
130
|
+
results.append({
|
|
131
|
+
"title": title,
|
|
132
|
+
"url": href,
|
|
133
|
+
"domain": urllib.parse.urlparse(href).hostname or "",
|
|
134
|
+
"content": "",
|
|
135
|
+
})
|
|
136
|
+
return results
|
|
137
|
+
|
|
138
|
+
def _parse_generic(self, content: str, seen_urls: set) -> List[Dict[str, Any]]:
|
|
139
|
+
"""Generic link parser for unknown search engines."""
|
|
140
|
+
results = []
|
|
141
|
+
link_regex = re.compile(
|
|
142
|
+
r'<a[^>]+href="(https?://[^"]+)"[^>]*>([^<]+)</a>',
|
|
143
|
+
re.IGNORECASE
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
for match in link_regex.finditer(content):
|
|
147
|
+
if len(results) >= 15:
|
|
148
|
+
break
|
|
149
|
+
href = match.group(1)
|
|
150
|
+
title = match.group(2).strip()
|
|
151
|
+
|
|
152
|
+
if href in seen_urls or not title or len(title) < 5:
|
|
153
|
+
continue
|
|
154
|
+
# Skip common non-result URLs
|
|
155
|
+
if any(x in href for x in ['javascript:', 'mailto:', '#', 'login', 'signin', 'account']):
|
|
156
|
+
continue
|
|
157
|
+
|
|
158
|
+
seen_urls.add(href)
|
|
159
|
+
results.append({
|
|
160
|
+
"title": title,
|
|
161
|
+
"url": href,
|
|
162
|
+
"domain": urllib.parse.urlparse(href).hostname or "",
|
|
163
|
+
"content": "",
|
|
164
|
+
})
|
|
165
|
+
return results
|
|
166
|
+
|
|
@@ -31,8 +31,10 @@ class DuckDuckGoEngine(SearchEngine):
|
|
|
31
31
|
results = []
|
|
32
32
|
seen_urls = set()
|
|
33
33
|
|
|
34
|
-
#
|
|
35
|
-
|
|
34
|
+
# More robust regex: capture ANY href, not just http
|
|
35
|
+
# Matches: <a ... href="..." ...>(...)</a>
|
|
36
|
+
# We capture the full hook to extract title + url
|
|
37
|
+
link_regex = re.compile(r'<a[^>]+href=["\']([^"\']+)["\'][^>]*>(.*?)</a>', re.IGNORECASE | re.DOTALL)
|
|
36
38
|
|
|
37
39
|
pos = 0
|
|
38
40
|
while True:
|
|
@@ -40,7 +42,7 @@ class DuckDuckGoEngine(SearchEngine):
|
|
|
40
42
|
if not match:
|
|
41
43
|
break
|
|
42
44
|
|
|
43
|
-
|
|
45
|
+
raw_href = match.group(1)
|
|
44
46
|
title_html = match.group(2)
|
|
45
47
|
|
|
46
48
|
# Clean title
|
|
@@ -48,17 +50,49 @@ class DuckDuckGoEngine(SearchEngine):
|
|
|
48
50
|
|
|
49
51
|
pos = match.end()
|
|
50
52
|
|
|
53
|
+
# 1. Resolve relative URLs (DDG Lite uses /l/?uddg=...)
|
|
54
|
+
if raw_href.startswith('/'):
|
|
55
|
+
href = "https://lite.duckduckgo.com" + raw_href
|
|
56
|
+
else:
|
|
57
|
+
href = raw_href
|
|
58
|
+
|
|
59
|
+
# 2. Decode DDG redirect (uddg=...)
|
|
60
|
+
# e.g. /l/?uddg=http%3A%2F%2Fexample.com&rut=...
|
|
61
|
+
if "uddg=" in href:
|
|
62
|
+
try:
|
|
63
|
+
parsed = urllib.parse.urlparse(href)
|
|
64
|
+
qs = urllib.parse.parse_qs(parsed.query)
|
|
65
|
+
if 'uddg' in qs:
|
|
66
|
+
href = qs['uddg'][0]
|
|
67
|
+
except: pass
|
|
68
|
+
|
|
51
69
|
# Filter junk
|
|
70
|
+
if not href.startswith("http"): continue
|
|
52
71
|
if "search" in href and "q=" in href: continue
|
|
53
72
|
if "google.com" in href or "bing.com" in href: continue
|
|
73
|
+
if "duckduckgo.com" in href: continue # Filter self links
|
|
54
74
|
if href in seen_urls: continue
|
|
55
75
|
|
|
56
|
-
#
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
76
|
+
# Improved Snippet Extraction:
|
|
77
|
+
# The structure is consistently:
|
|
78
|
+
# <a ... class="result-link">...</a>
|
|
79
|
+
# ...
|
|
80
|
+
# <td class="result-snippet">...</td>
|
|
60
81
|
|
|
61
|
-
#
|
|
82
|
+
# Search for the snippet cell specifically associated with this result
|
|
83
|
+
# We search in a reasonable window after the title link
|
|
84
|
+
snippet_window = content[pos:pos+2000]
|
|
85
|
+
snippet_match = re.search(r'class=["\']result-snippet["\'][^>]*>(.*?)</td>', snippet_window, re.IGNORECASE | re.DOTALL)
|
|
86
|
+
|
|
87
|
+
if snippet_match:
|
|
88
|
+
raw_snippet = snippet_match.group(1)
|
|
89
|
+
else:
|
|
90
|
+
# Fallback to old behavior if structural match fails (e.g. slight layout change)
|
|
91
|
+
# But stop at 'link-text' span or next anchor to avoid junk
|
|
92
|
+
fallback_match = re.search(r'(.*?)(?:<a|<span class=["\']link-text)', snippet_window, re.DOTALL | re.IGNORECASE)
|
|
93
|
+
raw_snippet = fallback_match.group(1) if fallback_match else ""
|
|
94
|
+
|
|
95
|
+
# Clean HTML tags
|
|
62
96
|
snippet = re.sub(r'<[^>]+>', ' ', raw_snippet)
|
|
63
97
|
snippet = re.sub(r'\s+', ' ', snippet).strip()
|
|
64
98
|
|
|
@@ -14,7 +14,7 @@ class GoogleEngine(SearchEngine):
|
|
|
14
14
|
|
|
15
15
|
def build_url(self, query: str, limit: int = 10) -> str:
|
|
16
16
|
encoded_query = urllib.parse.quote(query)
|
|
17
|
-
return f"https://www.google.com/search?q={encoded_query}"
|
|
17
|
+
return f"https://www.google.com/search?q={encoded_query}&udm=14"
|
|
18
18
|
|
|
19
19
|
def parse(self, content: str) -> List[Dict[str, Any]]:
|
|
20
20
|
results = []
|
|
@@ -65,7 +65,7 @@ class SharedBrowserManager:
|
|
|
65
65
|
# Hide scrollbars globally
|
|
66
66
|
co.set_argument('--hide-scrollbars')
|
|
67
67
|
# 十万的原因是滚动条屏蔽(大概吧)
|
|
68
|
-
co.set_argument('--window-size=1280,
|
|
68
|
+
co.set_argument('--window-size=1280,800')
|
|
69
69
|
self._page = ChromiumPage(addr_or_opts=co)
|
|
70
70
|
|
|
71
71
|
# Show Landing Page
|
|
@@ -94,13 +94,20 @@ class SharedBrowserManager:
|
|
|
94
94
|
self.start()
|
|
95
95
|
return self._page
|
|
96
96
|
|
|
97
|
-
def new_tab(self, url: str) -> Any:
|
|
98
|
-
"""
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
97
|
+
def new_tab(self, url: str = None) -> Any:
|
|
98
|
+
"""
|
|
99
|
+
Thread-safe tab creation.
|
|
100
|
+
DrissionPage is thread-safe for tab creation, so we call it directly
|
|
101
|
+
to allow atomic creation+navigation (Target.createTarget with url)
|
|
102
|
+
without blocking other threads.
|
|
103
|
+
"""
|
|
104
|
+
page = self.page
|
|
105
|
+
if not page:
|
|
106
|
+
raise RuntimeError("Browser not available")
|
|
107
|
+
|
|
108
|
+
# Direct call allows Chrome to handle creation and navigation atomically and concurrently
|
|
109
|
+
return page.new_tab(url)
|
|
110
|
+
|
|
104
111
|
|
|
105
112
|
def close(self):
|
|
106
113
|
"""Shutdown the browser."""
|
|
@@ -12,7 +12,7 @@ from typing import List, Dict, Any, Optional
|
|
|
12
12
|
from concurrent.futures import ThreadPoolExecutor
|
|
13
13
|
|
|
14
14
|
from loguru import logger
|
|
15
|
-
from .
|
|
15
|
+
from .manager import SharedBrowserManager, get_shared_browser_manager
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class ContentRenderer:
|
|
@@ -23,6 +23,7 @@ class ContentRenderer:
|
|
|
23
23
|
|
|
24
24
|
if template_path is None:
|
|
25
25
|
current_dir = Path(__file__).parent
|
|
26
|
+
# Use card-dist which has properly inlined JS (viteSingleFile)
|
|
26
27
|
template_path = current_dir / "assets" / "card-dist" / "index.html"
|
|
27
28
|
|
|
28
29
|
self.template_path = Path(template_path)
|
|
@@ -42,7 +43,6 @@ class ContentRenderer:
|
|
|
42
43
|
def _ensure_manager(self):
|
|
43
44
|
"""Ensure shared browser manager exists."""
|
|
44
45
|
if not self._manager:
|
|
45
|
-
from .browser.manager import get_shared_browser_manager
|
|
46
46
|
self._manager = get_shared_browser_manager(headless=self.headless)
|
|
47
47
|
|
|
48
48
|
async def start(self, timeout: int = 6000):
|
|
@@ -55,6 +55,18 @@ class ContentRenderer:
|
|
|
55
55
|
loop = asyncio.get_running_loop()
|
|
56
56
|
return await loop.run_in_executor(self._executor, self._prepare_tab_sync)
|
|
57
57
|
|
|
58
|
+
def _wait_for_render_finished(self, tab, timeout: float = 3.0):
|
|
59
|
+
"""Wait for window.RENDER_FINISHED to be true in the tab."""
|
|
60
|
+
import time as pytime
|
|
61
|
+
start = pytime.time()
|
|
62
|
+
while pytime.time() - start < timeout:
|
|
63
|
+
is_finished = tab.run_js("return window.RENDER_FINISHED")
|
|
64
|
+
if is_finished:
|
|
65
|
+
return True
|
|
66
|
+
pytime.sleep(0.05) # Fast polling
|
|
67
|
+
logger.warning(f"ContentRenderer: Wait for RENDER_FINISHED timed out after {timeout}s")
|
|
68
|
+
return False
|
|
69
|
+
|
|
58
70
|
def _prepare_tab_sync(self) -> str:
|
|
59
71
|
"""Create and warm up a new tab, return its ID."""
|
|
60
72
|
import time as pytimeout
|
|
@@ -64,10 +76,10 @@ class ContentRenderer:
|
|
|
64
76
|
tab = self._manager.new_tab(self.template_path.as_uri())
|
|
65
77
|
tab_id = tab.tab_id
|
|
66
78
|
|
|
67
|
-
#
|
|
68
|
-
tab.
|
|
79
|
+
# Wait for app to mount instead of fixed 1s
|
|
80
|
+
tab.ele('#app', timeout=5)
|
|
69
81
|
|
|
70
|
-
# Pre-warm
|
|
82
|
+
# Pre-warm with data to trigger Vue render
|
|
71
83
|
warmup_data = {
|
|
72
84
|
"markdown": "# Ready",
|
|
73
85
|
"total_time": 0,
|
|
@@ -77,8 +89,11 @@ class ContentRenderer:
|
|
|
77
89
|
"theme_color": "#ef4444",
|
|
78
90
|
}
|
|
79
91
|
|
|
80
|
-
|
|
81
|
-
|
|
92
|
+
tab.run_js(f"window.updateRenderData({json.dumps(warmup_data)})")
|
|
93
|
+
self._wait_for_render_finished(tab, timeout=5.0)
|
|
94
|
+
|
|
95
|
+
# Wait for main-container after warmup (Vue needs to render it)
|
|
96
|
+
tab.ele('#main-container', timeout=3)
|
|
82
97
|
|
|
83
98
|
elapsed = pytimeout.time() - start
|
|
84
99
|
logger.info(f"ContentRenderer: Prepared tab {tab_id} in {elapsed:.2f}s")
|
|
@@ -186,9 +201,9 @@ class ContentRenderer:
|
|
|
186
201
|
"theme_color": theme_color,
|
|
187
202
|
}
|
|
188
203
|
|
|
189
|
-
# 1. Update Data &
|
|
204
|
+
# 1. Update Data & Wait for Finished flag
|
|
190
205
|
tab.run_js(f"window.updateRenderData({json.dumps(render_data)})")
|
|
191
|
-
|
|
206
|
+
self._wait_for_render_finished(tab)
|
|
192
207
|
|
|
193
208
|
# 2. Dynamic Resize
|
|
194
209
|
# Get actual content height to prevent clipping
|
|
@@ -196,7 +211,7 @@ class ContentRenderer:
|
|
|
196
211
|
viewport_height = int(scroll_height) + 200
|
|
197
212
|
|
|
198
213
|
tab.run_cdp('Emulation.setDeviceMetricsOverride',
|
|
199
|
-
width=
|
|
214
|
+
width=1440, height=viewport_height, deviceScaleFactor=1, mobile=False
|
|
200
215
|
)
|
|
201
216
|
|
|
202
217
|
# 3. Hide Scrollbars (Now that viewport is large enough, overflow:hidden won't clip)
|
|
@@ -297,7 +312,7 @@ class ContentRenderer:
|
|
|
297
312
|
if not tab:
|
|
298
313
|
logger.warning("ContentRenderer: Pre-warmed tab not found, creating new.")
|
|
299
314
|
tab = page.new_tab(self.template_path.as_uri())
|
|
300
|
-
tab.
|
|
315
|
+
tab.ele('#app', timeout=5)
|
|
301
316
|
|
|
302
317
|
resolved_output_path = Path(output_path).resolve()
|
|
303
318
|
resolved_output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -317,15 +332,15 @@ class ContentRenderer:
|
|
|
317
332
|
|
|
318
333
|
tab.run_js(f"window.updateRenderData({json.dumps(render_data)})")
|
|
319
334
|
|
|
320
|
-
#
|
|
321
|
-
tab.
|
|
335
|
+
# Wait for event-driven finish
|
|
336
|
+
self._wait_for_render_finished(tab, timeout=5.0)
|
|
322
337
|
|
|
323
338
|
# Dynamic Resize
|
|
324
339
|
scroll_height = tab.run_js('return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);')
|
|
325
340
|
viewport_height = int(scroll_height) + 200
|
|
326
341
|
|
|
327
342
|
tab.run_cdp('Emulation.setDeviceMetricsOverride',
|
|
328
|
-
width=
|
|
343
|
+
width=1440, height=viewport_height, deviceScaleFactor=1, mobile=False
|
|
329
344
|
)
|
|
330
345
|
|
|
331
346
|
# Hide scrollbars
|