cli-web-gai 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli_web_gai-0.1.0/PKG-INFO +12 -0
- cli_web_gai-0.1.0/cli_web/gai/README.md +111 -0
- cli_web_gai-0.1.0/cli_web/gai/__init__.py +3 -0
- cli_web_gai-0.1.0/cli_web/gai/__main__.py +6 -0
- cli_web_gai-0.1.0/cli_web/gai/commands/__init__.py +0 -0
- cli_web_gai-0.1.0/cli_web/gai/commands/search.py +78 -0
- cli_web_gai-0.1.0/cli_web/gai/core/__init__.py +0 -0
- cli_web_gai-0.1.0/cli_web/gai/core/client.py +322 -0
- cli_web_gai-0.1.0/cli_web/gai/core/exceptions.py +71 -0
- cli_web_gai-0.1.0/cli_web/gai/core/models.py +39 -0
- cli_web_gai-0.1.0/cli_web/gai/gai_cli.py +152 -0
- cli_web_gai-0.1.0/cli_web/gai/tests/TEST.md +114 -0
- cli_web_gai-0.1.0/cli_web/gai/tests/__init__.py +0 -0
- cli_web_gai-0.1.0/cli_web/gai/tests/conftest.py +9 -0
- cli_web_gai-0.1.0/cli_web/gai/tests/test_core.py +317 -0
- cli_web_gai-0.1.0/cli_web/gai/tests/test_e2e.py +130 -0
- cli_web_gai-0.1.0/cli_web/gai/utils/__init__.py +0 -0
- cli_web_gai-0.1.0/cli_web/gai/utils/doctor.py +188 -0
- cli_web_gai-0.1.0/cli_web/gai/utils/helpers.py +66 -0
- cli_web_gai-0.1.0/cli_web/gai/utils/mcp_server.py +290 -0
- cli_web_gai-0.1.0/cli_web/gai/utils/output.py +44 -0
- cli_web_gai-0.1.0/cli_web/gai/utils/repl_skin.py +486 -0
- cli_web_gai-0.1.0/cli_web_gai.egg-info/PKG-INFO +12 -0
- cli_web_gai-0.1.0/cli_web_gai.egg-info/SOURCES.txt +28 -0
- cli_web_gai-0.1.0/cli_web_gai.egg-info/dependency_links.txt +1 -0
- cli_web_gai-0.1.0/cli_web_gai.egg-info/entry_points.txt +2 -0
- cli_web_gai-0.1.0/cli_web_gai.egg-info/requires.txt +4 -0
- cli_web_gai-0.1.0/cli_web_gai.egg-info/top_level.txt +1 -0
- cli_web_gai-0.1.0/setup.cfg +4 -0
- cli_web_gai-0.1.0/setup.py +23 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cli-web-gai
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CLI for Google AI Mode — AI-powered search with source references
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Requires-Dist: click>=8.0
|
|
7
|
+
Requires-Dist: playwright>=1.40
|
|
8
|
+
Requires-Dist: rich>=13.0
|
|
9
|
+
Requires-Dist: prompt_toolkit>=3.0
|
|
10
|
+
Dynamic: requires-dist
|
|
11
|
+
Dynamic: requires-python
|
|
12
|
+
Dynamic: summary
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# cli-web-gai
|
|
2
|
+
|
|
3
|
+
CLI for Google AI Mode — submit questions and get AI-generated answers with source references, powered by a headless Playwright browser.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install cli-web-gai
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
### Search
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
# Ask a question
|
|
17
|
+
cli-web-gai search ask "What is quantum computing?"
|
|
18
|
+
|
|
19
|
+
# Ask with JSON output
|
|
20
|
+
cli-web-gai search ask "Best Python frameworks" --json
|
|
21
|
+
|
|
22
|
+
# Ask in a specific language
|
|
23
|
+
cli-web-gai search ask "What is machine learning?" --lang he
|
|
24
|
+
|
|
25
|
+
# Show the browser window (for debugging or solving CAPTCHAs)
|
|
26
|
+
cli-web-gai search ask "Explain DNS" --headed
|
|
27
|
+
|
|
28
|
+
# Set a custom timeout (seconds)
|
|
29
|
+
cli-web-gai search ask "History of the internet" --timeout 45
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Follow-up Questions
|
|
33
|
+
|
|
34
|
+
After an initial `ask`, you can ask follow-up questions within the same session:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
cli-web-gai search followup "Tell me more about that"
|
|
38
|
+
cli-web-gai search followup "How is it used in practice?" --json
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Follow-up questions maintain conversation context from the previous query in the same session.
|
|
42
|
+
|
|
43
|
+
## JSON Output
|
|
44
|
+
|
|
45
|
+
Every command supports `--json` for structured output:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
cli-web-gai search ask "capital of France" --json
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
|
|
53
|
+
```json
|
|
54
|
+
{
|
|
55
|
+
"success": true,
|
|
56
|
+
"data": {
|
|
57
|
+
"query": "capital of France",
|
|
58
|
+
"answer": "The capital of France is Paris...",
|
|
59
|
+
"sources": [
|
|
60
|
+
{"title": "Wikipedia", "url": "https://en.wikipedia.org/wiki/Paris", "snippet": "..."}
|
|
61
|
+
],
|
|
62
|
+
"follow_up_prompt": "What are the main attractions in Paris?"
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Errors also return structured JSON:
|
|
68
|
+
|
|
69
|
+
```json
|
|
70
|
+
{
|
|
71
|
+
"error": true,
|
|
72
|
+
"code": "CAPTCHA_REQUIRED",
|
|
73
|
+
"message": "Google presented a CAPTCHA. Please solve it in a browser and try again."
|
|
74
|
+
}
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## REPL Mode
|
|
78
|
+
|
|
79
|
+
Run without arguments to enter interactive mode:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
cli-web-gai
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
REPL shortcuts:
|
|
86
|
+
- `ask <query>` — same as `search ask <query>`
|
|
87
|
+
- `followup <query>` — same as `search followup <query>`
|
|
88
|
+
- `help` — show available commands
|
|
89
|
+
- `exit` / `quit` — exit the REPL
|
|
90
|
+
|
|
91
|
+
## Authentication
|
|
92
|
+
|
|
93
|
+
No authentication is required. Google AI Mode is publicly accessible.
|
|
94
|
+
|
|
95
|
+
Note: Google rate-limits headless browsers. If you encounter a CAPTCHA, use `--headed` to open a visible browser window and solve it manually.
|
|
96
|
+
|
|
97
|
+
## Testing
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
cd gai/agent-harness
|
|
101
|
+
pip install -e .
|
|
102
|
+
python -m pytest cli_web/gai/tests/ -v -s
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Set `CLI_WEB_FORCE_INSTALLED=1` for subprocess tests to find the installed CLI binary.
|
|
106
|
+
|
|
107
|
+
## Protocol
|
|
108
|
+
|
|
109
|
+
- **Website:** google.com/search (AI Mode)
|
|
110
|
+
- **Protocol:** Browser-rendered (Playwright headless Chromium)
|
|
111
|
+
- **Auth:** None
|
|
File without changes
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Search commands for cli-web-gai."""
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
|
|
5
|
+
from ..core.client import GAIClient
|
|
6
|
+
from ..utils.helpers import handle_errors, print_json
|
|
7
|
+
from ..utils.output import print_search_result
|
|
8
|
+
|
|
9
|
+
_client: GAIClient | None = None
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _get_client(headless: bool = True, lang: str = "en") -> GAIClient:
|
|
13
|
+
"""Get or create a persistent client for conversation threading."""
|
|
14
|
+
global _client
|
|
15
|
+
|
|
16
|
+
if not _client:
|
|
17
|
+
_client = GAIClient(headless=headless, lang=lang)
|
|
18
|
+
return _client
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def close_client():
|
|
22
|
+
"""Close the persistent client."""
|
|
23
|
+
global _client
|
|
24
|
+
|
|
25
|
+
if _client:
|
|
26
|
+
_client.close()
|
|
27
|
+
_client = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@click.group("search", invoke_without_command=True)
|
|
31
|
+
@click.pass_context
|
|
32
|
+
def search_group(ctx):
|
|
33
|
+
"""Search Google AI Mode."""
|
|
34
|
+
|
|
35
|
+
if not ctx.invoked_subcommand:
|
|
36
|
+
click.echo(ctx.get_help())
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@search_group.command("ask")
|
|
40
|
+
@click.argument("query", nargs=-1, required=True)
|
|
41
|
+
@click.option("--lang", default="en", help="Response language (e.g., en, he, de).")
|
|
42
|
+
@click.option("--json", "use_json", is_flag=True, help="Output as JSON.")
|
|
43
|
+
@click.option("--headed", is_flag=True, help="Show browser window (for debugging).")
|
|
44
|
+
@click.option("--timeout", type=int, default=30, help="Response timeout in seconds.")
|
|
45
|
+
def ask(query, lang, use_json, headed, timeout):
|
|
46
|
+
"""Submit a query to Google AI Mode.
|
|
47
|
+
|
|
48
|
+
Example: cli-web-gai search ask "What is quantum computing?"
|
|
49
|
+
"""
|
|
50
|
+
query_str = " ".join(query)
|
|
51
|
+
with handle_errors(json_mode=use_json):
|
|
52
|
+
client = _get_client(headless=not headed, lang=lang)
|
|
53
|
+
client._timeout = timeout * 1000
|
|
54
|
+
result = client.search(query_str)
|
|
55
|
+
if use_json:
|
|
56
|
+
print_json(result.to_dict())
|
|
57
|
+
else:
|
|
58
|
+
print_search_result(result)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@search_group.command("followup")
|
|
62
|
+
@click.argument("query", nargs=-1, required=True)
|
|
63
|
+
@click.option("--json", "use_json", is_flag=True, help="Output as JSON.")
|
|
64
|
+
def followup(query, use_json):
|
|
65
|
+
"""Ask a follow-up question in the current conversation.
|
|
66
|
+
|
|
67
|
+
Requires a previous 'ask' command in this session.
|
|
68
|
+
|
|
69
|
+
Example: cli-web-gai search followup "Tell me more about that"
|
|
70
|
+
"""
|
|
71
|
+
query_str = " ".join(query)
|
|
72
|
+
with handle_errors(json_mode=use_json):
|
|
73
|
+
client = _get_client()
|
|
74
|
+
result = client.followup(query_str)
|
|
75
|
+
if use_json:
|
|
76
|
+
print_json(result.to_dict())
|
|
77
|
+
else:
|
|
78
|
+
print_search_result(result)
|
|
File without changes
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
"""Playwright-based browser client for Google AI Mode."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
import urllib.parse
|
|
5
|
+
|
|
6
|
+
from playwright.sync_api import Page, sync_playwright
|
|
7
|
+
|
|
8
|
+
from .exceptions import BrowserError, CaptchaError, NetworkError, ParseError, TimeoutError
|
|
9
|
+
from .models import SearchResult, Source
|
|
10
|
+
|
|
11
|
+
# Windows event loop fix for Playwright
|
|
12
|
+
if sys.platform == "win32":
|
|
13
|
+
import asyncio
|
|
14
|
+
|
|
15
|
+
asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
|
|
16
|
+
|
|
17
|
+
_SEARCH_URL = "https://www.google.com/search"
|
|
18
|
+
_DEFAULT_TIMEOUT = 30000
|
|
19
|
+
_ANSWER_SELECTOR = ".Y3BBE"
|
|
20
|
+
_TURN_SELECTOR = "[data-subtree=aimc]"
|
|
21
|
+
_COMPLETE_SELECTOR = "[data-complete=true]"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class GAIClient:
|
|
25
|
+
"""Headless browser client for Google AI Mode queries."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, headless: bool = True, lang: str = "en", timeout: int = _DEFAULT_TIMEOUT):
|
|
28
|
+
self._headless = headless
|
|
29
|
+
self._lang = lang
|
|
30
|
+
self._timeout = timeout
|
|
31
|
+
self._pw = None
|
|
32
|
+
self._browser = None
|
|
33
|
+
self._context = None
|
|
34
|
+
self._page = None
|
|
35
|
+
|
|
36
|
+
def _ensure_browser(self) -> Page:
|
|
37
|
+
"""Launch browser if needed and return the active page."""
|
|
38
|
+
if self._page and not self._page.is_closed():
|
|
39
|
+
return self._page
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
self._pw = sync_playwright().start()
|
|
43
|
+
self._browser = self._pw.chromium.launch(
|
|
44
|
+
headless=self._headless,
|
|
45
|
+
args=[
|
|
46
|
+
"--disable-blink-features=AutomationControlled",
|
|
47
|
+
"--no-first-run",
|
|
48
|
+
"--no-default-browser-check",
|
|
49
|
+
],
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
self._context = self._browser.new_context(
|
|
53
|
+
user_agent=(
|
|
54
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
55
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
56
|
+
"Chrome/131.0.0.0 Safari/537.36"
|
|
57
|
+
),
|
|
58
|
+
locale=self._lang,
|
|
59
|
+
viewport={"width": 1280, "height": 720},
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
self._page = self._context.new_page()
|
|
63
|
+
|
|
64
|
+
return self._page
|
|
65
|
+
except Exception as e:
|
|
66
|
+
raise BrowserError(f"Failed to launch browser: {e}") from e
|
|
67
|
+
|
|
68
|
+
def search(self, query: str) -> SearchResult:
|
|
69
|
+
"""Submit a new query to Google AI Mode.
|
|
70
|
+
|
|
71
|
+
Opens the AI Mode URL with the given query and waits for the
|
|
72
|
+
AI-generated response to complete.
|
|
73
|
+
"""
|
|
74
|
+
page = self._ensure_browser()
|
|
75
|
+
|
|
76
|
+
params = urllib.parse.urlencode({"q": query, "udm": "50", "hl": self._lang})
|
|
77
|
+
url = f"{_SEARCH_URL}?{params}"
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
page.goto(url, wait_until="domcontentloaded", timeout=self._timeout)
|
|
81
|
+
except Exception as e:
|
|
82
|
+
raise NetworkError(f"Failed to load Google AI Mode: {e}") from e
|
|
83
|
+
|
|
84
|
+
return self._wait_and_extract(page, query)
|
|
85
|
+
|
|
86
|
+
def followup(self, query: str) -> SearchResult:
|
|
87
|
+
"""Ask a follow-up question in the current conversation.
|
|
88
|
+
|
|
89
|
+
Requires a previous search() call in this session.
|
|
90
|
+
"""
|
|
91
|
+
page = self._ensure_browser()
|
|
92
|
+
|
|
93
|
+
if _SEARCH_URL not in (page.url or ""):
|
|
94
|
+
raise BrowserError("No active conversation. Run 'search' first.")
|
|
95
|
+
|
|
96
|
+
turn_count_before = page.evaluate(
|
|
97
|
+
"() => document.querySelectorAll('[data-subtree=aimc]').length"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
page.wait_for_selector("textarea", timeout=5000)
|
|
102
|
+
textarea = page.query_selector("textarea")
|
|
103
|
+
if not textarea:
|
|
104
|
+
raise ParseError("Follow-up input not found on page.")
|
|
105
|
+
textarea.focus()
|
|
106
|
+
textarea.fill(query)
|
|
107
|
+
page.keyboard.press("Enter")
|
|
108
|
+
except ParseError:
|
|
109
|
+
raise
|
|
110
|
+
except Exception as e:
|
|
111
|
+
raise BrowserError(f"Failed to submit follow-up: {e}") from e
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
page.wait_for_function(
|
|
115
|
+
f"() => document.querySelectorAll('[data-subtree=aimc]').length > {turn_count_before}",
|
|
116
|
+
timeout=self._timeout,
|
|
117
|
+
)
|
|
118
|
+
except Exception as exc:
|
|
119
|
+
raise TimeoutError(
|
|
120
|
+
f"Follow-up response did not appear within {self._timeout // 1000}s.",
|
|
121
|
+
timeout_seconds=self._timeout / 1000,
|
|
122
|
+
) from exc
|
|
123
|
+
|
|
124
|
+
return self._wait_and_extract(page, query)
|
|
125
|
+
|
|
126
|
+
def _wait_and_extract(self, page: Page, query: str) -> SearchResult:
|
|
127
|
+
"""Wait for the AI response to complete and extract it."""
|
|
128
|
+
# Check for CAPTCHA
|
|
129
|
+
try:
|
|
130
|
+
captcha = page.query_selector("#captcha-form, .g-recaptcha, #recaptcha")
|
|
131
|
+
if captcha:
|
|
132
|
+
raise CaptchaError(
|
|
133
|
+
"Google presented a CAPTCHA. Please solve it in a browser and try again."
|
|
134
|
+
)
|
|
135
|
+
except CaptchaError:
|
|
136
|
+
raise
|
|
137
|
+
|
|
138
|
+
# Wait for AI Mode turn to appear
|
|
139
|
+
try:
|
|
140
|
+
page.wait_for_selector(_TURN_SELECTOR, timeout=self._timeout)
|
|
141
|
+
except Exception as exc:
|
|
142
|
+
raise TimeoutError(
|
|
143
|
+
f"AI Mode response did not appear within {self._timeout // 1000}s. "
|
|
144
|
+
"Google may not have returned an AI response for this query.",
|
|
145
|
+
timeout_seconds=self._timeout / 1000,
|
|
146
|
+
) from exc
|
|
147
|
+
|
|
148
|
+
# Wait for response completion marker
|
|
149
|
+
try:
|
|
150
|
+
page.wait_for_function(
|
|
151
|
+
"""() => {
|
|
152
|
+
const turns = document.querySelectorAll('[data-subtree=aimc]');
|
|
153
|
+
const last = turns[turns.length - 1];
|
|
154
|
+
return last && last.querySelector('[data-complete=true]');
|
|
155
|
+
}""",
|
|
156
|
+
timeout=self._timeout,
|
|
157
|
+
)
|
|
158
|
+
except Exception:
|
|
159
|
+
pass # proceed anyway
|
|
160
|
+
|
|
161
|
+
# Wait for source links to appear
|
|
162
|
+
try:
|
|
163
|
+
page.wait_for_function(
|
|
164
|
+
"""() => {
|
|
165
|
+
const turns = document.querySelectorAll('[data-subtree=aimc]');
|
|
166
|
+
const last = turns[turns.length - 1];
|
|
167
|
+
return last && last.querySelector('a[data-ved]');
|
|
168
|
+
}""",
|
|
169
|
+
timeout=5000,
|
|
170
|
+
)
|
|
171
|
+
except Exception:
|
|
172
|
+
pass # proceed anyway
|
|
173
|
+
|
|
174
|
+
# Small delay to let final rendering settle
|
|
175
|
+
page.wait_for_timeout(500)
|
|
176
|
+
|
|
177
|
+
return self._extract_result(page, query)
|
|
178
|
+
|
|
179
|
+
def _extract_result(self, page: Page, query: str) -> SearchResult:
|
|
180
|
+
"""Extract the AI-generated answer and sources from the page."""
|
|
181
|
+
result = page.evaluate(
|
|
182
|
+
"""() => {
|
|
183
|
+
const turns = document.querySelectorAll('[data-subtree=aimc]');
|
|
184
|
+
const lastTurn = turns[turns.length - 1];
|
|
185
|
+
if (!lastTurn) return null;
|
|
186
|
+
|
|
187
|
+
// Extract answer text from .Y3BBE sections
|
|
188
|
+
const sections = lastTurn.querySelectorAll('.Y3BBE');
|
|
189
|
+
const parts = [];
|
|
190
|
+
for (const sec of sections) {
|
|
191
|
+
// Clone to remove inline source buttons before extracting text
|
|
192
|
+
const clone = sec.cloneNode(true);
|
|
193
|
+
// Remove ALL buttons (source citations like "Teradata +2", "Show links")
|
|
194
|
+
const buttons = clone.querySelectorAll('[role=button], button, [jsaction]');
|
|
195
|
+
for (const b of buttons) {
|
|
196
|
+
const text = (b.innerText || '').trim();
|
|
197
|
+
// Remove citation buttons: "SiteName +N" or just "+N"
|
|
198
|
+
if (text.match(/\\+\\d+$/) || text.match(/^\\d+ sites?$/i)) {
|
|
199
|
+
b.remove();
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
const text = clone.innerText.trim();
|
|
203
|
+
if (text) parts.push(text);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Extract source links \u2014 handle Google redirect URLs and empty-text links
|
|
207
|
+
const sources = [];
|
|
208
|
+
const seen = new Set();
|
|
209
|
+
const links = lastTurn.querySelectorAll('a[href][data-ved]');
|
|
210
|
+
for (const a of links) {
|
|
211
|
+
let href = a.href;
|
|
212
|
+
if (!href) continue;
|
|
213
|
+
|
|
214
|
+
// Resolve Google redirect URLs: /url?q=https://example.com/...
|
|
215
|
+
try {
|
|
216
|
+
const u = new URL(href);
|
|
217
|
+
if (u.hostname.includes('google.com') && u.pathname === '/url') {
|
|
218
|
+
const target = u.searchParams.get('q') || u.searchParams.get('url');
|
|
219
|
+
if (target) href = target;
|
|
220
|
+
}
|
|
221
|
+
} catch(e) {}
|
|
222
|
+
|
|
223
|
+
// Skip Google internal links
|
|
224
|
+
if (href.includes('google.com') || href.includes('gstatic.com')) continue;
|
|
225
|
+
if (!href.startsWith('http')) continue;
|
|
226
|
+
|
|
227
|
+
// Strip URL fragment (#:~:text=...)
|
|
228
|
+
const cleanHref = href.split('#')[0];
|
|
229
|
+
if (seen.has(cleanHref)) continue;
|
|
230
|
+
seen.add(cleanHref);
|
|
231
|
+
|
|
232
|
+
// Get title: first try link text, then parent container text
|
|
233
|
+
let title = a.innerText.trim().split('\\n')[0];
|
|
234
|
+
if (!title || title.length < 4) {
|
|
235
|
+
// Links may have empty text \u2014 extract domain as fallback title
|
|
236
|
+
try {
|
|
237
|
+
const domain = new URL(cleanHref).hostname.replace('www.', '');
|
|
238
|
+
title = domain;
|
|
239
|
+
} catch(e) { title = cleanHref.substring(0, 60); }
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Skip citation numbers
|
|
243
|
+
if (title.match(/^\\+?\\d+$/)) continue;
|
|
244
|
+
|
|
245
|
+
// Look for snippet near the link in parent container
|
|
246
|
+
let snippet = '';
|
|
247
|
+
const card = a.closest('li, [class]');
|
|
248
|
+
if (card) {
|
|
249
|
+
const allText = card.innerText.trim();
|
|
250
|
+
const lines = allText.split('\\n').filter(l => l.length > 30);
|
|
251
|
+
for (const line of lines) {
|
|
252
|
+
if (line !== title && line.length > 30) {
|
|
253
|
+
snippet = line.substring(0, 200);
|
|
254
|
+
break;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
sources.push({title: title.substring(0, 200), url: cleanHref, snippet: snippet});
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Check for follow-up prompt (last section often suggests next questions)
|
|
263
|
+
let followUp = '';
|
|
264
|
+
if (parts.length > 0) {
|
|
265
|
+
const last = parts[parts.length - 1];
|
|
266
|
+
if (last.includes('?') && last.length < 200) {
|
|
267
|
+
followUp = last;
|
|
268
|
+
parts.pop();
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
return {
|
|
273
|
+
answer: parts.join('\\n\\n'),
|
|
274
|
+
sources: sources.slice(0, 20),
|
|
275
|
+
followUp: followUp
|
|
276
|
+
};
|
|
277
|
+
}"""
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
if not result:
|
|
281
|
+
raise ParseError("Could not extract AI response from page.")
|
|
282
|
+
|
|
283
|
+
sources = [
|
|
284
|
+
Source(
|
|
285
|
+
title=s.get("title", ""),
|
|
286
|
+
url=s.get("url", ""),
|
|
287
|
+
snippet=s.get("snippet", ""),
|
|
288
|
+
)
|
|
289
|
+
for s in result.get("sources", [])
|
|
290
|
+
]
|
|
291
|
+
|
|
292
|
+
return SearchResult(
|
|
293
|
+
query=query,
|
|
294
|
+
answer=result.get("answer", ""),
|
|
295
|
+
sources=sources,
|
|
296
|
+
follow_up_prompt=result.get("followUp", ""),
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
def close(self):
|
|
300
|
+
"""Close all browser resources."""
|
|
301
|
+
try:
|
|
302
|
+
if self._page and not self._page.is_closed():
|
|
303
|
+
self._page.close()
|
|
304
|
+
if self._context:
|
|
305
|
+
self._context.close()
|
|
306
|
+
if self._browser:
|
|
307
|
+
self._browser.close()
|
|
308
|
+
if self._pw:
|
|
309
|
+
self._pw.stop()
|
|
310
|
+
except Exception:
|
|
311
|
+
pass
|
|
312
|
+
finally:
|
|
313
|
+
self._page = None
|
|
314
|
+
self._context = None
|
|
315
|
+
self._browser = None
|
|
316
|
+
self._pw = None
|
|
317
|
+
|
|
318
|
+
def __enter__(self):
|
|
319
|
+
return self
|
|
320
|
+
|
|
321
|
+
def __exit__(self, *args):
|
|
322
|
+
self.close()
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Domain-specific exception hierarchy for cli-web-gai."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class GAIError(Exception):
|
|
5
|
+
"""Base exception for all cli-web-gai errors."""
|
|
6
|
+
|
|
7
|
+
def to_dict(self) -> dict:
|
|
8
|
+
return {"error": True, "code": error_code_for(self), "message": str(self)}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BrowserError(GAIError):
|
|
12
|
+
"""Browser launch or navigation failure."""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TimeoutError(GAIError):
|
|
16
|
+
"""Response did not arrive within the timeout window."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, message: str, timeout_seconds: float = 0):
|
|
19
|
+
super().__init__(message)
|
|
20
|
+
self.timeout_seconds = timeout_seconds
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class RateLimitError(GAIError):
|
|
24
|
+
"""Google rate-limiting detected."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class NetworkError(GAIError):
|
|
28
|
+
"""Network or connection failure."""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ServerError(GAIError):
|
|
32
|
+
"""Google returned a server-side error page."""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class NotFoundError(GAIError):
|
|
36
|
+
"""Requested resource or result was not found."""
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class AuthError(GAIError):
|
|
40
|
+
"""Authentication failure (unused — cli-web-gai requires no auth)."""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ParseError(GAIError):
|
|
44
|
+
"""Failed to parse AI Mode response from the page."""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class CaptchaError(GAIError):
|
|
48
|
+
"""Google presented a CAPTCHA challenge."""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# --- JSON error code mapping (matches utils/helpers.py conventions) ---
|
|
52
|
+
|
|
53
|
+
EXCEPTION_CODE_MAP = {
|
|
54
|
+
BrowserError: "BROWSER_ERROR",
|
|
55
|
+
CaptchaError: "CAPTCHA_REQUIRED",
|
|
56
|
+
AuthError: "AUTH_ERROR",
|
|
57
|
+
NetworkError: "NETWORK_ERROR",
|
|
58
|
+
NotFoundError: "NOT_FOUND",
|
|
59
|
+
ServerError: "SERVER_ERROR",
|
|
60
|
+
ParseError: "PARSE_ERROR",
|
|
61
|
+
RateLimitError: "RATE_LIMITED",
|
|
62
|
+
TimeoutError: "TIMEOUT",
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def error_code_for(exc: Exception) -> str:
|
|
67
|
+
"""Get the JSON error code string for an exception."""
|
|
68
|
+
for exc_type, code in EXCEPTION_CODE_MAP.items():
|
|
69
|
+
if isinstance(exc, exc_type):
|
|
70
|
+
return code
|
|
71
|
+
return "UNKNOWN"
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Response models for cli-web-gai."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import asdict, dataclass, field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class Source:
|
|
8
|
+
"""A reference source from the AI response."""
|
|
9
|
+
|
|
10
|
+
title: str
|
|
11
|
+
url: str
|
|
12
|
+
snippet: str = ""
|
|
13
|
+
|
|
14
|
+
def to_dict(self) -> dict:
|
|
15
|
+
d = asdict(self)
|
|
16
|
+
if not d["snippet"]:
|
|
17
|
+
del d["snippet"]
|
|
18
|
+
return d
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class SearchResult:
|
|
23
|
+
"""An AI Mode search result."""
|
|
24
|
+
|
|
25
|
+
query: str
|
|
26
|
+
answer: str
|
|
27
|
+
sources: list[Source] = field(default_factory=list)
|
|
28
|
+
follow_up_prompt: str = ""
|
|
29
|
+
|
|
30
|
+
def to_dict(self) -> dict:
|
|
31
|
+
return {
|
|
32
|
+
"success": True,
|
|
33
|
+
"data": {
|
|
34
|
+
"query": self.query,
|
|
35
|
+
"answer": self.answer,
|
|
36
|
+
"sources": [s.to_dict() for s in self.sources],
|
|
37
|
+
**({"follow_up_prompt": self.follow_up_prompt} if self.follow_up_prompt else {}),
|
|
38
|
+
},
|
|
39
|
+
}
|