agentgate-mcp 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +18 -34
- package/MCP_TOOLS.md +50 -26
- package/README.md +54 -75
- package/package.json +1 -3
- package/src/browser-session.js +230 -0
- package/src/cli.js +14 -45
- package/src/config.js +1 -9
- package/src/mcp-server.js +136 -67
- package/src/orchestrator.js +54 -67
- package/services/_template.service.json +0 -34
- package/src/browser-runtime.js +0 -411
- package/src/integrations/captcha-solver.js +0 -128
- package/src/integrations/gmail-watcher.js +0 -129
- package/src/playwright-engine.js +0 -391
- package/src/registry.js +0 -47
- package/src/scaffold.js +0 -103
- package/src/setup.js +0 -109
- package/src/signup-engine.js +0 -24
- package/src/vault.js +0 -105
package/ARCHITECTURE.md
CHANGED
|
@@ -4,45 +4,29 @@
|
|
|
4
4
|
|
|
5
5
|
1. User runs `agentgate login` → opens Chromium → signs into Google → browser profile saved
|
|
6
6
|
2. `agentgate serve` starts MCP server over stdio
|
|
7
|
-
3. AI agent calls `get_or_create_key(
|
|
8
|
-
4.
|
|
9
|
-
5.
|
|
10
|
-
6.
|
|
11
|
-
7.
|
|
12
|
-
8.
|
|
7
|
+
3. AI agent calls `get_or_create_key("openai")` → no key cached
|
|
8
|
+
4. Agent calls `open_browser("https://platform.openai.com")` → sees screenshot
|
|
9
|
+
5. Agent decides what to click/fill based on the screenshot
|
|
10
|
+
6. Agent calls `browser_action({ action: "click", selector: "text=Sign in" })` → sees result
|
|
11
|
+
7. Agent navigates to API keys page, creates a key, extracts it
|
|
12
|
+
8. Agent calls `save_key("openai", "sk-...")` → key cached for next time
|
|
13
|
+
9. Agent calls `close_browser()` → done
|
|
14
|
+
|
|
15
|
+
The AI agent is the brain. AgentGate is the hands.
|
|
13
16
|
|
|
14
17
|
## Core modules
|
|
15
18
|
|
|
16
|
-
- `src/cli.js` — CLI entry point (`login`, `serve`, `doctor
|
|
17
|
-
- `src/mcp-server.js` — MCP JSON-RPC over stdio
|
|
18
|
-
- `src/orchestrator.js` —
|
|
19
|
-
- `src/
|
|
20
|
-
- `src/
|
|
21
|
-
- `src/
|
|
22
|
-
- `src/
|
|
23
|
-
- `src/db.js` — SQLite key + alias storage
|
|
24
|
-
- `src/registry.js` — Optional service recipe loader
|
|
25
|
-
- `src/scaffold.js` — Recipe template generator
|
|
26
|
-
- `src/logger.js` — Structured JSON logging with rotation
|
|
27
|
-
|
|
28
|
-
## Two modes of key creation
|
|
29
|
-
|
|
30
|
-
### Smart mode (default)
|
|
31
|
-
No recipe needed. Engine uses heuristics to:
|
|
32
|
-
1. Find and click Google sign-in buttons
|
|
33
|
-
2. Handle OAuth popup
|
|
34
|
-
3. Navigate to API keys page
|
|
35
|
-
4. Click "Create API Key" buttons
|
|
36
|
-
5. Extract the key from the page
|
|
37
|
-
|
|
38
|
-
### Recipe mode (optional)
|
|
39
|
-
For services with non-standard flows, a JSON recipe in `services/` provides
|
|
40
|
-
an explicit workflow with DSL actions (goto, click, fill, extract, etc.).
|
|
19
|
+
- `src/cli.js` — CLI entry point (`login`, `serve`, `doctor`)
|
|
20
|
+
- `src/mcp-server.js` — MCP JSON-RPC over stdio, 7 tools
|
|
21
|
+
- `src/orchestrator.js` — Thin coordinator between DB and browser session
|
|
22
|
+
- `src/browser-session.js` — Persistent Chromium session with Google profile
|
|
23
|
+
- `src/db.js` — SQLite key storage
|
|
24
|
+
- `src/config.js` — Path resolution and directory setup
|
|
25
|
+
- `src/logger.js` — Structured JSON logging with secret masking
|
|
41
26
|
|
|
42
27
|
## Security
|
|
43
28
|
|
|
44
29
|
- Browser profile stored locally in `~/.agentgate/browser-profile/`
|
|
45
|
-
- Vault encrypted with AES-256-GCM (keyring + vault file)
|
|
46
30
|
- SQLite database stored locally
|
|
47
31
|
- API keys masked in logs (last 4 chars visible)
|
|
48
32
|
- File permissions 0o600 on sensitive files
|
|
@@ -52,5 +36,5 @@ an explicit workflow with DSL actions (goto, click, fill, extract, etc.).
|
|
|
52
36
|
|
|
53
37
|
- Version: 2024-11-05
|
|
54
38
|
- Methods: `initialize`, `ping`, `tools/list`, `tools/call`
|
|
55
|
-
- Notifications: `notifications/initialized
|
|
56
|
-
- Tools: `get_or_create_key`, `
|
|
39
|
+
- Notifications: `notifications/initialized`
|
|
40
|
+
- Tools: `get_or_create_key`, `open_browser`, `browser_action`, `save_key`, `close_browser`, `list_my_keys`, `revoke_key`
|
package/MCP_TOOLS.md
CHANGED
|
@@ -2,44 +2,68 @@
|
|
|
2
2
|
|
|
3
3
|
## `get_or_create_key`
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Check if a cached API key exists for a service.
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
```json
|
|
8
|
+
{ "service": "openai" }
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Returns the cached key or `{ "exists": false }`.
|
|
12
|
+
|
|
13
|
+
## `open_browser`
|
|
14
|
+
|
|
15
|
+
Open a browser with the saved Google session and navigate to a URL. Returns a screenshot.
|
|
8
16
|
|
|
9
17
|
```json
|
|
10
|
-
{
|
|
11
|
-
"service": "twelvelabs",
|
|
12
|
-
"signup_url": "https://api.twelvelabs.io/signup",
|
|
13
|
-
"api_key_url": "https://api.twelvelabs.io/dashboard/api-keys"
|
|
14
|
-
}
|
|
18
|
+
{ "url": "https://platform.openai.com/signup" }
|
|
15
19
|
```
|
|
16
20
|
|
|
17
|
-
|
|
18
|
-
- `service` (required) — Service name, used as cache key
|
|
19
|
-
- `signup_url` (required) — Where to start the sign-up/login flow
|
|
20
|
-
- `api_key_url` (optional) — Direct link to the API keys dashboard
|
|
21
|
+
## `browser_action`
|
|
21
22
|
|
|
22
|
-
|
|
23
|
-
1. Returns cached key from SQLite if one exists
|
|
24
|
-
2. Otherwise opens browser with saved Google session
|
|
25
|
-
3. Navigates to signup_url, finds Google sign-in, authenticates
|
|
26
|
-
4. Navigates to api_key_url (if provided), creates and extracts key
|
|
27
|
-
5. Caches key in SQLite and returns it
|
|
23
|
+
Perform an action in the open browser. Returns a screenshot after each action.
|
|
28
24
|
|
|
29
|
-
|
|
25
|
+
```json
|
|
26
|
+
{ "action": "click", "selector": "text=Sign in with Google" }
|
|
27
|
+
{ "action": "fill", "selector": "input[name=email]", "value": "test@example.com" }
|
|
28
|
+
{ "action": "goto", "url": "https://platform.openai.com/api-keys" }
|
|
29
|
+
{ "action": "extract_text", "selector": ".api-key" }
|
|
30
|
+
{ "action": "extract_all_text" }
|
|
31
|
+
{ "action": "scroll", "value": "500" }
|
|
32
|
+
{ "action": "press", "key": "Enter" }
|
|
33
|
+
{ "action": "wait", "selector": ".loaded", "ms": 5000 }
|
|
34
|
+
{ "action": "screenshot" }
|
|
35
|
+
```
|
|
30
36
|
|
|
31
|
-
|
|
37
|
+
Actions: `click`, `fill`, `select`, `press`, `scroll`, `goto`, `wait`, `screenshot`, `extract_text`, `extract_all_text`
|
|
32
38
|
|
|
33
|
-
|
|
39
|
+
## `save_key`
|
|
34
40
|
|
|
35
|
-
|
|
41
|
+
Store an API key the agent found on the page.
|
|
42
|
+
|
|
43
|
+
```json
|
|
44
|
+
{ "service": "openai", "api_key": "sk-..." }
|
|
45
|
+
```
|
|
36
46
|
|
|
37
|
-
|
|
47
|
+
## `close_browser`
|
|
38
48
|
|
|
39
|
-
|
|
49
|
+
Close the browser session.
|
|
40
50
|
|
|
41
|
-
|
|
51
|
+
```json
|
|
52
|
+
{}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## `list_my_keys`
|
|
56
|
+
|
|
57
|
+
List all stored API keys.
|
|
58
|
+
|
|
59
|
+
```json
|
|
60
|
+
{}
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## `revoke_key`
|
|
42
64
|
|
|
43
|
-
|
|
65
|
+
Delete a stored API key.
|
|
44
66
|
|
|
45
|
-
|
|
67
|
+
```json
|
|
68
|
+
{ "service": "openai" }
|
|
69
|
+
```
|
package/README.md
CHANGED
|
@@ -1,15 +1,23 @@
|
|
|
1
1
|
# AgentGate
|
|
2
2
|
|
|
3
|
-
MCP server that
|
|
3
|
+
MCP server that gives AI agents a browser with your Google session. The agent sees screenshots, decides what to click, and grabs API keys from any service.
|
|
4
4
|
|
|
5
5
|
## How it works
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
```
|
|
8
|
+
You: "Get me a Twelve Labs API key"
|
|
9
|
+
|
|
10
|
+
Agent: get_or_create_key("twelvelabs") → no key cached
|
|
11
|
+
Agent: open_browser("https://twelvelabs.io") → sees screenshot
|
|
12
|
+
Agent: click("Sign in with Google") → sees dashboard
|
|
13
|
+
Agent: goto("https://twelvelabs.io/api-keys") → sees API keys page
|
|
14
|
+
Agent: click("Create API Key") → sees new key
|
|
15
|
+
Agent: extract_text(".api-key") → reads the key
|
|
16
|
+
Agent: save_key("twelvelabs", "tl_key_abc...") → cached for next time
|
|
17
|
+
Agent: close_browser() → done
|
|
18
|
+
```
|
|
11
19
|
|
|
12
|
-
|
|
20
|
+
The AI agent is the brain. AgentGate is the hands.
|
|
13
21
|
|
|
14
22
|
## Install
|
|
15
23
|
|
|
@@ -17,7 +25,7 @@ No hardcoded services. No config files per provider. Works for anything with "Si
|
|
|
17
25
|
npm install -g agentgate-mcp
|
|
18
26
|
```
|
|
19
27
|
|
|
20
|
-
Requires Node.js 23
|
|
28
|
+
Requires Node.js 23+.
|
|
21
29
|
|
|
22
30
|
## Setup (one time)
|
|
23
31
|
|
|
@@ -25,7 +33,7 @@ Requires Node.js 23+ (for built-in SQLite support).
|
|
|
25
33
|
agentgate login
|
|
26
34
|
```
|
|
27
35
|
|
|
28
|
-
|
|
36
|
+
Opens Chromium — sign into Google, close the browser. Done.
|
|
29
37
|
|
|
30
38
|
## Add to Claude Code
|
|
31
39
|
|
|
@@ -33,70 +41,55 @@ This opens Chromium — sign into your Google account, then close the browser. D
|
|
|
33
41
|
claude mcp add agentgate -- agentgate serve
|
|
34
42
|
```
|
|
35
43
|
|
|
36
|
-
Or manually add to your MCP config:
|
|
37
|
-
|
|
38
|
-
```json
|
|
39
|
-
{
|
|
40
|
-
"mcpServers": {
|
|
41
|
-
"agentgate": {
|
|
42
|
-
"command": "agentgate",
|
|
43
|
-
"args": ["serve"]
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
## Usage
|
|
50
|
-
|
|
51
|
-
Just ask your AI agent naturally:
|
|
52
|
-
|
|
53
|
-
- *"Get me an API key for Twelve Labs"*
|
|
54
|
-
- *"I need an OpenAI key"*
|
|
55
|
-
- *"Set me up with a Replicate key"*
|
|
56
|
-
- *"Show all my keys"*
|
|
57
|
-
- *"Revoke my openai key"*
|
|
58
|
-
|
|
59
44
|
## MCP Tools
|
|
60
45
|
|
|
61
|
-
|
|
46
|
+
| Tool | What it does |
|
|
47
|
+
|------|-------------|
|
|
48
|
+
| `get_or_create_key` | Check if a key is cached for a service |
|
|
49
|
+
| `open_browser` | Open browser with Google session, go to URL, return screenshot |
|
|
50
|
+
| `browser_action` | Click, fill, scroll, extract text — returns screenshot after each action |
|
|
51
|
+
| `save_key` | Store an API key the agent found |
|
|
52
|
+
| `close_browser` | Close the browser |
|
|
53
|
+
| `list_my_keys` | List all stored keys |
|
|
54
|
+
| `revoke_key` | Delete a stored key |
|
|
62
55
|
|
|
63
|
-
|
|
56
|
+
### `open_browser`
|
|
64
57
|
|
|
65
58
|
```json
|
|
66
|
-
{
|
|
67
|
-
"service": "twelvelabs",
|
|
68
|
-
"signup_url": "https://api.twelvelabs.io/signup",
|
|
69
|
-
"api_key_url": "https://api.twelvelabs.io/dashboard/api-keys"
|
|
70
|
-
}
|
|
59
|
+
{ "url": "https://platform.openai.com/signup" }
|
|
71
60
|
```
|
|
72
61
|
|
|
73
|
-
|
|
74
|
-
|-----------|----------|-------------|
|
|
75
|
-
| `service` | Yes | Service name (used as cache key) |
|
|
76
|
-
| `signup_url` | Yes | Signup or login page URL |
|
|
77
|
-
| `api_key_url` | No | Direct link to API keys dashboard |
|
|
62
|
+
Returns a **screenshot** of the page so the agent can see it.
|
|
78
63
|
|
|
79
|
-
### `
|
|
64
|
+
### `browser_action`
|
|
80
65
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
66
|
+
```json
|
|
67
|
+
{ "action": "click", "selector": "text=Sign in with Google" }
|
|
68
|
+
{ "action": "fill", "selector": "input[name=email]", "value": "test@example.com" }
|
|
69
|
+
{ "action": "goto", "url": "https://platform.openai.com/api-keys" }
|
|
70
|
+
{ "action": "extract_text", "selector": ".api-key" }
|
|
71
|
+
{ "action": "scroll", "value": "500" }
|
|
72
|
+
{ "action": "press", "key": "Enter" }
|
|
73
|
+
{ "action": "wait", "selector": ".loaded", "ms": 5000 }
|
|
74
|
+
{ "action": "screenshot" }
|
|
75
|
+
{ "action": "extract_all_text" }
|
|
76
|
+
```
|
|
86
77
|
|
|
87
|
-
|
|
78
|
+
Every action returns a screenshot so the agent always sees what happened.
|
|
88
79
|
|
|
89
|
-
|
|
80
|
+
### `save_key`
|
|
90
81
|
|
|
91
|
-
|
|
82
|
+
```json
|
|
83
|
+
{ "service": "openai", "api_key": "sk-..." }
|
|
84
|
+
```
|
|
92
85
|
|
|
93
|
-
|
|
86
|
+
### `get_or_create_key`
|
|
94
87
|
|
|
95
|
-
```
|
|
96
|
-
|
|
88
|
+
```json
|
|
89
|
+
{ "service": "openai" }
|
|
97
90
|
```
|
|
98
91
|
|
|
99
|
-
|
|
92
|
+
Returns cached key or `{ "exists": false }`.
|
|
100
93
|
|
|
101
94
|
## Commands
|
|
102
95
|
|
|
@@ -105,27 +98,13 @@ Most services work without a recipe.
|
|
|
105
98
|
| `agentgate login` | Sign in with Google (opens browser) |
|
|
106
99
|
| `agentgate serve` | Start MCP server |
|
|
107
100
|
| `agentgate doctor` | Health check |
|
|
108
|
-
| `agentgate scaffold <name> <url>` | Generate a service recipe |
|
|
109
|
-
|
|
110
|
-
## How it stays secure
|
|
111
101
|
|
|
112
|
-
|
|
113
|
-
- API keys stored in local SQLite, encrypted vault uses AES-256-GCM
|
|
114
|
-
- Nothing is sent to any cloud — fully local
|
|
115
|
-
- No telemetry
|
|
102
|
+
## Security
|
|
116
103
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
agentgate doctor
|
|
122
|
-
|
|
123
|
-
# Check logs
|
|
124
|
-
cat ~/.agentgate/logs/agentgate.log
|
|
125
|
-
|
|
126
|
-
# Re-login if session expired
|
|
127
|
-
agentgate login
|
|
128
|
-
```
|
|
104
|
+
- Google session stays on **your machine** (`~/.agentgate/browser-profile/`)
|
|
105
|
+
- API keys stored in local SQLite database
|
|
106
|
+
- No cloud, no telemetry
|
|
107
|
+
- The agent can only use YOUR authenticated session
|
|
129
108
|
|
|
130
109
|
## Development
|
|
131
110
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agentgate-mcp",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "MCP server that lets AI agents get API keys for any service via Google sign-in",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -8,7 +8,6 @@
|
|
|
8
8
|
},
|
|
9
9
|
"files": [
|
|
10
10
|
"src/",
|
|
11
|
-
"services/",
|
|
12
11
|
"README.md",
|
|
13
12
|
"ARCHITECTURE.md",
|
|
14
13
|
"MCP_TOOLS.md"
|
|
@@ -17,7 +16,6 @@
|
|
|
17
16
|
"start": "node --disable-warning=ExperimentalWarning src/cli.js serve",
|
|
18
17
|
"login": "node --disable-warning=ExperimentalWarning src/cli.js login",
|
|
19
18
|
"doctor": "node --disable-warning=ExperimentalWarning src/cli.js doctor",
|
|
20
|
-
"scaffold": "node --disable-warning=ExperimentalWarning src/cli.js scaffold",
|
|
21
19
|
"test": "node --disable-warning=ExperimentalWarning --test test/run.js",
|
|
22
20
|
"postinstall": "npx playwright install chromium 2>/dev/null || echo 'Run: npx playwright install chromium'"
|
|
23
21
|
},
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import { createLogger } from './logger.js';
|
|
3
|
+
|
|
4
|
+
const log = createLogger('browser-session');
|
|
5
|
+
|
|
6
|
+
async function importPlaywright() {
|
|
7
|
+
try {
|
|
8
|
+
return await import('playwright');
|
|
9
|
+
} catch {
|
|
10
|
+
throw new Error('Playwright is not installed. Run: npm i playwright && npx playwright install chromium');
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export class BrowserSession {
|
|
15
|
+
constructor({ browserProfileDir }) {
|
|
16
|
+
this.browserProfileDir = browserProfileDir;
|
|
17
|
+
this.context = null;
|
|
18
|
+
this.page = null;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
isOpen() {
|
|
22
|
+
return this.page !== null && this.context !== null;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Launch persistent browser, navigate to URL, return screenshot.
|
|
27
|
+
*/
|
|
28
|
+
async open(url) {
|
|
29
|
+
if (!fs.existsSync(this.browserProfileDir)) {
|
|
30
|
+
throw new Error('No browser profile found. Run `agentgate login` first to sign in with Google.');
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Close existing session if open
|
|
34
|
+
if (this.isOpen()) {
|
|
35
|
+
await this.close();
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const playwright = await importPlaywright();
|
|
39
|
+
|
|
40
|
+
log.info(`Opening browser: ${url}`);
|
|
41
|
+
|
|
42
|
+
try {
|
|
43
|
+
this.context = await playwright.chromium.launchPersistentContext(
|
|
44
|
+
this.browserProfileDir,
|
|
45
|
+
{
|
|
46
|
+
headless: true,
|
|
47
|
+
viewport: { width: 1366, height: 900 },
|
|
48
|
+
args: ['--disable-blink-features=AutomationControlled']
|
|
49
|
+
}
|
|
50
|
+
);
|
|
51
|
+
} catch (error) {
|
|
52
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
53
|
+
if (msg.includes('Permission denied') || msg.includes('Operation not permitted')) {
|
|
54
|
+
throw new Error('Playwright could not launch browser (permission denied).');
|
|
55
|
+
}
|
|
56
|
+
throw error;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
this.page = this.context.pages()[0] || await this.context.newPage();
|
|
60
|
+
await this.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30_000 });
|
|
61
|
+
await this.page.waitForTimeout(1_500);
|
|
62
|
+
|
|
63
|
+
const screenshot = await this.takeScreenshot();
|
|
64
|
+
const pageUrl = this.page.url();
|
|
65
|
+
const title = await this.page.title();
|
|
66
|
+
|
|
67
|
+
return {
|
|
68
|
+
url: pageUrl,
|
|
69
|
+
title,
|
|
70
|
+
screenshot
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Perform a browser action and return screenshot.
|
|
76
|
+
*/
|
|
77
|
+
async action({ action, selector, value, key, url, ms }) {
|
|
78
|
+
this.ensureOpen();
|
|
79
|
+
|
|
80
|
+
switch (action) {
|
|
81
|
+
case 'screenshot':
|
|
82
|
+
break; // just return screenshot below
|
|
83
|
+
|
|
84
|
+
case 'goto':
|
|
85
|
+
if (!url) throw new Error('goto requires "url"');
|
|
86
|
+
await this.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30_000 });
|
|
87
|
+
await this.page.waitForTimeout(1_500);
|
|
88
|
+
break;
|
|
89
|
+
|
|
90
|
+
case 'click':
|
|
91
|
+
if (!selector) throw new Error('click requires "selector"');
|
|
92
|
+
await this.page.click(selector, { timeout: 10_000 });
|
|
93
|
+
await this.page.waitForTimeout(1_500);
|
|
94
|
+
break;
|
|
95
|
+
|
|
96
|
+
case 'fill':
|
|
97
|
+
if (!selector) throw new Error('fill requires "selector"');
|
|
98
|
+
await this.page.fill(selector, value || '', { timeout: 10_000 });
|
|
99
|
+
break;
|
|
100
|
+
|
|
101
|
+
case 'select':
|
|
102
|
+
if (!selector) throw new Error('select requires "selector"');
|
|
103
|
+
await this.page.selectOption(selector, value || '', { timeout: 10_000 });
|
|
104
|
+
break;
|
|
105
|
+
|
|
106
|
+
case 'press':
|
|
107
|
+
if (selector) {
|
|
108
|
+
await this.page.press(selector, key || 'Enter', { timeout: 10_000 });
|
|
109
|
+
} else {
|
|
110
|
+
await this.page.keyboard.press(key || 'Enter');
|
|
111
|
+
}
|
|
112
|
+
break;
|
|
113
|
+
|
|
114
|
+
case 'scroll':
|
|
115
|
+
await this.page.mouse.wheel(0, Number(value) || 500);
|
|
116
|
+
await this.page.waitForTimeout(500);
|
|
117
|
+
break;
|
|
118
|
+
|
|
119
|
+
case 'wait': {
|
|
120
|
+
const timeout = Number(ms) || 5_000;
|
|
121
|
+
if (selector) {
|
|
122
|
+
await this.page.waitForSelector(selector, { timeout, state: 'visible' });
|
|
123
|
+
} else {
|
|
124
|
+
await this.page.waitForTimeout(timeout);
|
|
125
|
+
}
|
|
126
|
+
break;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
case 'extract_text': {
|
|
130
|
+
if (!selector) throw new Error('extract_text requires "selector"');
|
|
131
|
+
const text = await this.page.textContent(selector, { timeout: 10_000 });
|
|
132
|
+
const screenshot = await this.takeScreenshot();
|
|
133
|
+
return {
|
|
134
|
+
url: this.page.url(),
|
|
135
|
+
title: await this.page.title(),
|
|
136
|
+
extracted_text: (text || '').trim(),
|
|
137
|
+
screenshot
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
case 'extract_all_text': {
|
|
142
|
+
const body = await this.page.textContent('body');
|
|
143
|
+
const screenshot = await this.takeScreenshot();
|
|
144
|
+
return {
|
|
145
|
+
url: this.page.url(),
|
|
146
|
+
title: await this.page.title(),
|
|
147
|
+
extracted_text: (body || '').trim().slice(0, 10_000),
|
|
148
|
+
screenshot
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
default:
|
|
153
|
+
throw new Error(`Unknown browser action: "${action}". Available: screenshot, goto, click, fill, select, press, scroll, wait, extract_text, extract_all_text`);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Default: return screenshot after action
|
|
157
|
+
const screenshot = await this.takeScreenshot();
|
|
158
|
+
return {
|
|
159
|
+
url: this.page.url(),
|
|
160
|
+
title: await this.page.title(),
|
|
161
|
+
screenshot
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Close the browser session.
|
|
167
|
+
*/
|
|
168
|
+
async close() {
|
|
169
|
+
if (this.context) {
|
|
170
|
+
log.info('Closing browser session');
|
|
171
|
+
try {
|
|
172
|
+
await this.context.close();
|
|
173
|
+
} catch {
|
|
174
|
+
// already closed
|
|
175
|
+
}
|
|
176
|
+
this.context = null;
|
|
177
|
+
this.page = null;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Open a visible browser for the user to sign into Google.
|
|
183
|
+
*/
|
|
184
|
+
async login() {
|
|
185
|
+
const playwright = await importPlaywright();
|
|
186
|
+
|
|
187
|
+
log.info('Opening browser for Google login');
|
|
188
|
+
fs.mkdirSync(this.browserProfileDir, { recursive: true });
|
|
189
|
+
|
|
190
|
+
const context = await playwright.chromium.launchPersistentContext(
|
|
191
|
+
this.browserProfileDir,
|
|
192
|
+
{
|
|
193
|
+
headless: false,
|
|
194
|
+
viewport: { width: 1280, height: 900 },
|
|
195
|
+
args: ['--disable-blink-features=AutomationControlled']
|
|
196
|
+
}
|
|
197
|
+
);
|
|
198
|
+
|
|
199
|
+
const page = context.pages()[0] || await context.newPage();
|
|
200
|
+
await page.goto('https://accounts.google.com');
|
|
201
|
+
|
|
202
|
+
log.info('Waiting for user to complete Google sign-in...');
|
|
203
|
+
|
|
204
|
+
try {
|
|
205
|
+
await page.waitForURL(
|
|
206
|
+
(u) => u.href.includes('myaccount.google.com') || u.href.includes('google.com/search'),
|
|
207
|
+
{ timeout: 300_000 }
|
|
208
|
+
);
|
|
209
|
+
log.info('Google sign-in detected');
|
|
210
|
+
} catch {
|
|
211
|
+
log.info('Login window closed or timed out — profile saved if login completed');
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
await context.close();
|
|
215
|
+
log.info('Browser profile saved');
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// ── internal ──
|
|
219
|
+
|
|
220
|
+
ensureOpen() {
|
|
221
|
+
if (!this.isOpen()) {
|
|
222
|
+
throw new Error('No browser session open. Call open_browser first.');
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
async takeScreenshot() {
|
|
227
|
+
const buffer = await this.page.screenshot({ type: 'png' });
|
|
228
|
+
return buffer.toString('base64');
|
|
229
|
+
}
|
|
230
|
+
}
|