llm-in-chrome-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +164 -0
- package/dist/index.d.ts +21 -0
- package/dist/index.js +754 -0
- package/package.json +34 -0
package/README.md
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# LLM in Chrome MCP Server
|
|
2
|
+
|
|
3
|
+
Give Claude Code a browser agent that handles web tasks autonomously.
|
|
4
|
+
|
|
5
|
+
## Why This Exists
|
|
6
|
+
|
|
7
|
+
When you need to interact with the web, you have two options:
|
|
8
|
+
|
|
9
|
+
### 1. Low-Level Browser Tools (Playwright MCP, browser-use, etc.)
|
|
10
|
+
```
|
|
11
|
+
Claude: I'll click the login button
|
|
12
|
+
Claude: Now I'll type the username
|
|
13
|
+
Claude: Now I'll click submit
|
|
14
|
+
Claude: The page changed, let me screenshot
|
|
15
|
+
Claude: I see an error, let me try again
|
|
16
|
+
... 50 more tool calls ...
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Every click, every keystroke, every navigation is a separate tool call. You're doing the browsing yourself.
|
|
20
|
+
|
|
21
|
+
### 2. This MCP Server (High-Level Agent)
|
|
22
|
+
```
|
|
23
|
+
Claude: browser_start("Log into my account and download the invoice")
|
|
24
|
+
Claude: browser_status → { status: "running", step: "Filling login form..." }
|
|
25
|
+
Claude: browser_status → { status: "complete", result: "Downloaded invoice.pdf" }
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
You delegate the entire task. The agent handles all browser interaction.
|
|
29
|
+
|
|
30
|
+
## Tools
|
|
31
|
+
|
|
32
|
+
| Tool | Description |
|
|
33
|
+
|------|-------------|
|
|
34
|
+
| `browser_start` | Start a new task. Returns session_id for tracking. |
|
|
35
|
+
| `browser_message` | Send follow-up instructions to a running task. |
|
|
36
|
+
| `browser_status` | Check progress. Works for single task or all tasks. |
|
|
37
|
+
| `browser_stop` | Stop a task and get partial results. |
|
|
38
|
+
| `browser_screenshot` | Capture current browser state. |
|
|
39
|
+
|
|
40
|
+
## Parallel Execution
|
|
41
|
+
|
|
42
|
+
Run multiple tasks simultaneously:
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
session1 = browser_start("Search for flights to Tokyo")
|
|
46
|
+
session2 = browser_start("Check hotel prices in Shibuya")
|
|
47
|
+
session3 = browser_start("Look up JR Pass costs")
|
|
48
|
+
|
|
49
|
+
# All three run in parallel
|
|
50
|
+
browser_status() → Shows all 3 active sessions
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Multi-Turn Interaction
|
|
54
|
+
|
|
55
|
+
Send follow-up messages to guide the agent:
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
session = browser_start("Fill out the job application")
|
|
59
|
+
|
|
60
|
+
# Agent might need clarification
|
|
61
|
+
browser_status(session) → { status: "waiting", step: "What's your desired salary?" }
|
|
62
|
+
|
|
63
|
+
browser_message(session, "Put $150k")
|
|
64
|
+
|
|
65
|
+
browser_status(session) → { status: "running", step: "Completing remaining fields..." }
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Session Continuation
|
|
69
|
+
|
|
70
|
+
Continue working with completed tasks - the agent retains full memory:
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
session = browser_start("Go to LinkedIn and find an AI Engineer job in Montreal")
|
|
74
|
+
|
|
75
|
+
# Wait for completion...
|
|
76
|
+
browser_status(session) → { status: "complete", answer: "Found: Applied AI Engineer at Cohere" }
|
|
77
|
+
|
|
78
|
+
# Continue with the same session - agent remembers everything
|
|
79
|
+
browser_message(session, "Click into that job and tell me the requirements")
|
|
80
|
+
|
|
81
|
+
browser_status(session) → { status: "complete", answer: "Requirements: 3+ years Python..." }
|
|
82
|
+
|
|
83
|
+
# Keep going
|
|
84
|
+
browser_message(session, "Now apply to this job using my profile")
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Each session has isolated memory, so parallel tasks don't interfere with each other.
|
|
88
|
+
|
|
89
|
+
## Installation
|
|
90
|
+
|
|
91
|
+
### 1. Install the Chrome Extension
|
|
92
|
+
The MCP server requires the LLM in Chrome extension to be installed and configured.
|
|
93
|
+
|
|
94
|
+
### 2. Install the MCP Server
|
|
95
|
+
```bash
|
|
96
|
+
cd mcp-server
|
|
97
|
+
npm install
|
|
98
|
+
npm run build
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### 3. Configure Claude Code
|
|
102
|
+
Add to your Claude Code MCP config (`~/.claude/claude_desktop_config.json`):
|
|
103
|
+
|
|
104
|
+
```json
|
|
105
|
+
{
|
|
106
|
+
"mcpServers": {
|
|
107
|
+
"browser": {
|
|
108
|
+
"command": "node",
|
|
109
|
+
"args": ["/path/to/llm-in-chrome/mcp-server/dist/index.js"]
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Use Cases
|
|
116
|
+
|
|
117
|
+
**Form Filling**
|
|
118
|
+
```
|
|
119
|
+
browser_start("Apply for the senior engineer position on careers.example.com using my resume info")
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
**Research**
|
|
123
|
+
```
|
|
124
|
+
browser_start("Find the top 3 competitors for Acme Corp and summarize their pricing")
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
**Data Extraction**
|
|
128
|
+
```
|
|
129
|
+
browser_start("Go to my bank account and list all transactions from last month")
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
**Multi-Step Workflows**
|
|
133
|
+
```
|
|
134
|
+
browser_start("Log into Jira, find my open tickets, and summarize what needs attention this week")
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## How It Works
|
|
138
|
+
|
|
139
|
+
```
|
|
140
|
+
Claude Code → MCP Server → Native Host → Chrome Extension → Browser
|
|
141
|
+
↑ ↓
|
|
142
|
+
└──────── Status Updates ──────┘
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
1. Claude Code calls `browser_start` with a task
|
|
146
|
+
2. MCP server creates a session and sends to native host
|
|
147
|
+
3. Native host relays to Chrome extension
|
|
148
|
+
4. Extension's agent handles all browser interaction
|
|
149
|
+
5. Status updates flow back through the chain
|
|
150
|
+
6. Claude Code monitors via `browser_status`
|
|
151
|
+
|
|
152
|
+
## Comparison with Other Tools
|
|
153
|
+
|
|
154
|
+
| Feature | This Server | Playwright MCP | browser-use |
|
|
155
|
+
|---------|------------|----------------|-------------|
|
|
156
|
+
| Abstraction | Task-level | Action-level | Action-level |
|
|
157
|
+
| Tool calls per task | ~3 | ~50+ | ~50+ |
|
|
158
|
+
| Parallel tasks | ✅ | Manual | Manual |
|
|
159
|
+
| Multi-turn | ✅ | N/A | N/A |
|
|
160
|
+
| Setup | Extension | Playwright | Python deps |
|
|
161
|
+
|
|
162
|
+
## License
|
|
163
|
+
|
|
164
|
+
MIT
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* LLM in Chrome MCP Server
|
|
4
|
+
*
|
|
5
|
+
* A browser automation agent as a service. Instead of exposing low-level
|
|
6
|
+
* browser primitives (click, type, navigate), this exposes high-level
|
|
7
|
+
* task-based tools. The agent handles all browser interaction autonomously.
|
|
8
|
+
*
|
|
9
|
+
* Key features:
|
|
10
|
+
* - Parallel task execution (each task has a session ID)
|
|
11
|
+
* - Multi-turn interaction (send follow-up messages)
|
|
12
|
+
* - Real-time status monitoring
|
|
13
|
+
* - No need to understand browser internals
|
|
14
|
+
*
|
|
15
|
+
* Use this when you need to:
|
|
16
|
+
* - Fill out web forms
|
|
17
|
+
* - Navigate complex websites
|
|
18
|
+
* - Extract data from web pages
|
|
19
|
+
* - Perform multi-step web workflows
|
|
20
|
+
*/
|
|
21
|
+
export {};
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,754 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* LLM in Chrome MCP Server
|
|
4
|
+
*
|
|
5
|
+
* A browser automation agent as a service. Instead of exposing low-level
|
|
6
|
+
* browser primitives (click, type, navigate), this exposes high-level
|
|
7
|
+
* task-based tools. The agent handles all browser interaction autonomously.
|
|
8
|
+
*
|
|
9
|
+
* Key features:
|
|
10
|
+
* - Parallel task execution (each task has a session ID)
|
|
11
|
+
* - Multi-turn interaction (send follow-up messages)
|
|
12
|
+
* - Real-time status monitoring
|
|
13
|
+
* - No need to understand browser internals
|
|
14
|
+
*
|
|
15
|
+
* Use this when you need to:
|
|
16
|
+
* - Fill out web forms
|
|
17
|
+
* - Navigate complex websites
|
|
18
|
+
* - Extract data from web pages
|
|
19
|
+
* - Perform multi-step web workflows
|
|
20
|
+
*/
|
|
21
|
+
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
22
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
23
|
+
import { CallToolRequestSchema, ListToolsRequestSchema, } from "@modelcontextprotocol/sdk/types.js";
|
|
24
|
+
import { spawn } from "child_process";
|
|
25
|
+
import * as path from "path";
|
|
26
|
+
import * as os from "os";
|
|
27
|
+
import * as fs from "fs";
|
|
28
|
+
const pendingScreenshots = new Map();
|
|
29
|
+
const sessions = new Map();
|
|
30
|
+
let sessionCounter = 0;
|
|
31
|
+
// Native host connection
|
|
32
|
+
let nativeHost = null;
|
|
33
|
+
let messageBuffer = Buffer.alloc(0);
|
|
34
|
+
const TOOLS = [
|
|
35
|
+
{
|
|
36
|
+
name: "browser_start",
|
|
37
|
+
description: `Start a new browser automation task. The agent will autonomously navigate, click, type, and interact with web pages to complete your task.
|
|
38
|
+
|
|
39
|
+
Returns a session_id for tracking. Use browser_status to monitor progress.
|
|
40
|
+
|
|
41
|
+
Examples:
|
|
42
|
+
- "Fill out the contact form on example.com with my info"
|
|
43
|
+
- "Search for 'MCP protocol' on Google and summarize the first 3 results"
|
|
44
|
+
- "Log into my account and download the latest invoice"
|
|
45
|
+
|
|
46
|
+
WHEN TO USE THIS:
|
|
47
|
+
Use this when you need to interact with websites through a real browser - especially for:
|
|
48
|
+
- Sites requiring login/authentication (the user's browser is already logged in)
|
|
49
|
+
- Dynamic web apps that don't have APIs
|
|
50
|
+
- Tasks where no CLI tool or other MCP server can help
|
|
51
|
+
|
|
52
|
+
TASK GUIDELINES:
|
|
53
|
+
- Break down complex multi-step tasks. Instead of "research my profile AND find jobs AND apply", do each as a separate task.
|
|
54
|
+
- But don't over-specify. If you're unsure whether a detail helps, leave it out and let the agent figure it out.
|
|
55
|
+
- For exploration tasks ("find a good job for me"), give the goal and let the agent cook.
|
|
56
|
+
- For precise tasks ("fill this form with X"), be specific about what you need.`,
|
|
57
|
+
inputSchema: {
|
|
58
|
+
type: "object",
|
|
59
|
+
properties: {
|
|
60
|
+
task: {
|
|
61
|
+
type: "string",
|
|
62
|
+
description: "Natural language description of what you want done"
|
|
63
|
+
},
|
|
64
|
+
url: {
|
|
65
|
+
type: "string",
|
|
66
|
+
description: "Optional starting URL. If not provided, agent uses current tab or navigates as needed"
|
|
67
|
+
}
|
|
68
|
+
},
|
|
69
|
+
required: ["task"]
|
|
70
|
+
}
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
name: "browser_message",
|
|
74
|
+
description: `Send a follow-up message to a browser task. Works on running OR completed sessions.
|
|
75
|
+
|
|
76
|
+
Use this to:
|
|
77
|
+
- Continue a completed task with additional instructions ("now apply to this job")
|
|
78
|
+
- Provide context the agent needs mid-task
|
|
79
|
+
- Correct the agent if it's going wrong
|
|
80
|
+
|
|
81
|
+
The agent retains full memory of the session, so you can build on previous work.`,
|
|
82
|
+
inputSchema: {
|
|
83
|
+
type: "object",
|
|
84
|
+
properties: {
|
|
85
|
+
session_id: {
|
|
86
|
+
type: "string",
|
|
87
|
+
description: "The session ID from browser_start"
|
|
88
|
+
},
|
|
89
|
+
message: {
|
|
90
|
+
type: "string",
|
|
91
|
+
description: "Your follow-up message or instructions"
|
|
92
|
+
}
|
|
93
|
+
},
|
|
94
|
+
required: ["session_id", "message"]
|
|
95
|
+
}
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
name: "browser_status",
|
|
99
|
+
description: `Get the status of browser task(s). Returns current state, steps, reasoning, and answer.
|
|
100
|
+
|
|
101
|
+
Call without session_id to get status of all active tasks.
|
|
102
|
+
|
|
103
|
+
Response includes:
|
|
104
|
+
- steps: Action summary (what the agent did)
|
|
105
|
+
- reasoning: Full agent thinking process
|
|
106
|
+
- current_activity: What's happening now
|
|
107
|
+
- answer: Final result when complete
|
|
108
|
+
|
|
109
|
+
Options:
|
|
110
|
+
- wait: Block until task completes (with timeout). Great for fire-and-forget tasks.
|
|
111
|
+
- timeout_ms: Max wait time when wait=true (default: 2 min)`,
|
|
112
|
+
inputSchema: {
|
|
113
|
+
type: "object",
|
|
114
|
+
properties: {
|
|
115
|
+
session_id: {
|
|
116
|
+
type: "string",
|
|
117
|
+
description: "Optional session ID. If omitted, returns all active tasks"
|
|
118
|
+
},
|
|
119
|
+
wait: {
|
|
120
|
+
type: "boolean",
|
|
121
|
+
description: "If true, block until task completes or timeout (default: false)"
|
|
122
|
+
},
|
|
123
|
+
timeout_ms: {
|
|
124
|
+
type: "number",
|
|
125
|
+
description: "Max time to wait in ms when wait=true (default: 120000 = 2 min)"
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
name: "browser_stop",
|
|
132
|
+
description: `Stop a browser task.
|
|
133
|
+
|
|
134
|
+
By default, the task is PAUSED - session is preserved and you can resume later with browser_message.
|
|
135
|
+
|
|
136
|
+
Set remove=true to DELETE the session completely (frees resources, can't resume).
|
|
137
|
+
|
|
138
|
+
Use cases:
|
|
139
|
+
- Pause (remove=false): Task went off track, want to correct it later
|
|
140
|
+
- Remove (remove=true): Task completed or failed, done with this session`,
|
|
141
|
+
inputSchema: {
|
|
142
|
+
type: "object",
|
|
143
|
+
properties: {
|
|
144
|
+
session_id: {
|
|
145
|
+
type: "string",
|
|
146
|
+
description: "The session ID to stop"
|
|
147
|
+
},
|
|
148
|
+
remove: {
|
|
149
|
+
type: "boolean",
|
|
150
|
+
description: "If true, delete the session completely. If false (default), just pause - can resume with browser_message"
|
|
151
|
+
}
|
|
152
|
+
},
|
|
153
|
+
required: ["session_id"]
|
|
154
|
+
}
|
|
155
|
+
},
|
|
156
|
+
{
|
|
157
|
+
name: "browser_screenshot",
|
|
158
|
+
description: "Capture a screenshot of the current browser state for a task.",
|
|
159
|
+
inputSchema: {
|
|
160
|
+
type: "object",
|
|
161
|
+
properties: {
|
|
162
|
+
session_id: {
|
|
163
|
+
type: "string",
|
|
164
|
+
description: "Optional session ID. If omitted, captures the active tab"
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
name: "browser_debug",
|
|
171
|
+
description: "Debug tool - dumps internal MCP server state including all sessions and their step history.",
|
|
172
|
+
inputSchema: {
|
|
173
|
+
type: "object",
|
|
174
|
+
properties: {}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
];
|
|
178
|
+
/**
|
|
179
|
+
* Generate unique session ID
|
|
180
|
+
*/
|
|
181
|
+
function generateSessionId() {
|
|
182
|
+
sessionCounter++;
|
|
183
|
+
return `browser-${Date.now()}-${sessionCounter}`;
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Find native host path from installed manifest
|
|
187
|
+
*/
|
|
188
|
+
function findNativeHostPath() {
|
|
189
|
+
const manifestPath = path.join(os.homedir(), 'Library', 'Application Support', 'Google', 'Chrome', 'NativeMessagingHosts', 'com.llm_in_chrome.oauth_host.json');
|
|
190
|
+
if (fs.existsSync(manifestPath)) {
|
|
191
|
+
try {
|
|
192
|
+
const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
|
|
193
|
+
if (manifest.path && fs.existsSync(manifest.path)) {
|
|
194
|
+
return manifest.path;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
catch { }
|
|
198
|
+
}
|
|
199
|
+
// Development fallback
|
|
200
|
+
const devPath = path.join(__dirname, '..', '..', 'native-host', 'native-host-wrapper.sh');
|
|
201
|
+
if (fs.existsSync(devPath)) {
|
|
202
|
+
return devPath;
|
|
203
|
+
}
|
|
204
|
+
throw new Error("LLM in Chrome native host not found. Please install the extension first.");
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* Connect to native host
|
|
208
|
+
*/
|
|
209
|
+
function connectToNativeHost() {
|
|
210
|
+
return new Promise((resolve, reject) => {
|
|
211
|
+
try {
|
|
212
|
+
const hostPath = findNativeHostPath();
|
|
213
|
+
console.error(`[MCP] Connecting to: ${hostPath}`);
|
|
214
|
+
const host = spawn(hostPath, [], {
|
|
215
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
216
|
+
});
|
|
217
|
+
host.stdout?.on('data', (chunk) => {
|
|
218
|
+
messageBuffer = Buffer.concat([messageBuffer, chunk]);
|
|
219
|
+
processNativeMessages();
|
|
220
|
+
});
|
|
221
|
+
host.stderr?.on('data', (data) => {
|
|
222
|
+
console.error(`[Native] ${data.toString().trim()}`);
|
|
223
|
+
});
|
|
224
|
+
host.on('error', reject);
|
|
225
|
+
host.on('close', (code) => {
|
|
226
|
+
console.error(`[MCP] Native host exited: ${code}`);
|
|
227
|
+
nativeHost = null;
|
|
228
|
+
});
|
|
229
|
+
nativeHost = host;
|
|
230
|
+
setTimeout(() => resolve(host), 100);
|
|
231
|
+
}
|
|
232
|
+
catch (err) {
|
|
233
|
+
reject(err);
|
|
234
|
+
}
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Process messages from native host
|
|
239
|
+
*/
|
|
240
|
+
function processNativeMessages() {
|
|
241
|
+
while (messageBuffer.length >= 4) {
|
|
242
|
+
const msgLen = messageBuffer.readUInt32LE(0);
|
|
243
|
+
if (messageBuffer.length < 4 + msgLen)
|
|
244
|
+
break;
|
|
245
|
+
const msgStr = messageBuffer.subarray(4, 4 + msgLen).toString();
|
|
246
|
+
messageBuffer = messageBuffer.subarray(4 + msgLen);
|
|
247
|
+
try {
|
|
248
|
+
const message = JSON.parse(msgStr);
|
|
249
|
+
handleNativeMessage(message);
|
|
250
|
+
}
|
|
251
|
+
catch (e) {
|
|
252
|
+
console.error('[MCP] Parse error:', e);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Handle message from native host
|
|
258
|
+
*/
|
|
259
|
+
function handleNativeMessage(message) {
|
|
260
|
+
const { type, sessionId, results, ...data } = message;
|
|
261
|
+
// LOG EVERYTHING for debugging
|
|
262
|
+
console.error(`[MCP DEBUG] handleNativeMessage called: type=${type}, hasResults=${!!results}, resultsLen=${results?.length || 0}`);
|
|
263
|
+
// Handle batch results from polling
|
|
264
|
+
if (type === 'mcp_results' && Array.isArray(results)) {
|
|
265
|
+
console.error(`[MCP DEBUG] Processing ${results.length} results from poll`);
|
|
266
|
+
for (const result of results) {
|
|
267
|
+
console.error(`[MCP DEBUG] Result: ${JSON.stringify(result).substring(0, 200)}`);
|
|
268
|
+
processResult(result);
|
|
269
|
+
}
|
|
270
|
+
return;
|
|
271
|
+
}
|
|
272
|
+
// Log other message types
|
|
273
|
+
if (type !== 'no_commands' && type !== 'mcp_results') {
|
|
274
|
+
console.error(`[MCP] Native message: ${type}`, sessionId || '');
|
|
275
|
+
}
|
|
276
|
+
// Handle single result
|
|
277
|
+
if (sessionId && sessions.has(sessionId)) {
|
|
278
|
+
processResult({ type, sessionId, ...data });
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
/**
|
|
282
|
+
* Process a single result from extension
|
|
283
|
+
*/
|
|
284
|
+
function processResult(result) {
|
|
285
|
+
const { type, sessionId, ...data } = result;
|
|
286
|
+
console.error(`[MCP] processResult: type=${type}, sessionId=${sessionId}, activeSessions=${Array.from(sessions.keys()).join(',')}`);
|
|
287
|
+
// Handle screenshots that might be for pending requests (not real sessions)
|
|
288
|
+
if (type === 'screenshot' && data.data && sessionId) {
|
|
289
|
+
const pending = pendingScreenshots.get(sessionId);
|
|
290
|
+
if (pending) {
|
|
291
|
+
console.error(`[MCP] Screenshot received for pending request: ${sessionId}`);
|
|
292
|
+
clearTimeout(pending.timeout);
|
|
293
|
+
pending.resolve(data.data);
|
|
294
|
+
pendingScreenshots.delete(sessionId);
|
|
295
|
+
return;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
if (!sessionId || !sessions.has(sessionId)) {
|
|
299
|
+
// Log why we're skipping
|
|
300
|
+
console.error(`[MCP] Skipping result: sessionId=${sessionId}, exists=${sessions.has(sessionId)}`);
|
|
301
|
+
return;
|
|
302
|
+
}
|
|
303
|
+
const session = sessions.get(sessionId);
|
|
304
|
+
switch (type) {
|
|
305
|
+
case 'task_update':
|
|
306
|
+
session.status = 'running';
|
|
307
|
+
session.currentStep = data.step || data.status;
|
|
308
|
+
if (session.currentStep) {
|
|
309
|
+
// Always add to reasoning history (full trace)
|
|
310
|
+
session.reasoningHistory.push(session.currentStep);
|
|
311
|
+
// Only add non-thinking steps to stepHistory (action summary)
|
|
312
|
+
if (session.currentStep !== 'thinking' && !session.currentStep.startsWith('[thinking]')) {
|
|
313
|
+
session.stepHistory.push(session.currentStep);
|
|
314
|
+
}
|
|
315
|
+
// Clear pending messages when we see confirmation they were injected
|
|
316
|
+
if (session.currentStep.startsWith('[User follow-up]:') && session.pendingMessages.length > 0) {
|
|
317
|
+
session.pendingMessages.shift(); // Remove the oldest pending message
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
break;
|
|
321
|
+
case 'task_waiting':
|
|
322
|
+
session.status = 'waiting';
|
|
323
|
+
session.currentStep = data.message;
|
|
324
|
+
if (session.currentStep) {
|
|
325
|
+
session.reasoningHistory.push(`[WAITING] ${session.currentStep}`);
|
|
326
|
+
session.stepHistory.push(`[WAITING] ${session.currentStep}`);
|
|
327
|
+
}
|
|
328
|
+
break;
|
|
329
|
+
case 'task_complete':
|
|
330
|
+
session.status = 'complete';
|
|
331
|
+
session.completedAt = Date.now();
|
|
332
|
+
session.result = data.result;
|
|
333
|
+
// Extract answer from currentStep (where extension puts the final answer)
|
|
334
|
+
session.answer = session.currentStep;
|
|
335
|
+
console.error(`[MCP] Session ${sessionId} marked COMPLETE`);
|
|
336
|
+
break;
|
|
337
|
+
case 'task_error':
|
|
338
|
+
session.status = 'error';
|
|
339
|
+
session.completedAt = Date.now();
|
|
340
|
+
session.error = data.error;
|
|
341
|
+
break;
|
|
342
|
+
case 'screenshot':
|
|
343
|
+
if (data.data) {
|
|
344
|
+
session.screenshots.push(data.data);
|
|
345
|
+
// Check if there's a pending screenshot request for this session
|
|
346
|
+
const pending = pendingScreenshots.get(sessionId);
|
|
347
|
+
if (pending) {
|
|
348
|
+
clearTimeout(pending.timeout);
|
|
349
|
+
pending.resolve(data.data);
|
|
350
|
+
pendingScreenshots.delete(sessionId);
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
break;
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
/**
|
|
357
|
+
* Send message to native host
|
|
358
|
+
*/
|
|
359
|
+
async function sendToNative(message) {
|
|
360
|
+
if (!nativeHost?.stdin || !nativeHost.stdin.writable) {
|
|
361
|
+
console.error(`[MCP] Reconnecting to native host (stdin=${!!nativeHost?.stdin}, writable=${nativeHost?.stdin?.writable})`);
|
|
362
|
+
nativeHost = null;
|
|
363
|
+
await connectToNativeHost();
|
|
364
|
+
}
|
|
365
|
+
const json = JSON.stringify(message);
|
|
366
|
+
const buffer = Buffer.from(json);
|
|
367
|
+
const len = Buffer.alloc(4);
|
|
368
|
+
len.writeUInt32LE(buffer.length, 0);
|
|
369
|
+
console.error(`[MCP] Sending: ${message.type} (${json.length} bytes)`);
|
|
370
|
+
try {
|
|
371
|
+
nativeHost.stdin.write(len);
|
|
372
|
+
nativeHost.stdin.write(buffer);
|
|
373
|
+
}
|
|
374
|
+
catch (err) {
|
|
375
|
+
console.error(`[MCP] Write error:`, err);
|
|
376
|
+
nativeHost = null;
|
|
377
|
+
throw err;
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
/**
|
|
381
|
+
* Format session for response
|
|
382
|
+
*
|
|
383
|
+
* Keep it simple - the client needs:
|
|
384
|
+
* - Current activity (what's happening now)
|
|
385
|
+
* - Full step history (to detect wrong paths or loops)
|
|
386
|
+
* - Full reasoning history (agent's thinking process)
|
|
387
|
+
* - Answer when complete
|
|
388
|
+
*/
|
|
389
|
+
function formatSession(session) {
|
|
390
|
+
const response = {
|
|
391
|
+
session_id: session.id,
|
|
392
|
+
status: session.status,
|
|
393
|
+
};
|
|
394
|
+
// What's happening right now
|
|
395
|
+
if (session.status === 'running' || session.status === 'waiting') {
|
|
396
|
+
response.current_activity = session.currentStep;
|
|
397
|
+
}
|
|
398
|
+
// Full step history - action summary (no thinking markers)
|
|
399
|
+
if (session.stepHistory.length > 0) {
|
|
400
|
+
response.steps = session.stepHistory;
|
|
401
|
+
}
|
|
402
|
+
// Full reasoning history - includes all thinking and actions
|
|
403
|
+
if (session.reasoningHistory.length > 0) {
|
|
404
|
+
response.reasoning = session.reasoningHistory;
|
|
405
|
+
}
|
|
406
|
+
// Show pending messages if any
|
|
407
|
+
if (session.pendingMessages.length > 0) {
|
|
408
|
+
response.pending_messages = session.pendingMessages.length;
|
|
409
|
+
}
|
|
410
|
+
// Final answer when complete
|
|
411
|
+
if (session.status === 'complete') {
|
|
412
|
+
response.answer = session.answer || session.currentStep;
|
|
413
|
+
}
|
|
414
|
+
// Error details
|
|
415
|
+
if (session.status === 'error') {
|
|
416
|
+
response.error = session.error;
|
|
417
|
+
}
|
|
418
|
+
return response;
|
|
419
|
+
}
|
|
420
|
+
// Create MCP server
|
|
421
|
+
const server = new Server({
|
|
422
|
+
name: "llm-in-chrome",
|
|
423
|
+
version: "1.0.0"
|
|
424
|
+
}, {
|
|
425
|
+
capabilities: {
|
|
426
|
+
tools: {
|
|
427
|
+
listChanged: false
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
});
|
|
431
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
432
|
+
tools: TOOLS
|
|
433
|
+
}));
|
|
434
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
435
|
+
const { name, arguments: args } = request.params;
|
|
436
|
+
try {
|
|
437
|
+
switch (name) {
|
|
438
|
+
case "browser_start": {
|
|
439
|
+
const task = args?.task;
|
|
440
|
+
const url = args?.url;
|
|
441
|
+
if (!task?.trim()) {
|
|
442
|
+
return {
|
|
443
|
+
content: [{ type: "text", text: "Error: task cannot be empty" }],
|
|
444
|
+
isError: true
|
|
445
|
+
};
|
|
446
|
+
}
|
|
447
|
+
const sessionId = generateSessionId();
|
|
448
|
+
const session = {
|
|
449
|
+
id: sessionId,
|
|
450
|
+
status: 'starting',
|
|
451
|
+
task,
|
|
452
|
+
url,
|
|
453
|
+
startedAt: Date.now(),
|
|
454
|
+
stepHistory: [],
|
|
455
|
+
reasoningHistory: [],
|
|
456
|
+
screenshots: [],
|
|
457
|
+
pendingMessages: []
|
|
458
|
+
};
|
|
459
|
+
sessions.set(sessionId, session);
|
|
460
|
+
// Send to extension via native host
|
|
461
|
+
await sendToNative({
|
|
462
|
+
type: 'mcp_start_task',
|
|
463
|
+
sessionId,
|
|
464
|
+
task,
|
|
465
|
+
url
|
|
466
|
+
});
|
|
467
|
+
session.status = 'running';
|
|
468
|
+
return {
|
|
469
|
+
content: [{
|
|
470
|
+
type: "text",
|
|
471
|
+
text: JSON.stringify({
|
|
472
|
+
session_id: sessionId,
|
|
473
|
+
status: "running"
|
|
474
|
+
}, null, 2)
|
|
475
|
+
}]
|
|
476
|
+
};
|
|
477
|
+
}
|
|
478
|
+
case "browser_message": {
|
|
479
|
+
const sessionId = args?.session_id;
|
|
480
|
+
const message = args?.message;
|
|
481
|
+
if (!sessionId || !sessions.has(sessionId)) {
|
|
482
|
+
return {
|
|
483
|
+
content: [{ type: "text", text: `Error: Session not found: ${sessionId}` }],
|
|
484
|
+
isError: true
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
if (!message?.trim()) {
|
|
488
|
+
return {
|
|
489
|
+
content: [{ type: "text", text: "Error: message cannot be empty" }],
|
|
490
|
+
isError: true
|
|
491
|
+
};
|
|
492
|
+
}
|
|
493
|
+
const session = sessions.get(sessionId);
|
|
494
|
+
// Track this message as pending (for visibility in status)
|
|
495
|
+
session.pendingMessages.push(message);
|
|
496
|
+
await sendToNative({
|
|
497
|
+
type: 'mcp_send_message',
|
|
498
|
+
sessionId,
|
|
499
|
+
message
|
|
500
|
+
});
|
|
501
|
+
// If session was running, it stays running
|
|
502
|
+
// If session was complete/stopped, it will be re-activated by the extension
|
|
503
|
+
if (session.status !== 'running') {
|
|
504
|
+
session.status = 'running';
|
|
505
|
+
}
|
|
506
|
+
return {
|
|
507
|
+
content: [{
|
|
508
|
+
type: "text",
|
|
509
|
+
text: JSON.stringify({
|
|
510
|
+
session_id: sessionId,
|
|
511
|
+
status: "message_sent",
|
|
512
|
+
message: "Follow-up message sent to the agent",
|
|
513
|
+
pending_messages: session.pendingMessages.length
|
|
514
|
+
}, null, 2)
|
|
515
|
+
}]
|
|
516
|
+
};
|
|
517
|
+
}
|
|
518
|
+
case "browser_status": {
|
|
519
|
+
const sessionId = args?.session_id;
|
|
520
|
+
const shouldWait = args?.wait === true;
|
|
521
|
+
const timeoutMs = args?.timeout_ms || 120000; // 2 min default
|
|
522
|
+
if (sessionId) {
|
|
523
|
+
if (!sessions.has(sessionId)) {
|
|
524
|
+
return {
|
|
525
|
+
content: [{ type: "text", text: `Error: Session not found: ${sessionId}` }],
|
|
526
|
+
isError: true
|
|
527
|
+
};
|
|
528
|
+
}
|
|
529
|
+
// If wait=true, poll until task completes or timeout
|
|
530
|
+
if (shouldWait) {
|
|
531
|
+
const startTime = Date.now();
|
|
532
|
+
while (Date.now() - startTime < timeoutMs) {
|
|
533
|
+
const session = sessions.get(sessionId);
|
|
534
|
+
if (session.status === 'complete' || session.status === 'error' || session.status === 'stopped') {
|
|
535
|
+
return {
|
|
536
|
+
content: [{
|
|
537
|
+
type: "text",
|
|
538
|
+
text: JSON.stringify(formatSession(session), null, 2)
|
|
539
|
+
}]
|
|
540
|
+
};
|
|
541
|
+
}
|
|
542
|
+
// Wait 500ms before checking again
|
|
543
|
+
await new Promise(resolve => setTimeout(resolve, 500));
|
|
544
|
+
}
|
|
545
|
+
// Timeout - return current status
|
|
546
|
+
const session = sessions.get(sessionId);
|
|
547
|
+
return {
|
|
548
|
+
content: [{
|
|
549
|
+
type: "text",
|
|
550
|
+
text: JSON.stringify({
|
|
551
|
+
...formatSession(session),
|
|
552
|
+
timeout: true,
|
|
553
|
+
message: `Task still running after ${timeoutMs}ms timeout`
|
|
554
|
+
}, null, 2)
|
|
555
|
+
}]
|
|
556
|
+
};
|
|
557
|
+
}
|
|
558
|
+
return {
|
|
559
|
+
content: [{
|
|
560
|
+
type: "text",
|
|
561
|
+
text: JSON.stringify(formatSession(sessions.get(sessionId)), null, 2)
|
|
562
|
+
}]
|
|
563
|
+
};
|
|
564
|
+
}
|
|
565
|
+
// Return all active sessions
|
|
566
|
+
const activeSessions = Array.from(sessions.values())
|
|
567
|
+
.filter(s => s.status !== 'complete' && s.status !== 'error' && s.status !== 'stopped')
|
|
568
|
+
.map(s => formatSession(s));
|
|
569
|
+
return {
|
|
570
|
+
content: [{
|
|
571
|
+
type: "text",
|
|
572
|
+
text: JSON.stringify(activeSessions, null, 2)
|
|
573
|
+
}]
|
|
574
|
+
};
|
|
575
|
+
}
|
|
576
|
+
case "browser_stop": {
|
|
577
|
+
const sessionId = args?.session_id;
|
|
578
|
+
const shouldRemove = args?.remove === true;
|
|
579
|
+
if (!sessionId || !sessions.has(sessionId)) {
|
|
580
|
+
return {
|
|
581
|
+
content: [{ type: "text", text: `Error: Session not found: ${sessionId}` }],
|
|
582
|
+
isError: true
|
|
583
|
+
};
|
|
584
|
+
}
|
|
585
|
+
await sendToNative({
|
|
586
|
+
type: 'mcp_stop_task',
|
|
587
|
+
sessionId,
|
|
588
|
+
remove: shouldRemove // Tell extension whether to delete or just pause
|
|
589
|
+
});
|
|
590
|
+
const session = sessions.get(sessionId);
|
|
591
|
+
const partialResult = session.result || session.currentStep;
|
|
592
|
+
if (shouldRemove) {
|
|
593
|
+
// Delete the session completely
|
|
594
|
+
sessions.delete(sessionId);
|
|
595
|
+
return {
|
|
596
|
+
content: [{
|
|
597
|
+
type: "text",
|
|
598
|
+
text: JSON.stringify({
|
|
599
|
+
session_id: sessionId,
|
|
600
|
+
status: "removed",
|
|
601
|
+
message: "Session deleted. Cannot resume.",
|
|
602
|
+
partial_result: partialResult
|
|
603
|
+
}, null, 2)
|
|
604
|
+
}]
|
|
605
|
+
};
|
|
606
|
+
}
|
|
607
|
+
else {
|
|
608
|
+
// Just pause - can resume later
|
|
609
|
+
session.status = 'stopped';
|
|
610
|
+
return {
|
|
611
|
+
content: [{
|
|
612
|
+
type: "text",
|
|
613
|
+
text: JSON.stringify({
|
|
614
|
+
session_id: sessionId,
|
|
615
|
+
status: "paused",
|
|
616
|
+
message: "Session paused. Use browser_message to resume.",
|
|
617
|
+
partial_result: partialResult
|
|
618
|
+
}, null, 2)
|
|
619
|
+
}]
|
|
620
|
+
};
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
case "browser_screenshot": {
|
|
624
|
+
const sessionId = args?.session_id;
|
|
625
|
+
const requestId = sessionId || `screenshot-${Date.now()}`;
|
|
626
|
+
// Create a promise that resolves when screenshot arrives
|
|
627
|
+
const screenshotPromise = new Promise((resolve) => {
|
|
628
|
+
const timeout = setTimeout(() => {
|
|
629
|
+
pendingScreenshots.delete(requestId);
|
|
630
|
+
resolve(null);
|
|
631
|
+
}, 5000); // 5 second timeout
|
|
632
|
+
pendingScreenshots.set(requestId, { resolve, timeout });
|
|
633
|
+
});
|
|
634
|
+
await sendToNative({
|
|
635
|
+
type: 'mcp_screenshot',
|
|
636
|
+
sessionId: requestId
|
|
637
|
+
});
|
|
638
|
+
const screenshotData = await screenshotPromise;
|
|
639
|
+
if (screenshotData) {
|
|
640
|
+
return {
|
|
641
|
+
content: [
|
|
642
|
+
{
|
|
643
|
+
type: "image",
|
|
644
|
+
data: screenshotData,
|
|
645
|
+
mimeType: "image/png"
|
|
646
|
+
},
|
|
647
|
+
{
|
|
648
|
+
type: "text",
|
|
649
|
+
text: "Screenshot of current browser state"
|
|
650
|
+
}
|
|
651
|
+
]
|
|
652
|
+
};
|
|
653
|
+
}
|
|
654
|
+
// Fallback: check session screenshots if we have a session
|
|
655
|
+
if (sessionId && sessions.has(sessionId)) {
|
|
656
|
+
const session = sessions.get(sessionId);
|
|
657
|
+
if (session.screenshots.length > 0) {
|
|
658
|
+
const latest = session.screenshots[session.screenshots.length - 1];
|
|
659
|
+
return {
|
|
660
|
+
content: [
|
|
661
|
+
{
|
|
662
|
+
type: "image",
|
|
663
|
+
data: latest,
|
|
664
|
+
mimeType: "image/png"
|
|
665
|
+
},
|
|
666
|
+
{
|
|
667
|
+
type: "text",
|
|
668
|
+
text: "Screenshot from session cache"
|
|
669
|
+
}
|
|
670
|
+
]
|
|
671
|
+
};
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
return {
|
|
675
|
+
content: [{ type: "text", text: "Screenshot request timed out. The browser may not be responding." }],
|
|
676
|
+
isError: true
|
|
677
|
+
};
|
|
678
|
+
}
|
|
679
|
+
case "browser_debug": {
|
|
680
|
+
// Debug tool to dump internal state
|
|
681
|
+
const allSessions = Array.from(sessions.entries()).map(([id, s]) => ({
|
|
682
|
+
id,
|
|
683
|
+
status: s.status,
|
|
684
|
+
task: s.task?.substring(0, 50),
|
|
685
|
+
stepHistoryLength: s.stepHistory.length,
|
|
686
|
+
reasoningHistoryLength: s.reasoningHistory?.length || 0,
|
|
687
|
+
stepHistory: s.stepHistory.slice(-10), // Last 10 steps
|
|
688
|
+
reasoningHistory: s.reasoningHistory?.slice(-10), // Last 10 reasoning entries
|
|
689
|
+
pendingMessages: s.pendingMessages?.length || 0,
|
|
690
|
+
currentStep: s.currentStep,
|
|
691
|
+
answer: s.answer,
|
|
692
|
+
error: s.error,
|
|
693
|
+
startedAt: s.startedAt,
|
|
694
|
+
completedAt: s.completedAt,
|
|
695
|
+
}));
|
|
696
|
+
return {
|
|
697
|
+
content: [{
|
|
698
|
+
type: "text",
|
|
699
|
+
text: JSON.stringify({
|
|
700
|
+
totalSessions: sessions.size,
|
|
701
|
+
nativeHostConnected: !!nativeHost?.stdin,
|
|
702
|
+
sessions: allSessions
|
|
703
|
+
}, null, 2)
|
|
704
|
+
}]
|
|
705
|
+
};
|
|
706
|
+
}
|
|
707
|
+
default:
|
|
708
|
+
return {
|
|
709
|
+
content: [{ type: "text", text: `Unknown tool: ${name}` }],
|
|
710
|
+
isError: true
|
|
711
|
+
};
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
catch (error) {
|
|
715
|
+
return {
|
|
716
|
+
content: [{ type: "text", text: `Error: ${error.message}` }],
|
|
717
|
+
isError: true
|
|
718
|
+
};
|
|
719
|
+
}
|
|
720
|
+
});
|
|
721
|
+
/**
|
|
722
|
+
* Poll for results from extension via native host
|
|
723
|
+
*/
|
|
724
|
+
async function pollForResults() {
|
|
725
|
+
if (!nativeHost?.stdin)
|
|
726
|
+
return;
|
|
727
|
+
try {
|
|
728
|
+
sendToNative({ type: 'mcp_poll_results' });
|
|
729
|
+
}
|
|
730
|
+
catch (err) {
|
|
731
|
+
console.error('[MCP] Poll error:', err);
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
// Start server
|
|
735
|
+
async function main() {
|
|
736
|
+
console.error("[MCP] LLM in Chrome MCP Server starting...");
|
|
737
|
+
try {
|
|
738
|
+
// Pre-connect to native host
|
|
739
|
+
await connectToNativeHost();
|
|
740
|
+
console.error("[MCP] Connected to native host");
|
|
741
|
+
// Start polling for results every 500ms
|
|
742
|
+
setInterval(pollForResults, 500);
|
|
743
|
+
}
|
|
744
|
+
catch (err) {
|
|
745
|
+
console.error("[MCP] Warning: Could not connect to native host:", err);
|
|
746
|
+
}
|
|
747
|
+
const transport = new StdioServerTransport();
|
|
748
|
+
await server.connect(transport);
|
|
749
|
+
console.error("[MCP] Server running");
|
|
750
|
+
}
|
|
751
|
+
main().catch((error) => {
|
|
752
|
+
console.error("[MCP] Fatal:", error);
|
|
753
|
+
process.exit(1);
|
|
754
|
+
});
|
package/package.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "llm-in-chrome-mcp",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "MCP Server for LLM in Chrome - browser automation without exposing individual tools",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"author": "hanzili",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "https://github.com/hanzili/llm-in-chrome.git"
|
|
10
|
+
},
|
|
11
|
+
"type": "module",
|
|
12
|
+
"bin": {
|
|
13
|
+
"llm-in-chrome-mcp": "./dist/index.js"
|
|
14
|
+
},
|
|
15
|
+
"files": [
|
|
16
|
+
"dist",
|
|
17
|
+
"README.md"
|
|
18
|
+
],
|
|
19
|
+
"scripts": {
|
|
20
|
+
"build": "tsc",
|
|
21
|
+
"dev": "tsc --watch",
|
|
22
|
+
"start": "node dist/index.js"
|
|
23
|
+
},
|
|
24
|
+
"dependencies": {
|
|
25
|
+
"@modelcontextprotocol/sdk": "^1.25.3"
|
|
26
|
+
},
|
|
27
|
+
"devDependencies": {
|
|
28
|
+
"@types/node": "^20.11.0",
|
|
29
|
+
"typescript": "^5.3.0"
|
|
30
|
+
},
|
|
31
|
+
"engines": {
|
|
32
|
+
"node": ">=18.0.0"
|
|
33
|
+
}
|
|
34
|
+
}
|