@akshayram1/omnibrowser-agent 0.2.6 → 0.2.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +219 -110
- package/dist/background.js +24 -5
- package/dist/background.js.map +2 -2
- package/dist/content.js +120 -4
- package/dist/content.js.map +3 -3
- package/dist/lib.js +264 -58
- package/dist/lib.js.map +3 -3
- package/dist/popup.html +7 -1
- package/dist/popup.js +19 -1
- package/dist/popup.js.map +2 -2
- package/dist/types/core/prompt.d.ts +3 -0
- package/dist/types/core/webllm-bridge.d.ts +33 -0
- package/dist/types/lib/index.d.ts +2 -0
- package/dist/types/shared/contracts.d.ts +4 -0
- package/dist/types/shared/parse-action.d.ts +2 -1
- package/docs/EMBEDDING.md +3 -14
- package/docs/ROADMAP.md +8 -13
- package/docs/arch.md +220 -0
- package/index.html +1204 -198
- package/package.json +1 -1
- package/plan.md +114 -0
- package/styles.css +654 -293
- package/vercel.json +7 -2
package/README.md
CHANGED
|
@@ -1,183 +1,292 @@
|
|
|
1
1
|
# omnibrowser-agent
|
|
2
2
|
|
|
3
|
+
[](https://www.npmjs.com/package/@akshayram1/omnibrowser-agent)
|
|
3
4
|
[](LICENSE)
|
|
4
|
-
[](package.json)
|
|
5
5
|
|
|
6
|
-
Local-first
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
6
|
+
Local-first browser AI operator. Plans and executes DOM actions entirely in the browser — no API keys, no cloud costs, no data leaving your machine.
|
|
7
|
+
|
|
8
|
+
[Live Demo](https://omnibrowser-agent.vercel.app/examples/chatbot/) · [Embedding Guide](docs/EMBEDDING.md) · [Architecture](docs/arch.md) · [Deployment](docs/DEPLOYMENT.md) · [Roadmap](docs/ROADMAP.md)
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Architecture
|
|
13
|
+
|
|
14
|
+
```mermaid
|
|
15
|
+
flowchart TB
|
|
16
|
+
subgraph DELIVERY["Delivery Layer"]
|
|
17
|
+
EXT["🧩 Chrome Extension\npopup + background worker"]
|
|
18
|
+
LIB["📦 npm Library\ncreateBrowserAgent()"]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
subgraph ORCHESTRATION["Orchestration"]
|
|
22
|
+
BG["background/index.ts\nSession & tick loop"]
|
|
23
|
+
BA["BrowserAgent class\nrunLoop() / resume() / stop()"]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
subgraph CORE["Core (src/core/)"]
|
|
27
|
+
PL["planner.ts\nheuristicPlan() / webllm bridge\nplanNextAction()"]
|
|
28
|
+
OB["observer.ts\ncollectSnapshot()\nDOM candidates + visibility filter"]
|
|
29
|
+
EX["executor.ts\nexecuteAction()\nclick / type / navigate\nscroll / focus / wait"]
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
subgraph SHARED["Shared (src/shared/)"]
|
|
33
|
+
CT["contracts.ts\nAgentAction · PageSnapshot\nAgentSession · PlannerResult"]
|
|
34
|
+
SF["safety.ts\nassessRisk()\nsafe / review / blocked"]
|
|
35
|
+
PA["parse-action.ts\nparseAction()\nparsePlannerResult()"]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
subgraph OUTCOMES["Action Outcomes"]
|
|
39
|
+
direction LR
|
|
40
|
+
OK["✅ safe → execute"]
|
|
41
|
+
RV["⚠️ review → needs approval"]
|
|
42
|
+
BL["🚫 blocked → stop"]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
subgraph PLANNERS["Planner Modes"]
|
|
46
|
+
direction LR
|
|
47
|
+
HP["Heuristic\nzero deps · offline\nregex patterns"]
|
|
48
|
+
WL["WebLLM\non-device · WebGPU\nwindow.__browserAgentWebLLM"]
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
EXT --> BG
|
|
52
|
+
LIB --> BA
|
|
53
|
+
BG -. "chrome.tabs.sendMessage" .-> CORE
|
|
54
|
+
BA --> CORE
|
|
55
|
+
|
|
56
|
+
PL --> OB
|
|
57
|
+
PL --> SHARED
|
|
58
|
+
OB --> SHARED
|
|
59
|
+
EX --> SHARED
|
|
60
|
+
|
|
61
|
+
SF --> OUTCOMES
|
|
62
|
+
PL --> PLANNERS
|
|
63
|
+
```
|
|
24
64
|
|
|
25
|
-
|
|
65
|
+
---
|
|
26
66
|
|
|
27
|
-
|
|
28
|
-
- `src/content` page observer/planner/executor
|
|
29
|
-
- `src/popup` control panel
|
|
30
|
-
- `src/lib` embeddable runtime API
|
|
31
|
-
- `src/shared` contracts and safety
|
|
67
|
+
## How it works — one tick
|
|
32
68
|
|
|
33
|
-
|
|
69
|
+
```
|
|
70
|
+
goal + history + memory
|
|
71
|
+
│
|
|
72
|
+
▼
|
|
73
|
+
observer.collectSnapshot() ──→ PageSnapshot (url, title, candidates[])
|
|
74
|
+
│
|
|
75
|
+
▼
|
|
76
|
+
planner.planNextAction() ──→ PlannerResult { action, evaluation?, memory?, nextGoal? }
|
|
77
|
+
│
|
|
78
|
+
▼
|
|
79
|
+
safety.assessRisk(action) ──→ safe | review | blocked
|
|
80
|
+
│
|
|
81
|
+
┌────┴─────────────────────┐
|
|
82
|
+
blocked review (human-approved mode)
|
|
83
|
+
│ │
|
|
84
|
+
stop pause → user approves → resume
|
|
85
|
+
│
|
|
86
|
+
safe / approved
|
|
87
|
+
│
|
|
88
|
+
▼
|
|
89
|
+
executor.executeAction(action) ──→ result string
|
|
90
|
+
│
|
|
91
|
+
▼
|
|
92
|
+
session.history.push(result)
|
|
93
|
+
→ next tick
|
|
94
|
+
```
|
|
34
95
|
|
|
35
|
-
|
|
96
|
+
The planner uses a **reflection loop** before each action: it evaluates what happened last step, maintains working memory across steps, and states its next goal — giving the agent much better multi-step reasoning.
|
|
36
97
|
|
|
37
|
-
|
|
38
|
-
npm install
|
|
39
|
-
```
|
|
98
|
+
---
|
|
40
99
|
|
|
41
|
-
|
|
100
|
+
## Install
|
|
42
101
|
|
|
43
102
|
```bash
|
|
44
|
-
npm
|
|
103
|
+
npm install @akshayram1/omnibrowser-agent
|
|
45
104
|
```
|
|
46
105
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
- Open `chrome://extensions`
|
|
50
|
-
- Enable Developer Mode
|
|
51
|
-
- Click **Load unpacked**
|
|
52
|
-
- Select `dist`
|
|
106
|
+
---
|
|
53
107
|
|
|
54
|
-
##
|
|
55
|
-
|
|
56
|
-
1. Open a target website tab
|
|
57
|
-
2. Open extension popup
|
|
58
|
-
3. Enter goal (for example: `search contact John Doe in CRM and open profile`)
|
|
59
|
-
4. Select mode/planner
|
|
60
|
-
5. Click Start
|
|
61
|
-
6. If mode is `human-approved`, click **Approve pending action** on review steps
|
|
62
|
-
|
|
63
|
-
## Use as a web library
|
|
108
|
+
## Quick start
|
|
64
109
|
|
|
65
110
|
```ts
|
|
66
111
|
import { createBrowserAgent } from "@akshayram1/omnibrowser-agent";
|
|
67
112
|
|
|
68
113
|
const agent = createBrowserAgent({
|
|
69
|
-
goal: "
|
|
70
|
-
mode: "human-approved",
|
|
71
|
-
planner: { kind: "heuristic" }
|
|
114
|
+
goal: "Search for contact Jane Doe and open her profile",
|
|
115
|
+
mode: "human-approved", // or "autonomous"
|
|
116
|
+
planner: { kind: "heuristic" } // or "webllm"
|
|
72
117
|
}, {
|
|
73
|
-
onStep:
|
|
74
|
-
onApprovalRequired: (action) => console.log("
|
|
75
|
-
onDone:
|
|
76
|
-
|
|
118
|
+
onStep: (result, session) => console.log(result.message),
|
|
119
|
+
onApprovalRequired: (action, session) => console.log("Review:", action),
|
|
120
|
+
onDone: (result, session) => console.log("Done:", result.message),
|
|
121
|
+
onError: (err, session) => console.error(err),
|
|
122
|
+
onMaxStepsReached: (session) => console.log("Max steps hit"),
|
|
77
123
|
});
|
|
78
124
|
|
|
79
125
|
await agent.start();
|
|
80
126
|
|
|
81
|
-
//
|
|
127
|
+
// After onApprovalRequired fires:
|
|
82
128
|
await agent.resume();
|
|
83
129
|
|
|
84
|
-
//
|
|
85
|
-
console.log(agent.isRunning, agent.hasPendingAction);
|
|
86
|
-
|
|
87
|
-
// Stop at any time:
|
|
130
|
+
// Cancel at any time:
|
|
88
131
|
agent.stop();
|
|
89
132
|
```
|
|
90
133
|
|
|
91
|
-
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## Planner modes
|
|
137
|
+
|
|
138
|
+
| Mode | Description | When to use |
|
|
139
|
+
|---|---|---|
|
|
140
|
+
| `heuristic` | Zero-dependency regex planner. Works fully offline. | Simple, predictable goals — navigate, fill, click |
|
|
141
|
+
| `webllm` | On-device LLM via WebGPU. Fully private, no API calls. | Open-ended, multi-step, language-heavy goals |
|
|
142
|
+
|
|
143
|
+
### WebLLM with a custom system prompt
|
|
144
|
+
|
|
145
|
+
```ts
|
|
146
|
+
const agent = createBrowserAgent({
|
|
147
|
+
goal: "Fill the checkout form",
|
|
148
|
+
planner: {
|
|
149
|
+
kind: "webllm",
|
|
150
|
+
systemPrompt: "You are a careful checkout assistant. Never submit before all required fields are filled."
|
|
151
|
+
}
|
|
152
|
+
});
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
See [docs/EMBEDDING.md](docs/EMBEDDING.md) for the full WebLLM bridge wiring guide.
|
|
156
|
+
|
|
157
|
+
### Recommended WebLLM models
|
|
158
|
+
|
|
159
|
+
- `Llama-3.2-1B-Instruct-q4f16_1-MLC` — fast, ~600 MB
|
|
160
|
+
- `Llama-3.2-3B-Instruct-q4f16_1-MLC` — better quality, ~1.5 GB
|
|
161
|
+
- `Phi-3.5-mini-instruct-q4f16_1-MLC` — strong quality, ~2 GB
|
|
162
|
+
- `Mistral-7B-Instruct-v0.3-q4f16_1-MLC` — balanced quality, ~4.1 GB
|
|
163
|
+
- `Qwen2.5-7B-Instruct-q4f16_1-MLC` — strongest quality, ~4.3 GB
|
|
164
|
+
- `Llama-3.1-8B-Instruct-q4f16_1-MLC` — strong reasoning, ~4.8 GB
|
|
165
|
+
|
|
166
|
+
Model availability can vary by WebLLM release/build; if one fails to load, use a smaller fallback like `Llama-3.2-1B-Instruct-q4f16_1-MLC`.
|
|
92
167
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
|
98
|
-
|
|
99
|
-
| `
|
|
100
|
-
| `
|
|
101
|
-
| `wait` | Pause for a given number of milliseconds |
|
|
102
|
-
| `done` | Signal task completion |
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## Agent modes
|
|
171
|
+
|
|
172
|
+
| Mode | Behaviour |
|
|
173
|
+
|---|---|
|
|
174
|
+
| `autonomous` | All `safe` and `review` actions execute without pause |
|
|
175
|
+
| `human-approved` | `review`-rated actions pause and emit `onApprovalRequired` — call `resume()` to continue |
|
|
103
176
|
|
|
104
|
-
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## Supported actions
|
|
180
|
+
|
|
181
|
+
| Action | Description | Risk |
|
|
182
|
+
|---|---|---|
|
|
183
|
+
| `navigate` | Navigate to a URL (http/https only) | safe |
|
|
184
|
+
| `click` | Click an element by CSS selector | safe / review |
|
|
185
|
+
| `type` | Type text into an input or textarea | safe / review |
|
|
186
|
+
| `scroll` | Scroll a container or the page | safe |
|
|
187
|
+
| `focus` | Focus an element | safe |
|
|
188
|
+
| `wait` | Pause for N milliseconds | safe |
|
|
189
|
+
| `extract` | Extract text from an element | review |
|
|
190
|
+
| `done` | Signal task completion | safe |
|
|
191
|
+
|
|
192
|
+
---
|
|
193
|
+
|
|
194
|
+
## AbortSignal support
|
|
105
195
|
|
|
106
196
|
```ts
|
|
107
197
|
const controller = new AbortController();
|
|
108
198
|
const agent = createBrowserAgent({ goal: "...", signal: controller.signal });
|
|
109
199
|
agent.start();
|
|
110
200
|
|
|
111
|
-
//
|
|
112
|
-
controller.abort();
|
|
201
|
+
controller.abort(); // cancel from outside
|
|
113
202
|
```
|
|
114
203
|
|
|
115
|
-
|
|
204
|
+
---
|
|
116
205
|
|
|
117
|
-
##
|
|
206
|
+
## Chrome Extension
|
|
118
207
|
|
|
119
|
-
1. Build
|
|
208
|
+
1. Build:
|
|
120
209
|
|
|
121
210
|
```bash
|
|
122
211
|
npm run build
|
|
123
212
|
```
|
|
124
213
|
|
|
125
|
-
2.
|
|
214
|
+
2. Open `chrome://extensions`, enable **Developer Mode**, click **Load unpacked**, select `dist/`.
|
|
126
215
|
|
|
127
|
-
|
|
128
|
-
python3 -m http.server 4173
|
|
129
|
-
```
|
|
216
|
+
3. Open any tab, enter a goal in the popup, pick a mode, and click **Start**.
|
|
130
217
|
|
|
131
|
-
|
|
218
|
+
See [docs/DEPLOYMENT.md](docs/DEPLOYMENT.md) for publishing and CI pipeline details.
|
|
132
219
|
|
|
133
|
-
|
|
220
|
+
---
|
|
134
221
|
|
|
135
|
-
|
|
136
|
-
|
|
222
|
+
## Project structure
|
|
223
|
+
|
|
224
|
+
```
|
|
225
|
+
src/
|
|
226
|
+
├── background/ Extension service worker — session management
|
|
227
|
+
├── content/ Extension content script — runs in page context
|
|
228
|
+
├── core/ Shared engine (planner, observer, executor)
|
|
229
|
+
│ ├── planner.ts
|
|
230
|
+
│ ├── observer.ts
|
|
231
|
+
│ └── executor.ts
|
|
232
|
+
├── lib/ npm library entry — BrowserAgent class
|
|
233
|
+
│ └── index.ts
|
|
234
|
+
├── popup/ Extension popup UI
|
|
235
|
+
└── shared/ Types, safety, and parse utilities
|
|
236
|
+
├── contracts.ts
|
|
237
|
+
├── safety.ts
|
|
238
|
+
└── parse-action.ts
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
---
|
|
137
242
|
|
|
138
243
|
## Changelog
|
|
139
244
|
|
|
245
|
+
### v0.2.6
|
|
246
|
+
|
|
247
|
+
- Reflection-before-action pattern (`evaluation → memory → next_goal → action`) — agent reasons about each step before acting
|
|
248
|
+
- Working memory carried across ticks for better multi-step goals
|
|
249
|
+
- `parsePlannerResult()` exported from the library
|
|
250
|
+
- `systemPrompt` option in `PlannerConfig` — pass your own prompt without rewriting the bridge
|
|
251
|
+
- Thought bubble (💭) messages in the live demo chat showing the agent's next intent
|
|
252
|
+
|
|
253
|
+
### v0.2.4 — v0.2.5
|
|
254
|
+
|
|
255
|
+
- CI pipeline: auto version bump on push to main
|
|
256
|
+
- Removed page-agent dependency — reflection pattern implemented natively
|
|
257
|
+
- Chatbot demo redesign: right-aligned user messages, typing indicator, tab navigation (CRM + Task Manager)
|
|
258
|
+
- `parsePlannerResult()` and `PlannerResult` type exported from library
|
|
259
|
+
|
|
140
260
|
### v0.2.2
|
|
141
261
|
|
|
142
|
-
- SDK/extension separation: core logic
|
|
262
|
+
- SDK/extension separation: core logic in `src/core/` shared between extension and npm library
|
|
143
263
|
- 22 unit tests across planner and safety modules
|
|
144
264
|
- Action verification in executor (disabled-check, value-verify, empty-check)
|
|
145
265
|
- `CandidateElement.label` from associated `<label>` elements
|
|
146
266
|
- Retry loop with `lastError` fed back to planner on failure
|
|
147
|
-
- `parseAction` utility exported from the library
|
|
148
267
|
|
|
149
268
|
### v0.2.0
|
|
150
269
|
|
|
151
|
-
-
|
|
152
|
-
-
|
|
153
|
-
-
|
|
154
|
-
-
|
|
155
|
-
-
|
|
156
|
-
- **Executor**: uses `InputEvent` for proper framework compatibility, added keyboard event dispatch
|
|
157
|
-
- **License**: added author name
|
|
270
|
+
- New actions: `scroll` and `focus`
|
|
271
|
+
- Smarter safety: risk assessment checks element label/text
|
|
272
|
+
- Improved heuristic planner with regex pattern matching
|
|
273
|
+
- Better page observation: filters invisible elements, up to 60 candidates
|
|
274
|
+
- Library API: `resume()`, `isRunning`, `hasPendingAction`, `onMaxStepsReached`, `AbortSignal`
|
|
158
275
|
|
|
159
276
|
### v0.1.0
|
|
160
277
|
|
|
161
|
-
- Extension runtime loop
|
|
162
|
-
- Shared action contracts
|
|
163
|
-
- Heuristic + WebLLM planner switch
|
|
164
|
-
- Human-approved mode
|
|
165
|
-
|
|
166
|
-
## Planner modes
|
|
167
|
-
|
|
168
|
-
| Mode | Description |
|
|
169
|
-
|---|---|
|
|
170
|
-
| `heuristic` | Zero-dependency regex-based planner. Works offline. Good for simple, predictable goals. |
|
|
171
|
-
| `webllm` | Delegates to a local WebLLM bridge on `window.__browserAgentWebLLM`. Fully private, no API calls, runs on-device via WebGPU. |
|
|
278
|
+
- Extension runtime loop, shared action contracts, heuristic + WebLLM planner, human-approved mode
|
|
172
279
|
|
|
173
|
-
|
|
280
|
+
---
|
|
174
281
|
|
|
175
|
-
|
|
176
|
-
- `webllm` mode expects a bridge implementation attached to `window.__browserAgentWebLLM`. See `docs/EMBEDDING.md` for a complete example.
|
|
282
|
+
## Docs
|
|
177
283
|
|
|
178
|
-
|
|
284
|
+
- [Embedding Guide](docs/EMBEDDING.md) — integrate into any web app
|
|
285
|
+
- [Architecture](docs/arch.md) — layer-by-layer breakdown
|
|
286
|
+
- [Deployment](docs/DEPLOYMENT.md) — npm publish, Vercel, Chrome extension, CI
|
|
287
|
+
- [Roadmap](docs/ROADMAP.md) — planned features
|
|
179
288
|
|
|
180
|
-
|
|
289
|
+
---
|
|
181
290
|
|
|
182
291
|
## License
|
|
183
292
|
|
package/dist/background.js
CHANGED
|
@@ -1,14 +1,29 @@
|
|
|
1
1
|
// src/background/index.ts
|
|
2
2
|
var sessions = /* @__PURE__ */ new Map();
|
|
3
|
-
function
|
|
3
|
+
function normalizePlannerConfig(rawPlanner) {
|
|
4
|
+
if (typeof rawPlanner === "string" && (rawPlanner === "heuristic" || rawPlanner === "webllm")) {
|
|
5
|
+
return { kind: rawPlanner };
|
|
6
|
+
}
|
|
7
|
+
if (typeof rawPlanner === "object" && rawPlanner !== null) {
|
|
8
|
+
const record = rawPlanner;
|
|
9
|
+
const kind = record.kind;
|
|
10
|
+
if (kind === "heuristic" || kind === "webllm") {
|
|
11
|
+
return {
|
|
12
|
+
kind,
|
|
13
|
+
modelId: typeof record.modelId === "string" && record.modelId.trim() ? record.modelId : void 0,
|
|
14
|
+
systemPrompt: typeof record.systemPrompt === "string" && record.systemPrompt.trim() ? record.systemPrompt : void 0
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return { kind: "heuristic" };
|
|
19
|
+
}
|
|
20
|
+
function makeSession(tabId, goal, mode, planner) {
|
|
4
21
|
return {
|
|
5
22
|
id: crypto.randomUUID(),
|
|
6
23
|
tabId,
|
|
7
24
|
goal,
|
|
8
25
|
mode,
|
|
9
|
-
planner
|
|
10
|
-
kind: plannerKind
|
|
11
|
-
},
|
|
26
|
+
planner,
|
|
12
27
|
history: [],
|
|
13
28
|
isRunning: true
|
|
14
29
|
};
|
|
@@ -23,6 +38,10 @@ async function tick(tabId) {
|
|
|
23
38
|
session
|
|
24
39
|
});
|
|
25
40
|
session.history.push(result.message);
|
|
41
|
+
if (result.reflection?.memory !== void 0) {
|
|
42
|
+
session.memory = result.reflection.memory;
|
|
43
|
+
}
|
|
44
|
+
session.lastError = result.status === "error" ? result.message : void 0;
|
|
26
45
|
if (result.status === "needs_approval") {
|
|
27
46
|
session.pendingAction = result.action;
|
|
28
47
|
session.isRunning = false;
|
|
@@ -37,7 +56,7 @@ async function tick(tabId) {
|
|
|
37
56
|
}
|
|
38
57
|
chrome.runtime.onMessage.addListener((message, _sender, sendResponse) => {
|
|
39
58
|
if (message.type === "START_AGENT") {
|
|
40
|
-
const session = makeSession(message.tabId, message.goal, message.mode, message.planner);
|
|
59
|
+
const session = makeSession(message.tabId, message.goal, message.mode, normalizePlannerConfig(message.planner));
|
|
41
60
|
sessions.set(message.tabId, session);
|
|
42
61
|
tick(message.tabId).catch((error) => {
|
|
43
62
|
const failed = sessions.get(message.tabId);
|
package/dist/background.js.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../src/background/index.ts"],
|
|
4
|
-
"sourcesContent": ["import type { AgentMode, AgentSession,
|
|
5
|
-
"mappings": ";AAEA,IAAM,WAAW,oBAAI,IAA0B;AAE/C,SAAS,YAAY,OAAe,MAAc,MAAiB,
|
|
4
|
+
"sourcesContent": ["import type { AgentMode, AgentSession, PlannerConfig } from \"../shared/contracts\";\n\nconst sessions = new Map<number, AgentSession>();\n\nfunction normalizePlannerConfig(rawPlanner: unknown): PlannerConfig {\n if (typeof rawPlanner === \"string\" && (rawPlanner === \"heuristic\" || rawPlanner === \"webllm\")) {\n return { kind: rawPlanner };\n }\n\n if (typeof rawPlanner === \"object\" && rawPlanner !== null) {\n const record = rawPlanner as Record<string, unknown>;\n const kind = record.kind;\n if (kind === \"heuristic\" || kind === \"webllm\") {\n return {\n kind,\n modelId: typeof record.modelId === \"string\" && record.modelId.trim() ? record.modelId : undefined,\n systemPrompt: typeof record.systemPrompt === \"string\" && record.systemPrompt.trim() ? record.systemPrompt : undefined\n };\n }\n }\n\n return { kind: \"heuristic\" };\n}\n\nfunction makeSession(tabId: number, goal: string, mode: AgentMode, planner: PlannerConfig): AgentSession {\n return {\n id: crypto.randomUUID(),\n tabId: tabId,\n goal,\n mode,\n planner,\n history: [],\n isRunning: true\n };\n}\n\nasync function tick(tabId: number) {\n const session = sessions.get(tabId);\n if (!session || !session.isRunning) {\n return;\n }\n\n const result = await chrome.tabs.sendMessage(tabId, {\n type: \"AGENT_TICK\",\n session\n });\n\n session.history.push(result.message);\n if (result.reflection?.memory !== undefined) {\n session.memory = result.reflection.memory;\n }\n session.lastError = result.status === \"error\" ? result.message : undefined;\n\n if (result.status === \"needs_approval\") {\n session.pendingAction = result.action;\n session.isRunning = false;\n return;\n }\n\n session.pendingAction = undefined;\n\n if ([\"done\", \"blocked\", \"error\"].includes(result.status)) {\n session.isRunning = false;\n return;\n }\n\n setTimeout(() => tick(tabId), 600);\n}\n\nchrome.runtime.onMessage.addListener((message, _sender, sendResponse) => {\n if (message.type === \"START_AGENT\") {\n const session = makeSession(message.tabId, message.goal, message.mode, normalizePlannerConfig(message.planner));\n sessions.set(message.tabId, session);\n tick(message.tabId).catch((error) => {\n const failed = sessions.get(message.tabId);\n if (failed) {\n failed.history.push(`Error: ${String(error)}`);\n failed.isRunning = false;\n }\n });\n sendResponse({ ok: true });\n return true;\n }\n\n if (message.type === \"APPROVE_ACTION\") {\n const session = sessions.get(message.tabId);\n if (!session) {\n sendResponse({ ok: false, error: \"No active session\" });\n return true;\n }\n\n session.isRunning = true;\n tick(message.tabId).catch((error) => {\n session.history.push(`Error: ${String(error)}`);\n session.isRunning = false;\n });\n sendResponse({ ok: true });\n return true;\n }\n\n if (message.type === \"STOP_AGENT\") {\n const session = sessions.get(message.tabId);\n if (session) {\n session.isRunning = false;\n }\n chrome.tabs.sendMessage(message.tabId, { type: \"AGENT_STOP\" }).catch(() => undefined);\n sendResponse({ ok: true });\n return true;\n }\n\n if (message.type === \"GET_STATUS\") {\n const lines = Array.from(sessions.values()).map(\n (session) =>\n `${session.isRunning ? \"RUNNING\" : \"IDLE\"} ${session.tabId}: ${session.goal.slice(0, 45)}${session.goal.length > 45 ? \"...\" : \"\"}`\n );\n\n sendResponse({ status: lines.length > 0 ? lines.join(\"\\n\") : \"Idle\" });\n return true;\n }\n\n return false;\n});\n"],
|
|
5
|
+
"mappings": ";AAEA,IAAM,WAAW,oBAAI,IAA0B;AAE/C,SAAS,uBAAuB,YAAoC;AAClE,MAAI,OAAO,eAAe,aAAa,eAAe,eAAe,eAAe,WAAW;AAC7F,WAAO,EAAE,MAAM,WAAW;AAAA,EAC5B;AAEA,MAAI,OAAO,eAAe,YAAY,eAAe,MAAM;AACzD,UAAM,SAAS;AACf,UAAM,OAAO,OAAO;AACpB,QAAI,SAAS,eAAe,SAAS,UAAU;AAC7C,aAAO;AAAA,QACL;AAAA,QACA,SAAS,OAAO,OAAO,YAAY,YAAY,OAAO,QAAQ,KAAK,IAAI,OAAO,UAAU;AAAA,QACxF,cAAc,OAAO,OAAO,iBAAiB,YAAY,OAAO,aAAa,KAAK,IAAI,OAAO,eAAe;AAAA,MAC9G;AAAA,IACF;AAAA,EACF;AAEA,SAAO,EAAE,MAAM,YAAY;AAC7B;AAEA,SAAS,YAAY,OAAe,MAAc,MAAiB,SAAsC;AACvG,SAAO;AAAA,IACL,IAAI,OAAO,WAAW;AAAA,IACtB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,SAAS,CAAC;AAAA,IACV,WAAW;AAAA,EACb;AACF;AAEA,eAAe,KAAK,OAAe;AACjC,QAAM,UAAU,SAAS,IAAI,KAAK;AAClC,MAAI,CAAC,WAAW,CAAC,QAAQ,WAAW;AAClC;AAAA,EACF;AAEA,QAAM,SAAS,MAAM,OAAO,KAAK,YAAY,OAAO;AAAA,IAClD,MAAM;AAAA,IACN;AAAA,EACF,CAAC;AAED,UAAQ,QAAQ,KAAK,OAAO,OAAO;AACnC,MAAI,OAAO,YAAY,WAAW,QAAW;AAC3C,YAAQ,SAAS,OAAO,WAAW;AAAA,EACrC;AACA,UAAQ,YAAY,OAAO,WAAW,UAAU,OAAO,UAAU;AAEjE,MAAI,OAAO,WAAW,kBAAkB;AACtC,YAAQ,gBAAgB,OAAO;AAC/B,YAAQ,YAAY;AACpB;AAAA,EACF;AAEA,UAAQ,gBAAgB;AAExB,MAAI,CAAC,QAAQ,WAAW,OAAO,EAAE,SAAS,OAAO,MAAM,GAAG;AACxD,YAAQ,YAAY;AACpB;AAAA,EACF;AAEA,aAAW,MAAM,KAAK,KAAK,GAAG,GAAG;AACnC;AAEA,OAAO,QAAQ,UAAU,YAAY,CAAC,SAAS,SAAS,iBAAiB;AACvE,MAAI,QAAQ,SAAS,eAAe;AAClC,UAAM,UAAU,YAAY,QAAQ,OAAO,QAAQ,MAAM,QAAQ,MAAM,uBAAuB,QAAQ,OAAO,CAAC;AAC9G,aAAS,IAAI,QAAQ,OAAO,OAAO;AACnC,SAAK,QAAQ,KAAK,EAAE,MAAM,CAAC,UAAU;AACnC,YAAM,SAAS,SAAS,IAAI,QAAQ,KAAK;AACzC,UAAI,QAAQ;AACV,eAAO,QAAQ,KAAK,UAAU,OAAO,KAAK,CAAC,EAAE;AAC7C,eAAO,YAAY;AAAA,MACrB;AAAA,IACF,CAAC;AACD,iBAAa,EAAE,IAAI,KAAK,CAAC;AACzB,WAAO;AAAA,EACT;AAEA,MAAI,QAAQ,SAAS,kBAAkB;AACrC,UAAM,UAAU,SAAS,IAAI,QAAQ,KAAK;AAC1C,QAAI,CAAC,SAAS;AACZ,mBAAa,EAAE,IAAI,OAAO,OAAO,oBAAoB,CAAC;AACtD,aAAO;AAAA,IACT;AAEA,YAAQ,YAAY;AACpB,SAAK,QAAQ,KAAK,EAAE,MAAM,CAAC,UAAU;AACnC,cAAQ,QAAQ,KAAK,UAAU,OAAO,KAAK,CAAC,EAAE;AAC9C,cAAQ,YAAY;AAAA,IACtB,CAAC;AACD,iBAAa,EAAE,IAAI,KAAK,CAAC;AACzB,WAAO;AAAA,EACT;AAEA,MAAI,QAAQ,SAAS,cAAc;AACjC,UAAM,UAAU,SAAS,IAAI,QAAQ,KAAK;AAC1C,QAAI,SAAS;AACX,cAAQ,YAAY;AAAA,IACtB;AACA,WAAO,KAAK,YAAY,QAAQ,OAAO,EAAE,MAAM,aAAa,CAAC,EAAE,MAAM,MAAM,MAAS;AACpF,iBAAa,EAAE,IAAI,KAAK,CAAC;AACzB,WAAO;AAAA,EACT;AAEA,MAAI,QAAQ,SAAS,cAAc;AACjC,UAAM,QAAQ,MAAM,KAAK,SAAS,OAAO,CAAC,EAAE;AAAA,MAC1C,CAAC,YACC,GAAG,QAAQ,YAAY,YAAY,MAAM,IAAI,QAAQ,KAAK,KAAK,QAAQ,KAAK,MAAM,GAAG,EAAE,CAAC,GAAG,QAAQ,KAAK,SAAS,KAAK,QAAQ,EAAE;AAAA,IACpI;AAEA,iBAAa,EAAE,QAAQ,MAAM,SAAS,IAAI,MAAM,KAAK,IAAI,IAAI,OAAO,CAAC;AACrE,WAAO;AAAA,EACT;AAEA,SAAO;AACT,CAAC;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/dist/content.js
CHANGED
|
@@ -1,3 +1,86 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
3
|
+
var __esm = (fn, res) => function __init() {
|
|
4
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
5
|
+
};
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
// src/shared/parse-action.ts
|
|
12
|
+
var parse_action_exports = {};
|
|
13
|
+
__export(parse_action_exports, {
|
|
14
|
+
parseAction: () => parseAction,
|
|
15
|
+
parsePlannerResult: () => parsePlannerResult
|
|
16
|
+
});
|
|
17
|
+
function parseAction(raw) {
|
|
18
|
+
const fenceMatch = raw.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
19
|
+
const candidate = fenceMatch ? fenceMatch[1].trim() : raw.trim();
|
|
20
|
+
const objectMatch = candidate.match(/\{[\s\S]*\}/);
|
|
21
|
+
if (!objectMatch) {
|
|
22
|
+
return { type: "done", reason: `No JSON object found in: ${raw.slice(0, 120)}` };
|
|
23
|
+
}
|
|
24
|
+
let parsed;
|
|
25
|
+
try {
|
|
26
|
+
parsed = JSON.parse(objectMatch[0]);
|
|
27
|
+
} catch {
|
|
28
|
+
return { type: "done", reason: `JSON parse error for: ${objectMatch[0].slice(0, 120)}` };
|
|
29
|
+
}
|
|
30
|
+
if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
|
|
31
|
+
return { type: "done", reason: "Parsed value is not an object" };
|
|
32
|
+
}
|
|
33
|
+
const obj = parsed;
|
|
34
|
+
if (typeof obj.type !== "string" || !VALID_TYPES.has(obj.type)) {
|
|
35
|
+
return { type: "done", reason: `Unknown or missing action type: ${String(obj.type)}` };
|
|
36
|
+
}
|
|
37
|
+
return obj;
|
|
38
|
+
}
|
|
39
|
+
function parsePlannerResult(raw) {
|
|
40
|
+
const fenceMatch = raw.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
41
|
+
const candidate = fenceMatch ? fenceMatch[1].trim() : raw.trim();
|
|
42
|
+
const objectMatch = candidate.match(/\{[\s\S]*\}/);
|
|
43
|
+
if (!objectMatch) {
|
|
44
|
+
return { action: { type: "done", reason: `No JSON found in: ${raw.slice(0, 120)}` } };
|
|
45
|
+
}
|
|
46
|
+
let parsed;
|
|
47
|
+
try {
|
|
48
|
+
parsed = JSON.parse(objectMatch[0]);
|
|
49
|
+
} catch {
|
|
50
|
+
return { action: { type: "done", reason: `JSON parse error: ${objectMatch[0].slice(0, 120)}` } };
|
|
51
|
+
}
|
|
52
|
+
if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
|
|
53
|
+
return { action: { type: "done", reason: "Parsed value is not an object" } };
|
|
54
|
+
}
|
|
55
|
+
const obj = parsed;
|
|
56
|
+
if (typeof obj.action === "object" && obj.action !== null) {
|
|
57
|
+
const action = parseAction(JSON.stringify(obj.action));
|
|
58
|
+
return {
|
|
59
|
+
action,
|
|
60
|
+
evaluation: typeof obj.evaluation === "string" ? obj.evaluation : void 0,
|
|
61
|
+
memory: typeof obj.memory === "string" ? obj.memory : void 0,
|
|
62
|
+
nextGoal: typeof obj.nextGoal === "string" ? obj.nextGoal : typeof obj.next_goal === "string" ? obj.next_goal : void 0
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
return { action: parseAction(objectMatch[0]) };
|
|
66
|
+
}
|
|
67
|
+
var VALID_TYPES;
|
|
68
|
+
var init_parse_action = __esm({
|
|
69
|
+
"src/shared/parse-action.ts"() {
|
|
70
|
+
"use strict";
|
|
71
|
+
VALID_TYPES = /* @__PURE__ */ new Set([
|
|
72
|
+
"click",
|
|
73
|
+
"type",
|
|
74
|
+
"navigate",
|
|
75
|
+
"extract",
|
|
76
|
+
"scroll",
|
|
77
|
+
"focus",
|
|
78
|
+
"wait",
|
|
79
|
+
"done"
|
|
80
|
+
]);
|
|
81
|
+
}
|
|
82
|
+
});
|
|
83
|
+
|
|
1
84
|
// src/shared/safety.ts
|
|
2
85
|
var RISKY_KEYWORDS = /\b(delete|remove|pay|purchase|submit|confirm|checkout|transfer|withdraw|send)\b/i;
|
|
3
86
|
function elementTextRisky(text) {
|
|
@@ -253,6 +336,18 @@ function toPlannerResult(raw) {
|
|
|
253
336
|
}
|
|
254
337
|
return { action: raw };
|
|
255
338
|
}
|
|
339
|
+
async function parsePlannerText(raw) {
|
|
340
|
+
const parser = await Promise.resolve().then(() => (init_parse_action(), parse_action_exports));
|
|
341
|
+
return parser.parsePlannerResult(raw);
|
|
342
|
+
}
|
|
343
|
+
async function normalizeBridgeResponse(raw) {
|
|
344
|
+
if (typeof raw === "string") {
|
|
345
|
+
const parsed = await parsePlannerText(raw);
|
|
346
|
+
const parseFailed = parsed.action.type === "done" && /(No JSON|JSON parse error|Parsed value is not an object|Unknown or missing action type)/.test(parsed.action.reason);
|
|
347
|
+
return { result: parsed, parseFailed, rawText: raw };
|
|
348
|
+
}
|
|
349
|
+
return { result: toPlannerResult(raw), parseFailed: false };
|
|
350
|
+
}
|
|
256
351
|
async function planNextAction(config, input) {
|
|
257
352
|
if (config.kind === "heuristic") {
|
|
258
353
|
return { action: heuristicPlan(input) };
|
|
@@ -266,8 +361,25 @@ async function planNextAction(config, input) {
|
|
|
266
361
|
}
|
|
267
362
|
};
|
|
268
363
|
}
|
|
269
|
-
const
|
|
270
|
-
|
|
364
|
+
const plannerInput = { ...input, systemPrompt: config.systemPrompt };
|
|
365
|
+
const firstAttempt = await normalizeBridgeResponse(await bridge.plan(plannerInput, config.modelId));
|
|
366
|
+
if (!firstAttempt.parseFailed) {
|
|
367
|
+
return firstAttempt.result;
|
|
368
|
+
}
|
|
369
|
+
if (bridge.retryInvalidJson && firstAttempt.rawText) {
|
|
370
|
+
const retryAttempt = await normalizeBridgeResponse(
|
|
371
|
+
await bridge.retryInvalidJson(plannerInput, firstAttempt.rawText, config.modelId)
|
|
372
|
+
);
|
|
373
|
+
if (!retryAttempt.parseFailed) {
|
|
374
|
+
return retryAttempt.result;
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
return {
|
|
378
|
+
action: {
|
|
379
|
+
type: "done",
|
|
380
|
+
reason: "WebLLM output could not be parsed after retry."
|
|
381
|
+
}
|
|
382
|
+
};
|
|
271
383
|
}
|
|
272
384
|
|
|
273
385
|
// src/content/index.ts
|
|
@@ -293,8 +405,12 @@ async function runTick(session) {
|
|
|
293
405
|
if (action.type === "done") {
|
|
294
406
|
return { status: "done", action, message: action.reason, reflection };
|
|
295
407
|
}
|
|
296
|
-
|
|
297
|
-
|
|
408
|
+
try {
|
|
409
|
+
const message = await executeAction(action);
|
|
410
|
+
return { status: "executed", action, message, reflection };
|
|
411
|
+
} catch (error) {
|
|
412
|
+
return { status: "error", action, message: String(error), reflection };
|
|
413
|
+
}
|
|
298
414
|
}
|
|
299
415
|
async function executePendingAction(session) {
|
|
300
416
|
if (!session.pendingAction) {
|