@akshayram1/omnibrowser-agent 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +168 -0
- package/dist/background.js +84 -0
- package/dist/background.js.map +7 -0
- package/dist/content.js +278 -0
- package/dist/content.js.map +7 -0
- package/dist/lib.js +388 -0
- package/dist/lib.js.map +7 -0
- package/dist/manifest.json +23 -0
- package/dist/popup.html +45 -0
- package/dist/popup.js +46 -0
- package/dist/popup.js.map +7 -0
- package/dist/types/content/executor.d.ts +2 -0
- package/dist/types/content/pageObserver.d.ts +2 -0
- package/dist/types/content/planner.d.ts +2 -0
- package/dist/types/lib/index.d.ts +23 -0
- package/dist/types/shared/contracts.d.ts +92 -0
- package/dist/types/shared/safety.d.ts +2 -0
- package/docs/ARCHITECTURE.md +56 -0
- package/docs/EMBEDDING.md +72 -0
- package/docs/ROADMAP.md +27 -0
- package/package.json +30 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
export type AgentMode = "autonomous" | "human-approved";
|
|
2
|
+
export type PlannerKind = "heuristic" | "webllm";
|
|
3
|
+
export type RiskLevel = "safe" | "review" | "blocked";
|
|
4
|
+
export type AgentAction = {
|
|
5
|
+
type: "click";
|
|
6
|
+
selector: string;
|
|
7
|
+
label?: string;
|
|
8
|
+
} | {
|
|
9
|
+
type: "type";
|
|
10
|
+
selector: string;
|
|
11
|
+
text: string;
|
|
12
|
+
clearFirst?: boolean;
|
|
13
|
+
label?: string;
|
|
14
|
+
} | {
|
|
15
|
+
type: "navigate";
|
|
16
|
+
url: string;
|
|
17
|
+
} | {
|
|
18
|
+
type: "extract";
|
|
19
|
+
selector: string;
|
|
20
|
+
label: string;
|
|
21
|
+
} | {
|
|
22
|
+
type: "scroll";
|
|
23
|
+
selector?: string;
|
|
24
|
+
deltaY: number;
|
|
25
|
+
} | {
|
|
26
|
+
type: "focus";
|
|
27
|
+
selector: string;
|
|
28
|
+
} | {
|
|
29
|
+
type: "wait";
|
|
30
|
+
ms: number;
|
|
31
|
+
} | {
|
|
32
|
+
type: "done";
|
|
33
|
+
reason: string;
|
|
34
|
+
};
|
|
35
|
+
export interface CandidateElement {
|
|
36
|
+
selector: string;
|
|
37
|
+
role: string;
|
|
38
|
+
text: string;
|
|
39
|
+
placeholder?: string;
|
|
40
|
+
}
|
|
41
|
+
export interface PageSnapshot {
|
|
42
|
+
url: string;
|
|
43
|
+
title: string;
|
|
44
|
+
textPreview: string;
|
|
45
|
+
candidates: CandidateElement[];
|
|
46
|
+
}
|
|
47
|
+
export interface PlannerInput {
|
|
48
|
+
goal: string;
|
|
49
|
+
snapshot: PageSnapshot;
|
|
50
|
+
history: string[];
|
|
51
|
+
}
|
|
52
|
+
export interface PlannerConfig {
|
|
53
|
+
kind: PlannerKind;
|
|
54
|
+
modelId?: string;
|
|
55
|
+
}
|
|
56
|
+
export interface AgentSession {
|
|
57
|
+
id: string;
|
|
58
|
+
tabId: number | null;
|
|
59
|
+
goal: string;
|
|
60
|
+
mode: AgentMode;
|
|
61
|
+
planner: PlannerConfig;
|
|
62
|
+
history: string[];
|
|
63
|
+
isRunning: boolean;
|
|
64
|
+
pendingAction?: AgentAction;
|
|
65
|
+
}
|
|
66
|
+
export interface LibraryAgentConfig {
|
|
67
|
+
goal: string;
|
|
68
|
+
mode?: AgentMode;
|
|
69
|
+
planner?: PlannerConfig;
|
|
70
|
+
maxSteps?: number;
|
|
71
|
+
stepDelayMs?: number;
|
|
72
|
+
signal?: AbortSignal;
|
|
73
|
+
}
|
|
74
|
+
export interface LibraryAgentEvents {
|
|
75
|
+
onStart?: (session: AgentSession) => void;
|
|
76
|
+
onStep?: (result: ContentResult, session: AgentSession) => void;
|
|
77
|
+
onApprovalRequired?: (action: AgentAction, session: AgentSession) => void;
|
|
78
|
+
onDone?: (result: ContentResult, session: AgentSession) => void;
|
|
79
|
+
onError?: (error: unknown, session: AgentSession) => void;
|
|
80
|
+
onMaxStepsReached?: (session: AgentSession) => void;
|
|
81
|
+
}
|
|
82
|
+
export type ContentCommand = {
|
|
83
|
+
type: "AGENT_TICK";
|
|
84
|
+
session: AgentSession;
|
|
85
|
+
} | {
|
|
86
|
+
type: "AGENT_STOP";
|
|
87
|
+
};
|
|
88
|
+
export type ContentResult = {
|
|
89
|
+
status: "executed" | "needs_approval" | "blocked" | "done" | "error";
|
|
90
|
+
message: string;
|
|
91
|
+
action?: AgentAction;
|
|
92
|
+
};
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# OmniBrowser Agent Architecture (v0.1)
|
|
2
|
+
|
|
3
|
+
## Goals
|
|
4
|
+
|
|
5
|
+
- Local-first runtime in browser
|
|
6
|
+
- Privacy-first defaults
|
|
7
|
+
- Open-source composable planner/executor contracts
|
|
8
|
+
- Human-approved mode for risky actions
|
|
9
|
+
|
|
10
|
+
## Runtime Components
|
|
11
|
+
|
|
12
|
+
1. Popup UI (`src/popup`)
|
|
13
|
+
- Starts/stops sessions
|
|
14
|
+
- Picks mode (`autonomous`, `human-approved`)
|
|
15
|
+
- Picks planner (`heuristic`, `webllm`)
|
|
16
|
+
|
|
17
|
+
2. Background Service Worker (`src/background`)
|
|
18
|
+
- Session state machine per tab
|
|
19
|
+
- Tick loop orchestration
|
|
20
|
+
- Approval handling
|
|
21
|
+
|
|
22
|
+
3. Content Agent (`src/content`)
|
|
23
|
+
- `pageObserver`: page snapshot extraction
|
|
24
|
+
- `planner`: next-action decision (heuristic/WebLLM)
|
|
25
|
+
- `safety`: risk gating (`safe`, `review`, `blocked`)
|
|
26
|
+
- `executor`: DOM action execution
|
|
27
|
+
|
|
28
|
+
## Contracts
|
|
29
|
+
|
|
30
|
+
- Shared in `src/shared/contracts.ts`
|
|
31
|
+
- Action protocol:
|
|
32
|
+
- click
|
|
33
|
+
- type
|
|
34
|
+
- navigate
|
|
35
|
+
- extract
|
|
36
|
+
- wait
|
|
37
|
+
- done
|
|
38
|
+
|
|
39
|
+
## Safety Model
|
|
40
|
+
|
|
41
|
+
- Block invalid URL protocols
|
|
42
|
+
- Review risky actions (submit/delete/pay-like selectors)
|
|
43
|
+
- In `human-approved` mode, review-level actions require manual approval
|
|
44
|
+
|
|
45
|
+
## WebLLM Usage
|
|
46
|
+
|
|
47
|
+
- Planner includes a `webllm` mode contract with a local bridge hook
|
|
48
|
+
- v0.1 bridge entrypoint: `window.__browserAgentWebLLM.plan(input, modelId)`
|
|
49
|
+
- Full in-extension worker integration is planned for v0.2
|
|
50
|
+
|
|
51
|
+
## Limitations (v0.1)
|
|
52
|
+
|
|
53
|
+
- No persistent long-term memory yet
|
|
54
|
+
- No task DSL/skills registry yet
|
|
55
|
+
- Risk scoring is simple keyword heuristic
|
|
56
|
+
- No robust selector healing yet
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Embedding OmniBrowser Agent in Your Website
|
|
2
|
+
|
|
3
|
+
You can keep the extension flow and also embed OmniBrowser Agent as a library in your own web app.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install @akshaychame/omnibrowser-agent
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Basic usage
|
|
12
|
+
|
|
13
|
+
```ts
|
|
14
|
+
import { createBrowserAgent } from "@akshaychame/omnibrowser-agent";
|
|
15
|
+
|
|
16
|
+
const agent = createBrowserAgent(
|
|
17
|
+
{
|
|
18
|
+
goal: "Search contact Jane Doe and open profile",
|
|
19
|
+
mode: "human-approved",
|
|
20
|
+
planner: { kind: "heuristic" },
|
|
21
|
+
maxSteps: 15,
|
|
22
|
+
stepDelayMs: 400
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
onStep: (result) => console.log("step", result),
|
|
26
|
+
onApprovalRequired: (action) => {
|
|
27
|
+
console.log("approval required", action);
|
|
28
|
+
// Show your own modal/button then call approvePendingAction()
|
|
29
|
+
},
|
|
30
|
+
onDone: (result) => console.log("done", result),
|
|
31
|
+
onError: (error) => console.error(error)
|
|
32
|
+
}
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
await agent.start();
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Approve a pending action
|
|
39
|
+
|
|
40
|
+
```ts
|
|
41
|
+
await agent.approvePendingAction();
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Stop running session
|
|
45
|
+
|
|
46
|
+
```ts
|
|
47
|
+
agent.stop();
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## WebLLM mode in embedded app
|
|
51
|
+
|
|
52
|
+
To use planner mode `webllm`, provide a local bridge in your app:
|
|
53
|
+
|
|
54
|
+
```ts
|
|
55
|
+
window.__browserAgentWebLLM = {
|
|
56
|
+
async plan(input, modelId) {
|
|
57
|
+
// call your local WebLLM engine and return one AgentAction JSON
|
|
58
|
+
return { type: "done", reason: `Implement bridge with model ${modelId ?? "default"}` };
|
|
59
|
+
}
|
|
60
|
+
};
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Then configure:
|
|
64
|
+
|
|
65
|
+
```ts
|
|
66
|
+
planner: { kind: "webllm", modelId: "Llama-3.2-1B-Instruct-q4f16_1-MLC" }
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Notes
|
|
70
|
+
|
|
71
|
+
- For production, mount this inside an authenticated app shell and add your own permission checks.
|
|
72
|
+
- `human-approved` mode is recommended for CRM/finance/admin actions.
|
package/docs/ROADMAP.md
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Roadmap
|
|
2
|
+
|
|
3
|
+
## v0.1 (current)
|
|
4
|
+
|
|
5
|
+
- Extension runtime loop
|
|
6
|
+
- Shared action contracts
|
|
7
|
+
- Heuristic + WebLLM planner switch
|
|
8
|
+
- Human-approved mode
|
|
9
|
+
|
|
10
|
+
## v0.2
|
|
11
|
+
|
|
12
|
+
- Site profile + policy engine (allowlist, blocked domains)
|
|
13
|
+
- Selector healing and fallback strategy
|
|
14
|
+
- Session memory and action replay log
|
|
15
|
+
- Drupal CRM starter skills
|
|
16
|
+
|
|
17
|
+
## v0.3
|
|
18
|
+
|
|
19
|
+
- Long-term encrypted memory in IndexedDB
|
|
20
|
+
- Goal decomposition planner (multi-step task graphs)
|
|
21
|
+
- Multi-tab workflows
|
|
22
|
+
|
|
23
|
+
## v1.0
|
|
24
|
+
|
|
25
|
+
- Stable plugin API for site skills
|
|
26
|
+
- Validation/eval harness with benchmark tasks
|
|
27
|
+
- Cross-browser packaging (Chromium + Firefox)
|
package/package.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@akshayram1/omnibrowser-agent",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"private": false,
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/lib.js",
|
|
7
|
+
"module": "./dist/lib.js",
|
|
8
|
+
"types": "./dist/types/lib/index.d.ts",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"types": "./dist/types/lib/index.d.ts",
|
|
12
|
+
"import": "./dist/lib.js"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"description": "Local-first OmniBrowser Agent (WebLLM + page automation)",
|
|
16
|
+
"author": "Akshay Chame",
|
|
17
|
+
"license": "MIT",
|
|
18
|
+
"scripts": {
|
|
19
|
+
"build": "node scripts/build.mjs && npm run build:types",
|
|
20
|
+
"build:types": "tsc -p tsconfig.lib.json",
|
|
21
|
+
"watch": "node scripts/build.mjs --watch",
|
|
22
|
+
"typecheck": "tsc --noEmit"
|
|
23
|
+
},
|
|
24
|
+
"dependencies": {},
|
|
25
|
+
"devDependencies": {
|
|
26
|
+
"@types/chrome": "^0.0.322",
|
|
27
|
+
"esbuild": "^0.25.2",
|
|
28
|
+
"typescript": "^5.8.2"
|
|
29
|
+
}
|
|
30
|
+
}
|