@akshayram1/omnibrowser-agent 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,92 @@
1
+ export type AgentMode = "autonomous" | "human-approved";
2
+ export type PlannerKind = "heuristic" | "webllm";
3
+ export type RiskLevel = "safe" | "review" | "blocked";
4
+ export type AgentAction = {
5
+ type: "click";
6
+ selector: string;
7
+ label?: string;
8
+ } | {
9
+ type: "type";
10
+ selector: string;
11
+ text: string;
12
+ clearFirst?: boolean;
13
+ label?: string;
14
+ } | {
15
+ type: "navigate";
16
+ url: string;
17
+ } | {
18
+ type: "extract";
19
+ selector: string;
20
+ label: string;
21
+ } | {
22
+ type: "scroll";
23
+ selector?: string;
24
+ deltaY: number;
25
+ } | {
26
+ type: "focus";
27
+ selector: string;
28
+ } | {
29
+ type: "wait";
30
+ ms: number;
31
+ } | {
32
+ type: "done";
33
+ reason: string;
34
+ };
35
+ export interface CandidateElement {
36
+ selector: string;
37
+ role: string;
38
+ text: string;
39
+ placeholder?: string;
40
+ }
41
+ export interface PageSnapshot {
42
+ url: string;
43
+ title: string;
44
+ textPreview: string;
45
+ candidates: CandidateElement[];
46
+ }
47
+ export interface PlannerInput {
48
+ goal: string;
49
+ snapshot: PageSnapshot;
50
+ history: string[];
51
+ }
52
+ export interface PlannerConfig {
53
+ kind: PlannerKind;
54
+ modelId?: string;
55
+ }
56
+ export interface AgentSession {
57
+ id: string;
58
+ tabId: number | null;
59
+ goal: string;
60
+ mode: AgentMode;
61
+ planner: PlannerConfig;
62
+ history: string[];
63
+ isRunning: boolean;
64
+ pendingAction?: AgentAction;
65
+ }
66
+ export interface LibraryAgentConfig {
67
+ goal: string;
68
+ mode?: AgentMode;
69
+ planner?: PlannerConfig;
70
+ maxSteps?: number;
71
+ stepDelayMs?: number;
72
+ signal?: AbortSignal;
73
+ }
74
+ export interface LibraryAgentEvents {
75
+ onStart?: (session: AgentSession) => void;
76
+ onStep?: (result: ContentResult, session: AgentSession) => void;
77
+ onApprovalRequired?: (action: AgentAction, session: AgentSession) => void;
78
+ onDone?: (result: ContentResult, session: AgentSession) => void;
79
+ onError?: (error: unknown, session: AgentSession) => void;
80
+ onMaxStepsReached?: (session: AgentSession) => void;
81
+ }
82
+ export type ContentCommand = {
83
+ type: "AGENT_TICK";
84
+ session: AgentSession;
85
+ } | {
86
+ type: "AGENT_STOP";
87
+ };
88
+ export type ContentResult = {
89
+ status: "executed" | "needs_approval" | "blocked" | "done" | "error";
90
+ message: string;
91
+ action?: AgentAction;
92
+ };
@@ -0,0 +1,2 @@
1
+ import type { AgentAction, RiskLevel } from "./contracts";
2
+ export declare function assessRisk(action: AgentAction): RiskLevel;
@@ -0,0 +1,56 @@
1
+ # OmniBrowser Agent Architecture (v0.1)
2
+
3
+ ## Goals
4
+
5
+ - Local-first runtime in browser
6
+ - Privacy-first defaults
7
+ - Open-source composable planner/executor contracts
8
+ - Human-approved mode for risky actions
9
+
10
+ ## Runtime Components
11
+
12
+ 1. Popup UI (`src/popup`)
13
+ - Starts/stops sessions
14
+ - Picks mode (`autonomous`, `human-approved`)
15
+ - Picks planner (`heuristic`, `webllm`)
16
+
17
+ 2. Background Service Worker (`src/background`)
18
+ - Session state machine per tab
19
+ - Tick loop orchestration
20
+ - Approval handling
21
+
22
+ 3. Content Agent (`src/content`)
23
+ - `pageObserver`: page snapshot extraction
24
+ - `planner`: next-action decision (heuristic/WebLLM)
25
+ - `safety`: risk gating (`safe`, `review`, `blocked`)
26
+ - `executor`: DOM action execution
27
+
28
+ ## Contracts
29
+
30
+ - Shared in `src/shared/contracts.ts`
31
+ - Action protocol:
32
+ - click
33
+ - type
34
+ - navigate
35
+ - extract
36
+ - wait
37
+ - done
38
+
39
+ ## Safety Model
40
+
41
+ - Block invalid URL protocols
42
+ - Review risky actions (submit/delete/pay-like selectors)
43
+ - In `human-approved` mode, review-level actions require manual approval
44
+
45
+ ## WebLLM Usage
46
+
47
+ - Planner includes a `webllm` mode contract with a local bridge hook
48
+ - v0.1 bridge entrypoint: `window.__browserAgentWebLLM.plan(input, modelId)`
49
+ - Full in-extension worker integration is planned for v0.2
50
+
51
+ ## Limitations (v0.1)
52
+
53
+ - No persistent long-term memory yet
54
+ - No task DSL/skills registry yet
55
+ - Risk scoring is simple keyword heuristic
56
+ - No robust selector healing yet
@@ -0,0 +1,72 @@
1
+ # Embedding OmniBrowser Agent in Your Website
2
+
3
+ You can keep the extension flow and also embed OmniBrowser Agent as a library in your own web app.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ npm install @akshaychame/omnibrowser-agent
9
+ ```
10
+
11
+ ## Basic usage
12
+
13
+ ```ts
14
+ import { createBrowserAgent } from "@akshaychame/omnibrowser-agent";
15
+
16
+ const agent = createBrowserAgent(
17
+ {
18
+ goal: "Search contact Jane Doe and open profile",
19
+ mode: "human-approved",
20
+ planner: { kind: "heuristic" },
21
+ maxSteps: 15,
22
+ stepDelayMs: 400
23
+ },
24
+ {
25
+ onStep: (result) => console.log("step", result),
26
+ onApprovalRequired: (action) => {
27
+ console.log("approval required", action);
28
+ // Show your own modal/button then call approvePendingAction()
29
+ },
30
+ onDone: (result) => console.log("done", result),
31
+ onError: (error) => console.error(error)
32
+ }
33
+ );
34
+
35
+ await agent.start();
36
+ ```
37
+
38
+ ## Approve a pending action
39
+
40
+ ```ts
41
+ await agent.approvePendingAction();
42
+ ```
43
+
44
+ ## Stop running session
45
+
46
+ ```ts
47
+ agent.stop();
48
+ ```
49
+
50
+ ## WebLLM mode in embedded app
51
+
52
+ To use planner mode `webllm`, provide a local bridge in your app:
53
+
54
+ ```ts
55
+ window.__browserAgentWebLLM = {
56
+ async plan(input, modelId) {
57
+ // call your local WebLLM engine and return one AgentAction JSON
58
+ return { type: "done", reason: `Implement bridge with model ${modelId ?? "default"}` };
59
+ }
60
+ };
61
+ ```
62
+
63
+ Then configure:
64
+
65
+ ```ts
66
+ planner: { kind: "webllm", modelId: "Llama-3.2-1B-Instruct-q4f16_1-MLC" }
67
+ ```
68
+
69
+ ## Notes
70
+
71
+ - For production, mount this inside an authenticated app shell and add your own permission checks.
72
+ - `human-approved` mode is recommended for CRM/finance/admin actions.
@@ -0,0 +1,27 @@
1
+ # Roadmap
2
+
3
+ ## v0.1 (current)
4
+
5
+ - Extension runtime loop
6
+ - Shared action contracts
7
+ - Heuristic + WebLLM planner switch
8
+ - Human-approved mode
9
+
10
+ ## v0.2
11
+
12
+ - Site profile + policy engine (allowlist, blocked domains)
13
+ - Selector healing and fallback strategy
14
+ - Session memory and action replay log
15
+ - Drupal CRM starter skills
16
+
17
+ ## v0.3
18
+
19
+ - Long-term encrypted memory in IndexedDB
20
+ - Goal decomposition planner (multi-step task graphs)
21
+ - Multi-tab workflows
22
+
23
+ ## v1.0
24
+
25
+ - Stable plugin API for site skills
26
+ - Validation/eval harness with benchmark tasks
27
+ - Cross-browser packaging (Chromium + Firefox)
package/package.json ADDED
@@ -0,0 +1,30 @@
1
+ {
2
+ "name": "@akshayram1/omnibrowser-agent",
3
+ "version": "0.2.0",
4
+ "private": false,
5
+ "type": "module",
6
+ "main": "./dist/lib.js",
7
+ "module": "./dist/lib.js",
8
+ "types": "./dist/types/lib/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "types": "./dist/types/lib/index.d.ts",
12
+ "import": "./dist/lib.js"
13
+ }
14
+ },
15
+ "description": "Local-first OmniBrowser Agent (WebLLM + page automation)",
16
+ "author": "Akshay Chame",
17
+ "license": "MIT",
18
+ "scripts": {
19
+ "build": "node scripts/build.mjs && npm run build:types",
20
+ "build:types": "tsc -p tsconfig.lib.json",
21
+ "watch": "node scripts/build.mjs --watch",
22
+ "typecheck": "tsc --noEmit"
23
+ },
24
+ "dependencies": {},
25
+ "devDependencies": {
26
+ "@types/chrome": "^0.0.322",
27
+ "esbuild": "^0.25.2",
28
+ "typescript": "^5.8.2"
29
+ }
30
+ }