page-agent 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -10
- package/dist/lib/PageAgent.d.ts +68 -9
- package/dist/lib/page-agent.js +41 -14
- package/dist/lib/page-agent.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# PageAgent 🤖🪄
|
|
2
2
|
|
|
3
|
-
> ⚠️ See [**Roadmap**](./ROADMAP.md)
|
|
4
|
-
|
|
5
3
|

|
|
6
4
|
|
|
7
5
|
[](https://badge.fury.io/js/page-agent) [](https://opensource.org/licenses/MIT) [](http://www.typescriptlang.org/) [](https://www.npmjs.com/package/page-agent) [](https://bundlephobia.com/package/page-agent) [](https://github.com/alibaba/page-agent)
|
|
@@ -34,7 +32,11 @@ An in-page UI agent in javascript. Control web interfaces with natural language.
|
|
|
34
32
|
|
|
35
33
|
```html
|
|
36
34
|
<!-- temporary CDN URL. May change in the future -->
|
|
37
|
-
<script
|
|
35
|
+
<script
|
|
36
|
+
src="https://hwcxiuzfylggtcktqgij.supabase.co/storage/v1/object/public/demo-public/v0.0.2/page-agent.js"
|
|
37
|
+
crossorigin="true"
|
|
38
|
+
type="text/javascript"
|
|
39
|
+
></script>
|
|
38
40
|
```
|
|
39
41
|
|
|
40
42
|
### NPM Installation
|
|
@@ -54,13 +56,13 @@ const DEMO_BASE_URL = 'https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm
|
|
|
54
56
|
const DEMO_API_KEY = 'PAGE-AGENT-FREE-TESTING-RANDOM'
|
|
55
57
|
|
|
56
58
|
const agent = new PageAgent({
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
59
|
+
modelName: DEMO_MODEL,
|
|
60
|
+
baseURL: DEMO_BASE_URL,
|
|
61
|
+
apiKey: DEMO_API_KEY,
|
|
62
|
+
language: 'en-US',
|
|
61
63
|
})
|
|
62
64
|
|
|
63
|
-
await agent.execute(
|
|
65
|
+
await agent.execute('Click the login button')
|
|
64
66
|
```
|
|
65
67
|
|
|
66
68
|
## 🏗️ Structure
|
|
@@ -104,9 +106,8 @@ PageAgent is designed for **client-side web enhancement**, not server-side autom
|
|
|
104
106
|
|
|
105
107
|
MIT License - see the [LICENSE](LICENSE) file for details.
|
|
106
108
|
|
|
107
|
-
|
|
108
109
|
```
|
|
109
|
-
DOM processing components and prompt are derived from browser-use:
|
|
110
|
+
DOM processing components and prompt are derived from browser-use:
|
|
110
111
|
|
|
111
112
|
Browser Use
|
|
112
113
|
Copyright (c) 2024 Gregor Zunic
|
package/dist/lib/PageAgent.d.ts
CHANGED
|
@@ -7,6 +7,61 @@ export declare interface AgentBrain {
|
|
|
7
7
|
next_goal: string;
|
|
8
8
|
}
|
|
9
9
|
|
|
10
|
+
declare interface AgentConfig {
|
|
11
|
+
language?: SupportedLanguage;
|
|
12
|
+
/**
|
|
13
|
+
* Custom tools to extend PageAgent capabilities
|
|
14
|
+
* @experimental
|
|
15
|
+
* @note You can also override or remove internal tools by using the same name.
|
|
16
|
+
* @see [tools](../tools/index.ts)
|
|
17
|
+
*
|
|
18
|
+
* @example
|
|
19
|
+
* // override internal tool
|
|
20
|
+
* import { tool } from 'page-agent'
|
|
21
|
+
* const customTools = {
|
|
22
|
+
* ask_user: tool({
|
|
23
|
+
* description:
|
|
24
|
+
* 'Ask the user or parent model a question and wait for their answer. Use this if you need more information or clarification.',
|
|
25
|
+
* inputSchema: zod.object({
|
|
26
|
+
* question: zod.string(),
|
|
27
|
+
* }),
|
|
28
|
+
* execute: async function (this: PageAgent, input) {
|
|
29
|
+
* const answer = await do_some_thing(input.question)
|
|
30
|
+
* return "✅ Received user answer: " + answer
|
|
31
|
+
* },
|
|
32
|
+
* })
|
|
33
|
+
* }
|
|
34
|
+
*
|
|
35
|
+
* @example
|
|
36
|
+
* // remove internal tool
|
|
37
|
+
* const customTools = {
|
|
38
|
+
* ask_user: null // never ask user questions
|
|
39
|
+
* }
|
|
40
|
+
*/
|
|
41
|
+
customTools?: Record<string, PageAgentTool | null>;
|
|
42
|
+
onBeforeStep?: (this: PageAgent, stepCnt: number) => Promise<void> | void;
|
|
43
|
+
onAfterStep?: (this: PageAgent, stepCnt: number, history: AgentHistory[]) => Promise<void> | void;
|
|
44
|
+
onBeforeTask?: (this: PageAgent) => Promise<void> | void;
|
|
45
|
+
onAfterTask?: (this: PageAgent, result: ExecutionResult) => Promise<void> | void;
|
|
46
|
+
/**
|
|
47
|
+
* @note this hook can block the disposal process
|
|
48
|
+
* @note when dispose caused by page unload, reason will be 'PAGE_UNLOADING'. this method CANNOT block unloading. async operations may be cut.
|
|
49
|
+
*/
|
|
50
|
+
onDispose?: (this: PageAgent, reason?: string) => void;
|
|
51
|
+
/**
|
|
52
|
+
* TODO: @unimplemented
|
|
53
|
+
* hook when action causes a new page to be opened
|
|
54
|
+
* @note PageAgent will try to detect new pages and decide if it's caused by an action. But not very reliable.
|
|
55
|
+
*/
|
|
56
|
+
onNewPageOpen?: (this: PageAgent, url: string) => Promise<void> | void;
|
|
57
|
+
/**
|
|
58
|
+
* TODO: @unimplemented
|
|
59
|
+
* try to navigate to a new page instead of opening a new tab/window.
|
|
60
|
+
* @note will unload the current page when a action tries to open a new page. so that things keep in the same tab/window.
|
|
61
|
+
*/
|
|
62
|
+
experimentalPreventNewPage?: boolean;
|
|
63
|
+
}
|
|
64
|
+
|
|
10
65
|
export declare interface AgentHistory {
|
|
11
66
|
brain: AgentBrain;
|
|
12
67
|
action: {
|
|
@@ -303,9 +358,11 @@ export declare class PageAgent extends EventTarget {
|
|
|
303
358
|
bus: EventBus;
|
|
304
359
|
i18n: I18n;
|
|
305
360
|
panel: Panel;
|
|
361
|
+
tools: typeof tools;
|
|
306
362
|
paused: boolean;
|
|
307
363
|
disposed: boolean;
|
|
308
364
|
task: string;
|
|
365
|
+
taskId: string;
|
|
309
366
|
/** Corresponds to eval_page in browser-use */
|
|
310
367
|
flatTree: FlatDomTree | null;
|
|
311
368
|
/**
|
|
@@ -319,8 +376,6 @@ export declare class PageAgent extends EventTarget {
|
|
|
319
376
|
simplifiedHTML: string;
|
|
320
377
|
/** last time the tree was updated */
|
|
321
378
|
lastTimeUpdate: number;
|
|
322
|
-
/** Corresponds to actions in browser-use */
|
|
323
|
-
tools: Map<string, PageAgentTool<any>>;
|
|
324
379
|
/** Fullscreen mask */
|
|
325
380
|
mask: SimulatorMask;
|
|
326
381
|
/** History records */
|
|
@@ -330,10 +385,10 @@ export declare class PageAgent extends EventTarget {
|
|
|
330
385
|
* @todo maybe return something?
|
|
331
386
|
*/
|
|
332
387
|
execute(task: string): Promise<ExecutionResult>;
|
|
333
|
-
dispose(): void;
|
|
388
|
+
dispose(reason?: string): void;
|
|
334
389
|
}
|
|
335
390
|
|
|
336
|
-
export declare type PageAgentConfig = LLMConfig &
|
|
391
|
+
export declare type PageAgentConfig = LLMConfig & AgentConfig & DomConfig;
|
|
337
392
|
|
|
338
393
|
/**
|
|
339
394
|
* Event mapping definitions
|
|
@@ -363,7 +418,7 @@ declare interface PageAgentEventMap {
|
|
|
363
418
|
/**
|
|
364
419
|
* Internal tool definition that has access to PageAgent `this` context
|
|
365
420
|
*/
|
|
366
|
-
declare interface PageAgentTool<TParams = any> {
|
|
421
|
+
export declare interface PageAgentTool<TParams = any> {
|
|
367
422
|
description: string;
|
|
368
423
|
inputSchema: z.ZodType<TParams>;
|
|
369
424
|
execute: (this: PageAgent, args: TParams) => Promise<string>;
|
|
@@ -422,14 +477,18 @@ declare interface TextDomNode {
|
|
|
422
477
|
[key: string]: unknown;
|
|
423
478
|
}
|
|
424
479
|
|
|
480
|
+
export declare function tool<TParams>(options: PageAgentTool<TParams>): PageAgentTool<TParams>;
|
|
481
|
+
|
|
482
|
+
/**
|
|
483
|
+
* Internal tools for PageAgent.
|
|
484
|
+
* Note: Using any to allow different parameter types for each tool
|
|
485
|
+
*/
|
|
486
|
+
declare const tools: Map<string, PageAgentTool<any>>;
|
|
487
|
+
|
|
425
488
|
declare type TranslationKey = NestedKeyOf<TranslationSchema>;
|
|
426
489
|
|
|
427
490
|
declare type TranslationParams = Record<string, string | number>;
|
|
428
491
|
|
|
429
492
|
declare type TranslationSchema = DeepStringify<typeof enUS>;
|
|
430
493
|
|
|
431
|
-
declare interface UIConfig {
|
|
432
|
-
language?: SupportedLanguage;
|
|
433
|
-
}
|
|
434
|
-
|
|
435
494
|
export { }
|
package/dist/lib/page-agent.js
CHANGED
|
@@ -27,9 +27,9 @@ import chalk from "chalk";
|
|
|
27
27
|
import zod, { z } from "zod";
|
|
28
28
|
import { Motion } from "ai-motion";
|
|
29
29
|
const VIEWPORT_EXPANSION = -1;
|
|
30
|
-
const DEFAULT_MODEL_NAME = "
|
|
31
|
-
const DEFAULT_API_KEY = "
|
|
32
|
-
const DEFAULT_BASE_URL = "
|
|
30
|
+
const DEFAULT_MODEL_NAME = "PAGE-AGENT-FREE-TESTING-RANDOM";
|
|
31
|
+
const DEFAULT_API_KEY = "PAGE-AGENT-FREE-TESTING-RANDOM";
|
|
32
|
+
const DEFAULT_BASE_URL = "https://hwcxiuzfylggtcktqgij.supabase.co/functions/v1/llm-testing-proxy";
|
|
33
33
|
const LLM_MAX_RETRIES = 2;
|
|
34
34
|
const MAX_STEPS = 20;
|
|
35
35
|
const DEFAULT_TEMPERATURE = 0.7;
|
|
@@ -3333,9 +3333,11 @@ const _PageAgent = class _PageAgent extends EventTarget {
|
|
|
3333
3333
|
__publicField(this, "bus", getEventBus(this.id));
|
|
3334
3334
|
__publicField(this, "i18n");
|
|
3335
3335
|
__publicField(this, "panel");
|
|
3336
|
+
__publicField(this, "tools");
|
|
3336
3337
|
__publicField(this, "paused", false);
|
|
3337
3338
|
__publicField(this, "disposed", false);
|
|
3338
3339
|
__publicField(this, "task", "");
|
|
3340
|
+
__publicField(this, "taskId", "");
|
|
3339
3341
|
__privateAdd(this, _llm);
|
|
3340
3342
|
__privateAdd(this, _totalWaitTime, 0);
|
|
3341
3343
|
__privateAdd(this, _abortController, new AbortController());
|
|
@@ -3352,8 +3354,6 @@ const _PageAgent = class _PageAgent extends EventTarget {
|
|
|
3352
3354
|
__publicField(this, "simplifiedHTML", "<EMPTY>");
|
|
3353
3355
|
/** last time the tree was updated */
|
|
3354
3356
|
__publicField(this, "lastTimeUpdate", 0);
|
|
3355
|
-
/** Corresponds to actions in browser-use */
|
|
3356
|
-
__publicField(this, "tools", new Map(tools));
|
|
3357
3357
|
/** Fullscreen mask */
|
|
3358
3358
|
__publicField(this, "mask", new SimulatorMask());
|
|
3359
3359
|
/** History records */
|
|
@@ -3362,7 +3362,20 @@ const _PageAgent = class _PageAgent extends EventTarget {
|
|
|
3362
3362
|
__privateSet(this, _llm, new LLM(this.config, this.id));
|
|
3363
3363
|
this.i18n = new I18n(this.config.language);
|
|
3364
3364
|
this.panel = new Panel(this);
|
|
3365
|
+
this.tools = new Map(tools);
|
|
3366
|
+
if (this.config.customTools) {
|
|
3367
|
+
for (const [name, tool2] of Object.entries(this.config.customTools)) {
|
|
3368
|
+
if (tool2 === null) {
|
|
3369
|
+
this.tools.delete(name);
|
|
3370
|
+
continue;
|
|
3371
|
+
}
|
|
3372
|
+
this.tools.set(name, tool2);
|
|
3373
|
+
}
|
|
3374
|
+
}
|
|
3365
3375
|
patchReact();
|
|
3376
|
+
window.addEventListener("beforeunload", (e) => {
|
|
3377
|
+
if (!this.disposed) this.dispose("PAGE_UNLOADING");
|
|
3378
|
+
});
|
|
3366
3379
|
}
|
|
3367
3380
|
/**
|
|
3368
3381
|
* @todo maybe return something?
|
|
@@ -3370,12 +3383,18 @@ const _PageAgent = class _PageAgent extends EventTarget {
|
|
|
3370
3383
|
async execute(task) {
|
|
3371
3384
|
if (!task) throw new Error("Task is required");
|
|
3372
3385
|
this.task = task;
|
|
3386
|
+
this.taskId = uid();
|
|
3387
|
+
const onBeforeStep = this.config.onBeforeStep || (() => void 0);
|
|
3388
|
+
const onAfterStep = this.config.onAfterStep || (() => void 0);
|
|
3389
|
+
const onBeforeTask = this.config.onBeforeTask || (() => void 0);
|
|
3390
|
+
const onAfterTask = this.config.onAfterTask || (() => void 0);
|
|
3391
|
+
await onBeforeTask.call(this);
|
|
3373
3392
|
this.mask.show();
|
|
3374
3393
|
this.bus.emit("panel:show");
|
|
3375
3394
|
this.bus.emit("panel:reset");
|
|
3376
3395
|
this.bus.emit("panel:update", {
|
|
3377
3396
|
type: "input",
|
|
3378
|
-
displayText: task
|
|
3397
|
+
displayText: this.task
|
|
3379
3398
|
});
|
|
3380
3399
|
if (__privateGet(this, _abortController)) {
|
|
3381
3400
|
__privateGet(this, _abortController).abort();
|
|
@@ -3385,6 +3404,7 @@ const _PageAgent = class _PageAgent extends EventTarget {
|
|
|
3385
3404
|
try {
|
|
3386
3405
|
let step = 0;
|
|
3387
3406
|
while (true) {
|
|
3407
|
+
await onBeforeStep.call(this, step);
|
|
3388
3408
|
console.group(`step: ${step + 1}`);
|
|
3389
3409
|
if (__privateGet(this, _abortController).signal.aborted) throw new Error("AbortError");
|
|
3390
3410
|
await waitUntil(() => !this.paused);
|
|
@@ -3428,38 +3448,45 @@ const _PageAgent = class _PageAgent extends EventTarget {
|
|
|
3428
3448
|
});
|
|
3429
3449
|
console.log(chalk.green("Step finished:"), actionName);
|
|
3430
3450
|
console.groupEnd();
|
|
3451
|
+
await onAfterStep.call(this, step, this.history);
|
|
3431
3452
|
step++;
|
|
3432
3453
|
if (step > MAX_STEPS) {
|
|
3433
3454
|
__privateMethod(this, _PageAgent_instances, onDone_fn).call(this, "Step count exceeded maximum limit", false);
|
|
3434
|
-
|
|
3455
|
+
const result2 = {
|
|
3435
3456
|
success: false,
|
|
3436
3457
|
data: "Step count exceeded maximum limit",
|
|
3437
3458
|
history: this.history
|
|
3438
3459
|
};
|
|
3460
|
+
await onAfterTask.call(this, result2);
|
|
3461
|
+
return result2;
|
|
3439
3462
|
}
|
|
3440
3463
|
if (actionName === "done") {
|
|
3441
3464
|
const success = action.input?.success ?? false;
|
|
3442
3465
|
const text = action.input?.text || "no text provided";
|
|
3443
3466
|
console.log(chalk.green.bold("Task completed"), success, text);
|
|
3444
3467
|
__privateMethod(this, _PageAgent_instances, onDone_fn).call(this, text, success);
|
|
3445
|
-
|
|
3468
|
+
const result2 = {
|
|
3446
3469
|
success,
|
|
3447
3470
|
data: text,
|
|
3448
3471
|
history: this.history
|
|
3449
3472
|
};
|
|
3473
|
+
await onAfterTask.call(this, result2);
|
|
3474
|
+
return result2;
|
|
3450
3475
|
}
|
|
3451
3476
|
}
|
|
3452
3477
|
} catch (error2) {
|
|
3453
3478
|
console.error("Task failed", error2);
|
|
3454
3479
|
__privateMethod(this, _PageAgent_instances, onDone_fn).call(this, String(error2), false);
|
|
3455
|
-
|
|
3480
|
+
const result = {
|
|
3456
3481
|
success: false,
|
|
3457
3482
|
data: String(error2),
|
|
3458
3483
|
history: this.history
|
|
3459
3484
|
};
|
|
3485
|
+
await onAfterTask.call(this, result);
|
|
3486
|
+
return result;
|
|
3460
3487
|
}
|
|
3461
3488
|
}
|
|
3462
|
-
dispose() {
|
|
3489
|
+
dispose(reason) {
|
|
3463
3490
|
console.log("Disposing PageAgent...");
|
|
3464
3491
|
this.disposed = true;
|
|
3465
3492
|
cleanUpHighlights();
|
|
@@ -3469,7 +3496,8 @@ const _PageAgent = class _PageAgent extends EventTarget {
|
|
|
3469
3496
|
this.panel.dispose();
|
|
3470
3497
|
this.mask.dispose();
|
|
3471
3498
|
this.history = [];
|
|
3472
|
-
__privateGet(this, _abortController).abort("PageAgent disposed");
|
|
3499
|
+
__privateGet(this, _abortController).abort(reason ?? "PageAgent disposed");
|
|
3500
|
+
this.config.onDispose?.call(this, reason);
|
|
3473
3501
|
}
|
|
3474
3502
|
};
|
|
3475
3503
|
_llm = new WeakMap();
|
|
@@ -3503,8 +3531,6 @@ packMacroTool_fn = /* @__PURE__ */ __name(function() {
|
|
|
3503
3531
|
action: actionSchema
|
|
3504
3532
|
});
|
|
3505
3533
|
return {
|
|
3506
|
-
// name: MACRO_TOOL_NAME,
|
|
3507
|
-
// description: 'Execute agent action', // @todo remote
|
|
3508
3534
|
inputSchema: macroToolSchema,
|
|
3509
3535
|
execute: /* @__PURE__ */ __name(async (input2) => {
|
|
3510
3536
|
if (__privateGet(this, _abortController).signal.aborted) throw new Error("AbortError");
|
|
@@ -3668,6 +3694,7 @@ updateTree_fn = /* @__PURE__ */ __name(function() {
|
|
|
3668
3694
|
__name(_PageAgent, "PageAgent");
|
|
3669
3695
|
let PageAgent = _PageAgent;
|
|
3670
3696
|
export {
|
|
3671
|
-
PageAgent
|
|
3697
|
+
PageAgent,
|
|
3698
|
+
tool
|
|
3672
3699
|
};
|
|
3673
3700
|
//# sourceMappingURL=page-agent.js.map
|