browser-use 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +295 -686
- package/dist/actor/element.d.ts +19 -0
- package/dist/actor/element.js +46 -0
- package/dist/actor/index.d.ts +4 -0
- package/dist/actor/index.js +4 -0
- package/dist/actor/mouse.d.ts +19 -0
- package/dist/actor/mouse.js +39 -0
- package/dist/actor/page.d.ts +29 -0
- package/dist/actor/page.js +88 -0
- package/dist/actor/utils.d.ts +4 -0
- package/dist/actor/utils.js +35 -0
- package/dist/agent/cloud-events.d.ts +18 -0
- package/dist/agent/cloud-events.js +65 -2
- package/dist/agent/gif.d.ts +1 -0
- package/dist/agent/gif.js +24 -2
- package/dist/agent/judge.d.ts +17 -0
- package/dist/agent/judge.js +197 -0
- package/dist/agent/message-manager/service.d.ts +12 -4
- package/dist/agent/message-manager/service.js +205 -39
- package/dist/agent/message-manager/utils.js +0 -1
- package/dist/agent/message-manager/views.d.ts +4 -0
- package/dist/agent/message-manager/views.js +11 -7
- package/dist/agent/prompts.d.ts +24 -3
- package/dist/agent/prompts.js +274 -59
- package/dist/agent/service.d.ts +99 -41
- package/dist/agent/service.js +2266 -472
- package/dist/agent/variable-detector.d.ts +12 -0
- package/dist/agent/variable-detector.js +211 -0
- package/dist/agent/views.d.ts +237 -18
- package/dist/agent/views.js +446 -33
- package/dist/browser/cloud/cloud.d.ts +20 -0
- package/dist/browser/cloud/cloud.js +129 -0
- package/dist/browser/cloud/index.d.ts +2 -0
- package/dist/browser/cloud/index.js +2 -0
- package/dist/browser/cloud/views.d.ts +41 -0
- package/dist/browser/cloud/views.js +35 -0
- package/dist/browser/events.d.ts +345 -0
- package/dist/browser/events.js +566 -0
- package/dist/browser/extensions.js +17 -17
- package/dist/browser/index.d.ts +4 -0
- package/dist/browser/index.js +4 -0
- package/dist/browser/profile.d.ts +8 -2
- package/dist/browser/profile.js +79 -12
- package/dist/browser/session-manager.d.ts +85 -0
- package/dist/browser/session-manager.js +208 -0
- package/dist/browser/session.d.ts +100 -8
- package/dist/browser/session.js +1097 -58
- package/dist/browser/types.d.ts +0 -2
- package/dist/browser/views.d.ts +39 -0
- package/dist/browser/views.js +32 -0
- package/dist/browser/watchdogs/aboutblank-watchdog.d.ts +12 -0
- package/dist/browser/watchdogs/aboutblank-watchdog.js +131 -0
- package/dist/browser/watchdogs/base.d.ts +21 -0
- package/dist/browser/watchdogs/base.js +81 -0
- package/dist/browser/watchdogs/cdp-session-watchdog.d.ts +14 -0
- package/dist/browser/watchdogs/cdp-session-watchdog.js +177 -0
- package/dist/browser/watchdogs/crash-watchdog.d.ts +38 -0
- package/dist/browser/watchdogs/crash-watchdog.js +296 -0
- package/dist/browser/watchdogs/default-action-watchdog.d.ts +49 -0
- package/dist/browser/watchdogs/default-action-watchdog.js +212 -0
- package/dist/browser/watchdogs/dom-watchdog.d.ts +8 -0
- package/dist/browser/watchdogs/dom-watchdog.js +31 -0
- package/dist/browser/watchdogs/downloads-watchdog.d.ts +77 -0
- package/dist/browser/watchdogs/downloads-watchdog.js +409 -0
- package/dist/browser/watchdogs/har-recording-watchdog.d.ts +19 -0
- package/dist/browser/watchdogs/har-recording-watchdog.js +317 -0
- package/dist/browser/watchdogs/index.d.ts +15 -0
- package/dist/browser/watchdogs/index.js +15 -0
- package/dist/browser/watchdogs/local-browser-watchdog.d.ts +10 -0
- package/dist/browser/watchdogs/local-browser-watchdog.js +32 -0
- package/dist/browser/watchdogs/permissions-watchdog.d.ts +8 -0
- package/dist/browser/watchdogs/permissions-watchdog.js +73 -0
- package/dist/browser/watchdogs/popups-watchdog.d.ts +13 -0
- package/dist/browser/watchdogs/popups-watchdog.js +77 -0
- package/dist/browser/watchdogs/recording-watchdog.d.ts +27 -0
- package/dist/browser/watchdogs/recording-watchdog.js +249 -0
- package/dist/browser/watchdogs/screenshot-watchdog.d.ts +6 -0
- package/dist/browser/watchdogs/screenshot-watchdog.js +13 -0
- package/dist/browser/watchdogs/security-watchdog.d.ts +10 -0
- package/dist/browser/watchdogs/security-watchdog.js +84 -0
- package/dist/browser/watchdogs/storage-state-watchdog.d.ts +24 -0
- package/dist/browser/watchdogs/storage-state-watchdog.js +288 -0
- package/dist/cli.d.ts +7 -2
- package/dist/cli.js +182 -25
- package/dist/code-use/formatting.d.ts +3 -0
- package/dist/code-use/formatting.js +18 -0
- package/dist/code-use/index.d.ts +6 -0
- package/dist/code-use/index.js +6 -0
- package/dist/code-use/namespace.d.ts +5 -0
- package/dist/code-use/namespace.js +81 -0
- package/dist/code-use/notebook-export.d.ts +3 -0
- package/dist/code-use/notebook-export.js +56 -0
- package/dist/code-use/service.d.ts +24 -0
- package/dist/code-use/service.js +104 -0
- package/dist/code-use/utils.d.ts +4 -0
- package/dist/code-use/utils.js +98 -0
- package/dist/code-use/views.d.ts +108 -0
- package/dist/code-use/views.js +165 -0
- package/dist/config.d.ts +13 -0
- package/dist/config.js +69 -3
- package/dist/controller/registry/service.d.ts +10 -1
- package/dist/controller/registry/service.js +266 -10
- package/dist/controller/registry/views.d.ts +4 -1
- package/dist/controller/registry/views.js +25 -2
- package/dist/controller/service.d.ts +10 -1
- package/dist/controller/service.js +1807 -268
- package/dist/controller/views.d.ts +78 -155
- package/dist/controller/views.js +61 -12
- package/dist/dom/history-tree-processor/service.d.ts +5 -0
- package/dist/dom/history-tree-processor/service.js +169 -14
- package/dist/dom/history-tree-processor/view.d.ts +7 -1
- package/dist/dom/history-tree-processor/view.js +10 -1
- package/dist/dom/markdown-extractor.d.ts +37 -0
- package/dist/dom/markdown-extractor.js +345 -0
- package/dist/dom/service.d.ts +3 -1
- package/dist/dom/service.js +76 -0
- package/dist/dom/views.d.ts +1 -0
- package/dist/dom/views.js +45 -0
- package/dist/event-bus.d.ts +107 -7
- package/dist/event-bus.js +313 -10
- package/dist/exceptions.d.ts +0 -3
- package/dist/exceptions.js +0 -7
- package/dist/filesystem/file-system.d.ts +18 -0
- package/dist/filesystem/file-system.js +503 -42
- package/dist/index.d.ts +7 -0
- package/dist/index.js +6 -0
- package/dist/integrations/gmail/actions.d.ts +3 -3
- package/dist/integrations/gmail/actions.js +4 -4
- package/dist/llm/anthropic/chat.d.ts +18 -1
- package/dist/llm/anthropic/chat.js +123 -55
- package/dist/llm/anthropic/serializer.d.ts +2 -0
- package/dist/llm/anthropic/serializer.js +81 -9
- package/dist/llm/aws/chat-anthropic.d.ts +17 -0
- package/dist/llm/aws/chat-anthropic.js +126 -26
- package/dist/llm/aws/chat-bedrock.d.ts +28 -1
- package/dist/llm/aws/chat-bedrock.js +161 -34
- package/dist/llm/aws/serializer.d.ts +13 -1
- package/dist/llm/aws/serializer.js +56 -17
- package/dist/llm/azure/chat.d.ts +53 -2
- package/dist/llm/azure/chat.js +366 -54
- package/dist/llm/base.d.ts +2 -0
- package/dist/llm/browser-use/chat.d.ts +40 -0
- package/dist/llm/browser-use/chat.js +305 -0
- package/dist/llm/browser-use/index.d.ts +1 -0
- package/dist/llm/browser-use/index.js +1 -0
- package/dist/llm/cerebras/chat.d.ts +39 -0
- package/dist/llm/cerebras/chat.js +178 -0
- package/dist/llm/cerebras/index.d.ts +2 -0
- package/dist/llm/cerebras/index.js +2 -0
- package/dist/llm/cerebras/serializer.d.ts +7 -0
- package/dist/llm/cerebras/serializer.js +82 -0
- package/dist/llm/deepseek/chat.d.ts +19 -2
- package/dist/llm/deepseek/chat.js +138 -25
- package/dist/llm/google/chat.d.ts +46 -2
- package/dist/llm/google/chat.js +267 -64
- package/dist/llm/google/serializer.d.ts +9 -1
- package/dist/llm/google/serializer.js +141 -34
- package/dist/llm/groq/chat.d.ts +21 -2
- package/dist/llm/groq/chat.js +125 -26
- package/dist/llm/groq/parser.js +3 -1
- package/dist/llm/mistral/chat.d.ts +43 -0
- package/dist/llm/mistral/chat.js +154 -0
- package/dist/llm/mistral/index.d.ts +2 -0
- package/dist/llm/mistral/index.js +2 -0
- package/dist/llm/mistral/schema.d.ts +8 -0
- package/dist/llm/mistral/schema.js +27 -0
- package/dist/llm/models.d.ts +2 -0
- package/dist/llm/models.js +317 -0
- package/dist/llm/ollama/chat.d.ts +13 -1
- package/dist/llm/ollama/chat.js +110 -19
- package/dist/llm/ollama/serializer.d.ts +1 -0
- package/dist/llm/ollama/serializer.js +34 -12
- package/dist/llm/openai/chat.d.ts +16 -0
- package/dist/llm/openai/chat.js +94 -44
- package/dist/llm/openai/like.d.ts +5 -3
- package/dist/llm/openai/like.js +7 -3
- package/dist/llm/openai/responses-serializer.d.ts +18 -0
- package/dist/llm/openai/responses-serializer.js +72 -0
- package/dist/llm/openrouter/chat.d.ts +28 -2
- package/dist/llm/openrouter/chat.js +115 -29
- package/dist/llm/schema.d.ts +11 -1
- package/dist/llm/schema.js +81 -1
- package/dist/llm/vercel/chat.d.ts +50 -0
- package/dist/llm/vercel/chat.js +276 -0
- package/dist/llm/vercel/index.d.ts +1 -0
- package/dist/llm/vercel/index.js +1 -0
- package/dist/llm/vercel/serializer.d.ts +5 -0
- package/dist/llm/vercel/serializer.js +7 -0
- package/dist/llm/views.d.ts +2 -1
- package/dist/llm/views.js +3 -1
- package/dist/logging-config.d.ts +2 -0
- package/dist/logging-config.js +82 -29
- package/dist/mcp/client.d.ts +10 -5
- package/dist/mcp/client.js +14 -9
- package/dist/mcp/controller.d.ts +42 -3
- package/dist/mcp/controller.js +56 -31
- package/dist/mcp/server.d.ts +14 -0
- package/dist/mcp/server.js +255 -52
- package/dist/observability.js +10 -4
- package/dist/sandbox/index.d.ts +2 -0
- package/dist/sandbox/index.js +2 -0
- package/dist/sandbox/sandbox.d.ts +19 -0
- package/dist/sandbox/sandbox.js +140 -0
- package/dist/sandbox/views.d.ts +67 -0
- package/dist/sandbox/views.js +121 -0
- package/dist/skill-cli/index.d.ts +3 -0
- package/dist/skill-cli/index.js +3 -0
- package/dist/skill-cli/protocol.d.ts +30 -0
- package/dist/skill-cli/protocol.js +48 -0
- package/dist/skill-cli/server.d.ts +11 -0
- package/dist/skill-cli/server.js +85 -0
- package/dist/skill-cli/sessions.d.ts +24 -0
- package/dist/skill-cli/sessions.js +47 -0
- package/dist/skills/index.d.ts +3 -0
- package/dist/skills/index.js +3 -0
- package/dist/skills/service.d.ts +27 -0
- package/dist/skills/service.js +266 -0
- package/dist/skills/utils.d.ts +6 -0
- package/dist/skills/utils.js +53 -0
- package/dist/skills/views.d.ts +40 -0
- package/dist/skills/views.js +10 -0
- package/dist/sync/auth.js +8 -3
- package/dist/sync/service.d.ts +6 -6
- package/dist/sync/service.js +54 -89
- package/dist/telemetry/views.d.ts +20 -6
- package/dist/telemetry/views.js +23 -5
- package/dist/tokens/custom-pricing.d.ts +2 -0
- package/dist/tokens/custom-pricing.js +22 -0
- package/dist/tokens/index.d.ts +2 -0
- package/dist/tokens/index.js +2 -0
- package/dist/tokens/mappings.d.ts +1 -0
- package/dist/tokens/mappings.js +3 -0
- package/dist/tokens/service.js +27 -8
- package/dist/tools/extraction/index.d.ts +2 -0
- package/dist/tools/extraction/index.js +2 -0
- package/dist/tools/extraction/schema-utils.d.ts +6 -0
- package/dist/tools/extraction/schema-utils.js +237 -0
- package/dist/tools/extraction/views.d.ts +7 -0
- package/dist/tools/index.d.ts +5 -0
- package/dist/tools/index.js +5 -0
- package/dist/tools/registry/index.d.ts +2 -0
- package/dist/tools/registry/index.js +2 -0
- package/dist/tools/registry/service.d.ts +1 -0
- package/dist/tools/registry/service.js +1 -0
- package/dist/tools/registry/views.d.ts +1 -0
- package/dist/tools/registry/views.js +1 -0
- package/dist/tools/service.d.ts +2 -0
- package/dist/tools/service.js +1 -0
- package/dist/tools/utils.d.ts +2 -0
- package/dist/tools/utils.js +57 -0
- package/dist/tools/views.d.ts +1 -0
- package/dist/tools/views.js +1 -0
- package/dist/utils.d.ts +10 -1
- package/dist/utils.js +70 -3
- package/package.json +87 -26
- package/dist/dom/playground/process-dom.js +0 -5
- package/dist/dom/playground/test-accessibility.d.ts +0 -44
- package/dist/dom/playground/test-accessibility.js +0 -111
- /package/dist/{dom/playground/process-dom.d.ts → tools/extraction/views.js} +0 -0
|
@@ -27,7 +27,10 @@ export class DOMHistoryElement {
|
|
|
27
27
|
page_coordinates;
|
|
28
28
|
viewport_coordinates;
|
|
29
29
|
viewport_info;
|
|
30
|
-
|
|
30
|
+
element_hash;
|
|
31
|
+
stable_hash;
|
|
32
|
+
ax_name;
|
|
33
|
+
constructor(tag_name, xpath, highlight_index, entire_parent_branch_path, attributes, shadow_root = false, css_selector = null, page_coordinates = null, viewport_coordinates = null, viewport_info = null, element_hash = null, stable_hash = null, ax_name = null) {
|
|
31
34
|
this.tag_name = tag_name;
|
|
32
35
|
this.xpath = xpath;
|
|
33
36
|
this.highlight_index = highlight_index;
|
|
@@ -38,6 +41,9 @@ export class DOMHistoryElement {
|
|
|
38
41
|
this.page_coordinates = page_coordinates;
|
|
39
42
|
this.viewport_coordinates = viewport_coordinates;
|
|
40
43
|
this.viewport_info = viewport_info;
|
|
44
|
+
this.element_hash = element_hash;
|
|
45
|
+
this.stable_hash = stable_hash;
|
|
46
|
+
this.ax_name = ax_name;
|
|
41
47
|
}
|
|
42
48
|
to_dict() {
|
|
43
49
|
return {
|
|
@@ -51,6 +57,9 @@ export class DOMHistoryElement {
|
|
|
51
57
|
page_coordinates: this.page_coordinates,
|
|
52
58
|
viewport_coordinates: this.viewport_coordinates,
|
|
53
59
|
viewport_info: this.viewport_info,
|
|
60
|
+
element_hash: this.element_hash,
|
|
61
|
+
stable_hash: this.stable_hash,
|
|
62
|
+
ax_name: this.ax_name,
|
|
54
63
|
};
|
|
55
64
|
}
|
|
56
65
|
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
export interface MarkdownContentStats {
|
|
2
|
+
method: string;
|
|
3
|
+
original_html_chars: number;
|
|
4
|
+
initial_markdown_chars: number;
|
|
5
|
+
filtered_chars_removed: number;
|
|
6
|
+
final_filtered_chars: number;
|
|
7
|
+
url?: string;
|
|
8
|
+
started_from_char?: number;
|
|
9
|
+
truncated_at_char?: number;
|
|
10
|
+
next_start_char?: number;
|
|
11
|
+
chunk_index?: number;
|
|
12
|
+
total_chunks?: number;
|
|
13
|
+
}
|
|
14
|
+
export interface MarkdownChunk {
|
|
15
|
+
content: string;
|
|
16
|
+
chunk_index: number;
|
|
17
|
+
total_chunks: number;
|
|
18
|
+
char_offset_start: number;
|
|
19
|
+
char_offset_end: number;
|
|
20
|
+
overlap_prefix: string;
|
|
21
|
+
has_more: boolean;
|
|
22
|
+
}
|
|
23
|
+
interface ExtractCleanMarkdownOptions {
|
|
24
|
+
extract_links?: boolean;
|
|
25
|
+
method?: string;
|
|
26
|
+
url?: string;
|
|
27
|
+
}
|
|
28
|
+
export declare const preprocessMarkdownContent: (input: string, maxNewlines?: number) => {
|
|
29
|
+
content: string;
|
|
30
|
+
chars_filtered: number;
|
|
31
|
+
};
|
|
32
|
+
export declare const extractCleanMarkdownFromHtml: (html: string, options?: ExtractCleanMarkdownOptions) => {
|
|
33
|
+
content: string;
|
|
34
|
+
stats: MarkdownContentStats;
|
|
35
|
+
};
|
|
36
|
+
export declare const chunkMarkdownByStructure: (content: string, maxChunkChars?: number, overlapLines?: number, startFromChar?: number) => MarkdownChunk[];
|
|
37
|
+
export {};
|
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
import TurndownService from 'turndown';
|
|
2
|
+
var BlockType;
|
|
3
|
+
(function (BlockType) {
|
|
4
|
+
BlockType["Header"] = "header";
|
|
5
|
+
BlockType["CodeFence"] = "code_fence";
|
|
6
|
+
BlockType["Table"] = "table";
|
|
7
|
+
BlockType["ListItem"] = "list_item";
|
|
8
|
+
BlockType["Paragraph"] = "paragraph";
|
|
9
|
+
BlockType["Blank"] = "blank";
|
|
10
|
+
})(BlockType || (BlockType = {}));
|
|
11
|
+
const TABLE_ROW_RE = /^\s*\|.*\|\s*$/;
|
|
12
|
+
const LIST_ITEM_RE = /^(\s*)([-*+]|\d+[.)]) /;
|
|
13
|
+
const LIST_CONTINUATION_RE = /^(\s{2,}|\t)/;
|
|
14
|
+
const getBlockSize = (block) => block.char_end - block.char_start;
|
|
15
|
+
const blockText = (block) => block.lines.join('\n');
|
|
16
|
+
const getTableHeader = (block) => {
|
|
17
|
+
if (block.block_type !== BlockType.Table || block.lines.length < 2) {
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
const separator = block.lines[1] ?? '';
|
|
21
|
+
if (separator.includes('---') || separator.includes('- -')) {
|
|
22
|
+
return `${block.lines[0]}\n${separator}`;
|
|
23
|
+
}
|
|
24
|
+
return null;
|
|
25
|
+
};
|
|
26
|
+
export const preprocessMarkdownContent = (input, maxNewlines = 3) => {
|
|
27
|
+
const originalLength = input.length;
|
|
28
|
+
let content = input;
|
|
29
|
+
content = content.replace(/`\{["\w][\s\S]*?\}`/g, '');
|
|
30
|
+
content = content.replace(/\{"\$type":[^}]{100,}\}/g, '');
|
|
31
|
+
content = content.replace(/\{"[^"]{5,}":\{[^}]{100,}\}/g, '');
|
|
32
|
+
content = content.replace(/\n{4,}/g, '\n'.repeat(maxNewlines));
|
|
33
|
+
const filteredLines = [];
|
|
34
|
+
for (const line of content.split('\n')) {
|
|
35
|
+
const stripped = line.trim();
|
|
36
|
+
if (!stripped) {
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
if ((stripped.startsWith('{') || stripped.startsWith('[')) &&
|
|
40
|
+
stripped.length > 100) {
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
filteredLines.push(line);
|
|
44
|
+
}
|
|
45
|
+
content = filteredLines.join('\n').trim();
|
|
46
|
+
return {
|
|
47
|
+
content,
|
|
48
|
+
chars_filtered: originalLength - content.length,
|
|
49
|
+
};
|
|
50
|
+
};
|
|
51
|
+
export const extractCleanMarkdownFromHtml = (html, options = {}) => {
|
|
52
|
+
const method = options.method ?? 'html';
|
|
53
|
+
const extractLinks = options.extract_links ?? false;
|
|
54
|
+
let pageHtml = html;
|
|
55
|
+
if (!extractLinks) {
|
|
56
|
+
pageHtml = pageHtml
|
|
57
|
+
.replace(/<a\b[^>]*>/gi, '')
|
|
58
|
+
.replace(/<\/a>/gi, '')
|
|
59
|
+
.replace(/<img\b[^>]*>/gi, '');
|
|
60
|
+
}
|
|
61
|
+
const originalHtmlLength = pageHtml.length;
|
|
62
|
+
const turndown = new TurndownService({
|
|
63
|
+
headingStyle: 'atx',
|
|
64
|
+
codeBlockStyle: 'fenced',
|
|
65
|
+
bulletListMarker: '-',
|
|
66
|
+
});
|
|
67
|
+
turndown.remove(['script', 'style']);
|
|
68
|
+
let content = turndown.turndown(pageHtml);
|
|
69
|
+
const initialMarkdownLength = content.length;
|
|
70
|
+
content = content.replace(/%[0-9A-Fa-f]{2}/g, '');
|
|
71
|
+
const preprocessed = preprocessMarkdownContent(content);
|
|
72
|
+
content = preprocessed.content;
|
|
73
|
+
const stats = {
|
|
74
|
+
method,
|
|
75
|
+
original_html_chars: originalHtmlLength,
|
|
76
|
+
initial_markdown_chars: initialMarkdownLength,
|
|
77
|
+
filtered_chars_removed: preprocessed.chars_filtered,
|
|
78
|
+
final_filtered_chars: content.length,
|
|
79
|
+
};
|
|
80
|
+
if (options.url) {
|
|
81
|
+
stats.url = options.url;
|
|
82
|
+
}
|
|
83
|
+
return { content, stats };
|
|
84
|
+
};
|
|
85
|
+
const parseAtomicBlocks = (content) => {
|
|
86
|
+
const lines = content.split('\n');
|
|
87
|
+
const blocks = [];
|
|
88
|
+
let i = 0;
|
|
89
|
+
let offset = 0;
|
|
90
|
+
while (i < lines.length) {
|
|
91
|
+
const line = lines[i] ?? '';
|
|
92
|
+
const lineLength = line.length + 1;
|
|
93
|
+
if (!line.trim()) {
|
|
94
|
+
blocks.push({
|
|
95
|
+
block_type: BlockType.Blank,
|
|
96
|
+
lines: [line],
|
|
97
|
+
char_start: offset,
|
|
98
|
+
char_end: offset + lineLength,
|
|
99
|
+
});
|
|
100
|
+
offset += lineLength;
|
|
101
|
+
i += 1;
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
if (line.trim().startsWith('```')) {
|
|
105
|
+
const fenceLines = [line];
|
|
106
|
+
let fenceEnd = offset + lineLength;
|
|
107
|
+
i += 1;
|
|
108
|
+
while (i < lines.length) {
|
|
109
|
+
const fenceLine = lines[i] ?? '';
|
|
110
|
+
const fenceLineLength = fenceLine.length + 1;
|
|
111
|
+
fenceLines.push(fenceLine);
|
|
112
|
+
fenceEnd += fenceLineLength;
|
|
113
|
+
i += 1;
|
|
114
|
+
if (fenceLine.trim().startsWith('```') && fenceLines.length > 1) {
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
blocks.push({
|
|
119
|
+
block_type: BlockType.CodeFence,
|
|
120
|
+
lines: fenceLines,
|
|
121
|
+
char_start: offset,
|
|
122
|
+
char_end: fenceEnd,
|
|
123
|
+
});
|
|
124
|
+
offset = fenceEnd;
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
if (line.trimStart().startsWith('#')) {
|
|
128
|
+
blocks.push({
|
|
129
|
+
block_type: BlockType.Header,
|
|
130
|
+
lines: [line],
|
|
131
|
+
char_start: offset,
|
|
132
|
+
char_end: offset + lineLength,
|
|
133
|
+
});
|
|
134
|
+
offset += lineLength;
|
|
135
|
+
i += 1;
|
|
136
|
+
continue;
|
|
137
|
+
}
|
|
138
|
+
if (TABLE_ROW_RE.test(line)) {
|
|
139
|
+
const headerLines = [line];
|
|
140
|
+
let headerEnd = offset + lineLength;
|
|
141
|
+
i += 1;
|
|
142
|
+
if (i < lines.length &&
|
|
143
|
+
TABLE_ROW_RE.test(lines[i] ?? '') &&
|
|
144
|
+
(lines[i] ?? '').includes('---')) {
|
|
145
|
+
const separator = lines[i] ?? '';
|
|
146
|
+
const separatorLength = separator.length + 1;
|
|
147
|
+
headerLines.push(separator);
|
|
148
|
+
headerEnd += separatorLength;
|
|
149
|
+
i += 1;
|
|
150
|
+
}
|
|
151
|
+
blocks.push({
|
|
152
|
+
block_type: BlockType.Table,
|
|
153
|
+
lines: headerLines,
|
|
154
|
+
char_start: offset,
|
|
155
|
+
char_end: headerEnd,
|
|
156
|
+
});
|
|
157
|
+
offset = headerEnd;
|
|
158
|
+
while (i < lines.length && TABLE_ROW_RE.test(lines[i] ?? '')) {
|
|
159
|
+
const row = lines[i] ?? '';
|
|
160
|
+
const rowLength = row.length + 1;
|
|
161
|
+
blocks.push({
|
|
162
|
+
block_type: BlockType.Table,
|
|
163
|
+
lines: [row],
|
|
164
|
+
char_start: offset,
|
|
165
|
+
char_end: offset + rowLength,
|
|
166
|
+
});
|
|
167
|
+
offset += rowLength;
|
|
168
|
+
i += 1;
|
|
169
|
+
}
|
|
170
|
+
continue;
|
|
171
|
+
}
|
|
172
|
+
if (LIST_ITEM_RE.test(line)) {
|
|
173
|
+
const listLines = [line];
|
|
174
|
+
let listEnd = offset + lineLength;
|
|
175
|
+
i += 1;
|
|
176
|
+
while (i < lines.length) {
|
|
177
|
+
const nextLine = lines[i] ?? '';
|
|
178
|
+
const nextLineLength = nextLine.length + 1;
|
|
179
|
+
if (LIST_ITEM_RE.test(nextLine)) {
|
|
180
|
+
listLines.push(nextLine);
|
|
181
|
+
listEnd += nextLineLength;
|
|
182
|
+
i += 1;
|
|
183
|
+
continue;
|
|
184
|
+
}
|
|
185
|
+
if (nextLine.trim() && LIST_CONTINUATION_RE.test(nextLine)) {
|
|
186
|
+
listLines.push(nextLine);
|
|
187
|
+
listEnd += nextLineLength;
|
|
188
|
+
i += 1;
|
|
189
|
+
continue;
|
|
190
|
+
}
|
|
191
|
+
break;
|
|
192
|
+
}
|
|
193
|
+
blocks.push({
|
|
194
|
+
block_type: BlockType.ListItem,
|
|
195
|
+
lines: listLines,
|
|
196
|
+
char_start: offset,
|
|
197
|
+
char_end: listEnd,
|
|
198
|
+
});
|
|
199
|
+
offset = listEnd;
|
|
200
|
+
continue;
|
|
201
|
+
}
|
|
202
|
+
const paragraphLines = [line];
|
|
203
|
+
let paragraphEnd = offset + lineLength;
|
|
204
|
+
i += 1;
|
|
205
|
+
while (i < lines.length && (lines[i] ?? '').trim()) {
|
|
206
|
+
const nextLine = lines[i] ?? '';
|
|
207
|
+
if (nextLine.trimStart().startsWith('#') ||
|
|
208
|
+
nextLine.trim().startsWith('```') ||
|
|
209
|
+
TABLE_ROW_RE.test(nextLine) ||
|
|
210
|
+
LIST_ITEM_RE.test(nextLine)) {
|
|
211
|
+
break;
|
|
212
|
+
}
|
|
213
|
+
const nextLineLength = nextLine.length + 1;
|
|
214
|
+
paragraphLines.push(nextLine);
|
|
215
|
+
paragraphEnd += nextLineLength;
|
|
216
|
+
i += 1;
|
|
217
|
+
}
|
|
218
|
+
blocks.push({
|
|
219
|
+
block_type: BlockType.Paragraph,
|
|
220
|
+
lines: paragraphLines,
|
|
221
|
+
char_start: offset,
|
|
222
|
+
char_end: paragraphEnd,
|
|
223
|
+
});
|
|
224
|
+
offset = paragraphEnd;
|
|
225
|
+
}
|
|
226
|
+
if (blocks.length > 0 && content && !content.endsWith('\n')) {
|
|
227
|
+
const last = blocks[blocks.length - 1];
|
|
228
|
+
blocks[blocks.length - 1] = {
|
|
229
|
+
...last,
|
|
230
|
+
char_end: content.length,
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
return blocks;
|
|
234
|
+
};
|
|
235
|
+
export const chunkMarkdownByStructure = (content, maxChunkChars = 100_000, overlapLines = 5, startFromChar = 0) => {
|
|
236
|
+
if (!content) {
|
|
237
|
+
return [
|
|
238
|
+
{
|
|
239
|
+
content: '',
|
|
240
|
+
chunk_index: 0,
|
|
241
|
+
total_chunks: 1,
|
|
242
|
+
char_offset_start: 0,
|
|
243
|
+
char_offset_end: 0,
|
|
244
|
+
overlap_prefix: '',
|
|
245
|
+
has_more: false,
|
|
246
|
+
},
|
|
247
|
+
];
|
|
248
|
+
}
|
|
249
|
+
if (startFromChar >= content.length) {
|
|
250
|
+
return [];
|
|
251
|
+
}
|
|
252
|
+
const blocks = parseAtomicBlocks(content);
|
|
253
|
+
if (!blocks.length) {
|
|
254
|
+
return [];
|
|
255
|
+
}
|
|
256
|
+
const rawChunks = [];
|
|
257
|
+
let currentChunk = [];
|
|
258
|
+
let currentSize = 0;
|
|
259
|
+
for (const block of blocks) {
|
|
260
|
+
const blockSize = getBlockSize(block);
|
|
261
|
+
if (currentSize + blockSize > maxChunkChars && currentChunk.length > 0) {
|
|
262
|
+
let bestSplit = currentChunk.length;
|
|
263
|
+
for (let j = currentChunk.length - 1; j >= 1; j -= 1) {
|
|
264
|
+
if (currentChunk[j]?.block_type === BlockType.Header) {
|
|
265
|
+
const prefixSize = currentChunk
|
|
266
|
+
.slice(0, j)
|
|
267
|
+
.reduce((sum, part) => sum + getBlockSize(part), 0);
|
|
268
|
+
if (prefixSize >= maxChunkChars * 0.5) {
|
|
269
|
+
bestSplit = j;
|
|
270
|
+
break;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
rawChunks.push(currentChunk.slice(0, bestSplit));
|
|
275
|
+
currentChunk = currentChunk.slice(bestSplit);
|
|
276
|
+
currentSize = currentChunk.reduce((sum, part) => sum + getBlockSize(part), 0);
|
|
277
|
+
}
|
|
278
|
+
currentChunk.push(block);
|
|
279
|
+
currentSize += blockSize;
|
|
280
|
+
}
|
|
281
|
+
if (currentChunk.length > 0) {
|
|
282
|
+
rawChunks.push(currentChunk);
|
|
283
|
+
}
|
|
284
|
+
const chunks = [];
|
|
285
|
+
const totalChunks = rawChunks.length;
|
|
286
|
+
let previousTableHeader = null;
|
|
287
|
+
for (let index = 0; index < rawChunks.length; index += 1) {
|
|
288
|
+
const chunkBlocks = rawChunks[index] ?? [];
|
|
289
|
+
if (!chunkBlocks.length) {
|
|
290
|
+
continue;
|
|
291
|
+
}
|
|
292
|
+
const chunkText = chunkBlocks.map(blockText).join('\n');
|
|
293
|
+
const charStart = chunkBlocks[0]?.char_start ?? 0;
|
|
294
|
+
const charEnd = chunkBlocks[chunkBlocks.length - 1]?.char_end ?? charStart;
|
|
295
|
+
let overlapPrefix = '';
|
|
296
|
+
if (index > 0) {
|
|
297
|
+
const previousBlocks = rawChunks[index - 1] ?? [];
|
|
298
|
+
const previousText = previousBlocks.map(blockText).join('\n');
|
|
299
|
+
const previousLines = previousText.split('\n');
|
|
300
|
+
const firstBlock = chunkBlocks[0];
|
|
301
|
+
if (firstBlock?.block_type === BlockType.Table &&
|
|
302
|
+
previousTableHeader !== null) {
|
|
303
|
+
const trailingLines = overlapLines > 0 ? previousLines.slice(-overlapLines) : [];
|
|
304
|
+
const headerLines = previousTableHeader.split('\n');
|
|
305
|
+
const merged = [...headerLines];
|
|
306
|
+
for (const trailing of trailingLines) {
|
|
307
|
+
if (!merged.includes(trailing)) {
|
|
308
|
+
merged.push(trailing);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
overlapPrefix = merged.join('\n');
|
|
312
|
+
}
|
|
313
|
+
else if (overlapLines > 0) {
|
|
314
|
+
overlapPrefix = previousLines.slice(-overlapLines).join('\n');
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
for (const block of chunkBlocks) {
|
|
318
|
+
if (block.block_type !== BlockType.Table) {
|
|
319
|
+
continue;
|
|
320
|
+
}
|
|
321
|
+
const header = getTableHeader(block);
|
|
322
|
+
if (header !== null) {
|
|
323
|
+
previousTableHeader = header;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
chunks.push({
|
|
327
|
+
content: chunkText,
|
|
328
|
+
chunk_index: index,
|
|
329
|
+
total_chunks: totalChunks,
|
|
330
|
+
char_offset_start: charStart,
|
|
331
|
+
char_offset_end: charEnd,
|
|
332
|
+
overlap_prefix: overlapPrefix,
|
|
333
|
+
has_more: index < totalChunks - 1,
|
|
334
|
+
});
|
|
335
|
+
}
|
|
336
|
+
if (startFromChar > 0) {
|
|
337
|
+
for (let index = 0; index < chunks.length; index += 1) {
|
|
338
|
+
if ((chunks[index]?.char_offset_end ?? 0) > startFromChar) {
|
|
339
|
+
return chunks.slice(index);
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
return [];
|
|
343
|
+
}
|
|
344
|
+
return chunks;
|
|
345
|
+
};
|
package/dist/dom/service.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { Page } from '../browser/types.js';
|
|
2
|
-
import { DOMState } from './views.js';
|
|
2
|
+
import { DOMState, type SelectorMap } from './views.js';
|
|
3
|
+
import type { PaginationButton } from '../browser/views.js';
|
|
3
4
|
export declare class DomService {
|
|
4
5
|
private readonly page;
|
|
5
6
|
private readonly logger;
|
|
@@ -16,4 +17,5 @@ export declare class DomService {
|
|
|
16
17
|
private isAdUrl;
|
|
17
18
|
private getPageUrl;
|
|
18
19
|
private isDebugEnabled;
|
|
20
|
+
static detect_pagination_buttons(selector_map: SelectorMap): PaginationButton[];
|
|
19
21
|
}
|
package/dist/dom/service.js
CHANGED
|
@@ -205,6 +205,82 @@ export class DomService {
|
|
|
205
205
|
isDebugEnabled() {
|
|
206
206
|
return ((process.env.BROWSER_USE_LOGGING_LEVEL ?? '').toLowerCase() === 'debug');
|
|
207
207
|
}
|
|
208
|
+
static detect_pagination_buttons(selector_map) {
|
|
209
|
+
const paginationButtons = [];
|
|
210
|
+
const nextPatterns = [
|
|
211
|
+
'next',
|
|
212
|
+
'>',
|
|
213
|
+
'>>',
|
|
214
|
+
'siguiente',
|
|
215
|
+
'suivant',
|
|
216
|
+
'weiter',
|
|
217
|
+
'volgende',
|
|
218
|
+
];
|
|
219
|
+
const prevPatterns = [
|
|
220
|
+
'prev',
|
|
221
|
+
'previous',
|
|
222
|
+
'<',
|
|
223
|
+
'<<',
|
|
224
|
+
'anterior',
|
|
225
|
+
'precedent',
|
|
226
|
+
'zuruck',
|
|
227
|
+
'vorige',
|
|
228
|
+
];
|
|
229
|
+
const firstPatterns = ['first', 'primera', 'premiere', 'erste'];
|
|
230
|
+
const lastPatterns = ['last', 'ultima', 'dernier', 'letzte'];
|
|
231
|
+
const hasPattern = (text, patterns) => patterns.some((pattern) => text.includes(pattern));
|
|
232
|
+
for (const [index, node] of Object.entries(selector_map)) {
|
|
233
|
+
if (!(node instanceof DOMElementNode)) {
|
|
234
|
+
continue;
|
|
235
|
+
}
|
|
236
|
+
const text = node.get_all_text_till_next_clickable_element().trim();
|
|
237
|
+
const textLower = text.toLowerCase();
|
|
238
|
+
const ariaLabel = String(node.attributes?.['aria-label'] ?? '').toLowerCase();
|
|
239
|
+
const title = String(node.attributes?.title ?? '').toLowerCase();
|
|
240
|
+
const className = String(node.attributes?.class ?? '').toLowerCase();
|
|
241
|
+
const role = String(node.attributes?.role ?? '').toLowerCase();
|
|
242
|
+
const allText = `${textLower} ${ariaLabel} ${title} ${className}`.trim();
|
|
243
|
+
const disabledRaw = node.attributes?.disabled;
|
|
244
|
+
const ariaDisabledRaw = node.attributes?.['aria-disabled'];
|
|
245
|
+
const disabledAttr = typeof disabledRaw === 'string' ? disabledRaw.toLowerCase() : '';
|
|
246
|
+
const ariaDisabled = typeof ariaDisabledRaw === 'string'
|
|
247
|
+
? ariaDisabledRaw.toLowerCase()
|
|
248
|
+
: '';
|
|
249
|
+
const isDisabled = (typeof disabledRaw === 'string' &&
|
|
250
|
+
disabledAttr !== '' &&
|
|
251
|
+
disabledAttr !== 'false') ||
|
|
252
|
+
ariaDisabled === 'true' ||
|
|
253
|
+
className.includes('disabled');
|
|
254
|
+
let buttonType = null;
|
|
255
|
+
if (hasPattern(allText, nextPatterns)) {
|
|
256
|
+
buttonType = 'next';
|
|
257
|
+
}
|
|
258
|
+
else if (hasPattern(allText, prevPatterns)) {
|
|
259
|
+
buttonType = 'prev';
|
|
260
|
+
}
|
|
261
|
+
else if (hasPattern(allText, firstPatterns)) {
|
|
262
|
+
buttonType = 'first';
|
|
263
|
+
}
|
|
264
|
+
else if (hasPattern(allText, lastPatterns)) {
|
|
265
|
+
buttonType = 'last';
|
|
266
|
+
}
|
|
267
|
+
else if (/^\d{1,2}$/.test(textLower) &&
|
|
268
|
+
(role === 'button' || role === 'link' || role === '')) {
|
|
269
|
+
buttonType = 'page_number';
|
|
270
|
+
}
|
|
271
|
+
if (!buttonType) {
|
|
272
|
+
continue;
|
|
273
|
+
}
|
|
274
|
+
paginationButtons.push({
|
|
275
|
+
button_type: buttonType,
|
|
276
|
+
backend_node_id: Number(index),
|
|
277
|
+
text: text || ariaLabel || title || node.tag_name,
|
|
278
|
+
selector: node.xpath,
|
|
279
|
+
is_disabled: isDisabled,
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
return paginationButtons;
|
|
283
|
+
}
|
|
208
284
|
}
|
|
209
285
|
__decorate([
|
|
210
286
|
observe_debug({
|
package/dist/dom/views.d.ts
CHANGED
package/dist/dom/views.js
CHANGED
|
@@ -44,6 +44,7 @@ export const DEFAULT_INCLUDE_ATTRIBUTES = [
|
|
|
44
44
|
'title',
|
|
45
45
|
'type',
|
|
46
46
|
'checked',
|
|
47
|
+
'id',
|
|
47
48
|
'name',
|
|
48
49
|
'role',
|
|
49
50
|
'value',
|
|
@@ -54,6 +55,47 @@ export const DEFAULT_INCLUDE_ATTRIBUTES = [
|
|
|
54
55
|
'aria-expanded',
|
|
55
56
|
'data-state',
|
|
56
57
|
'aria-checked',
|
|
58
|
+
'aria-valuemin',
|
|
59
|
+
'aria-valuemax',
|
|
60
|
+
'aria-valuenow',
|
|
61
|
+
'aria-placeholder',
|
|
62
|
+
'pattern',
|
|
63
|
+
'min',
|
|
64
|
+
'max',
|
|
65
|
+
'minlength',
|
|
66
|
+
'maxlength',
|
|
67
|
+
'step',
|
|
68
|
+
'accept',
|
|
69
|
+
'multiple',
|
|
70
|
+
'inputmode',
|
|
71
|
+
'autocomplete',
|
|
72
|
+
'aria-autocomplete',
|
|
73
|
+
'list',
|
|
74
|
+
'data-mask',
|
|
75
|
+
'data-inputmask',
|
|
76
|
+
'data-datepicker',
|
|
77
|
+
'format',
|
|
78
|
+
'expected_format',
|
|
79
|
+
'contenteditable',
|
|
80
|
+
'pseudo',
|
|
81
|
+
'checked',
|
|
82
|
+
'selected',
|
|
83
|
+
'expanded',
|
|
84
|
+
'pressed',
|
|
85
|
+
'disabled',
|
|
86
|
+
'invalid',
|
|
87
|
+
'valuemin',
|
|
88
|
+
'valuemax',
|
|
89
|
+
'valuenow',
|
|
90
|
+
'keyshortcuts',
|
|
91
|
+
'haspopup',
|
|
92
|
+
'multiselectable',
|
|
93
|
+
'required',
|
|
94
|
+
'valuetext',
|
|
95
|
+
'level',
|
|
96
|
+
'busy',
|
|
97
|
+
'live',
|
|
98
|
+
'ax_name',
|
|
57
99
|
];
|
|
58
100
|
export class DOMElementNode extends DOMBaseNode {
|
|
59
101
|
tag_name;
|
|
@@ -244,4 +286,7 @@ export class DOMState {
|
|
|
244
286
|
this.element_tree = element_tree;
|
|
245
287
|
this.selector_map = selector_map;
|
|
246
288
|
}
|
|
289
|
+
llm_representation(include_attributes) {
|
|
290
|
+
return this.element_tree.clickable_elements_to_string(include_attributes);
|
|
291
|
+
}
|
|
247
292
|
}
|