webmarker-js 0.0.6 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- package/README.md +122 -43
- package/dist/index.d.ts +6 -0
- package/dist/main.js +5 -4
- package/dist/module.js +5 -4
- package/package.json +1 -1
- package/src/index.ts +11 -4
- package/test-results/.last-run.json +4 -2
package/README.md
CHANGED
@@ -11,60 +11,146 @@ Mark web pages for use with vision-language models.
|
|
11
11
|
|
12
12
|
## Overview
|
13
13
|
|
14
|
-
|
14
|
+
**WebMarker** adds visual markings with labels to elements on a web page. This can be used for [Set-of-Mark (SoM)](https://github.com/microsoft/SoM) prompting, which improves visual grounding abilities of vision-language models such as GPT-4o, Claude 3.5, and Google Gemini 1.5.
|
15
15
|
|
16
|
-
|
16
|
+
![Screenshot of marked Google homepage](https://github.com/user-attachments/assets/722e1034-06d4-4ccd-a7d6-f03749435681)
|
17
17
|
|
18
|
-
## Usage
|
19
18
|
|
20
|
-
|
19
|
+
## How it works
|
20
|
+
|
21
|
+
**1. Call the `mark()` function**
|
22
|
+
|
23
|
+
This marks the interactive elements on the page, and returns an object containing the marked elements, where each key is a mark label string, and each value is an object with the following properties:
|
24
|
+
|
25
|
+
- `element`: The interactive element that was marked.
|
26
|
+
- `markElement`: The label element that was added to the page.
|
27
|
+
- `maskElement`: The bounding box element that was added to the page.
|
28
|
+
|
29
|
+
You can use this information to build your prompt for the vision-language model.
|
30
|
+
|
31
|
+
**2. Send a screenshot of the marked page to a vision-language model, along with your prompt**
|
32
|
+
|
33
|
+
Example prompt:
|
21
34
|
|
22
35
|
```javascript
|
23
|
-
|
36
|
+
let markedElements = await mark();
|
37
|
+
|
38
|
+
let prompt = `The following is a screenshot of a web page.
|
24
39
|
|
25
|
-
|
26
|
-
let elements = await mark();
|
40
|
+
Interactive elements have been marked with red bounding boxes and labels.
|
27
41
|
|
28
|
-
|
29
|
-
console.log(elements["0"].element);
|
42
|
+
When referring to elements, use the labels to identify them.
|
30
43
|
|
31
|
-
|
32
|
-
|
44
|
+
Return an action and element to perform the action on.
|
45
|
+
|
46
|
+
Available actions: click, hover
|
47
|
+
|
48
|
+
Available elements:
|
49
|
+
${Object.keys(markedElements)
|
50
|
+
.map((label) => `- ${label}`)
|
51
|
+
.join("\n")}
|
52
|
+
|
53
|
+
Example response: click 0
|
54
|
+
`;
|
55
|
+
```
|
56
|
+
|
57
|
+
**3. Programmatically interact with the marked elements.**
|
58
|
+
|
59
|
+
In a web browser (i.e. via Playwright), interact with elements as needed.
|
60
|
+
|
61
|
+
For prompting or agent ideas, see the [WebVoyager](https://arxiv.org/abs/2401.13919) paper.
|
62
|
+
|
63
|
+
## Playwright example
|
64
|
+
|
65
|
+
```javascript
|
66
|
+
// Inject the WebMarker library into the page
|
67
|
+
await page.addScriptTag({
|
68
|
+
url: "https://cdn.jsdelivr.net/npm/webmarker-js/dist/main.js",
|
69
|
+
});
|
70
|
+
|
71
|
+
// Mark the page and get the marked elements
|
72
|
+
let markedElements = await page.evaluate(async () => await WebMarker.mark());
|
73
|
+
|
74
|
+
// (Optional) Check if page is marked
|
75
|
+
let isMarked = await page.evaluate(async () => await WebMarker.isMarked());
|
76
|
+
|
77
|
+
// (Optional) Unmark the page
|
78
|
+
await page.evaluate(async () => await WebMarker.unmark());
|
33
79
|
```
|
34
80
|
|
35
81
|
## Options
|
36
82
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
83
|
+
### selector
|
84
|
+
|
85
|
+
A custom CSS selector to specify which elements to mark.
|
86
|
+
|
87
|
+
- Type: `string`
|
88
|
+
- Default: `"button, input, a, select, textarea"`
|
89
|
+
|
90
|
+
### markAttribute
|
91
|
+
|
92
|
+
A custom attribute to add to the marked elements. This attribute contains the label of the mark.
|
93
|
+
|
94
|
+
- Type: `string`
|
95
|
+
- Default: `"data-mark-id"`
|
96
|
+
|
97
|
+
### markStyle
|
98
|
+
|
99
|
+
A CSS style to apply to the label element. You can also specify a function that returns a CSS style object.
|
100
|
+
|
101
|
+
- Type: `Readonly<Partial<CSSStyleDeclaration>> or (element: Element) => Readonly<Partial<CSSStyleDeclaration>>`
|
102
|
+
- Default: `{backgroundColor: "red", color: "white", padding: "2px 4px", fontSize: "12px", fontWeight: "bold"}`
|
103
|
+
|
104
|
+
### markPlacement
|
105
|
+
|
106
|
+
The placement of the mark relative to the element.
|
107
|
+
|
108
|
+
- Type: `'top' | 'top-start' | 'top-end' | 'right' | 'right-start' | 'right-end' | 'bottom' | 'bottom-start' | 'bottom-end' | 'left' | 'left-start' | 'left-end'`
|
109
|
+
- Default: `'top-start'`
|
110
|
+
|
111
|
+
### maskStyle
|
112
|
+
|
113
|
+
A CSS style to apply to the bounding box element. You can also specify a function that returns a CSS style object. Bounding boxes are only shown if showMasks is true.
|
114
|
+
|
115
|
+
- Type: `Readonly<Partial<CSSStyleDeclaration>> or (element: Element) => Readonly<Partial<CSSStyleDeclaration>>`
|
116
|
+
- Default: `{outline: "2px dashed red", backgroundColor: "transparent"}`
|
117
|
+
|
118
|
+
### showMasks
|
119
|
+
|
120
|
+
Whether or not to show bounding boxes around the elements.
|
121
|
+
|
122
|
+
- Type: `boolean`
|
123
|
+
- Default: `true`
|
124
|
+
|
125
|
+
### labelGenerator
|
126
|
+
|
127
|
+
Provide a function for generating labels. By default, labels are generated as integers starting from 0.
|
128
|
+
|
129
|
+
- Type: `(element: Element, index: number) => string`
|
130
|
+
- Default: `(_, index) => index.toString()`
|
131
|
+
|
132
|
+
### containerElement
|
133
|
+
|
134
|
+
Provide a container element to query the elements to be marked. By default, the container element is document.body.
|
135
|
+
|
136
|
+
- Type: `Element`
|
137
|
+
- Default: `document.body`
|
138
|
+
|
139
|
+
### viewPortOnly
|
140
|
+
|
141
|
+
Only mark elements that are visible in the current viewport.
|
142
|
+
|
143
|
+
- Type: `boolean`
|
144
|
+
- Default: `false`
|
61
145
|
|
62
146
|
### Advanced example
|
63
147
|
|
64
148
|
```typescript
|
65
|
-
|
149
|
+
const markedElements = await mark({
|
66
150
|
// Only mark buttons and inputs
|
67
151
|
selector: "button, input",
|
152
|
+
// Use test id attribute for marker labels
|
153
|
+
markAttribute: "data-test-id",
|
68
154
|
// Use a blue mark with white text
|
69
155
|
markStyle: { color: "white", backgroundColor: "blue", padding: 5 },
|
70
156
|
// Use a blue dashed outline mask with a transparent and slighly blue background
|
@@ -82,11 +168,4 @@ let elements = mark({
|
|
82
168
|
// Only mark elements that are visible in the current viewport
|
83
169
|
viewPortOnly: true,
|
84
170
|
});
|
85
|
-
|
86
|
-
// Cleanup
|
87
|
-
unmark();
|
88
171
|
```
|
89
|
-
|
90
|
-
## Use with Playwright
|
91
|
-
|
92
|
-
Coming soon
|
package/dist/index.d.ts
CHANGED
@@ -4,6 +4,12 @@ interface MarkOptions {
|
|
4
4
|
* A CSS selector to specify the elements to be marked.
|
5
5
|
*/
|
6
6
|
selector?: string;
|
7
|
+
/**
|
8
|
+
* Name for the attribute added to the marked elements. This attribute is used to store the label.
|
9
|
+
*
|
10
|
+
* @default 'data-mark-id'
|
11
|
+
*/
|
12
|
+
markAttribute?: string;
|
7
13
|
/**
|
8
14
|
* A CSS style to apply to the label element.
|
9
15
|
* You can also specify a function that returns a CSS style object.
|
package/dist/main.js
CHANGED
@@ -932,6 +932,7 @@ var WebMarker = (() => {
|
|
932
932
|
return __async(this, arguments, function* (options = {}) {
|
933
933
|
const {
|
934
934
|
selector = "button, input, a, select, textarea",
|
935
|
+
markAttribute = "data-mark-id",
|
935
936
|
markStyle = {
|
936
937
|
backgroundColor: "red",
|
937
938
|
color: "white",
|
@@ -961,7 +962,7 @@ var WebMarker = (() => {
|
|
961
962
|
const markElement = createMark(element, markStyle, label, markPlacement);
|
962
963
|
const maskElement = showMasks ? createMask(element, maskStyle, label) : void 0;
|
963
964
|
markedElements[label] = { element, markElement, maskElement };
|
964
|
-
element.setAttribute(
|
965
|
+
element.setAttribute(markAttribute, label);
|
965
966
|
}))
|
966
967
|
);
|
967
968
|
document.documentElement.dataset.webmarkered = "true";
|
@@ -988,8 +989,8 @@ var WebMarker = (() => {
|
|
988
989
|
}
|
989
990
|
function createMask(element, style, label) {
|
990
991
|
const maskElement = document.createElement("div");
|
991
|
-
maskElement.className = "
|
992
|
-
maskElement.id = `
|
992
|
+
maskElement.className = "webmarker-mask";
|
993
|
+
maskElement.id = `webmarker-mask-${label}`;
|
993
994
|
document.body.appendChild(maskElement);
|
994
995
|
positionMask(maskElement, element);
|
995
996
|
applyStyle(
|
@@ -1040,7 +1041,7 @@ var WebMarker = (() => {
|
|
1040
1041
|
Object.assign(element.style, defaultStyle, customStyle);
|
1041
1042
|
}
|
1042
1043
|
function unmark() {
|
1043
|
-
document.querySelectorAll(".webmarker, .
|
1044
|
+
document.querySelectorAll(".webmarker, .webmarker-mask").forEach((el) => el.remove());
|
1044
1045
|
document.documentElement.removeAttribute("data-webmarkered");
|
1045
1046
|
cleanupFns.forEach((fn) => fn());
|
1046
1047
|
cleanupFns = [];
|
package/dist/module.js
CHANGED
@@ -908,6 +908,7 @@ function mark() {
|
|
908
908
|
return __async(this, arguments, function* (options = {}) {
|
909
909
|
const {
|
910
910
|
selector = "button, input, a, select, textarea",
|
911
|
+
markAttribute = "data-mark-id",
|
911
912
|
markStyle = {
|
912
913
|
backgroundColor: "red",
|
913
914
|
color: "white",
|
@@ -937,7 +938,7 @@ function mark() {
|
|
937
938
|
const markElement = createMark(element, markStyle, label, markPlacement);
|
938
939
|
const maskElement = showMasks ? createMask(element, maskStyle, label) : void 0;
|
939
940
|
markedElements[label] = { element, markElement, maskElement };
|
940
|
-
element.setAttribute(
|
941
|
+
element.setAttribute(markAttribute, label);
|
941
942
|
}))
|
942
943
|
);
|
943
944
|
document.documentElement.dataset.webmarkered = "true";
|
@@ -964,8 +965,8 @@ function createMark(element, style, label, markPlacement = "top-start") {
|
|
964
965
|
}
|
965
966
|
function createMask(element, style, label) {
|
966
967
|
const maskElement = document.createElement("div");
|
967
|
-
maskElement.className = "
|
968
|
-
maskElement.id = `
|
968
|
+
maskElement.className = "webmarker-mask";
|
969
|
+
maskElement.id = `webmarker-mask-${label}`;
|
969
970
|
document.body.appendChild(maskElement);
|
970
971
|
positionMask(maskElement, element);
|
971
972
|
applyStyle(
|
@@ -1016,7 +1017,7 @@ function applyStyle(element, defaultStyle, customStyle) {
|
|
1016
1017
|
Object.assign(element.style, defaultStyle, customStyle);
|
1017
1018
|
}
|
1018
1019
|
function unmark() {
|
1019
|
-
document.querySelectorAll(".webmarker, .
|
1020
|
+
document.querySelectorAll(".webmarker, .webmarker-mask").forEach((el) => el.remove());
|
1020
1021
|
document.documentElement.removeAttribute("data-webmarkered");
|
1021
1022
|
cleanupFns.forEach((fn) => fn());
|
1022
1023
|
cleanupFns = [];
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
@@ -19,6 +19,12 @@ interface MarkOptions {
|
|
19
19
|
* A CSS selector to specify the elements to be marked.
|
20
20
|
*/
|
21
21
|
selector?: string;
|
22
|
+
/**
|
23
|
+
* Name for the attribute added to the marked elements. This attribute is used to store the label.
|
24
|
+
*
|
25
|
+
* @default 'data-mark-id'
|
26
|
+
*/
|
27
|
+
markAttribute?: string;
|
22
28
|
/**
|
23
29
|
* A CSS style to apply to the label element.
|
24
30
|
* You can also specify a function that returns a CSS style object.
|
@@ -77,6 +83,7 @@ async function mark(
|
|
77
83
|
): Promise<Record<string, MarkedElement>> {
|
78
84
|
const {
|
79
85
|
selector = "button, input, a, select, textarea",
|
86
|
+
markAttribute = "data-mark-id",
|
80
87
|
markStyle = {
|
81
88
|
backgroundColor: "red",
|
82
89
|
color: "white",
|
@@ -113,7 +120,7 @@ async function mark(
|
|
113
120
|
: undefined;
|
114
121
|
|
115
122
|
markedElements[label] = { element, markElement, maskElement };
|
116
|
-
element.setAttribute(
|
123
|
+
element.setAttribute(markAttribute, label);
|
117
124
|
})
|
118
125
|
);
|
119
126
|
|
@@ -155,8 +162,8 @@ function createMask(
|
|
155
162
|
label: string
|
156
163
|
): HTMLElement {
|
157
164
|
const maskElement = document.createElement("div");
|
158
|
-
maskElement.className = "
|
159
|
-
maskElement.id = `
|
165
|
+
maskElement.className = "webmarker-mask";
|
166
|
+
maskElement.id = `webmarker-mask-${label}`;
|
160
167
|
document.body.appendChild(maskElement);
|
161
168
|
positionMask(maskElement, element);
|
162
169
|
applyStyle(
|
@@ -215,7 +222,7 @@ function applyStyle(
|
|
215
222
|
|
216
223
|
function unmark(): void {
|
217
224
|
document
|
218
|
-
.querySelectorAll(".webmarker, .
|
225
|
+
.querySelectorAll(".webmarker, .webmarker-mask")
|
219
226
|
.forEach((el) => el.remove());
|
220
227
|
document.documentElement.removeAttribute("data-webmarkered");
|
221
228
|
cleanupFns.forEach((fn) => fn());
|