webmarker-js 0.0.6 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +122 -43
- package/dist/index.d.ts +6 -0
- package/dist/main.js +5 -4
- package/dist/module.js +5 -4
- package/package.json +1 -1
- package/src/index.ts +11 -4
- package/test-results/.last-run.json +4 -2
package/README.md
CHANGED
@@ -11,60 +11,146 @@ Mark web pages for use with vision-language models.
|
|
11
11
|
|
12
12
|
## Overview
|
13
13
|
|
14
|
-
|
14
|
+
**WebMarker** adds visual markings with labels to elements on a web page. This can be used for [Set-of-Mark (SoM)](https://github.com/microsoft/SoM) prompting, which improves visual grounding abilities of vision-language models such as GPT-4o, Claude 3.5, and Google Gemini 1.5.
|
15
15
|
|
16
|
-
|
16
|
+

|
17
17
|
|
18
|
-
## Usage
|
19
18
|
|
20
|
-
|
19
|
+
## How it works
|
20
|
+
|
21
|
+
**1. Call the `mark()` function**
|
22
|
+
|
23
|
+
This marks the interactive elements on the page, and returns an object containing the marked elements, where each key is a mark label string, and each value is an object with the following properties:
|
24
|
+
|
25
|
+
- `element`: The interactive element that was marked.
|
26
|
+
- `markElement`: The label element that was added to the page.
|
27
|
+
- `maskElement`: The bounding box element that was added to the page.
|
28
|
+
|
29
|
+
You can use this information to build your prompt for the vision-language model.
|
30
|
+
|
31
|
+
**2. Send a screenshot of the marked page to a vision-language model, along with your prompt**
|
32
|
+
|
33
|
+
Example prompt:
|
21
34
|
|
22
35
|
```javascript
|
23
|
-
|
36
|
+
let markedElements = await mark();
|
37
|
+
|
38
|
+
let prompt = `The following is a screenshot of a web page.
|
24
39
|
|
25
|
-
|
26
|
-
let elements = await mark();
|
40
|
+
Interactive elements have been marked with red bounding boxes and labels.
|
27
41
|
|
28
|
-
|
29
|
-
console.log(elements["0"].element);
|
42
|
+
When referring to elements, use the labels to identify them.
|
30
43
|
|
31
|
-
|
32
|
-
|
44
|
+
Return an action and element to perform the action on.
|
45
|
+
|
46
|
+
Available actions: click, hover
|
47
|
+
|
48
|
+
Available elements:
|
49
|
+
${Object.keys(markedElements)
|
50
|
+
.map((label) => `- ${label}`)
|
51
|
+
.join("\n")}
|
52
|
+
|
53
|
+
Example response: click 0
|
54
|
+
`;
|
55
|
+
```
|
56
|
+
|
57
|
+
**3. Programmatically interact with the marked elements.**
|
58
|
+
|
59
|
+
In a web browser (i.e. via Playwright), interact with elements as needed.
|
60
|
+
|
61
|
+
For prompting or agent ideas, see the [WebVoyager](https://arxiv.org/abs/2401.13919) paper.
|
62
|
+
|
63
|
+
## Playwright example
|
64
|
+
|
65
|
+
```javascript
|
66
|
+
// Inject the WebMarker library into the page
|
67
|
+
await page.addScriptTag({
|
68
|
+
url: "https://cdn.jsdelivr.net/npm/webmarker-js/dist/main.js",
|
69
|
+
});
|
70
|
+
|
71
|
+
// Mark the page and get the marked elements
|
72
|
+
let markedElements = await page.evaluate(async () => await WebMarker.mark());
|
73
|
+
|
74
|
+
// (Optional) Check if page is marked
|
75
|
+
let isMarked = await page.evaluate(async () => await WebMarker.isMarked());
|
76
|
+
|
77
|
+
// (Optional) Unmark the page
|
78
|
+
await page.evaluate(async () => await WebMarker.unmark());
|
33
79
|
```
|
34
80
|
|
35
81
|
## Options
|
36
82
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
83
|
+
### selector
|
84
|
+
|
85
|
+
A custom CSS selector to specify which elements to mark.
|
86
|
+
|
87
|
+
- Type: `string`
|
88
|
+
- Default: `"button, input, a, select, textarea"`
|
89
|
+
|
90
|
+
### markAttribute
|
91
|
+
|
92
|
+
A custom attribute to add to the marked elements. This attribute contains the label of the mark.
|
93
|
+
|
94
|
+
- Type: `string`
|
95
|
+
- Default: `"data-mark-id"`
|
96
|
+
|
97
|
+
### markStyle
|
98
|
+
|
99
|
+
A CSS style to apply to the label element. You can also specify a function that returns a CSS style object.
|
100
|
+
|
101
|
+
- Type: `Readonly<Partial<CSSStyleDeclaration>> or (element: Element) => Readonly<Partial<CSSStyleDeclaration>>`
|
102
|
+
- Default: `{backgroundColor: "red", color: "white", padding: "2px 4px", fontSize: "12px", fontWeight: "bold"}`
|
103
|
+
|
104
|
+
### markPlacement
|
105
|
+
|
106
|
+
The placement of the mark relative to the element.
|
107
|
+
|
108
|
+
- Type: `'top' | 'top-start' | 'top-end' | 'right' | 'right-start' | 'right-end' | 'bottom' | 'bottom-start' | 'bottom-end' | 'left' | 'left-start' | 'left-end'`
|
109
|
+
- Default: `'top-start'`
|
110
|
+
|
111
|
+
### maskStyle
|
112
|
+
|
113
|
+
A CSS style to apply to the bounding box element. You can also specify a function that returns a CSS style object. Bounding boxes are only shown if showMasks is true.
|
114
|
+
|
115
|
+
- Type: `Readonly<Partial<CSSStyleDeclaration>> or (element: Element) => Readonly<Partial<CSSStyleDeclaration>>`
|
116
|
+
- Default: `{outline: "2px dashed red", backgroundColor: "transparent"}`
|
117
|
+
|
118
|
+
### showMasks
|
119
|
+
|
120
|
+
Whether or not to show bounding boxes around the elements.
|
121
|
+
|
122
|
+
- Type: `boolean`
|
123
|
+
- Default: `true`
|
124
|
+
|
125
|
+
### labelGenerator
|
126
|
+
|
127
|
+
Provide a function for generating labels. By default, labels are generated as integers starting from 0.
|
128
|
+
|
129
|
+
- Type: `(element: Element, index: number) => string`
|
130
|
+
- Default: `(_, index) => index.toString()`
|
131
|
+
|
132
|
+
### containerElement
|
133
|
+
|
134
|
+
Provide a container element to query the elements to be marked. By default, the container element is document.body.
|
135
|
+
|
136
|
+
- Type: `Element`
|
137
|
+
- Default: `document.body`
|
138
|
+
|
139
|
+
### viewPortOnly
|
140
|
+
|
141
|
+
Only mark elements that are visible in the current viewport.
|
142
|
+
|
143
|
+
- Type: `boolean`
|
144
|
+
- Default: `false`
|
61
145
|
|
62
146
|
### Advanced example
|
63
147
|
|
64
148
|
```typescript
|
65
|
-
|
149
|
+
const markedElements = await mark({
|
66
150
|
// Only mark buttons and inputs
|
67
151
|
selector: "button, input",
|
152
|
+
// Use test id attribute for marker labels
|
153
|
+
markAttribute: "data-test-id",
|
68
154
|
// Use a blue mark with white text
|
69
155
|
markStyle: { color: "white", backgroundColor: "blue", padding: 5 },
|
70
156
|
// Use a blue dashed outline mask with a transparent and slighly blue background
|
@@ -82,11 +168,4 @@ let elements = mark({
|
|
82
168
|
// Only mark elements that are visible in the current viewport
|
83
169
|
viewPortOnly: true,
|
84
170
|
});
|
85
|
-
|
86
|
-
// Cleanup
|
87
|
-
unmark();
|
88
171
|
```
|
89
|
-
|
90
|
-
## Use with Playwright
|
91
|
-
|
92
|
-
Coming soon
|
package/dist/index.d.ts
CHANGED
@@ -4,6 +4,12 @@ interface MarkOptions {
|
|
4
4
|
* A CSS selector to specify the elements to be marked.
|
5
5
|
*/
|
6
6
|
selector?: string;
|
7
|
+
/**
|
8
|
+
* Name for the attribute added to the marked elements. This attribute is used to store the label.
|
9
|
+
*
|
10
|
+
* @default 'data-mark-id'
|
11
|
+
*/
|
12
|
+
markAttribute?: string;
|
7
13
|
/**
|
8
14
|
* A CSS style to apply to the label element.
|
9
15
|
* You can also specify a function that returns a CSS style object.
|
package/dist/main.js
CHANGED
@@ -932,6 +932,7 @@ var WebMarker = (() => {
|
|
932
932
|
return __async(this, arguments, function* (options = {}) {
|
933
933
|
const {
|
934
934
|
selector = "button, input, a, select, textarea",
|
935
|
+
markAttribute = "data-mark-id",
|
935
936
|
markStyle = {
|
936
937
|
backgroundColor: "red",
|
937
938
|
color: "white",
|
@@ -961,7 +962,7 @@ var WebMarker = (() => {
|
|
961
962
|
const markElement = createMark(element, markStyle, label, markPlacement);
|
962
963
|
const maskElement = showMasks ? createMask(element, maskStyle, label) : void 0;
|
963
964
|
markedElements[label] = { element, markElement, maskElement };
|
964
|
-
element.setAttribute(
|
965
|
+
element.setAttribute(markAttribute, label);
|
965
966
|
}))
|
966
967
|
);
|
967
968
|
document.documentElement.dataset.webmarkered = "true";
|
@@ -988,8 +989,8 @@ var WebMarker = (() => {
|
|
988
989
|
}
|
989
990
|
function createMask(element, style, label) {
|
990
991
|
const maskElement = document.createElement("div");
|
991
|
-
maskElement.className = "
|
992
|
-
maskElement.id = `
|
992
|
+
maskElement.className = "webmarker-mask";
|
993
|
+
maskElement.id = `webmarker-mask-${label}`;
|
993
994
|
document.body.appendChild(maskElement);
|
994
995
|
positionMask(maskElement, element);
|
995
996
|
applyStyle(
|
@@ -1040,7 +1041,7 @@ var WebMarker = (() => {
|
|
1040
1041
|
Object.assign(element.style, defaultStyle, customStyle);
|
1041
1042
|
}
|
1042
1043
|
function unmark() {
|
1043
|
-
document.querySelectorAll(".webmarker, .
|
1044
|
+
document.querySelectorAll(".webmarker, .webmarker-mask").forEach((el) => el.remove());
|
1044
1045
|
document.documentElement.removeAttribute("data-webmarkered");
|
1045
1046
|
cleanupFns.forEach((fn) => fn());
|
1046
1047
|
cleanupFns = [];
|
package/dist/module.js
CHANGED
@@ -908,6 +908,7 @@ function mark() {
|
|
908
908
|
return __async(this, arguments, function* (options = {}) {
|
909
909
|
const {
|
910
910
|
selector = "button, input, a, select, textarea",
|
911
|
+
markAttribute = "data-mark-id",
|
911
912
|
markStyle = {
|
912
913
|
backgroundColor: "red",
|
913
914
|
color: "white",
|
@@ -937,7 +938,7 @@ function mark() {
|
|
937
938
|
const markElement = createMark(element, markStyle, label, markPlacement);
|
938
939
|
const maskElement = showMasks ? createMask(element, maskStyle, label) : void 0;
|
939
940
|
markedElements[label] = { element, markElement, maskElement };
|
940
|
-
element.setAttribute(
|
941
|
+
element.setAttribute(markAttribute, label);
|
941
942
|
}))
|
942
943
|
);
|
943
944
|
document.documentElement.dataset.webmarkered = "true";
|
@@ -964,8 +965,8 @@ function createMark(element, style, label, markPlacement = "top-start") {
|
|
964
965
|
}
|
965
966
|
function createMask(element, style, label) {
|
966
967
|
const maskElement = document.createElement("div");
|
967
|
-
maskElement.className = "
|
968
|
-
maskElement.id = `
|
968
|
+
maskElement.className = "webmarker-mask";
|
969
|
+
maskElement.id = `webmarker-mask-${label}`;
|
969
970
|
document.body.appendChild(maskElement);
|
970
971
|
positionMask(maskElement, element);
|
971
972
|
applyStyle(
|
@@ -1016,7 +1017,7 @@ function applyStyle(element, defaultStyle, customStyle) {
|
|
1016
1017
|
Object.assign(element.style, defaultStyle, customStyle);
|
1017
1018
|
}
|
1018
1019
|
function unmark() {
|
1019
|
-
document.querySelectorAll(".webmarker, .
|
1020
|
+
document.querySelectorAll(".webmarker, .webmarker-mask").forEach((el) => el.remove());
|
1020
1021
|
document.documentElement.removeAttribute("data-webmarkered");
|
1021
1022
|
cleanupFns.forEach((fn) => fn());
|
1022
1023
|
cleanupFns = [];
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
@@ -19,6 +19,12 @@ interface MarkOptions {
|
|
19
19
|
* A CSS selector to specify the elements to be marked.
|
20
20
|
*/
|
21
21
|
selector?: string;
|
22
|
+
/**
|
23
|
+
* Name for the attribute added to the marked elements. This attribute is used to store the label.
|
24
|
+
*
|
25
|
+
* @default 'data-mark-id'
|
26
|
+
*/
|
27
|
+
markAttribute?: string;
|
22
28
|
/**
|
23
29
|
* A CSS style to apply to the label element.
|
24
30
|
* You can also specify a function that returns a CSS style object.
|
@@ -77,6 +83,7 @@ async function mark(
|
|
77
83
|
): Promise<Record<string, MarkedElement>> {
|
78
84
|
const {
|
79
85
|
selector = "button, input, a, select, textarea",
|
86
|
+
markAttribute = "data-mark-id",
|
80
87
|
markStyle = {
|
81
88
|
backgroundColor: "red",
|
82
89
|
color: "white",
|
@@ -113,7 +120,7 @@ async function mark(
|
|
113
120
|
: undefined;
|
114
121
|
|
115
122
|
markedElements[label] = { element, markElement, maskElement };
|
116
|
-
element.setAttribute(
|
123
|
+
element.setAttribute(markAttribute, label);
|
117
124
|
})
|
118
125
|
);
|
119
126
|
|
@@ -155,8 +162,8 @@ function createMask(
|
|
155
162
|
label: string
|
156
163
|
): HTMLElement {
|
157
164
|
const maskElement = document.createElement("div");
|
158
|
-
maskElement.className = "
|
159
|
-
maskElement.id = `
|
165
|
+
maskElement.className = "webmarker-mask";
|
166
|
+
maskElement.id = `webmarker-mask-${label}`;
|
160
167
|
document.body.appendChild(maskElement);
|
161
168
|
positionMask(maskElement, element);
|
162
169
|
applyStyle(
|
@@ -215,7 +222,7 @@ function applyStyle(
|
|
215
222
|
|
216
223
|
function unmark(): void {
|
217
224
|
document
|
218
|
-
.querySelectorAll(".webmarker, .
|
225
|
+
.querySelectorAll(".webmarker, .webmarker-mask")
|
219
226
|
.forEach((el) => el.remove());
|
220
227
|
document.documentElement.removeAttribute("data-webmarkered");
|
221
228
|
cleanupFns.forEach((fn) => fn());
|