playwriter 0.0.63 → 0.0.89
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/a11y-client.js +18 -8
- package/dist/aria-snapshot.d.ts +41 -3
- package/dist/aria-snapshot.d.ts.map +1 -1
- package/dist/aria-snapshot.js +134 -55
- package/dist/aria-snapshot.js.map +1 -1
- package/dist/aria-snapshot.test.js +5 -2
- package/dist/aria-snapshot.test.js.map +1 -1
- package/dist/aria-snapshot.unit.test.js +83 -41
- package/dist/aria-snapshot.unit.test.js.map +1 -1
- package/dist/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.d.ts +5 -0
- package/dist/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.d.ts.map +1 -0
- package/dist/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.js +5 -0
- package/dist/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.js.map +1 -0
- package/dist/bippy.js +1 -1
- package/dist/cdp-log.d.ts +1 -1
- package/dist/cdp-log.d.ts.map +1 -1
- package/dist/cdp-log.js +1 -1
- package/dist/cdp-log.js.map +1 -1
- package/dist/cdp-relay.d.ts.map +1 -1
- package/dist/cdp-relay.js +492 -298
- package/dist/cdp-relay.js.map +1 -1
- package/dist/cdp-session.d.ts.map +1 -1
- package/dist/cdp-session.js.map +1 -1
- package/dist/cdp-types.d.ts.map +1 -1
- package/dist/cdp-types.js +7 -7
- package/dist/cdp-types.js.map +1 -1
- package/dist/clean-html.d.ts.map +1 -1
- package/dist/clean-html.js +4 -5
- package/dist/clean-html.js.map +1 -1
- package/dist/cli.js +45 -27
- package/dist/cli.js.map +1 -1
- package/dist/create-logger.d.ts.map +1 -1
- package/dist/create-logger.js +3 -1
- package/dist/create-logger.js.map +1 -1
- package/dist/debugger-examples-types.d.ts.map +1 -1
- package/dist/debugger.d.ts.map +1 -1
- package/dist/debugger.js +1 -3
- package/dist/debugger.js.map +1 -1
- package/dist/diff-utils.d.ts.map +1 -1
- package/dist/diff-utils.js +1 -4
- package/dist/diff-utils.js.map +1 -1
- package/dist/editor-api.md +12 -2
- package/dist/editor-examples.d.ts +1 -1
- package/dist/editor-examples.d.ts.map +1 -1
- package/dist/editor-examples.js +1 -1
- package/dist/editor-examples.js.map +1 -1
- package/dist/editor.d.ts +1 -1
- package/dist/editor.d.ts.map +1 -1
- package/dist/editor.js +1 -1
- package/dist/editor.js.map +1 -1
- package/dist/executor.d.ts +26 -3
- package/dist/executor.d.ts.map +1 -1
- package/dist/executor.js +297 -64
- package/dist/executor.js.map +1 -1
- package/dist/executor.unit.test.js +38 -1
- package/dist/executor.unit.test.js.map +1 -1
- package/dist/extension-connection.test.js +139 -36
- package/dist/extension-connection.test.js.map +1 -1
- package/dist/ffmpeg.d.ts +148 -0
- package/dist/ffmpeg.d.ts.map +1 -0
- package/dist/ffmpeg.js +523 -0
- package/dist/ffmpeg.js.map +1 -0
- package/dist/ghost-browser.d.ts.map +1 -1
- package/dist/ghost-browser.js.map +1 -1
- package/dist/ghost-cursor-client.js +287 -0
- package/dist/ghost-cursor.d.ts +27 -0
- package/dist/ghost-cursor.d.ts.map +1 -0
- package/dist/ghost-cursor.js +63 -0
- package/dist/ghost-cursor.js.map +1 -0
- package/dist/htmlrewrite.d.ts.map +1 -1
- package/dist/htmlrewrite.js +17 -55
- package/dist/htmlrewrite.js.map +1 -1
- package/dist/htmlrewrite.test.js.map +1 -1
- package/dist/kill-port.d.ts.map +1 -1
- package/dist/kill-port.js +1 -3
- package/dist/kill-port.js.map +1 -1
- package/dist/locator-selector.test.d.ts +2 -0
- package/dist/locator-selector.test.d.ts.map +1 -0
- package/dist/locator-selector.test.js +96 -0
- package/dist/locator-selector.test.js.map +1 -0
- package/dist/mcp-client.js.map +1 -1
- package/dist/mcp.d.ts.map +1 -1
- package/dist/mcp.js +8 -3
- package/dist/mcp.js.map +1 -1
- package/dist/on-mouse-action.test.d.ts +2 -0
- package/dist/on-mouse-action.test.d.ts.map +1 -0
- package/dist/on-mouse-action.test.js +155 -0
- package/dist/on-mouse-action.test.js.map +1 -0
- package/dist/page-markdown.js +4 -4
- package/dist/page-markdown.js.map +1 -1
- package/dist/prompt.md +450 -377
- package/dist/protocol.d.ts +4 -0
- package/dist/protocol.d.ts.map +1 -1
- package/dist/readability.js +16 -2
- package/dist/recording-ghost-cursor.d.ts +41 -0
- package/dist/recording-ghost-cursor.d.ts.map +1 -0
- package/dist/recording-ghost-cursor.js +79 -0
- package/dist/recording-ghost-cursor.js.map +1 -0
- package/dist/recording-relay.d.ts.map +1 -1
- package/dist/recording-relay.js +8 -8
- package/dist/recording-relay.js.map +1 -1
- package/dist/relay-client.d.ts +17 -4
- package/dist/relay-client.d.ts.map +1 -1
- package/dist/relay-client.js +45 -11
- package/dist/relay-client.js.map +1 -1
- package/dist/relay-core.test.d.ts.map +1 -1
- package/dist/relay-core.test.js +515 -26
- package/dist/relay-core.test.js.map +1 -1
- package/dist/relay-navigation.test.d.ts.map +1 -1
- package/dist/relay-navigation.test.js +169 -31
- package/dist/relay-navigation.test.js.map +1 -1
- package/dist/relay-session.test.d.ts.map +1 -1
- package/dist/relay-session.test.js +113 -65
- package/dist/relay-session.test.js.map +1 -1
- package/dist/relay-state.d.ts +158 -0
- package/dist/relay-state.d.ts.map +1 -0
- package/dist/relay-state.js +306 -0
- package/dist/relay-state.js.map +1 -0
- package/dist/relay-state.test.d.ts +2 -0
- package/dist/relay-state.test.d.ts.map +1 -0
- package/dist/relay-state.test.js +472 -0
- package/dist/relay-state.test.js.map +1 -0
- package/dist/scoped-fs.d.ts.map +1 -1
- package/dist/scoped-fs.js.map +1 -1
- package/dist/screen-recording.d.ts +66 -4
- package/dist/screen-recording.d.ts.map +1 -1
- package/dist/screen-recording.js +150 -13
- package/dist/screen-recording.js.map +1 -1
- package/dist/screen-recording.test.d.ts +2 -0
- package/dist/screen-recording.test.d.ts.map +1 -0
- package/dist/screen-recording.test.js +102 -0
- package/dist/screen-recording.test.js.map +1 -0
- package/dist/selector-generator.js +1 -1
- package/dist/snapshot-tools.test.js +71 -28
- package/dist/snapshot-tools.test.js.map +1 -1
- package/dist/start-relay-server.d.ts +1 -1
- package/dist/start-relay-server.d.ts.map +1 -1
- package/dist/start-relay-server.js +1 -1
- package/dist/start-relay-server.js.map +1 -1
- package/dist/styles-api.md +8 -1
- package/dist/styles-examples.d.ts +1 -1
- package/dist/styles-examples.d.ts.map +1 -1
- package/dist/styles-examples.js +1 -1
- package/dist/styles-examples.js.map +1 -1
- package/dist/styles.d.ts.map +1 -1
- package/dist/styles.js +1 -3
- package/dist/styles.js.map +1 -1
- package/dist/test-declarations.d.ts.map +1 -1
- package/dist/test-utils.d.ts +1 -1
- package/dist/test-utils.d.ts.map +1 -1
- package/dist/test-utils.js +7 -5
- package/dist/test-utils.js.map +1 -1
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js.map +1 -1
- package/dist/wait-for-page-load.d.ts.map +1 -1
- package/dist/wait-for-page-load.js +1 -1
- package/dist/wait-for-page-load.js.map +1 -1
- package/package.json +4 -3
- package/src/a11y-client.ts +5 -4
- package/src/aria-snapshot.test.ts +5 -2
- package/src/aria-snapshot.ts +306 -117
- package/src/aria-snapshot.unit.test.ts +199 -141
- package/src/aria-snapshots/github-interactive.txt +2 -0
- package/src/aria-snapshots/github-raw.txt +5 -1
- package/src/aria-snapshots/hackernews-interactive.txt +238 -241
- package/src/aria-snapshots/hackernews-raw.txt +265 -269
- package/src/assets/aria-labels-example.png +0 -0
- package/src/assets/aria-labels-github.png +0 -0
- package/src/assets/aria-labels-hacker-news.png +0 -0
- package/src/assets/aria-labels-old-reddit.png +0 -0
- package/src/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.ts +5 -0
- package/src/assets/cursors/screen-studio/pointer-macos-tahoe.svg +18 -0
- package/src/cdp-log.ts +4 -1
- package/src/cdp-relay.ts +1059 -737
- package/src/cdp-session.ts +12 -3
- package/src/cdp-types.ts +51 -51
- package/src/clean-html.ts +4 -5
- package/src/cli.ts +82 -55
- package/src/create-logger.ts +5 -3
- package/src/debugger-examples-types.ts +4 -1
- package/src/debugger.ts +1 -5
- package/src/diff-utils.ts +2 -5
- package/src/editor-examples.ts +11 -1
- package/src/editor.ts +10 -2
- package/src/executor.ts +374 -73
- package/src/executor.unit.test.ts +48 -1
- package/src/extension-connection.test.ts +612 -488
- package/src/ffmpeg.ts +769 -0
- package/src/ghost-browser.ts +4 -6
- package/src/ghost-cursor-client.ts +369 -0
- package/src/ghost-cursor.ts +110 -0
- package/src/htmlrewrite.test.ts +6 -2
- package/src/htmlrewrite.ts +348 -386
- package/src/kill-port.ts +1 -3
- package/src/locator-selector.test.ts +115 -0
- package/src/mcp-client.ts +1 -1
- package/src/mcp.ts +21 -15
- package/src/on-mouse-action.test.ts +196 -0
- package/src/page-markdown.ts +7 -7
- package/src/protocol.ts +73 -57
- package/src/recording-ghost-cursor.ts +113 -0
- package/src/recording-relay.ts +20 -12
- package/src/relay-client.ts +85 -18
- package/src/relay-core.test.ts +1117 -578
- package/src/relay-navigation.test.ts +648 -483
- package/src/relay-session.test.ts +984 -929
- package/src/relay-state.test.ts +570 -0
- package/src/relay-state.ts +497 -0
- package/src/resource.md +21 -49
- package/src/scoped-fs.ts +9 -3
- package/src/screen-recording.test.ts +111 -0
- package/src/screen-recording.ts +256 -31
- package/src/skill.md +476 -396
- package/src/snapshot-tools.test.ts +580 -528
- package/src/snapshots/shadcn-ui-accessibility-full.md +8 -8
- package/src/snapshots/shadcn-ui-accessibility-interactive.md +8 -8
- package/src/start-relay-server.ts +14 -11
- package/src/styles-examples.ts +8 -1
- package/src/styles.ts +20 -21
- package/src/test-declarations.ts +6 -6
- package/src/test-utils.ts +104 -91
- package/src/utils.ts +2 -1
- package/src/wait-for-page-load.ts +6 -1
package/src/htmlrewrite.ts
CHANGED
|
@@ -2,427 +2,389 @@ import posthtml from 'posthtml'
|
|
|
2
2
|
import beautify from 'posthtml-beautify'
|
|
3
3
|
|
|
4
4
|
export interface FormatHtmlOptions {
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
html: string
|
|
6
|
+
keepStyles?: boolean
|
|
7
|
+
maxAttrLen?: number
|
|
8
|
+
maxContentLen?: number
|
|
9
9
|
}
|
|
10
10
|
|
|
11
11
|
export async function formatHtmlForPrompt({
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
html,
|
|
13
|
+
keepStyles = false,
|
|
14
|
+
maxAttrLen = 200,
|
|
15
|
+
maxContentLen = 500,
|
|
16
16
|
}: FormatHtmlOptions) {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
17
|
+
const tagsToRemove = ['hint', 'style', 'link', 'script', 'meta', 'noscript', 'svg', 'head']
|
|
18
|
+
|
|
19
|
+
const attributesToKeep = [
|
|
20
|
+
// Standard descriptive attributes
|
|
21
|
+
'label',
|
|
22
|
+
'title',
|
|
23
|
+
'alt',
|
|
24
|
+
'href',
|
|
25
|
+
'name',
|
|
26
|
+
'value',
|
|
27
|
+
'checked',
|
|
28
|
+
'placeholder',
|
|
29
|
+
'type',
|
|
30
|
+
'role',
|
|
31
|
+
'target',
|
|
32
|
+
// Descriptive aria attributes (text content)
|
|
33
|
+
'aria-label',
|
|
34
|
+
'aria-placeholder',
|
|
35
|
+
'aria-valuetext',
|
|
36
|
+
'aria-roledescription',
|
|
37
|
+
// Useful aria state attributes
|
|
38
|
+
'aria-hidden',
|
|
39
|
+
'aria-expanded',
|
|
40
|
+
'aria-checked',
|
|
41
|
+
'aria-selected',
|
|
42
|
+
'aria-disabled',
|
|
43
|
+
'aria-pressed',
|
|
44
|
+
'aria-required',
|
|
45
|
+
'aria-current',
|
|
46
|
+
// Test IDs (data-testid, data-test, data-cy, data-qa are covered by data-* prefix)
|
|
47
|
+
'testid',
|
|
48
|
+
'test-id',
|
|
49
|
+
'tid',
|
|
50
|
+
'qa',
|
|
51
|
+
'qa-id',
|
|
52
|
+
'e2e',
|
|
53
|
+
'e2e-id',
|
|
54
|
+
'automation-id',
|
|
55
|
+
'automationid',
|
|
56
|
+
'selenium',
|
|
57
|
+
'pw',
|
|
58
|
+
'vimium-label',
|
|
59
|
+
// Conditionally added: 'style', 'class'
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
if (keepStyles) {
|
|
63
|
+
attributesToKeep.push('style', 'class')
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const truncate = (str: string, maxLen: number): string => {
|
|
67
|
+
if (str.length <= maxLen) return str
|
|
68
|
+
const remaining = str.length - maxLen
|
|
69
|
+
return str.slice(0, maxLen) + `...${remaining} more characters`
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Create a custom plugin to remove tags and filter attributes
|
|
73
|
+
const removeTagsAndAttrsPlugin = () => {
|
|
74
|
+
return (tree) => {
|
|
75
|
+
// Remove comments at root level
|
|
76
|
+
tree = tree.filter((item) => {
|
|
77
|
+
if (typeof item === 'string') {
|
|
78
|
+
const trimmed = item.trim()
|
|
79
|
+
return !(trimmed.startsWith('<!--') && trimmed.endsWith('-->'))
|
|
80
|
+
}
|
|
81
|
+
return true
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
// Process each node recursively
|
|
85
|
+
const processNode = (node) => {
|
|
86
|
+
if (typeof node === 'string') {
|
|
87
|
+
// Truncate text content
|
|
88
|
+
const trimmed = node.trim()
|
|
89
|
+
if (trimmed.length === 0) return node
|
|
90
|
+
return truncate(node, maxContentLen)
|
|
91
|
+
}
|
|
74
92
|
|
|
75
|
-
|
|
76
|
-
if (
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
}
|
|
93
|
+
// Remove unwanted tags
|
|
94
|
+
if (node.tag && tagsToRemove.includes(node.tag.toLowerCase())) {
|
|
95
|
+
return null
|
|
96
|
+
}
|
|
80
97
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if (typeof item === 'string') {
|
|
87
|
-
const trimmed = item.trim()
|
|
88
|
-
return !(trimmed.startsWith('<!--') && trimmed.endsWith('-->'))
|
|
89
|
-
}
|
|
90
|
-
return true
|
|
91
|
-
})
|
|
92
|
-
|
|
93
|
-
// Process each node recursively
|
|
94
|
-
const processNode = (node) => {
|
|
95
|
-
if (typeof node === 'string') {
|
|
96
|
-
// Truncate text content
|
|
97
|
-
const trimmed = node.trim()
|
|
98
|
-
if (trimmed.length === 0) return node
|
|
99
|
-
return truncate(node, maxContentLen)
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
// Remove unwanted tags
|
|
103
|
-
if (node.tag && tagsToRemove.includes(node.tag.toLowerCase())) {
|
|
104
|
-
return null
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
// Filter attributes
|
|
108
|
-
if (node.attrs) {
|
|
109
|
-
const newAttrs: typeof node.attrs = {}
|
|
110
|
-
for (const [attr, value] of Object.entries(node.attrs)) {
|
|
111
|
-
const shouldKeep =
|
|
112
|
-
attr.startsWith('data-') ||
|
|
113
|
-
attributesToKeep.includes(attr)
|
|
114
|
-
|
|
115
|
-
if (shouldKeep) {
|
|
116
|
-
// Truncate attribute values
|
|
117
|
-
newAttrs[attr] = typeof value === 'string'
|
|
118
|
-
? truncate(value, maxAttrLen)
|
|
119
|
-
: value
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
node.attrs = newAttrs
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
// Process content recursively
|
|
126
|
-
if (node.content && Array.isArray(node.content)) {
|
|
127
|
-
node.content = node.content
|
|
128
|
-
.map(processNode)
|
|
129
|
-
.filter(item => {
|
|
130
|
-
if (item === null) return false
|
|
131
|
-
if (typeof item === 'string') {
|
|
132
|
-
const trimmed = item.trim()
|
|
133
|
-
return !(trimmed.startsWith('<!--') && trimmed.endsWith('-->'))
|
|
134
|
-
}
|
|
135
|
-
return true
|
|
136
|
-
})
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
return node
|
|
140
|
-
}
|
|
98
|
+
// Filter attributes
|
|
99
|
+
if (node.attrs) {
|
|
100
|
+
const newAttrs: typeof node.attrs = {}
|
|
101
|
+
for (const [attr, value] of Object.entries(node.attrs)) {
|
|
102
|
+
const shouldKeep = attr.startsWith('data-') || attributesToKeep.includes(attr)
|
|
141
103
|
|
|
142
|
-
|
|
143
|
-
|
|
104
|
+
if (shouldKeep) {
|
|
105
|
+
// Truncate attribute values
|
|
106
|
+
newAttrs[attr] = typeof value === 'string' ? truncate(value, maxAttrLen) : value
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
node.attrs = newAttrs
|
|
144
110
|
}
|
|
145
|
-
}
|
|
146
111
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
// Remove if aria-hidden="true"
|
|
156
|
-
if (node.attrs?.['aria-hidden'] === 'true') {
|
|
157
|
-
return null
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
// Process children recursively
|
|
161
|
-
if (node.content && Array.isArray(node.content)) {
|
|
162
|
-
node.content = node.content
|
|
163
|
-
.map(processNode)
|
|
164
|
-
.filter((item) => item !== null)
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
return node
|
|
112
|
+
// Process content recursively
|
|
113
|
+
if (node.content && Array.isArray(node.content)) {
|
|
114
|
+
node.content = node.content.map(processNode).filter((item) => {
|
|
115
|
+
if (item === null) return false
|
|
116
|
+
if (typeof item === 'string') {
|
|
117
|
+
const trimmed = item.trim()
|
|
118
|
+
return !(trimmed.startsWith('<!--') && trimmed.endsWith('-->'))
|
|
168
119
|
}
|
|
169
|
-
|
|
170
|
-
|
|
120
|
+
return true
|
|
121
|
+
})
|
|
171
122
|
}
|
|
172
|
-
}
|
|
173
123
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
const removeEmptyAltImagesPlugin = () => {
|
|
177
|
-
return (tree) => {
|
|
178
|
-
const processNode = (node) => {
|
|
179
|
-
if (typeof node === 'string') return node
|
|
180
|
-
if (!node.tag) return node
|
|
181
|
-
|
|
182
|
-
// Remove img with empty or missing alt
|
|
183
|
-
if (node.tag.toLowerCase() === 'img') {
|
|
184
|
-
const alt = node.attrs?.alt
|
|
185
|
-
if (alt === '' || alt === undefined) {
|
|
186
|
-
return null
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
// Process children recursively
|
|
191
|
-
if (node.content && Array.isArray(node.content)) {
|
|
192
|
-
node.content = node.content
|
|
193
|
-
.map(processNode)
|
|
194
|
-
.filter((item) => item !== null)
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
return node
|
|
198
|
-
}
|
|
124
|
+
return node
|
|
125
|
+
}
|
|
199
126
|
|
|
200
|
-
|
|
201
|
-
|
|
127
|
+
// Process all root nodes
|
|
128
|
+
return tree.map(processNode).filter((item) => item !== null)
|
|
202
129
|
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Plugin to remove aria-hidden="true" subtrees entirely
|
|
133
|
+
// These are hidden from assistive tech and usually decorative
|
|
134
|
+
const removeAriaHiddenPlugin = () => {
|
|
135
|
+
return (tree) => {
|
|
136
|
+
const processNode = (node) => {
|
|
137
|
+
if (typeof node === 'string') return node
|
|
138
|
+
if (!node.tag) return node
|
|
139
|
+
|
|
140
|
+
// Remove if aria-hidden="true"
|
|
141
|
+
if (node.attrs?.['aria-hidden'] === 'true') {
|
|
142
|
+
return null
|
|
143
|
+
}
|
|
203
144
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
const removeDecorativeSubtreesPlugin = () => {
|
|
209
|
-
const actionableTags = ['button', 'a', 'input', 'select', 'textarea']
|
|
210
|
-
const meaningfulAttrs = [
|
|
211
|
-
'aria-label',
|
|
212
|
-
'title',
|
|
213
|
-
'alt',
|
|
214
|
-
'value',
|
|
215
|
-
'placeholder',
|
|
216
|
-
'href',
|
|
217
|
-
'name',
|
|
218
|
-
]
|
|
219
|
-
|
|
220
|
-
// Form elements are always actionable, keep unconditionally
|
|
221
|
-
const formTags = ['input', 'select', 'textarea']
|
|
222
|
-
|
|
223
|
-
// Check if a subtree has any useful content
|
|
224
|
-
const hasUsefulContent = (node): boolean => {
|
|
225
|
-
if (typeof node === 'string') {
|
|
226
|
-
return node.trim().length > 0
|
|
227
|
-
}
|
|
228
|
-
if (!node.tag) return false
|
|
145
|
+
// Process children recursively
|
|
146
|
+
if (node.content && Array.isArray(node.content)) {
|
|
147
|
+
node.content = node.content.map(processNode).filter((item) => item !== null)
|
|
148
|
+
}
|
|
229
149
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
return true
|
|
233
|
-
}
|
|
150
|
+
return node
|
|
151
|
+
}
|
|
234
152
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
153
|
+
return tree.map(processNode).filter((item) => item !== null)
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Plugin to remove images with empty alt text (purely decorative)
|
|
158
|
+
// Runs before decorative subtree pruning so containers become empty
|
|
159
|
+
const removeEmptyAltImagesPlugin = () => {
|
|
160
|
+
return (tree) => {
|
|
161
|
+
const processNode = (node) => {
|
|
162
|
+
if (typeof node === 'string') return node
|
|
163
|
+
if (!node.tag) return node
|
|
164
|
+
|
|
165
|
+
// Remove img with empty or missing alt
|
|
166
|
+
if (node.tag.toLowerCase() === 'img') {
|
|
167
|
+
const alt = node.attrs?.alt
|
|
168
|
+
if (alt === '' || alt === undefined) {
|
|
169
|
+
return null
|
|
170
|
+
}
|
|
171
|
+
}
|
|
242
172
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
const value = node.attrs[attr]
|
|
248
|
-
if (typeof value === 'string' && value.trim().length > 0) {
|
|
249
|
-
return true
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
}
|
|
173
|
+
// Process children recursively
|
|
174
|
+
if (node.content && Array.isArray(node.content)) {
|
|
175
|
+
node.content = node.content.map(processNode).filter((item) => item !== null)
|
|
176
|
+
}
|
|
254
177
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
for (const child of node.content) {
|
|
258
|
-
if (hasUsefulContent(child)) {
|
|
259
|
-
return true
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
}
|
|
178
|
+
return node
|
|
179
|
+
}
|
|
263
180
|
|
|
264
|
-
|
|
181
|
+
return tree.map(processNode).filter((item) => item !== null)
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Plugin to remove decorative subtrees that have no useful content for agents
|
|
186
|
+
// A subtree is decorative if it has:
|
|
187
|
+
// - No text content (leaf text nodes)
|
|
188
|
+
// - No actionable elements with meaningful attributes
|
|
189
|
+
const removeDecorativeSubtreesPlugin = () => {
|
|
190
|
+
const actionableTags = ['button', 'a', 'input', 'select', 'textarea']
|
|
191
|
+
const meaningfulAttrs = ['aria-label', 'title', 'alt', 'value', 'placeholder', 'href', 'name']
|
|
192
|
+
|
|
193
|
+
// Form elements are always actionable, keep unconditionally
|
|
194
|
+
const formTags = ['input', 'select', 'textarea']
|
|
195
|
+
|
|
196
|
+
// Check if a subtree has any useful content
|
|
197
|
+
const hasUsefulContent = (node): boolean => {
|
|
198
|
+
if (typeof node === 'string') {
|
|
199
|
+
return node.trim().length > 0
|
|
200
|
+
}
|
|
201
|
+
if (!node.tag) return false
|
|
202
|
+
|
|
203
|
+
// Form elements are always useful for agents to interact with
|
|
204
|
+
if (formTags.includes(node.tag.toLowerCase())) {
|
|
205
|
+
return true
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Images with non-empty alt text are useful (descriptive content)
|
|
209
|
+
if (node.tag.toLowerCase() === 'img') {
|
|
210
|
+
const alt = node.attrs?.alt
|
|
211
|
+
if (typeof alt === 'string' && alt.trim().length > 0) {
|
|
212
|
+
return true
|
|
265
213
|
}
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
.map(processNode)
|
|
276
|
-
.filter((item) => item !== null)
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
// After processing children, check if this subtree is now decorative
|
|
280
|
-
// Skip root-level semantic elements (body, main, etc.)
|
|
281
|
-
const semanticTags = [
|
|
282
|
-
'html',
|
|
283
|
-
'body',
|
|
284
|
-
'main',
|
|
285
|
-
'header',
|
|
286
|
-
'footer',
|
|
287
|
-
'nav',
|
|
288
|
-
'section',
|
|
289
|
-
'article',
|
|
290
|
-
'aside',
|
|
291
|
-
]
|
|
292
|
-
if (semanticTags.includes(node.tag.toLowerCase())) {
|
|
293
|
-
return node
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
// If no useful content in this subtree, remove it
|
|
297
|
-
if (!hasUsefulContent(node)) {
|
|
298
|
-
return null
|
|
299
|
-
}
|
|
300
|
-
|
|
301
|
-
return node
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Check if this is an actionable element with meaningful attributes
|
|
217
|
+
if (actionableTags.includes(node.tag.toLowerCase())) {
|
|
218
|
+
if (node.attrs) {
|
|
219
|
+
for (const attr of meaningfulAttrs) {
|
|
220
|
+
const value = node.attrs[attr]
|
|
221
|
+
if (typeof value === 'string' && value.trim().length > 0) {
|
|
222
|
+
return true
|
|
302
223
|
}
|
|
303
|
-
|
|
304
|
-
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Check children recursively
|
|
229
|
+
if (node.content && Array.isArray(node.content)) {
|
|
230
|
+
for (const child of node.content) {
|
|
231
|
+
if (hasUsefulContent(child)) {
|
|
232
|
+
return true
|
|
233
|
+
}
|
|
305
234
|
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
return false
|
|
306
238
|
}
|
|
307
239
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
const isWhitespaceOnly = (node) => {
|
|
313
|
-
return typeof node === 'string' && node.trim().length === 0
|
|
314
|
-
}
|
|
240
|
+
return (tree) => {
|
|
241
|
+
const processNode = (node) => {
|
|
242
|
+
if (typeof node === 'string') return node
|
|
243
|
+
if (!node.tag) return node
|
|
315
244
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
const unwrapNode = (node) => {
|
|
321
|
-
if (typeof node === 'string') return node
|
|
322
|
-
if (!node.tag) return node
|
|
323
|
-
|
|
324
|
-
// First, recursively process children
|
|
325
|
-
if (node.content && Array.isArray(node.content)) {
|
|
326
|
-
node.content = node.content.map(unwrapNode)
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
// Check if this node is an unnecessary wrapper:
|
|
330
|
-
// - has no attributes
|
|
331
|
-
// - has exactly one non-whitespace child that is an element
|
|
332
|
-
if (hasNoAttrs(node) && node.content && Array.isArray(node.content)) {
|
|
333
|
-
const nonWhitespaceChildren = node.content.filter(c => !isWhitespaceOnly(c))
|
|
334
|
-
|
|
335
|
-
if (nonWhitespaceChildren.length === 1) {
|
|
336
|
-
const onlyChild = nonWhitespaceChildren[0]
|
|
337
|
-
// If the only child is also an element (not text), unwrap
|
|
338
|
-
if (typeof onlyChild !== 'string' && onlyChild.tag) {
|
|
339
|
-
// Replace this node with its child
|
|
340
|
-
return onlyChild
|
|
341
|
-
}
|
|
342
|
-
}
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
return node
|
|
346
|
-
}
|
|
245
|
+
// First process children
|
|
246
|
+
if (node.content && Array.isArray(node.content)) {
|
|
247
|
+
node.content = node.content.map(processNode).filter((item) => item !== null)
|
|
248
|
+
}
|
|
347
249
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
result = result.map(unwrapNode)
|
|
355
|
-
currJson = JSON.stringify(result)
|
|
356
|
-
}
|
|
250
|
+
// After processing children, check if this subtree is now decorative
|
|
251
|
+
// Skip root-level semantic elements (body, main, etc.)
|
|
252
|
+
const semanticTags = ['html', 'body', 'main', 'header', 'footer', 'nav', 'section', 'article', 'aside']
|
|
253
|
+
if (semanticTags.includes(node.tag.toLowerCase())) {
|
|
254
|
+
return node
|
|
255
|
+
}
|
|
357
256
|
|
|
358
|
-
|
|
257
|
+
// If no useful content in this subtree, remove it
|
|
258
|
+
if (!hasUsefulContent(node)) {
|
|
259
|
+
return null
|
|
359
260
|
}
|
|
360
|
-
}
|
|
361
261
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
const removeEmptyElementsPlugin = () => {
|
|
365
|
-
return (tree) => {
|
|
366
|
-
const isEmptyElement = (node) => {
|
|
367
|
-
if (typeof node === 'string') return false
|
|
368
|
-
if (!node.tag) return false
|
|
369
|
-
const hasAttrs = node.attrs && Object.keys(node.attrs).length > 0
|
|
370
|
-
const hasContent = node.content && node.content.some(c =>
|
|
371
|
-
typeof c === 'string' ? c.trim().length > 0 : true
|
|
372
|
-
)
|
|
373
|
-
return !hasAttrs && !hasContent
|
|
374
|
-
}
|
|
262
|
+
return node
|
|
263
|
+
}
|
|
375
264
|
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
265
|
+
return tree.map(processNode).filter((item) => item !== null)
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Plugin to unwrap unnecessary nested wrapper elements
|
|
270
|
+
// e.g., <div><div><div><p>text</p></div></div></div> -> <div><p>text</p></div>
|
|
271
|
+
const unwrapNestedWrappersPlugin = () => {
|
|
272
|
+
return (tree) => {
|
|
273
|
+
const isWhitespaceOnly = (node) => {
|
|
274
|
+
return typeof node === 'string' && node.trim().length === 0
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
const hasNoAttrs = (node) => {
|
|
278
|
+
return !node.attrs || Object.keys(node.attrs).length === 0
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
const unwrapNode = (node) => {
|
|
282
|
+
if (typeof node === 'string') return node
|
|
283
|
+
if (!node.tag) return node
|
|
284
|
+
|
|
285
|
+
// First, recursively process children
|
|
286
|
+
if (node.content && Array.isArray(node.content)) {
|
|
287
|
+
node.content = node.content.map(unwrapNode)
|
|
288
|
+
}
|
|
389
289
|
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
290
|
+
// Check if this node is an unnecessary wrapper:
|
|
291
|
+
// - has no attributes
|
|
292
|
+
// - has exactly one non-whitespace child that is an element
|
|
293
|
+
if (hasNoAttrs(node) && node.content && Array.isArray(node.content)) {
|
|
294
|
+
const nonWhitespaceChildren = node.content.filter((c) => !isWhitespaceOnly(c))
|
|
295
|
+
|
|
296
|
+
if (nonWhitespaceChildren.length === 1) {
|
|
297
|
+
const onlyChild = nonWhitespaceChildren[0]
|
|
298
|
+
// If the only child is also an element (not text), unwrap
|
|
299
|
+
if (typeof onlyChild !== 'string' && onlyChild.tag) {
|
|
300
|
+
// Replace this node with its child
|
|
301
|
+
return onlyChild
|
|
398
302
|
}
|
|
399
|
-
|
|
400
|
-
return result
|
|
303
|
+
}
|
|
401
304
|
}
|
|
402
|
-
}
|
|
403
305
|
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
.use(removeTagsAndAttrsPlugin())
|
|
407
|
-
.use(removeAriaHiddenPlugin())
|
|
408
|
-
.use(removeEmptyAltImagesPlugin())
|
|
409
|
-
.use(removeDecorativeSubtreesPlugin())
|
|
410
|
-
.use(removeEmptyElementsPlugin())
|
|
411
|
-
.use(unwrapNestedWrappersPlugin())
|
|
412
|
-
.use(beautify({
|
|
413
|
-
rules: {
|
|
414
|
-
indent: 1, // 1-space indent
|
|
415
|
-
blankLines: false, // no extra blank lines
|
|
416
|
-
maxlen: 100000 // effectively never wrap by content length
|
|
417
|
-
},
|
|
418
|
-
jsBeautifyOptions: {
|
|
419
|
-
wrap_line_length: 0, // disable js-beautify wrapping
|
|
420
|
-
preserve_newlines: false // reduce stray newlines
|
|
421
|
-
}
|
|
422
|
-
}))
|
|
306
|
+
return node
|
|
307
|
+
}
|
|
423
308
|
|
|
424
|
-
|
|
425
|
-
|
|
309
|
+
// Apply multiple passes until stable (handles deeply nested wrappers)
|
|
310
|
+
let result = tree.map(unwrapNode)
|
|
311
|
+
let prevJson = ''
|
|
312
|
+
let currJson = JSON.stringify(result)
|
|
313
|
+
while (prevJson !== currJson) {
|
|
314
|
+
prevJson = currJson
|
|
315
|
+
result = result.map(unwrapNode)
|
|
316
|
+
currJson = JSON.stringify(result)
|
|
317
|
+
}
|
|
426
318
|
|
|
427
|
-
|
|
319
|
+
return result
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Plugin to remove empty elements (no attrs, no content)
|
|
324
|
+
// Runs repeatedly until no more empty elements exist
|
|
325
|
+
const removeEmptyElementsPlugin = () => {
|
|
326
|
+
return (tree) => {
|
|
327
|
+
const isEmptyElement = (node) => {
|
|
328
|
+
if (typeof node === 'string') return false
|
|
329
|
+
if (!node.tag) return false
|
|
330
|
+
const hasAttrs = node.attrs && Object.keys(node.attrs).length > 0
|
|
331
|
+
const hasContent =
|
|
332
|
+
node.content && node.content.some((c) => (typeof c === 'string' ? c.trim().length > 0 : true))
|
|
333
|
+
return !hasAttrs && !hasContent
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
const removeEmpty = (content) => {
|
|
337
|
+
if (!content || !Array.isArray(content)) return content
|
|
338
|
+
|
|
339
|
+
return content
|
|
340
|
+
.map((node) => {
|
|
341
|
+
if (typeof node === 'string') return node
|
|
342
|
+
if (node.content) {
|
|
343
|
+
node.content = removeEmpty(node.content)
|
|
344
|
+
}
|
|
345
|
+
return node
|
|
346
|
+
})
|
|
347
|
+
.filter((node) => !isEmptyElement(node))
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// Apply multiple passes until stable
|
|
351
|
+
let result = removeEmpty(tree)
|
|
352
|
+
let prevJson = ''
|
|
353
|
+
let currJson = JSON.stringify(result)
|
|
354
|
+
while (prevJson !== currJson) {
|
|
355
|
+
prevJson = currJson
|
|
356
|
+
result = removeEmpty(result)
|
|
357
|
+
currJson = JSON.stringify(result)
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
return result
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// Process HTML
|
|
365
|
+
const processor = posthtml()
|
|
366
|
+
.use(removeTagsAndAttrsPlugin())
|
|
367
|
+
.use(removeAriaHiddenPlugin())
|
|
368
|
+
.use(removeEmptyAltImagesPlugin())
|
|
369
|
+
.use(removeDecorativeSubtreesPlugin())
|
|
370
|
+
.use(removeEmptyElementsPlugin())
|
|
371
|
+
.use(unwrapNestedWrappersPlugin())
|
|
372
|
+
.use(
|
|
373
|
+
beautify({
|
|
374
|
+
rules: {
|
|
375
|
+
indent: 1, // 1-space indent
|
|
376
|
+
blankLines: false, // no extra blank lines
|
|
377
|
+
maxlen: 100000, // effectively never wrap by content length
|
|
378
|
+
},
|
|
379
|
+
jsBeautifyOptions: {
|
|
380
|
+
wrap_line_length: 0, // disable js-beautify wrapping
|
|
381
|
+
preserve_newlines: false, // reduce stray newlines
|
|
382
|
+
},
|
|
383
|
+
}),
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
// Process with await
|
|
387
|
+
const result = await processor.process(html)
|
|
388
|
+
|
|
389
|
+
return result.html
|
|
428
390
|
}
|