playwriter 0.0.63 → 0.0.89

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/dist/a11y-client.js +18 -8
  2. package/dist/aria-snapshot.d.ts +41 -3
  3. package/dist/aria-snapshot.d.ts.map +1 -1
  4. package/dist/aria-snapshot.js +134 -55
  5. package/dist/aria-snapshot.js.map +1 -1
  6. package/dist/aria-snapshot.test.js +5 -2
  7. package/dist/aria-snapshot.test.js.map +1 -1
  8. package/dist/aria-snapshot.unit.test.js +83 -41
  9. package/dist/aria-snapshot.unit.test.js.map +1 -1
  10. package/dist/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.d.ts +5 -0
  11. package/dist/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.d.ts.map +1 -0
  12. package/dist/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.js +5 -0
  13. package/dist/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.js.map +1 -0
  14. package/dist/bippy.js +1 -1
  15. package/dist/cdp-log.d.ts +1 -1
  16. package/dist/cdp-log.d.ts.map +1 -1
  17. package/dist/cdp-log.js +1 -1
  18. package/dist/cdp-log.js.map +1 -1
  19. package/dist/cdp-relay.d.ts.map +1 -1
  20. package/dist/cdp-relay.js +492 -298
  21. package/dist/cdp-relay.js.map +1 -1
  22. package/dist/cdp-session.d.ts.map +1 -1
  23. package/dist/cdp-session.js.map +1 -1
  24. package/dist/cdp-types.d.ts.map +1 -1
  25. package/dist/cdp-types.js +7 -7
  26. package/dist/cdp-types.js.map +1 -1
  27. package/dist/clean-html.d.ts.map +1 -1
  28. package/dist/clean-html.js +4 -5
  29. package/dist/clean-html.js.map +1 -1
  30. package/dist/cli.js +45 -27
  31. package/dist/cli.js.map +1 -1
  32. package/dist/create-logger.d.ts.map +1 -1
  33. package/dist/create-logger.js +3 -1
  34. package/dist/create-logger.js.map +1 -1
  35. package/dist/debugger-examples-types.d.ts.map +1 -1
  36. package/dist/debugger.d.ts.map +1 -1
  37. package/dist/debugger.js +1 -3
  38. package/dist/debugger.js.map +1 -1
  39. package/dist/diff-utils.d.ts.map +1 -1
  40. package/dist/diff-utils.js +1 -4
  41. package/dist/diff-utils.js.map +1 -1
  42. package/dist/editor-api.md +12 -2
  43. package/dist/editor-examples.d.ts +1 -1
  44. package/dist/editor-examples.d.ts.map +1 -1
  45. package/dist/editor-examples.js +1 -1
  46. package/dist/editor-examples.js.map +1 -1
  47. package/dist/editor.d.ts +1 -1
  48. package/dist/editor.d.ts.map +1 -1
  49. package/dist/editor.js +1 -1
  50. package/dist/editor.js.map +1 -1
  51. package/dist/executor.d.ts +26 -3
  52. package/dist/executor.d.ts.map +1 -1
  53. package/dist/executor.js +297 -64
  54. package/dist/executor.js.map +1 -1
  55. package/dist/executor.unit.test.js +38 -1
  56. package/dist/executor.unit.test.js.map +1 -1
  57. package/dist/extension-connection.test.js +139 -36
  58. package/dist/extension-connection.test.js.map +1 -1
  59. package/dist/ffmpeg.d.ts +148 -0
  60. package/dist/ffmpeg.d.ts.map +1 -0
  61. package/dist/ffmpeg.js +523 -0
  62. package/dist/ffmpeg.js.map +1 -0
  63. package/dist/ghost-browser.d.ts.map +1 -1
  64. package/dist/ghost-browser.js.map +1 -1
  65. package/dist/ghost-cursor-client.js +287 -0
  66. package/dist/ghost-cursor.d.ts +27 -0
  67. package/dist/ghost-cursor.d.ts.map +1 -0
  68. package/dist/ghost-cursor.js +63 -0
  69. package/dist/ghost-cursor.js.map +1 -0
  70. package/dist/htmlrewrite.d.ts.map +1 -1
  71. package/dist/htmlrewrite.js +17 -55
  72. package/dist/htmlrewrite.js.map +1 -1
  73. package/dist/htmlrewrite.test.js.map +1 -1
  74. package/dist/kill-port.d.ts.map +1 -1
  75. package/dist/kill-port.js +1 -3
  76. package/dist/kill-port.js.map +1 -1
  77. package/dist/locator-selector.test.d.ts +2 -0
  78. package/dist/locator-selector.test.d.ts.map +1 -0
  79. package/dist/locator-selector.test.js +96 -0
  80. package/dist/locator-selector.test.js.map +1 -0
  81. package/dist/mcp-client.js.map +1 -1
  82. package/dist/mcp.d.ts.map +1 -1
  83. package/dist/mcp.js +8 -3
  84. package/dist/mcp.js.map +1 -1
  85. package/dist/on-mouse-action.test.d.ts +2 -0
  86. package/dist/on-mouse-action.test.d.ts.map +1 -0
  87. package/dist/on-mouse-action.test.js +155 -0
  88. package/dist/on-mouse-action.test.js.map +1 -0
  89. package/dist/page-markdown.js +4 -4
  90. package/dist/page-markdown.js.map +1 -1
  91. package/dist/prompt.md +450 -377
  92. package/dist/protocol.d.ts +4 -0
  93. package/dist/protocol.d.ts.map +1 -1
  94. package/dist/readability.js +16 -2
  95. package/dist/recording-ghost-cursor.d.ts +41 -0
  96. package/dist/recording-ghost-cursor.d.ts.map +1 -0
  97. package/dist/recording-ghost-cursor.js +79 -0
  98. package/dist/recording-ghost-cursor.js.map +1 -0
  99. package/dist/recording-relay.d.ts.map +1 -1
  100. package/dist/recording-relay.js +8 -8
  101. package/dist/recording-relay.js.map +1 -1
  102. package/dist/relay-client.d.ts +17 -4
  103. package/dist/relay-client.d.ts.map +1 -1
  104. package/dist/relay-client.js +45 -11
  105. package/dist/relay-client.js.map +1 -1
  106. package/dist/relay-core.test.d.ts.map +1 -1
  107. package/dist/relay-core.test.js +515 -26
  108. package/dist/relay-core.test.js.map +1 -1
  109. package/dist/relay-navigation.test.d.ts.map +1 -1
  110. package/dist/relay-navigation.test.js +169 -31
  111. package/dist/relay-navigation.test.js.map +1 -1
  112. package/dist/relay-session.test.d.ts.map +1 -1
  113. package/dist/relay-session.test.js +113 -65
  114. package/dist/relay-session.test.js.map +1 -1
  115. package/dist/relay-state.d.ts +158 -0
  116. package/dist/relay-state.d.ts.map +1 -0
  117. package/dist/relay-state.js +306 -0
  118. package/dist/relay-state.js.map +1 -0
  119. package/dist/relay-state.test.d.ts +2 -0
  120. package/dist/relay-state.test.d.ts.map +1 -0
  121. package/dist/relay-state.test.js +472 -0
  122. package/dist/relay-state.test.js.map +1 -0
  123. package/dist/scoped-fs.d.ts.map +1 -1
  124. package/dist/scoped-fs.js.map +1 -1
  125. package/dist/screen-recording.d.ts +66 -4
  126. package/dist/screen-recording.d.ts.map +1 -1
  127. package/dist/screen-recording.js +150 -13
  128. package/dist/screen-recording.js.map +1 -1
  129. package/dist/screen-recording.test.d.ts +2 -0
  130. package/dist/screen-recording.test.d.ts.map +1 -0
  131. package/dist/screen-recording.test.js +102 -0
  132. package/dist/screen-recording.test.js.map +1 -0
  133. package/dist/selector-generator.js +1 -1
  134. package/dist/snapshot-tools.test.js +71 -28
  135. package/dist/snapshot-tools.test.js.map +1 -1
  136. package/dist/start-relay-server.d.ts +1 -1
  137. package/dist/start-relay-server.d.ts.map +1 -1
  138. package/dist/start-relay-server.js +1 -1
  139. package/dist/start-relay-server.js.map +1 -1
  140. package/dist/styles-api.md +8 -1
  141. package/dist/styles-examples.d.ts +1 -1
  142. package/dist/styles-examples.d.ts.map +1 -1
  143. package/dist/styles-examples.js +1 -1
  144. package/dist/styles-examples.js.map +1 -1
  145. package/dist/styles.d.ts.map +1 -1
  146. package/dist/styles.js +1 -3
  147. package/dist/styles.js.map +1 -1
  148. package/dist/test-declarations.d.ts.map +1 -1
  149. package/dist/test-utils.d.ts +1 -1
  150. package/dist/test-utils.d.ts.map +1 -1
  151. package/dist/test-utils.js +7 -5
  152. package/dist/test-utils.js.map +1 -1
  153. package/dist/utils.d.ts.map +1 -1
  154. package/dist/utils.js.map +1 -1
  155. package/dist/wait-for-page-load.d.ts.map +1 -1
  156. package/dist/wait-for-page-load.js +1 -1
  157. package/dist/wait-for-page-load.js.map +1 -1
  158. package/package.json +4 -3
  159. package/src/a11y-client.ts +5 -4
  160. package/src/aria-snapshot.test.ts +5 -2
  161. package/src/aria-snapshot.ts +306 -117
  162. package/src/aria-snapshot.unit.test.ts +199 -141
  163. package/src/aria-snapshots/github-interactive.txt +2 -0
  164. package/src/aria-snapshots/github-raw.txt +5 -1
  165. package/src/aria-snapshots/hackernews-interactive.txt +238 -241
  166. package/src/aria-snapshots/hackernews-raw.txt +265 -269
  167. package/src/assets/aria-labels-example.png +0 -0
  168. package/src/assets/aria-labels-github.png +0 -0
  169. package/src/assets/aria-labels-hacker-news.png +0 -0
  170. package/src/assets/aria-labels-old-reddit.png +0 -0
  171. package/src/assets/cursors/screen-studio/pointer-macos-tahoe-data-url.ts +5 -0
  172. package/src/assets/cursors/screen-studio/pointer-macos-tahoe.svg +18 -0
  173. package/src/cdp-log.ts +4 -1
  174. package/src/cdp-relay.ts +1059 -737
  175. package/src/cdp-session.ts +12 -3
  176. package/src/cdp-types.ts +51 -51
  177. package/src/clean-html.ts +4 -5
  178. package/src/cli.ts +82 -55
  179. package/src/create-logger.ts +5 -3
  180. package/src/debugger-examples-types.ts +4 -1
  181. package/src/debugger.ts +1 -5
  182. package/src/diff-utils.ts +2 -5
  183. package/src/editor-examples.ts +11 -1
  184. package/src/editor.ts +10 -2
  185. package/src/executor.ts +374 -73
  186. package/src/executor.unit.test.ts +48 -1
  187. package/src/extension-connection.test.ts +612 -488
  188. package/src/ffmpeg.ts +769 -0
  189. package/src/ghost-browser.ts +4 -6
  190. package/src/ghost-cursor-client.ts +369 -0
  191. package/src/ghost-cursor.ts +110 -0
  192. package/src/htmlrewrite.test.ts +6 -2
  193. package/src/htmlrewrite.ts +348 -386
  194. package/src/kill-port.ts +1 -3
  195. package/src/locator-selector.test.ts +115 -0
  196. package/src/mcp-client.ts +1 -1
  197. package/src/mcp.ts +21 -15
  198. package/src/on-mouse-action.test.ts +196 -0
  199. package/src/page-markdown.ts +7 -7
  200. package/src/protocol.ts +73 -57
  201. package/src/recording-ghost-cursor.ts +113 -0
  202. package/src/recording-relay.ts +20 -12
  203. package/src/relay-client.ts +85 -18
  204. package/src/relay-core.test.ts +1117 -578
  205. package/src/relay-navigation.test.ts +648 -483
  206. package/src/relay-session.test.ts +984 -929
  207. package/src/relay-state.test.ts +570 -0
  208. package/src/relay-state.ts +497 -0
  209. package/src/resource.md +21 -49
  210. package/src/scoped-fs.ts +9 -3
  211. package/src/screen-recording.test.ts +111 -0
  212. package/src/screen-recording.ts +256 -31
  213. package/src/skill.md +476 -396
  214. package/src/snapshot-tools.test.ts +580 -528
  215. package/src/snapshots/shadcn-ui-accessibility-full.md +8 -8
  216. package/src/snapshots/shadcn-ui-accessibility-interactive.md +8 -8
  217. package/src/start-relay-server.ts +14 -11
  218. package/src/styles-examples.ts +8 -1
  219. package/src/styles.ts +20 -21
  220. package/src/test-declarations.ts +6 -6
  221. package/src/test-utils.ts +104 -91
  222. package/src/utils.ts +2 -1
  223. package/src/wait-for-page-load.ts +6 -1
@@ -2,427 +2,389 @@ import posthtml from 'posthtml'
2
2
  import beautify from 'posthtml-beautify'
3
3
 
4
4
  export interface FormatHtmlOptions {
5
- html: string
6
- keepStyles?: boolean
7
- maxAttrLen?: number
8
- maxContentLen?: number
5
+ html: string
6
+ keepStyles?: boolean
7
+ maxAttrLen?: number
8
+ maxContentLen?: number
9
9
  }
10
10
 
11
11
  export async function formatHtmlForPrompt({
12
- html,
13
- keepStyles = false,
14
- maxAttrLen = 200,
15
- maxContentLen = 500,
12
+ html,
13
+ keepStyles = false,
14
+ maxAttrLen = 200,
15
+ maxContentLen = 500,
16
16
  }: FormatHtmlOptions) {
17
- const tagsToRemove = [
18
- 'hint',
19
- 'style',
20
- 'link',
21
- 'script',
22
- 'meta',
23
- 'noscript',
24
- 'svg',
25
- 'head',
26
- ]
27
-
28
- const attributesToKeep = [
29
- // Standard descriptive attributes
30
- 'label',
31
- 'title',
32
- 'alt',
33
- 'href',
34
- 'name',
35
- 'value',
36
- 'checked',
37
- 'placeholder',
38
- 'type',
39
- 'role',
40
- 'target',
41
- // Descriptive aria attributes (text content)
42
- 'aria-label',
43
- 'aria-placeholder',
44
- 'aria-valuetext',
45
- 'aria-roledescription',
46
- // Useful aria state attributes
47
- 'aria-hidden',
48
- 'aria-expanded',
49
- 'aria-checked',
50
- 'aria-selected',
51
- 'aria-disabled',
52
- 'aria-pressed',
53
- 'aria-required',
54
- 'aria-current',
55
- // Test IDs (data-testid, data-test, data-cy, data-qa are covered by data-* prefix)
56
- 'testid',
57
- 'test-id',
58
- 'tid',
59
- 'qa',
60
- 'qa-id',
61
- 'e2e',
62
- 'e2e-id',
63
- 'automation-id',
64
- 'automationid',
65
- 'selenium',
66
- 'pw',
67
- 'vimium-label',
68
- // Conditionally added: 'style', 'class'
69
- ]
70
-
71
- if (keepStyles) {
72
- attributesToKeep.push('style', 'class')
73
- }
17
+ const tagsToRemove = ['hint', 'style', 'link', 'script', 'meta', 'noscript', 'svg', 'head']
18
+
19
+ const attributesToKeep = [
20
+ // Standard descriptive attributes
21
+ 'label',
22
+ 'title',
23
+ 'alt',
24
+ 'href',
25
+ 'name',
26
+ 'value',
27
+ 'checked',
28
+ 'placeholder',
29
+ 'type',
30
+ 'role',
31
+ 'target',
32
+ // Descriptive aria attributes (text content)
33
+ 'aria-label',
34
+ 'aria-placeholder',
35
+ 'aria-valuetext',
36
+ 'aria-roledescription',
37
+ // Useful aria state attributes
38
+ 'aria-hidden',
39
+ 'aria-expanded',
40
+ 'aria-checked',
41
+ 'aria-selected',
42
+ 'aria-disabled',
43
+ 'aria-pressed',
44
+ 'aria-required',
45
+ 'aria-current',
46
+ // Test IDs (data-testid, data-test, data-cy, data-qa are covered by data-* prefix)
47
+ 'testid',
48
+ 'test-id',
49
+ 'tid',
50
+ 'qa',
51
+ 'qa-id',
52
+ 'e2e',
53
+ 'e2e-id',
54
+ 'automation-id',
55
+ 'automationid',
56
+ 'selenium',
57
+ 'pw',
58
+ 'vimium-label',
59
+ // Conditionally added: 'style', 'class'
60
+ ]
61
+
62
+ if (keepStyles) {
63
+ attributesToKeep.push('style', 'class')
64
+ }
65
+
66
+ const truncate = (str: string, maxLen: number): string => {
67
+ if (str.length <= maxLen) return str
68
+ const remaining = str.length - maxLen
69
+ return str.slice(0, maxLen) + `...${remaining} more characters`
70
+ }
71
+
72
+ // Create a custom plugin to remove tags and filter attributes
73
+ const removeTagsAndAttrsPlugin = () => {
74
+ return (tree) => {
75
+ // Remove comments at root level
76
+ tree = tree.filter((item) => {
77
+ if (typeof item === 'string') {
78
+ const trimmed = item.trim()
79
+ return !(trimmed.startsWith('<!--') && trimmed.endsWith('-->'))
80
+ }
81
+ return true
82
+ })
83
+
84
+ // Process each node recursively
85
+ const processNode = (node) => {
86
+ if (typeof node === 'string') {
87
+ // Truncate text content
88
+ const trimmed = node.trim()
89
+ if (trimmed.length === 0) return node
90
+ return truncate(node, maxContentLen)
91
+ }
74
92
 
75
- const truncate = (str: string, maxLen: number): string => {
76
- if (str.length <= maxLen) return str
77
- const remaining = str.length - maxLen
78
- return str.slice(0, maxLen) + `...${remaining} more characters`
79
- }
93
+ // Remove unwanted tags
94
+ if (node.tag && tagsToRemove.includes(node.tag.toLowerCase())) {
95
+ return null
96
+ }
80
97
 
81
- // Create a custom plugin to remove tags and filter attributes
82
- const removeTagsAndAttrsPlugin = () => {
83
- return (tree) => {
84
- // Remove comments at root level
85
- tree = tree.filter((item) => {
86
- if (typeof item === 'string') {
87
- const trimmed = item.trim()
88
- return !(trimmed.startsWith('<!--') && trimmed.endsWith('-->'))
89
- }
90
- return true
91
- })
92
-
93
- // Process each node recursively
94
- const processNode = (node) => {
95
- if (typeof node === 'string') {
96
- // Truncate text content
97
- const trimmed = node.trim()
98
- if (trimmed.length === 0) return node
99
- return truncate(node, maxContentLen)
100
- }
101
-
102
- // Remove unwanted tags
103
- if (node.tag && tagsToRemove.includes(node.tag.toLowerCase())) {
104
- return null
105
- }
106
-
107
- // Filter attributes
108
- if (node.attrs) {
109
- const newAttrs: typeof node.attrs = {}
110
- for (const [attr, value] of Object.entries(node.attrs)) {
111
- const shouldKeep =
112
- attr.startsWith('data-') ||
113
- attributesToKeep.includes(attr)
114
-
115
- if (shouldKeep) {
116
- // Truncate attribute values
117
- newAttrs[attr] = typeof value === 'string'
118
- ? truncate(value, maxAttrLen)
119
- : value
120
- }
121
- }
122
- node.attrs = newAttrs
123
- }
124
-
125
- // Process content recursively
126
- if (node.content && Array.isArray(node.content)) {
127
- node.content = node.content
128
- .map(processNode)
129
- .filter(item => {
130
- if (item === null) return false
131
- if (typeof item === 'string') {
132
- const trimmed = item.trim()
133
- return !(trimmed.startsWith('<!--') && trimmed.endsWith('-->'))
134
- }
135
- return true
136
- })
137
- }
138
-
139
- return node
140
- }
98
+ // Filter attributes
99
+ if (node.attrs) {
100
+ const newAttrs: typeof node.attrs = {}
101
+ for (const [attr, value] of Object.entries(node.attrs)) {
102
+ const shouldKeep = attr.startsWith('data-') || attributesToKeep.includes(attr)
141
103
 
142
- // Process all root nodes
143
- return tree.map(processNode).filter(item => item !== null)
104
+ if (shouldKeep) {
105
+ // Truncate attribute values
106
+ newAttrs[attr] = typeof value === 'string' ? truncate(value, maxAttrLen) : value
107
+ }
108
+ }
109
+ node.attrs = newAttrs
144
110
  }
145
- }
146
111
 
147
- // Plugin to remove aria-hidden="true" subtrees entirely
148
- // These are hidden from assistive tech and usually decorative
149
- const removeAriaHiddenPlugin = () => {
150
- return (tree) => {
151
- const processNode = (node) => {
152
- if (typeof node === 'string') return node
153
- if (!node.tag) return node
154
-
155
- // Remove if aria-hidden="true"
156
- if (node.attrs?.['aria-hidden'] === 'true') {
157
- return null
158
- }
159
-
160
- // Process children recursively
161
- if (node.content && Array.isArray(node.content)) {
162
- node.content = node.content
163
- .map(processNode)
164
- .filter((item) => item !== null)
165
- }
166
-
167
- return node
112
+ // Process content recursively
113
+ if (node.content && Array.isArray(node.content)) {
114
+ node.content = node.content.map(processNode).filter((item) => {
115
+ if (item === null) return false
116
+ if (typeof item === 'string') {
117
+ const trimmed = item.trim()
118
+ return !(trimmed.startsWith('<!--') && trimmed.endsWith('-->'))
168
119
  }
169
-
170
- return tree.map(processNode).filter((item) => item !== null)
120
+ return true
121
+ })
171
122
  }
172
- }
173
123
 
174
- // Plugin to remove images with empty alt text (purely decorative)
175
- // Runs before decorative subtree pruning so containers become empty
176
- const removeEmptyAltImagesPlugin = () => {
177
- return (tree) => {
178
- const processNode = (node) => {
179
- if (typeof node === 'string') return node
180
- if (!node.tag) return node
181
-
182
- // Remove img with empty or missing alt
183
- if (node.tag.toLowerCase() === 'img') {
184
- const alt = node.attrs?.alt
185
- if (alt === '' || alt === undefined) {
186
- return null
187
- }
188
- }
189
-
190
- // Process children recursively
191
- if (node.content && Array.isArray(node.content)) {
192
- node.content = node.content
193
- .map(processNode)
194
- .filter((item) => item !== null)
195
- }
196
-
197
- return node
198
- }
124
+ return node
125
+ }
199
126
 
200
- return tree.map(processNode).filter((item) => item !== null)
201
- }
127
+ // Process all root nodes
128
+ return tree.map(processNode).filter((item) => item !== null)
202
129
  }
130
+ }
131
+
132
+ // Plugin to remove aria-hidden="true" subtrees entirely
133
+ // These are hidden from assistive tech and usually decorative
134
+ const removeAriaHiddenPlugin = () => {
135
+ return (tree) => {
136
+ const processNode = (node) => {
137
+ if (typeof node === 'string') return node
138
+ if (!node.tag) return node
139
+
140
+ // Remove if aria-hidden="true"
141
+ if (node.attrs?.['aria-hidden'] === 'true') {
142
+ return null
143
+ }
203
144
 
204
- // Plugin to remove decorative subtrees that have no useful content for agents
205
- // A subtree is decorative if it has:
206
- // - No text content (leaf text nodes)
207
- // - No actionable elements with meaningful attributes
208
- const removeDecorativeSubtreesPlugin = () => {
209
- const actionableTags = ['button', 'a', 'input', 'select', 'textarea']
210
- const meaningfulAttrs = [
211
- 'aria-label',
212
- 'title',
213
- 'alt',
214
- 'value',
215
- 'placeholder',
216
- 'href',
217
- 'name',
218
- ]
219
-
220
- // Form elements are always actionable, keep unconditionally
221
- const formTags = ['input', 'select', 'textarea']
222
-
223
- // Check if a subtree has any useful content
224
- const hasUsefulContent = (node): boolean => {
225
- if (typeof node === 'string') {
226
- return node.trim().length > 0
227
- }
228
- if (!node.tag) return false
145
+ // Process children recursively
146
+ if (node.content && Array.isArray(node.content)) {
147
+ node.content = node.content.map(processNode).filter((item) => item !== null)
148
+ }
229
149
 
230
- // Form elements are always useful for agents to interact with
231
- if (formTags.includes(node.tag.toLowerCase())) {
232
- return true
233
- }
150
+ return node
151
+ }
234
152
 
235
- // Images with non-empty alt text are useful (descriptive content)
236
- if (node.tag.toLowerCase() === 'img') {
237
- const alt = node.attrs?.alt
238
- if (typeof alt === 'string' && alt.trim().length > 0) {
239
- return true
240
- }
241
- }
153
+ return tree.map(processNode).filter((item) => item !== null)
154
+ }
155
+ }
156
+
157
+ // Plugin to remove images with empty alt text (purely decorative)
158
+ // Runs before decorative subtree pruning so containers become empty
159
+ const removeEmptyAltImagesPlugin = () => {
160
+ return (tree) => {
161
+ const processNode = (node) => {
162
+ if (typeof node === 'string') return node
163
+ if (!node.tag) return node
164
+
165
+ // Remove img with empty or missing alt
166
+ if (node.tag.toLowerCase() === 'img') {
167
+ const alt = node.attrs?.alt
168
+ if (alt === '' || alt === undefined) {
169
+ return null
170
+ }
171
+ }
242
172
 
243
- // Check if this is an actionable element with meaningful attributes
244
- if (actionableTags.includes(node.tag.toLowerCase())) {
245
- if (node.attrs) {
246
- for (const attr of meaningfulAttrs) {
247
- const value = node.attrs[attr]
248
- if (typeof value === 'string' && value.trim().length > 0) {
249
- return true
250
- }
251
- }
252
- }
253
- }
173
+ // Process children recursively
174
+ if (node.content && Array.isArray(node.content)) {
175
+ node.content = node.content.map(processNode).filter((item) => item !== null)
176
+ }
254
177
 
255
- // Check children recursively
256
- if (node.content && Array.isArray(node.content)) {
257
- for (const child of node.content) {
258
- if (hasUsefulContent(child)) {
259
- return true
260
- }
261
- }
262
- }
178
+ return node
179
+ }
263
180
 
264
- return false
181
+ return tree.map(processNode).filter((item) => item !== null)
182
+ }
183
+ }
184
+
185
+ // Plugin to remove decorative subtrees that have no useful content for agents
186
+ // A subtree is decorative if it has:
187
+ // - No text content (leaf text nodes)
188
+ // - No actionable elements with meaningful attributes
189
+ const removeDecorativeSubtreesPlugin = () => {
190
+ const actionableTags = ['button', 'a', 'input', 'select', 'textarea']
191
+ const meaningfulAttrs = ['aria-label', 'title', 'alt', 'value', 'placeholder', 'href', 'name']
192
+
193
+ // Form elements are always actionable, keep unconditionally
194
+ const formTags = ['input', 'select', 'textarea']
195
+
196
+ // Check if a subtree has any useful content
197
+ const hasUsefulContent = (node): boolean => {
198
+ if (typeof node === 'string') {
199
+ return node.trim().length > 0
200
+ }
201
+ if (!node.tag) return false
202
+
203
+ // Form elements are always useful for agents to interact with
204
+ if (formTags.includes(node.tag.toLowerCase())) {
205
+ return true
206
+ }
207
+
208
+ // Images with non-empty alt text are useful (descriptive content)
209
+ if (node.tag.toLowerCase() === 'img') {
210
+ const alt = node.attrs?.alt
211
+ if (typeof alt === 'string' && alt.trim().length > 0) {
212
+ return true
265
213
  }
266
-
267
- return (tree) => {
268
- const processNode = (node) => {
269
- if (typeof node === 'string') return node
270
- if (!node.tag) return node
271
-
272
- // First process children
273
- if (node.content && Array.isArray(node.content)) {
274
- node.content = node.content
275
- .map(processNode)
276
- .filter((item) => item !== null)
277
- }
278
-
279
- // After processing children, check if this subtree is now decorative
280
- // Skip root-level semantic elements (body, main, etc.)
281
- const semanticTags = [
282
- 'html',
283
- 'body',
284
- 'main',
285
- 'header',
286
- 'footer',
287
- 'nav',
288
- 'section',
289
- 'article',
290
- 'aside',
291
- ]
292
- if (semanticTags.includes(node.tag.toLowerCase())) {
293
- return node
294
- }
295
-
296
- // If no useful content in this subtree, remove it
297
- if (!hasUsefulContent(node)) {
298
- return null
299
- }
300
-
301
- return node
214
+ }
215
+
216
+ // Check if this is an actionable element with meaningful attributes
217
+ if (actionableTags.includes(node.tag.toLowerCase())) {
218
+ if (node.attrs) {
219
+ for (const attr of meaningfulAttrs) {
220
+ const value = node.attrs[attr]
221
+ if (typeof value === 'string' && value.trim().length > 0) {
222
+ return true
302
223
  }
303
-
304
- return tree.map(processNode).filter((item) => item !== null)
224
+ }
225
+ }
226
+ }
227
+
228
+ // Check children recursively
229
+ if (node.content && Array.isArray(node.content)) {
230
+ for (const child of node.content) {
231
+ if (hasUsefulContent(child)) {
232
+ return true
233
+ }
305
234
  }
235
+ }
236
+
237
+ return false
306
238
  }
307
239
 
308
- // Plugin to unwrap unnecessary nested wrapper elements
309
- // e.g., <div><div><div><p>text</p></div></div></div> -> <div><p>text</p></div>
310
- const unwrapNestedWrappersPlugin = () => {
311
- return (tree) => {
312
- const isWhitespaceOnly = (node) => {
313
- return typeof node === 'string' && node.trim().length === 0
314
- }
240
+ return (tree) => {
241
+ const processNode = (node) => {
242
+ if (typeof node === 'string') return node
243
+ if (!node.tag) return node
315
244
 
316
- const hasNoAttrs = (node) => {
317
- return !node.attrs || Object.keys(node.attrs).length === 0
318
- }
319
-
320
- const unwrapNode = (node) => {
321
- if (typeof node === 'string') return node
322
- if (!node.tag) return node
323
-
324
- // First, recursively process children
325
- if (node.content && Array.isArray(node.content)) {
326
- node.content = node.content.map(unwrapNode)
327
- }
328
-
329
- // Check if this node is an unnecessary wrapper:
330
- // - has no attributes
331
- // - has exactly one non-whitespace child that is an element
332
- if (hasNoAttrs(node) && node.content && Array.isArray(node.content)) {
333
- const nonWhitespaceChildren = node.content.filter(c => !isWhitespaceOnly(c))
334
-
335
- if (nonWhitespaceChildren.length === 1) {
336
- const onlyChild = nonWhitespaceChildren[0]
337
- // If the only child is also an element (not text), unwrap
338
- if (typeof onlyChild !== 'string' && onlyChild.tag) {
339
- // Replace this node with its child
340
- return onlyChild
341
- }
342
- }
343
- }
344
-
345
- return node
346
- }
245
+ // First process children
246
+ if (node.content && Array.isArray(node.content)) {
247
+ node.content = node.content.map(processNode).filter((item) => item !== null)
248
+ }
347
249
 
348
- // Apply multiple passes until stable (handles deeply nested wrappers)
349
- let result = tree.map(unwrapNode)
350
- let prevJson = ''
351
- let currJson = JSON.stringify(result)
352
- while (prevJson !== currJson) {
353
- prevJson = currJson
354
- result = result.map(unwrapNode)
355
- currJson = JSON.stringify(result)
356
- }
250
+ // After processing children, check if this subtree is now decorative
251
+ // Skip root-level semantic elements (body, main, etc.)
252
+ const semanticTags = ['html', 'body', 'main', 'header', 'footer', 'nav', 'section', 'article', 'aside']
253
+ if (semanticTags.includes(node.tag.toLowerCase())) {
254
+ return node
255
+ }
357
256
 
358
- return result
257
+ // If no useful content in this subtree, remove it
258
+ if (!hasUsefulContent(node)) {
259
+ return null
359
260
  }
360
- }
361
261
 
362
- // Plugin to remove empty elements (no attrs, no content)
363
- // Runs repeatedly until no more empty elements exist
364
- const removeEmptyElementsPlugin = () => {
365
- return (tree) => {
366
- const isEmptyElement = (node) => {
367
- if (typeof node === 'string') return false
368
- if (!node.tag) return false
369
- const hasAttrs = node.attrs && Object.keys(node.attrs).length > 0
370
- const hasContent = node.content && node.content.some(c =>
371
- typeof c === 'string' ? c.trim().length > 0 : true
372
- )
373
- return !hasAttrs && !hasContent
374
- }
262
+ return node
263
+ }
375
264
 
376
- const removeEmpty = (content) => {
377
- if (!content || !Array.isArray(content)) return content
378
-
379
- return content
380
- .map(node => {
381
- if (typeof node === 'string') return node
382
- if (node.content) {
383
- node.content = removeEmpty(node.content)
384
- }
385
- return node
386
- })
387
- .filter(node => !isEmptyElement(node))
388
- }
265
+ return tree.map(processNode).filter((item) => item !== null)
266
+ }
267
+ }
268
+
269
+ // Plugin to unwrap unnecessary nested wrapper elements
270
+ // e.g., <div><div><div><p>text</p></div></div></div> -> <div><p>text</p></div>
271
+ const unwrapNestedWrappersPlugin = () => {
272
+ return (tree) => {
273
+ const isWhitespaceOnly = (node) => {
274
+ return typeof node === 'string' && node.trim().length === 0
275
+ }
276
+
277
+ const hasNoAttrs = (node) => {
278
+ return !node.attrs || Object.keys(node.attrs).length === 0
279
+ }
280
+
281
+ const unwrapNode = (node) => {
282
+ if (typeof node === 'string') return node
283
+ if (!node.tag) return node
284
+
285
+ // First, recursively process children
286
+ if (node.content && Array.isArray(node.content)) {
287
+ node.content = node.content.map(unwrapNode)
288
+ }
389
289
 
390
- // Apply multiple passes until stable
391
- let result = removeEmpty(tree)
392
- let prevJson = ''
393
- let currJson = JSON.stringify(result)
394
- while (prevJson !== currJson) {
395
- prevJson = currJson
396
- result = removeEmpty(result)
397
- currJson = JSON.stringify(result)
290
+ // Check if this node is an unnecessary wrapper:
291
+ // - has no attributes
292
+ // - has exactly one non-whitespace child that is an element
293
+ if (hasNoAttrs(node) && node.content && Array.isArray(node.content)) {
294
+ const nonWhitespaceChildren = node.content.filter((c) => !isWhitespaceOnly(c))
295
+
296
+ if (nonWhitespaceChildren.length === 1) {
297
+ const onlyChild = nonWhitespaceChildren[0]
298
+ // If the only child is also an element (not text), unwrap
299
+ if (typeof onlyChild !== 'string' && onlyChild.tag) {
300
+ // Replace this node with its child
301
+ return onlyChild
398
302
  }
399
-
400
- return result
303
+ }
401
304
  }
402
- }
403
305
 
404
- // Process HTML
405
- const processor = posthtml()
406
- .use(removeTagsAndAttrsPlugin())
407
- .use(removeAriaHiddenPlugin())
408
- .use(removeEmptyAltImagesPlugin())
409
- .use(removeDecorativeSubtreesPlugin())
410
- .use(removeEmptyElementsPlugin())
411
- .use(unwrapNestedWrappersPlugin())
412
- .use(beautify({
413
- rules: {
414
- indent: 1, // 1-space indent
415
- blankLines: false, // no extra blank lines
416
- maxlen: 100000 // effectively never wrap by content length
417
- },
418
- jsBeautifyOptions: {
419
- wrap_line_length: 0, // disable js-beautify wrapping
420
- preserve_newlines: false // reduce stray newlines
421
- }
422
- }))
306
+ return node
307
+ }
423
308
 
424
- // Process with await
425
- const result = await processor.process(html)
309
+ // Apply multiple passes until stable (handles deeply nested wrappers)
310
+ let result = tree.map(unwrapNode)
311
+ let prevJson = ''
312
+ let currJson = JSON.stringify(result)
313
+ while (prevJson !== currJson) {
314
+ prevJson = currJson
315
+ result = result.map(unwrapNode)
316
+ currJson = JSON.stringify(result)
317
+ }
426
318
 
427
- return result.html
319
+ return result
320
+ }
321
+ }
322
+
323
+ // Plugin to remove empty elements (no attrs, no content)
324
+ // Runs repeatedly until no more empty elements exist
325
+ const removeEmptyElementsPlugin = () => {
326
+ return (tree) => {
327
+ const isEmptyElement = (node) => {
328
+ if (typeof node === 'string') return false
329
+ if (!node.tag) return false
330
+ const hasAttrs = node.attrs && Object.keys(node.attrs).length > 0
331
+ const hasContent =
332
+ node.content && node.content.some((c) => (typeof c === 'string' ? c.trim().length > 0 : true))
333
+ return !hasAttrs && !hasContent
334
+ }
335
+
336
+ const removeEmpty = (content) => {
337
+ if (!content || !Array.isArray(content)) return content
338
+
339
+ return content
340
+ .map((node) => {
341
+ if (typeof node === 'string') return node
342
+ if (node.content) {
343
+ node.content = removeEmpty(node.content)
344
+ }
345
+ return node
346
+ })
347
+ .filter((node) => !isEmptyElement(node))
348
+ }
349
+
350
+ // Apply multiple passes until stable
351
+ let result = removeEmpty(tree)
352
+ let prevJson = ''
353
+ let currJson = JSON.stringify(result)
354
+ while (prevJson !== currJson) {
355
+ prevJson = currJson
356
+ result = removeEmpty(result)
357
+ currJson = JSON.stringify(result)
358
+ }
359
+
360
+ return result
361
+ }
362
+ }
363
+
364
+ // Process HTML
365
+ const processor = posthtml()
366
+ .use(removeTagsAndAttrsPlugin())
367
+ .use(removeAriaHiddenPlugin())
368
+ .use(removeEmptyAltImagesPlugin())
369
+ .use(removeDecorativeSubtreesPlugin())
370
+ .use(removeEmptyElementsPlugin())
371
+ .use(unwrapNestedWrappersPlugin())
372
+ .use(
373
+ beautify({
374
+ rules: {
375
+ indent: 1, // 1-space indent
376
+ blankLines: false, // no extra blank lines
377
+ maxlen: 100000, // effectively never wrap by content length
378
+ },
379
+ jsBeautifyOptions: {
380
+ wrap_line_length: 0, // disable js-beautify wrapping
381
+ preserve_newlines: false, // reduce stray newlines
382
+ },
383
+ }),
384
+ )
385
+
386
+ // Process with await
387
+ const result = await processor.process(html)
388
+
389
+ return result.html
428
390
  }