ax-grep 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,735 @@
1
+ // src/static.ts
2
+ import { parseDocument } from "htmlparser2";
3
+ import { Element as DomElement } from "domhandler";
4
+ var defaultOptions = {
5
+ includeAttributes: true,
6
+ excludeLikelyAds: false,
7
+ includeHidden: false,
8
+ includeSelectOptions: true,
9
+ includeTextNodes: false,
10
+ maxTextLength: 240,
11
+ mode: "compact",
12
+ excludeLikelyBoilerplate: false,
13
+ maxChildrenPerNode: 80,
14
+ maxLinkFarmChildren: 24,
15
+ maxRepeatedSubtreeInstances: 3,
16
+ pruneCollapsedSubtrees: true,
17
+ pruneLikelyClosedOverlays: true,
18
+ summarizeLargeSubtrees: true,
19
+ summarizeLikelyLinkFarms: true,
20
+ summarizeRepeatedSubtrees: true
21
+ };
22
+ var interactiveRoles = /* @__PURE__ */ new Set([
23
+ "button",
24
+ "checkbox",
25
+ "combobox",
26
+ "link",
27
+ "listbox",
28
+ "menuitem",
29
+ "menuitemcheckbox",
30
+ "menuitemradio",
31
+ "option",
32
+ "radio",
33
+ "searchbox",
34
+ "slider",
35
+ "spinbutton",
36
+ "switch",
37
+ "tab",
38
+ "textbox",
39
+ "treeitem"
40
+ ]);
41
+ var landmarkTags = {
42
+ article: "article",
43
+ aside: "complementary",
44
+ footer: "contentinfo",
45
+ header: "banner",
46
+ main: "main",
47
+ nav: "navigation",
48
+ section: "region"
49
+ };
50
+ var rolesNamedFromContents = /* @__PURE__ */ new Set([
51
+ "button",
52
+ "cell",
53
+ "checkbox",
54
+ "columnheader",
55
+ "heading",
56
+ "link",
57
+ "listitem",
58
+ "menuitem",
59
+ "menuitemcheckbox",
60
+ "menuitemradio",
61
+ "option",
62
+ "radio",
63
+ "rowheader",
64
+ "switch",
65
+ "tab",
66
+ "treeitem"
67
+ ]);
68
+ var hiddenStylePattern = /(?:^|;)\s*(display\s*:\s*none|visibility\s*:\s*hidden|content-visibility\s*:\s*hidden|opacity\s*:\s*0(?:\.0+)?)(?:;|$)/i;
69
+ var nonSemanticTags = /* @__PURE__ */ new Set(["head", "link", "meta", "script", "style", "template"]);
70
+ function extractStaticSemanticTree(html, options = {}) {
71
+ const document = parseDocument(html, {
72
+ lowerCaseAttributeNames: true,
73
+ lowerCaseTags: true,
74
+ recognizeSelfClosing: true
75
+ });
76
+ const context = {
77
+ options: resolveStaticOptions(document.children, html, options),
78
+ nextId: 1,
79
+ ids: /* @__PURE__ */ new Map(),
80
+ collapsedControlledIds: /* @__PURE__ */ new Set(),
81
+ labelsByFor: /* @__PURE__ */ new Map()
82
+ };
83
+ indexDocument(document.children, context);
84
+ const root = findElement(document.children, "body") ?? findElement(document.children, "html") ?? fragmentRoot(document.children);
85
+ return walkElement(root, context) ?? unavailableNode(context, "document", "HTML has no inspectable root");
86
+ }
87
+ function resolveStaticOptions(nodes, html, options) {
88
+ const inferred = inferStaticSourceProfile(nodes, html);
89
+ const resolved = { ...defaultOptions };
90
+ if (inferred.wikiLike) {
91
+ resolved.maxChildrenPerNode = 400;
92
+ resolved.maxLinkFarmChildren = 80;
93
+ }
94
+ if (inferred.forumLike) {
95
+ resolved.maxLinkFarmChildren = 19;
96
+ }
97
+ return { ...resolved, ...options };
98
+ }
99
+ function inferStaticSourceProfile(nodes, html) {
100
+ const root = findElement(nodes, "html") ?? fragmentRoot(nodes);
101
+ const body = findElement(nodes, "body");
102
+ const profileText = [
103
+ attr(root, "class"),
104
+ attr(root, "id"),
105
+ body ? attr(body, "class") : "",
106
+ body ? attr(body, "id") : "",
107
+ firstMetaContent(root, "generator"),
108
+ firstMetaContent(root, "application-name"),
109
+ firstMetaContent(root, "twitter:site")
110
+ ].filter(Boolean).join(" ").toLowerCase();
111
+ return {
112
+ wikiLike: /\b(mediawiki|mw-parser-output|wikipedia|wikimedia)\b/.test(profileText) || /\b(?:id|class)=["'][^"']*\bmw-parser-output\b/i.test(html),
113
+ forumLike: /\b(5ch|2ch|dcinside|ruliweb|clien|bbs|board|forum|gallery|gall|thread|subback)\b/.test(profileText) || /\b(?:id|class)=["'][^"']*\b(?:gall_list|threadlist|thread-list|board-list|article-list|subback|bbs|forum)\b/i.test(html) || /(?:갤러리|게시판|댓글|개념글|스레드|レス|話題度)/.test(html)
114
+ };
115
+ }
116
+ function firstMetaContent(root, name) {
117
+ if (!root) return "";
118
+ const stack = [...root.children];
119
+ while (stack.length > 0) {
120
+ const node = stack.shift();
121
+ if (!node) continue;
122
+ if (!isElement(node)) continue;
123
+ if (node.name === "meta" && (attr(node, "name") === name || attr(node, "property") === name)) {
124
+ return attr(node, "content") ?? "";
125
+ }
126
+ stack.unshift(...node.children);
127
+ }
128
+ return "";
129
+ }
130
+ function indexDocument(nodes, context) {
131
+ for (const node of nodes) {
132
+ if (!isElement(node)) continue;
133
+ const id = attr(node, "id");
134
+ if (id) context.ids.set(id, node);
135
+ if (attr(node, "aria-expanded") === "false") {
136
+ for (const controlledId of (attr(node, "aria-controls") ?? "").split(/\s+/)) {
137
+ if (controlledId) context.collapsedControlledIds.add(controlledId);
138
+ }
139
+ }
140
+ if (node.name === "label") {
141
+ const target = attr(node, "for");
142
+ if (target) context.labelsByFor.set(target, normalizeText(descendantText(node), context.options.maxTextLength));
143
+ }
144
+ indexDocument(node.children, context);
145
+ }
146
+ }
147
+ function walkElement(element, context) {
148
+ if (!element) return null;
149
+ if (shouldSkipElement(element, context)) return null;
150
+ if (!context.options.includeHidden && isHidden(element)) return null;
151
+ if (context.options.excludeLikelyAds && isLikelyAd(element)) return null;
152
+ if (context.options.excludeLikelyBoilerplate && isLikelyBoilerplateTable(element)) return flattenBoilerplateTable(element, context);
153
+ if (context.options.excludeLikelyBoilerplate && isLikelyBoilerplate(element)) return null;
154
+ if (!context.options.includeHidden && isCollapsedControlledElement(element, context)) return null;
155
+ if (!context.options.includeHidden && isLikelyClosedOverlay(element, context)) return null;
156
+ const role = getRole(element);
157
+ const state = getState(element);
158
+ const focusable = isFocusable(element, role);
159
+ const interactive = isInteractive(element, role, focusable);
160
+ const name = role ? computeName(element, role, context) : "";
161
+ const tag = element.name;
162
+ const children = shouldSkipChildrenForCollapsedElement(element, context) ? [] : collectChildren(element, context);
163
+ if (context.options.mode === "interactive" && !interactive) {
164
+ return children.length > 0 ? containerNode(context, tag, children) : null;
165
+ }
166
+ if (shouldPruneListItemWrapper(role, children, context)) {
167
+ return children.length === 1 ? children[0] ?? null : containerNode(context, tag, children);
168
+ }
169
+ if (shouldPrune(element, role, name, interactive, children, context)) {
170
+ if (children.length === 0) return null;
171
+ return children.length === 1 ? children[0] ?? null : containerNode(context, tag, children);
172
+ }
173
+ const node = {
174
+ id: nextId(context),
175
+ tag,
176
+ role,
177
+ name,
178
+ interactive,
179
+ focusable,
180
+ selector: getSelector(element),
181
+ xpath: getXPath(element),
182
+ children
183
+ };
184
+ const text = directText(element, context.options.maxTextLength);
185
+ if (text) node.text = text;
186
+ const value = getValue(element);
187
+ if (value) node.value = value;
188
+ if (Object.keys(state).length > 0) node.state = state;
189
+ if (context.options.includeAttributes) node.attributes = { ...element.attribs };
190
+ return node;
191
+ }
192
+ function collectChildren(element, context) {
193
+ const children = [];
194
+ const repeatedSignatures = /* @__PURE__ */ new Map();
195
+ let omitted = 0;
196
+ for (const child of element.children) {
197
+ if (isElement(child)) {
198
+ if (!context.options.includeSelectOptions && element.name === "select") continue;
199
+ const semanticChild = walkElement(child, context);
200
+ if (semanticChild) {
201
+ if (shouldSummarizeRepeatedChild(element, semanticChild, repeatedSignatures, context)) {
202
+ omitted += countSemanticNodes(semanticChild);
203
+ continue;
204
+ }
205
+ if (shouldSummarizeMoreChildren(element, children, context)) {
206
+ omitted += countSemanticNodes(semanticChild);
207
+ } else {
208
+ children.push(semanticChild);
209
+ }
210
+ }
211
+ } else if (context.options.includeTextNodes && isText(child)) {
212
+ const text = normalizeText(child.data, context.options.maxTextLength);
213
+ if (text) {
214
+ const textNode = {
215
+ id: nextId(context),
216
+ tag: "#text",
217
+ role: "text",
218
+ name: text,
219
+ text,
220
+ interactive: false,
221
+ focusable: false,
222
+ children: []
223
+ };
224
+ if (shouldSummarizeMoreChildren(element, children, context)) {
225
+ omitted += 1;
226
+ } else {
227
+ children.push(textNode);
228
+ }
229
+ }
230
+ }
231
+ }
232
+ const linkFarmSummary = summarizeLikelyLinkFarmChildren(element, children, context);
233
+ if (linkFarmSummary.omitted > 0) {
234
+ children.splice(0, children.length, ...linkFarmSummary.children);
235
+ omitted += linkFarmSummary.omitted;
236
+ }
237
+ if (omitted > 0) children.push(omittedNode(context, omitted));
238
+ return children;
239
+ }
240
+ function shouldSkipElement(element, context) {
241
+ if (context.options.mode === "full") return false;
242
+ if (nonSemanticTags.has(element.name)) return true;
243
+ if (element.name === "noscript") return true;
244
+ return false;
245
+ }
246
+ function shouldSummarizeMoreChildren(element, children, context) {
247
+ if (!context.options.summarizeLargeSubtrees || context.options.mode === "full") return false;
248
+ if (!isLargeSubtreeCandidate(element)) return false;
249
+ return children.length >= context.options.maxChildrenPerNode;
250
+ }
251
+ function isLargeSubtreeCandidate(element) {
252
+ return ["nav", "ul", "ol", "div", "section", "footer", "header", "main"].includes(element.name);
253
+ }
254
+ function summarizeLikelyLinkFarmChildren(element, children, context) {
255
+ if (!context.options.summarizeLikelyLinkFarms || context.options.mode === "full") return { children, omitted: 0 };
256
+ if (children.length <= context.options.maxLinkFarmChildren) return { children, omitted: 0 };
257
+ if (!isLikelyLinkFarmContainer(element)) return { children, omitted: 0 };
258
+ const stats = childLinkFarmStats(children);
259
+ if (stats.linkishChildren < Math.max(8, Math.floor(children.length * 0.65))) return { children, omitted: 0 };
260
+ if (stats.contentRichChildren > Math.max(2, Math.floor(children.length * 0.2))) return { children, omitted: 0 };
261
+ const kept = [];
262
+ let omitted = 0;
263
+ let keptLinkish = 0;
264
+ for (const child of children) {
265
+ if (!isLinkishSummaryChild(child)) {
266
+ kept.push(child);
267
+ continue;
268
+ }
269
+ if (keptLinkish < context.options.maxLinkFarmChildren) {
270
+ kept.push(child);
271
+ keptLinkish += 1;
272
+ } else {
273
+ omitted += countSemanticNodes(child);
274
+ }
275
+ }
276
+ return omitted > 0 ? { children: kept, omitted } : { children, omitted: 0 };
277
+ }
278
+ function isLikelyLinkFarmContainer(element) {
279
+ if (["nav", "ul", "ol", "aside", "footer", "header"].includes(element.name)) return true;
280
+ if (!["div", "section"].includes(element.name)) return false;
281
+ const value = [
282
+ attr(element, "id"),
283
+ attr(element, "class"),
284
+ attr(element, "role"),
285
+ attr(element, "aria-label"),
286
+ attr(element, "title")
287
+ ].filter(Boolean).join(" ").toLowerCase();
288
+ if (/\b(article|body|content|contents|entry|main|post|story|text|view)\b/.test(value)) return false;
289
+ return /\b(board|category|comment|footer|gallery|gnb|header|issue|list|menu|nav|popular|recent|recommend|related|reply|sidebar|tab)\b/.test(value) || /갤러리|댓글|개념글|관련|목록|베스트|인기|최근|추천|카테고리/.test(value);
290
+ }
291
+ function childLinkFarmStats(children) {
292
+ let linkishChildren = 0;
293
+ let contentRichChildren = 0;
294
+ for (const child of children) {
295
+ if (isLinkishSummaryChild(child)) linkishChildren += 1;
296
+ if (isContentRichSummaryChild(child)) contentRichChildren += 1;
297
+ }
298
+ return { linkishChildren, contentRichChildren };
299
+ }
300
+ function isLinkishSummaryChild(node) {
301
+ const stats = semanticRoleStats(node);
302
+ return stats.links > 0 && stats.formControls === 0 && stats.tables === 0 && stats.paragraphs <= 1 && stats.contentContainers === 0;
303
+ }
304
+ function isContentRichSummaryChild(node) {
305
+ const stats = semanticRoleStats(node);
306
+ return stats.paragraphs > 1 || stats.tables > 0 || stats.contentContainers > 0 || stats.formControls > 0;
307
+ }
308
+ function semanticRoleStats(node) {
309
+ const role = node.role ?? node.tag;
310
+ const stats = {
311
+ links: role === "link" ? 1 : 0,
312
+ paragraphs: role === "p" || role === "text" ? 1 : 0,
313
+ tables: role === "table" || role === "row" || role === "cell" ? 1 : 0,
314
+ formControls: role === "textbox" || role === "searchbox" || role === "combobox" || role === "listbox" || role === "checkbox" || role === "radio" || role === "slider" || role === "spinbutton" || role === "switch" ? 1 : 0,
315
+ contentContainers: role === "article" || role === "main" ? 1 : 0
316
+ };
317
+ for (const child of node.children) {
318
+ const childStats = semanticRoleStats(child);
319
+ stats.links += childStats.links;
320
+ stats.paragraphs += childStats.paragraphs;
321
+ stats.tables += childStats.tables;
322
+ stats.formControls += childStats.formControls;
323
+ stats.contentContainers += childStats.contentContainers;
324
+ }
325
+ return stats;
326
+ }
327
+ function shouldSummarizeRepeatedChild(parent, child, signatures, context) {
328
+ if (!context.options.summarizeRepeatedSubtrees || context.options.mode === "full") return false;
329
+ if (!isRepeatedSubtreeCandidate(parent)) return false;
330
+ const signature = semanticSignature(child);
331
+ const count = signatures.get(signature) ?? 0;
332
+ signatures.set(signature, count + 1);
333
+ return count >= context.options.maxRepeatedSubtreeInstances;
334
+ }
335
+ function isRepeatedSubtreeCandidate(element) {
336
+ return ["body", "main", "nav", "ul", "ol", "div", "section", "footer", "header", "aside"].includes(element.name);
337
+ }
338
+ function semanticSignature(node) {
339
+ const childSignatures = node.children.map(semanticSignature).join(",");
340
+ return `${node.tag}|${node.role ?? ""}|${node.name}|${node.text ?? ""}|${node.value ?? ""}|${node.interactive ? "i" : ""}[${childSignatures}]`;
341
+ }
342
+ function countSemanticNodes(node) {
343
+ let count = 1;
344
+ for (const child of node.children) count += countSemanticNodes(child);
345
+ return count;
346
+ }
347
+ function shouldSkipChildrenForCollapsedElement(element, context) {
348
+ if (!context.options.pruneCollapsedSubtrees || context.options.includeHidden) return false;
349
+ if (attr(element, "aria-expanded") === "false") return true;
350
+ if (element.name === "details" && attr(element, "open") === null) return true;
351
+ if (element.name === "dialog" && attr(element, "open") === null) return true;
352
+ if (attr(element, "popover") !== null && attr(element, "open") === null) return true;
353
+ return false;
354
+ }
355
+ function isCollapsedControlledElement(element, context) {
356
+ const id = attr(element, "id");
357
+ return Boolean(id && context.options.pruneCollapsedSubtrees && context.collapsedControlledIds.has(id));
358
+ }
359
+ function isLikelyClosedOverlay(element, context) {
360
+ if (!context.options.pruneLikelyClosedOverlays || context.options.mode === "full") return false;
361
+ if (hasUsefulOpenSignal(element)) return false;
362
+ if (!hasOverlaySignal(element)) return false;
363
+ if (hasDirectFocusableIntent(element)) return false;
364
+ return hasOffscreenOrClosedStyle(element) || hasClosedClassSignal(element) || hasInertSignal(element);
365
+ }
366
+ function hasUsefulOpenSignal(element) {
367
+ return attr(element, "open") !== null || attr(element, "aria-expanded") === "true" || attr(element, "aria-modal") === "true" || attr(element, "data-open") === "true" || attr(element, "data-state") === "open";
368
+ }
369
+ function hasOverlaySignal(element) {
370
+ const value = [element.name, attr(element, "id"), attr(element, "class"), attr(element, "role"), attr(element, "aria-label")].filter(Boolean).join(" ").toLowerCase();
371
+ return /\b(drawer|modal|dialog|popover|overlay|hamburger|menu|sidebar|sheet|flyout|dropdown)\b/.test(value);
372
+ }
373
+ function hasDirectFocusableIntent(element) {
374
+ const tabindex = attr(element, "tabindex");
375
+ return tabindex !== null && Number(tabindex) >= 0;
376
+ }
377
+ function hasInertSignal(element) {
378
+ return attr(element, "inert") !== null || attr(element, "aria-hidden") === "true";
379
+ }
380
+ function hasClosedClassSignal(element) {
381
+ const className = attr(element, "class") ?? "";
382
+ return /\b(closed|collapsed|hidden|inactive|is-closed|is-hidden)\b/i.test(className);
383
+ }
384
+ function hasOffscreenOrClosedStyle(element) {
385
+ const style = attr(element, "style") ?? "";
386
+ if (!style) return false;
387
+ const normalized = style.replace(/\s+/g, "").toLowerCase();
388
+ return /(?:^|;)(?:left|right|top|bottom):-\d{2,}(?:px|rem|em|vw|vh|%)/.test(normalized) || /(?:^|;)transform:translate[xy]?\(-[1-9]\d*%/.test(normalized) || /(?:^|;)(?:max-height|height):0(?:px|rem|em|%)?/.test(normalized) || /(?:^|;)pointer-events:none/.test(normalized);
389
+ }
390
+ function shouldPrune(element, role, name, interactive, children, context) {
391
+ if (context.options.mode === "full") return false;
392
+ if (role === "none" || role === "presentation") return true;
393
+ if (interactive) return false;
394
+ if (role && role !== "generic") return false;
395
+ if (name) return false;
396
+ if (children.length === 0) return true;
397
+ if (attr(element, "id") || attr(element, "aria-label") || attr(element, "aria-labelledby")) return false;
398
+ return children.length > 0;
399
+ }
400
+ function shouldPruneListItemWrapper(role, children, context) {
401
+ if (context.options.mode === "full") return false;
402
+ if (role !== "listitem") return false;
403
+ return children.some((child) => child.role === "link" || child.role === "button");
404
+ }
405
+ function getRole(element) {
406
+ const explicit = firstToken(attr(element, "role"));
407
+ if (explicit) return explicit;
408
+ const tag = element.name;
409
+ if (tag === "section" && !hasExplicitNameSource(element)) return null;
410
+ if (tag === "form" && !hasExplicitNameSource(element)) return null;
411
+ if (tag in landmarkTags) return landmarkTags[tag] ?? null;
412
+ if (/^h[1-6]$/.test(tag)) return "heading";
413
+ if (tag === "a" || tag === "area") return attr(element, "href") ? "link" : null;
414
+ if (tag === "button") return "button";
415
+ if (tag === "details" || tag === "fieldset") return "group";
416
+ if (tag === "dialog") return "dialog";
417
+ if (tag === "figure") return "figure";
418
+ if (tag === "form") return "form";
419
+ if (tag === "img") return "img";
420
+ if (tag === "input") return inputRole(element);
421
+ if (tag === "li") return "listitem";
422
+ if (tag === "ol" || tag === "ul") return "list";
423
+ if (tag === "option") return "option";
424
+ if (tag === "p") return "p";
425
+ if (tag === "progress") return "progressbar";
426
+ if (tag === "select") return attr(element, "multiple") !== null ? "listbox" : "combobox";
427
+ if (tag === "summary") return "button";
428
+ if (tag === "table") return "table";
429
+ if (tag === "td") return "cell";
430
+ if (tag === "textarea") return "textbox";
431
+ if (tag === "th") return attr(element, "scope") === "row" ? "rowheader" : "columnheader";
432
+ if (tag === "tr") return "row";
433
+ return null;
434
+ }
435
+ function inputRole(element) {
436
+ const type = (attr(element, "type") ?? "text").toLowerCase();
437
+ if (type === "hidden") return null;
438
+ if (type === "button" || type === "submit" || type === "reset") return "button";
439
+ if (type === "checkbox") return "checkbox";
440
+ if (type === "image") return "button";
441
+ if (type === "radio") return "radio";
442
+ if (type === "range") return "slider";
443
+ if (type === "search") return "searchbox";
444
+ if (type === "number") return "spinbutton";
445
+ return "textbox";
446
+ }
447
+ function computeName(element, role, context) {
448
+ const labelledBy = attr(element, "aria-labelledby");
449
+ if (labelledBy) {
450
+ const value = labelledBy.split(/\s+/).map((id) => context.ids.get(id)).filter((item) => Boolean(item)).map((item) => descendantText(item)).join(" ");
451
+ const normalized = normalizeText(value, context.options.maxTextLength);
452
+ if (normalized) return normalized;
453
+ }
454
+ const ariaLabel = normalizeText(attr(element, "aria-label") ?? "", context.options.maxTextLength);
455
+ if (ariaLabel) return ariaLabel;
456
+ const labelled = labelName(element, context);
457
+ if (labelled) return labelled;
458
+ const valueName = elementValueName(element);
459
+ if (valueName) return normalizeText(valueName, context.options.maxTextLength);
460
+ if (role === "img") {
461
+ const alt = normalizeText(attr(element, "alt") ?? "", context.options.maxTextLength);
462
+ if (alt) return alt;
463
+ }
464
+ if (rolesNamedFromContents.has(role)) {
465
+ const contents = normalizeText(descendantText(element), context.options.maxTextLength);
466
+ if (contents) return contents;
467
+ }
468
+ const title = normalizeText(attr(element, "title") ?? "", context.options.maxTextLength);
469
+ if (title) return title;
470
+ return "";
471
+ }
472
+ function labelName(element, context) {
473
+ const id = attr(element, "id");
474
+ if (id) {
475
+ const value = context.labelsByFor.get(id);
476
+ if (value) return value;
477
+ }
478
+ const label = findClosestLabel(element);
479
+ return label ? normalizeText(descendantText(label), context.options.maxTextLength) : "";
480
+ }
481
+ function findClosestLabel(element) {
482
+ let parent = element.parent;
483
+ while (parent) {
484
+ if (isElement(parent) && parent.name === "label") return parent;
485
+ parent = parent.parent;
486
+ }
487
+ return null;
488
+ }
489
+ function elementValueName(element) {
490
+ if (element.name === "input") {
491
+ const type = (attr(element, "type") ?? "text").toLowerCase();
492
+ if (type === "button" || type === "submit" || type === "reset") return attr(element, "value") ?? "";
493
+ }
494
+ return "";
495
+ }
496
+ function getState(element) {
497
+ const state = {};
498
+ if (attr(element, "disabled") !== null || attr(element, "aria-disabled") === "true") state.disabled = true;
499
+ if (attr(element, "required") !== null || attr(element, "aria-required") === "true") state.required = true;
500
+ if (attr(element, "readonly") !== null || attr(element, "aria-readonly") === "true") state.readonly = true;
501
+ const checked = attr(element, "aria-checked") ?? (attr(element, "checked") !== null ? "true" : null);
502
+ if (checked === "true") state.checked = true;
503
+ if (checked === "false") state.checked = false;
504
+ if (checked === "mixed") state.checked = "mixed";
505
+ if (attr(element, "selected") !== null || attr(element, "aria-selected") === "true") state.selected = true;
506
+ const expanded = attr(element, "aria-expanded");
507
+ if (expanded === "true") state.expanded = true;
508
+ if (expanded === "false") state.expanded = false;
509
+ const pressed = attr(element, "aria-pressed");
510
+ if (pressed === "true") state.pressed = true;
511
+ if (pressed === "false") state.pressed = false;
512
+ if (pressed === "mixed") state.pressed = "mixed";
513
+ const invalid = attr(element, "aria-invalid");
514
+ if (invalid && invalid !== "false") state.invalid = invalid === "true" ? true : invalid;
515
+ return state;
516
+ }
517
+ function isInteractive(element, role, focusable) {
518
+ if (role && interactiveRoles.has(role)) return true;
519
+ if (focusable) return true;
520
+ return ["button", "input", "select", "textarea"].includes(element.name);
521
+ }
522
+ function isFocusable(element, role) {
523
+ if (attr(element, "disabled") !== null) return false;
524
+ const tabindex = attr(element, "tabindex");
525
+ if (tabindex !== null && Number(tabindex) >= 0) return true;
526
+ if (role && interactiveRoles.has(role)) return true;
527
+ return element.name === "a" && attr(element, "href") !== null;
528
+ }
529
+ function isHidden(element) {
530
+ if (attr(element, "hidden") !== null) return true;
531
+ if (attr(element, "aria-hidden") === "true") return true;
532
+ const style = attr(element, "style");
533
+ return style ? hiddenStylePattern.test(style) : false;
534
+ }
535
+ function isLikelyAd(element) {
536
+ const value = [
537
+ attr(element, "id"),
538
+ attr(element, "class"),
539
+ attr(element, "aria-label"),
540
+ attr(element, "title"),
541
+ attr(element, "data-testid")
542
+ ].filter(Boolean).join(" ").toLowerCase();
543
+ return /\b(ad|ads|advert|advertisement|banner|sponsor|sponsored|promotion|promoted|powerlink)\b/.test(value) || /파워링크|광고|직접홍보|홍보/.test(value);
544
+ }
545
+ function isLikelyBoilerplate(element) {
546
+ if (element.name === "footer") return true;
547
+ if (element.name === "main" || element.name === "article") return false;
548
+ const role = firstToken(attr(element, "role"));
549
+ if (role === "main" || role === "article") return false;
550
+ const value = [
551
+ attr(element, "id"),
552
+ attr(element, "class"),
553
+ attr(element, "aria-label"),
554
+ attr(element, "title")
555
+ ].filter(Boolean).join(" ").toLowerCase();
556
+ if (!value) return false;
557
+ if (/\b(content|contents|entry|post-body|article-body|story-body|view-content)\b/.test(value)) return false;
558
+ return /\b(footer|sidebar)\b/.test(value) || /푸터/.test(value);
559
+ }
560
+ function isLikelyBoilerplateTable(element) {
561
+ if (element.name !== "table") return false;
562
+ const value = [
563
+ attr(element, "id"),
564
+ attr(element, "class"),
565
+ attr(element, "aria-label"),
566
+ attr(element, "title")
567
+ ].filter(Boolean).join(" ").toLowerCase();
568
+ return /\bgall[_-]?list\b/.test(value) || /\bbottom[_-]?list\w*\b/.test(value);
569
+ }
570
+ function flattenBoilerplateTable(element, context) {
571
+ const children = collectFlattenedBoilerplateItems(element, context);
572
+ if (children.length === 0) return null;
573
+ return containerNode(context, element.name, children);
574
+ }
575
+ function collectFlattenedBoilerplateItems(element, context) {
576
+ const children = [];
577
+ for (const child of element.children) {
578
+ if (!isElement(child)) continue;
579
+ if (shouldSkipElement(child, context)) continue;
580
+ if (!context.options.includeHidden && isHidden(child)) continue;
581
+ if (context.options.excludeLikelyAds && isLikelyAd(child)) continue;
582
+ const role = getRole(child);
583
+ const focusable = isFocusable(child, role);
584
+ const interactive = isInteractive(child, role, focusable);
585
+ const name = role ? computeName(child, role, context) : "";
586
+ if (role && name && (interactive || role === "heading" || role === "img")) {
587
+ const node = {
588
+ id: nextId(context),
589
+ tag: child.name,
590
+ role,
591
+ name,
592
+ interactive,
593
+ focusable,
594
+ selector: getSelector(child),
595
+ xpath: getXPath(child),
596
+ children: []
597
+ };
598
+ const value = getValue(child);
599
+ if (value) node.value = value;
600
+ const state = getState(child);
601
+ if (Object.keys(state).length > 0) node.state = state;
602
+ if (context.options.includeAttributes) node.attributes = { ...child.attribs };
603
+ children.push(node);
604
+ continue;
605
+ }
606
+ children.push(...collectFlattenedBoilerplateItems(child, context));
607
+ }
608
+ return children;
609
+ }
610
+ function hasExplicitNameSource(element) {
611
+ return attr(element, "aria-label") !== null || attr(element, "aria-labelledby") !== null || attr(element, "title") !== null;
612
+ }
613
+ function directText(element, maxLength) {
614
+ return normalizeText(
615
+ element.children.filter(isText).map((node) => node.data).join(" "),
616
+ maxLength
617
+ );
618
+ }
619
+ function descendantText(element) {
620
+ const parts = [];
621
+ collectDescendantText(element.children, parts);
622
+ return parts.join(" ");
623
+ }
624
+ function collectDescendantText(nodes, parts) {
625
+ for (const node of nodes) {
626
+ if (isText(node)) {
627
+ parts.push(node.data);
628
+ continue;
629
+ }
630
+ if (!isElement(node)) continue;
631
+ if (nonSemanticTags.has(node.name) || node.name === "noscript") continue;
632
+ collectDescendantText(node.children, parts);
633
+ }
634
+ }
635
+ function normalizeText(value, maxLength) {
636
+ const normalized = value.replace(/\s+/g, " ").trim();
637
+ return normalized.length > maxLength ? `${normalized.slice(0, maxLength - 1)}...` : normalized;
638
+ }
639
+ function getValue(element) {
640
+ return normalizeText(attr(element, "value") ?? "", 240);
641
+ }
642
+ function getSelector(element) {
643
+ const id = attr(element, "id");
644
+ if (id) return `#${cssEscape(id)}`;
645
+ const parent = element.parent;
646
+ if (!parent || !("children" in parent)) return element.name;
647
+ const siblings = parent.children.filter((node) => isElement(node) && node.name === element.name);
648
+ const index = siblings.indexOf(element);
649
+ return index > 0 ? `${element.name}:nth-of-type(${index + 1})` : element.name;
650
+ }
651
+ function getXPath(element) {
652
+ const parts = [];
653
+ let current = element;
654
+ while (current) {
655
+ const parent = current.parent;
656
+ if (!parent || !("children" in parent)) {
657
+ parts.unshift(current.name);
658
+ break;
659
+ }
660
+ const siblings = parent.children.filter((node) => isElement(node) && node.name === current?.name);
661
+ const index = siblings.indexOf(current) + 1;
662
+ parts.unshift(`${current.name}[${index}]`);
663
+ current = isElement(parent) ? parent : null;
664
+ }
665
+ return `/${parts.join("/")}`;
666
+ }
667
+ function containerNode(context, tag, children) {
668
+ return {
669
+ id: nextId(context),
670
+ tag,
671
+ role: null,
672
+ name: "",
673
+ interactive: false,
674
+ focusable: false,
675
+ children
676
+ };
677
+ }
678
+ function unavailableNode(context, tag, unavailableReason) {
679
+ return {
680
+ id: nextId(context),
681
+ tag,
682
+ role: null,
683
+ name: "",
684
+ interactive: false,
685
+ focusable: false,
686
+ children: [],
687
+ unavailableReason
688
+ };
689
+ }
690
+ function omittedNode(context, omitted) {
691
+ return {
692
+ id: nextId(context),
693
+ tag: "omitted",
694
+ role: "note",
695
+ name: `${omitted} static nodes omitted`,
696
+ interactive: false,
697
+ focusable: false,
698
+ children: []
699
+ };
700
+ }
701
+ function nextId(context) {
702
+ return `static-${context.nextId++}`;
703
+ }
704
+ function attr(element, name) {
705
+ return Object.prototype.hasOwnProperty.call(element.attribs, name) ? element.attribs[name] ?? "" : null;
706
+ }
707
+ function firstToken(value) {
708
+ return value?.trim().split(/\s+/)[0] || null;
709
+ }
710
+ function cssEscape(value) {
711
+ return value.replace(/[^a-zA-Z0-9_-]/g, (char) => `\\${char}`);
712
+ }
713
+ function isElement(node) {
714
+ return node.type === "tag" || node.type === "script" || node.type === "style";
715
+ }
716
+ function isText(node) {
717
+ return node.type === "text";
718
+ }
719
+ function findElement(nodes, name) {
720
+ for (const node of nodes) {
721
+ if (!isElement(node)) continue;
722
+ if (node.name === name) return node;
723
+ const child = findElement(node.children, name);
724
+ if (child) return child;
725
+ }
726
+ return void 0;
727
+ }
728
+ function fragmentRoot(children) {
729
+ return new DomElement("fragment", {}, children);
730
+ }
731
+
732
+ export {
733
+ extractStaticSemanticTree
734
+ };
735
+ //# sourceMappingURL=chunk-Z7V6PIPH.js.map