ax-grep 0.0.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,925 @@
1
+ // src/static.ts
2
+ import { parseDocument } from "htmlparser2";
3
+ import { Element as DomElement } from "domhandler";
4
+ var defaultOptions = {
5
+ includeAttributes: true,
6
+ excludeLikelyAds: false,
7
+ includeHidden: false,
8
+ includeSelectOptions: true,
9
+ includeTextNodes: false,
10
+ maxTextLength: 240,
11
+ mode: "compact",
12
+ excludeLikelyBoilerplate: false,
13
+ maxChildrenPerNode: 80,
14
+ maxLinkFarmChildren: 24,
15
+ maxRepeatedSubtreeInstances: 3,
16
+ pruneCollapsedSubtrees: true,
17
+ pruneLikelyClosedOverlays: true,
18
+ summarizeLargeSubtrees: true,
19
+ summarizeLikelyLinkFarms: true,
20
+ summarizeRepeatedSubtrees: true
21
+ };
22
+ var interactiveRoles = /* @__PURE__ */ new Set([
23
+ "button",
24
+ "checkbox",
25
+ "combobox",
26
+ "link",
27
+ "listbox",
28
+ "menuitem",
29
+ "menuitemcheckbox",
30
+ "menuitemradio",
31
+ "option",
32
+ "radio",
33
+ "searchbox",
34
+ "slider",
35
+ "spinbutton",
36
+ "switch",
37
+ "tab",
38
+ "textbox",
39
+ "treeitem"
40
+ ]);
41
+ var landmarkTags = {
42
+ article: "article",
43
+ aside: "complementary",
44
+ footer: "contentinfo",
45
+ header: "banner",
46
+ main: "main",
47
+ nav: "navigation",
48
+ section: "region"
49
+ };
50
+ var rolesNamedFromContents = /* @__PURE__ */ new Set([
51
+ "button",
52
+ "cell",
53
+ "checkbox",
54
+ "columnheader",
55
+ "heading",
56
+ "link",
57
+ "listitem",
58
+ "menuitem",
59
+ "menuitemcheckbox",
60
+ "menuitemradio",
61
+ "option",
62
+ "radio",
63
+ "rowheader",
64
+ "switch",
65
+ "tab",
66
+ "treeitem"
67
+ ]);
68
+ var hiddenStylePattern = /(?:^|;)\s*(display\s*:\s*none|visibility\s*:\s*hidden|content-visibility\s*:\s*hidden|opacity\s*:\s*0(?:\.0+)?)(?:;|$)/i;
69
+ var nonSemanticTags = /* @__PURE__ */ new Set(["head", "link", "meta", "script", "style", "template"]);
70
+ function extractStaticSemanticTree(html, options = {}) {
71
+ const document = parseDocument(html, {
72
+ lowerCaseAttributeNames: true,
73
+ lowerCaseTags: true,
74
+ recognizeSelfClosing: true
75
+ });
76
+ const context = {
77
+ options: resolveStaticOptions(document.children, html, options),
78
+ nextId: 1,
79
+ ids: /* @__PURE__ */ new Map(),
80
+ referencedIds: /* @__PURE__ */ new Set(),
81
+ collapsedControlledIds: /* @__PURE__ */ new Set(),
82
+ labelsByFor: /* @__PURE__ */ new Map(),
83
+ slotAssignments: void 0
84
+ };
85
+ indexDocument(document.children, context);
86
+ const root = findElement(document.children, "body") ?? findElement(document.children, "html") ?? fragmentRoot(document.children);
87
+ return walkElement(root, context) ?? unavailableNode(context, "document", "HTML has no inspectable root");
88
+ }
89
+ function resolveStaticOptions(nodes, html, options) {
90
+ const inferred = inferStaticSourceProfile(nodes, html);
91
+ const resolved = { ...defaultOptions };
92
+ if (inferred.wikiLike) {
93
+ resolved.maxChildrenPerNode = 400;
94
+ resolved.maxLinkFarmChildren = 80;
95
+ }
96
+ if (inferred.forumLike) {
97
+ resolved.maxLinkFarmChildren = 19;
98
+ }
99
+ return { ...resolved, ...options };
100
+ }
101
+ function inferStaticSourceProfile(nodes, html) {
102
+ const root = findElement(nodes, "html") ?? fragmentRoot(nodes);
103
+ const body = findElement(nodes, "body");
104
+ const profileText = [
105
+ attr(root, "class"),
106
+ attr(root, "id"),
107
+ body ? attr(body, "class") : "",
108
+ body ? attr(body, "id") : "",
109
+ firstMetaContent(root, "generator"),
110
+ firstMetaContent(root, "application-name"),
111
+ firstMetaContent(root, "twitter:site")
112
+ ].filter(Boolean).join(" ").toLowerCase();
113
+ return {
114
+ wikiLike: /\b(mediawiki|mw-parser-output|wikipedia|wikimedia)\b/.test(profileText) || /\b(?:id|class)=["'][^"']*\bmw-parser-output\b/i.test(html),
115
+ forumLike: /\b(5ch|2ch|dcinside|ruliweb|clien|bbs|board|forum|gallery|gall|thread|subback)\b/.test(profileText) || /\b(?:id|class)=["'][^"']*\b(?:gall_list|threadlist|thread-list|board-list|article-list|subback|bbs|forum)\b/i.test(html) || /(?:갤러리|게시판|댓글|개념글|스레드|レス|話題度)/.test(html)
116
+ };
117
+ }
118
+ function firstMetaContent(root, name) {
119
+ if (!root) return "";
120
+ const stack = [...root.children];
121
+ while (stack.length > 0) {
122
+ const node = stack.shift();
123
+ if (!node) continue;
124
+ if (!isElement(node)) continue;
125
+ if (node.name === "meta" && (attr(node, "name") === name || attr(node, "property") === name)) {
126
+ return attr(node, "content") ?? "";
127
+ }
128
+ stack.unshift(...node.children);
129
+ }
130
+ return "";
131
+ }
132
+ function indexDocument(nodes, context) {
133
+ for (const node of nodes) {
134
+ if (!isElement(node)) continue;
135
+ const id = attr(node, "id");
136
+ if (id) context.ids.set(id, node);
137
+ for (const referencedId of referencedIds(node)) {
138
+ context.referencedIds.add(referencedId);
139
+ }
140
+ if (attr(node, "aria-expanded") === "false") {
141
+ for (const controlledId of (attr(node, "aria-controls") ?? "").split(/\s+/)) {
142
+ if (controlledId) context.collapsedControlledIds.add(controlledId);
143
+ }
144
+ }
145
+ if (node.name === "label") {
146
+ const target = attr(node, "for");
147
+ if (target) context.labelsByFor.set(target, node);
148
+ }
149
+ indexDocument(node.children, context);
150
+ }
151
+ }
152
+ function referencedIds(element) {
153
+ return [
154
+ attr(element, "aria-labelledby"),
155
+ attr(element, "aria-describedby"),
156
+ attr(element, "aria-details"),
157
+ attr(element, "aria-errormessage"),
158
+ attr(element, "aria-controls"),
159
+ attr(element, "aria-owns"),
160
+ attr(element, "aria-flowto"),
161
+ attr(element, "aria-activedescendant")
162
+ ].filter((value) => Boolean(value)).flatMap((value) => value.split(/\s+/).map((item) => item.trim()).filter(Boolean));
163
+ }
164
+ function descriptionReferenceIds(element) {
165
+ return new Set([
166
+ attr(element, "aria-describedby"),
167
+ attr(element, "aria-details"),
168
+ attr(element, "aria-errormessage")
169
+ ].filter((value) => Boolean(value)).flatMap((value) => value.split(/\s+/).map((item) => item.trim()).filter(Boolean)));
170
+ }
171
+ function walkElement(element, context) {
172
+ if (!element) return null;
173
+ if (shouldSkipElement(element, context)) return null;
174
+ if (!context.options.includeHidden && isHidden(element)) return null;
175
+ if (context.options.excludeLikelyAds && isLikelyAd(element)) return null;
176
+ if (context.options.excludeLikelyBoilerplate && isLikelyBoilerplateTable(element)) return flattenBoilerplateTable(element, context);
177
+ if (context.options.excludeLikelyBoilerplate && isLikelyBoilerplate(element)) return null;
178
+ if (!context.options.includeHidden && isCollapsedControlledElement(element, context)) return null;
179
+ if (!context.options.includeHidden && isLikelyClosedOverlay(element, context)) return null;
180
+ const role = getRole(element);
181
+ const state = getState(element);
182
+ const focusable = isFocusable(element, role);
183
+ const interactive = isInteractive(element, role, focusable);
184
+ const name = role ? computeName(element, role, context) : "";
185
+ const tag = element.name;
186
+ const children = shouldSkipChildrenForCollapsedElement(element, context) ? [] : collectChildren(element, context);
187
+ if (tag === "iframe" && children.length === 0 && attr(element, "src") && !attr(element, "srcdoc")) {
188
+ children.push(unavailableNode(context, "iframe", "iframe content unavailable in static HTML"));
189
+ }
190
+ if (context.options.mode === "interactive" && !interactive) {
191
+ return children.length > 0 ? containerNode(context, tag, children) : null;
192
+ }
193
+ if (shouldPruneCustomElementWrapper(element, role, name, interactive, children, context)) {
194
+ return children.length === 1 ? children[0] ?? null : containerNode(context, "fragment", children);
195
+ }
196
+ if (shouldPruneListItemWrapper(role, children, context)) {
197
+ return children.length === 1 ? children[0] ?? null : containerNode(context, tag, children);
198
+ }
199
+ if (shouldPrune(element, role, name, interactive, children, context)) {
200
+ if (children.length === 0) return null;
201
+ return children.length === 1 ? children[0] ?? null : containerNode(context, tag, children);
202
+ }
203
+ const node = {
204
+ id: nextId(context),
205
+ tag,
206
+ role,
207
+ name,
208
+ interactive,
209
+ focusable,
210
+ selector: getSelector(element),
211
+ xpath: getXPath(element),
212
+ children
213
+ };
214
+ const description = computeDescription(element, context);
215
+ if (description) node.description = description;
216
+ const text = directText(element, context.options.maxTextLength);
217
+ if (text) node.text = text;
218
+ const value = getValue(element);
219
+ if (value) node.value = value;
220
+ if (Object.keys(state).length > 0) node.state = state;
221
+ if (context.options.includeAttributes) node.attributes = { ...element.attribs };
222
+ return node;
223
+ }
224
+ function collectChildren(element, context) {
225
+ const children = [];
226
+ const repeatedSignatures = /* @__PURE__ */ new Map();
227
+ let omitted = 0;
228
+ const shadowTemplate = element.children.find((child) => isElement(child) && isDeclarativeShadowTemplate(child));
229
+ if (shadowTemplate) {
230
+ const previousAssignments = context.slotAssignments;
231
+ context.slotAssignments = collectSlotAssignments(element);
232
+ for (const child of shadowTemplate.children) {
233
+ if (!isElement(child)) continue;
234
+ const semanticChild = walkElement(child, context);
235
+ omitted += appendSemanticChild(element, semanticChild, children, repeatedSignatures, context);
236
+ }
237
+ context.slotAssignments = previousAssignments;
238
+ const linkFarmSummary2 = summarizeLikelyLinkFarmChildren(element, children, context);
239
+ if (linkFarmSummary2.omitted > 0) {
240
+ children.splice(0, children.length, ...linkFarmSummary2.children);
241
+ omitted += linkFarmSummary2.omitted;
242
+ }
243
+ if (omitted > 0) children.push(omittedNode(context, omitted));
244
+ return children;
245
+ }
246
+ if (element.name === "slot" && context.slotAssignments) {
247
+ const slotName = attr(element, "name") ?? "";
248
+ const assignedChildren = context.slotAssignments.get(slotName) ?? [];
249
+ const projectedChildren = assignedChildren.length > 0 ? assignedChildren : element.children.filter(isElement);
250
+ for (const child of projectedChildren) {
251
+ if (isElement(child)) {
252
+ const semanticChild = walkElement(child, context);
253
+ omitted += appendSemanticChild(element, semanticChild, children, repeatedSignatures, context);
254
+ } else if (context.options.includeTextNodes && isText(child)) {
255
+ const text = normalizeText(child.data, context.options.maxTextLength);
256
+ if (text) {
257
+ children.push({
258
+ id: nextId(context),
259
+ tag: "#text",
260
+ role: "text",
261
+ name: text,
262
+ text,
263
+ interactive: false,
264
+ focusable: false,
265
+ children: []
266
+ });
267
+ }
268
+ }
269
+ }
270
+ if (omitted > 0) children.push(omittedNode(context, omitted));
271
+ return children;
272
+ }
273
+ for (const child of element.children) {
274
+ if (isElement(child)) {
275
+ if (!context.options.includeSelectOptions && element.name === "select") continue;
276
+ const semanticChild = walkElement(child, context);
277
+ omitted += appendSemanticChild(element, semanticChild, children, repeatedSignatures, context);
278
+ } else if (context.options.includeTextNodes && isText(child)) {
279
+ const text = normalizeText(child.data, context.options.maxTextLength);
280
+ if (text) {
281
+ const textNode = {
282
+ id: nextId(context),
283
+ tag: "#text",
284
+ role: "text",
285
+ name: text,
286
+ text,
287
+ interactive: false,
288
+ focusable: false,
289
+ children: []
290
+ };
291
+ if (shouldSummarizeMoreChildren(element, children, context)) {
292
+ omitted += 1;
293
+ } else {
294
+ children.push(textNode);
295
+ }
296
+ }
297
+ }
298
+ }
299
+ const linkFarmSummary = summarizeLikelyLinkFarmChildren(element, children, context);
300
+ if (linkFarmSummary.omitted > 0) {
301
+ children.splice(0, children.length, ...linkFarmSummary.children);
302
+ omitted += linkFarmSummary.omitted;
303
+ }
304
+ if (omitted > 0) children.push(omittedNode(context, omitted));
305
+ return children;
306
+ }
307
+ function collectSlotAssignments(host) {
308
+ const assignments = /* @__PURE__ */ new Map();
309
+ for (const child of host.children) {
310
+ if (!isUsefulSlotAssignment(child)) continue;
311
+ const slotName = isElement(child) ? attr(child, "slot") ?? "" : "";
312
+ const assigned = assignments.get(slotName) ?? [];
313
+ assigned.push(child);
314
+ assignments.set(slotName, assigned);
315
+ }
316
+ return assignments;
317
+ }
318
+ function isUsefulSlotAssignment(node) {
319
+ if (isText(node)) return normalizeText(node.data, 120) !== "";
320
+ if (!isElement(node)) return false;
321
+ return !isDeclarativeShadowTemplate(node);
322
+ }
323
+ function appendSemanticChild(parent, child, children, repeatedSignatures, context) {
324
+ if (!child) return 0;
325
+ if (shouldSummarizeRepeatedChild(parent, child, repeatedSignatures, context)) {
326
+ return countSemanticNodes(child);
327
+ }
328
+ if (shouldSummarizeMoreChildren(parent, children, context)) {
329
+ return countSemanticNodes(child);
330
+ }
331
+ children.push(child);
332
+ return 0;
333
+ }
334
+ function shouldSkipElement(element, context) {
335
+ if (context.options.mode === "full") return false;
336
+ if (isDeclarativeShadowTemplate(element)) return false;
337
+ if (nonSemanticTags.has(element.name)) return true;
338
+ if (element.name === "noscript") return true;
339
+ return false;
340
+ }
341
+ function isDeclarativeShadowTemplate(element) {
342
+ if (element.name !== "template") return false;
343
+ const mode = attr(element, "shadowrootmode") ?? attr(element, "shadowroot");
344
+ return mode === "open" || mode === "closed";
345
+ }
346
+ function shouldSummarizeMoreChildren(element, children, context) {
347
+ if (!context.options.summarizeLargeSubtrees || context.options.mode === "full") return false;
348
+ if (!isLargeSubtreeCandidate(element)) return false;
349
+ return children.length >= context.options.maxChildrenPerNode;
350
+ }
351
+ function isLargeSubtreeCandidate(element) {
352
+ return ["nav", "ul", "ol", "div", "section", "footer", "header", "main"].includes(element.name);
353
+ }
354
+ function summarizeLikelyLinkFarmChildren(element, children, context) {
355
+ if (!context.options.summarizeLikelyLinkFarms || context.options.mode === "full") return { children, omitted: 0 };
356
+ if (children.length <= context.options.maxLinkFarmChildren) return { children, omitted: 0 };
357
+ if (!isLikelyLinkFarmContainer(element)) return { children, omitted: 0 };
358
+ const stats = childLinkFarmStats(children);
359
+ if (stats.linkishChildren < Math.max(8, Math.floor(children.length * 0.65))) return { children, omitted: 0 };
360
+ if (stats.contentRichChildren > Math.max(2, Math.floor(children.length * 0.2))) return { children, omitted: 0 };
361
+ const kept = [];
362
+ let omitted = 0;
363
+ let keptLinkish = 0;
364
+ for (const child of children) {
365
+ if (!isLinkishSummaryChild(child)) {
366
+ kept.push(child);
367
+ continue;
368
+ }
369
+ if (keptLinkish < context.options.maxLinkFarmChildren) {
370
+ kept.push(child);
371
+ keptLinkish += 1;
372
+ } else {
373
+ omitted += countSemanticNodes(child);
374
+ }
375
+ }
376
+ return omitted > 0 ? { children: kept, omitted } : { children, omitted: 0 };
377
+ }
378
+ function isLikelyLinkFarmContainer(element) {
379
+ if (["nav", "ul", "ol", "aside", "footer", "header"].includes(element.name)) return true;
380
+ if (!["div", "section"].includes(element.name)) return false;
381
+ const value = [
382
+ attr(element, "id"),
383
+ attr(element, "class"),
384
+ attr(element, "role"),
385
+ attr(element, "aria-label"),
386
+ attr(element, "title")
387
+ ].filter(Boolean).join(" ").toLowerCase();
388
+ if (/\b(article|body|content|contents|entry|main|post|story|text|view)\b/.test(value)) return false;
389
+ return /\b(board|category|comment|footer|gallery|gnb|header|issue|list|menu|nav|popular|recent|recommend|related|reply|sidebar|tab)\b/.test(value) || /갤러리|댓글|개념글|관련|목록|베스트|인기|최근|추천|카테고리/.test(value);
390
+ }
391
+ function childLinkFarmStats(children) {
392
+ let linkishChildren = 0;
393
+ let contentRichChildren = 0;
394
+ for (const child of children) {
395
+ if (isLinkishSummaryChild(child)) linkishChildren += 1;
396
+ if (isContentRichSummaryChild(child)) contentRichChildren += 1;
397
+ }
398
+ return { linkishChildren, contentRichChildren };
399
+ }
400
+ function isLinkishSummaryChild(node) {
401
+ const stats = semanticRoleStats(node);
402
+ return stats.links > 0 && stats.formControls === 0 && stats.tables === 0 && stats.paragraphs <= 1 && stats.contentContainers === 0;
403
+ }
404
+ function isContentRichSummaryChild(node) {
405
+ const stats = semanticRoleStats(node);
406
+ return stats.paragraphs > 1 || stats.tables > 0 || stats.contentContainers > 0 || stats.formControls > 0;
407
+ }
408
+ function semanticRoleStats(node) {
409
+ const role = node.role ?? node.tag;
410
+ const stats = {
411
+ links: role === "link" ? 1 : 0,
412
+ paragraphs: role === "p" || role === "text" ? 1 : 0,
413
+ tables: role === "table" || role === "row" || role === "cell" ? 1 : 0,
414
+ formControls: role === "textbox" || role === "searchbox" || role === "combobox" || role === "listbox" || role === "checkbox" || role === "radio" || role === "slider" || role === "spinbutton" || role === "switch" ? 1 : 0,
415
+ contentContainers: role === "article" || role === "main" ? 1 : 0
416
+ };
417
+ for (const child of node.children) {
418
+ const childStats = semanticRoleStats(child);
419
+ stats.links += childStats.links;
420
+ stats.paragraphs += childStats.paragraphs;
421
+ stats.tables += childStats.tables;
422
+ stats.formControls += childStats.formControls;
423
+ stats.contentContainers += childStats.contentContainers;
424
+ }
425
+ return stats;
426
+ }
427
+ function shouldSummarizeRepeatedChild(parent, child, signatures, context) {
428
+ if (!context.options.summarizeRepeatedSubtrees || context.options.mode === "full") return false;
429
+ if (!isRepeatedSubtreeCandidate(parent)) return false;
430
+ const signature = semanticSignature(child);
431
+ const count = signatures.get(signature) ?? 0;
432
+ signatures.set(signature, count + 1);
433
+ return count >= context.options.maxRepeatedSubtreeInstances;
434
+ }
435
+ function isRepeatedSubtreeCandidate(element) {
436
+ return ["body", "main", "nav", "ul", "ol", "div", "section", "footer", "header", "aside"].includes(element.name);
437
+ }
438
+ function semanticSignature(node) {
439
+ const childSignatures = node.children.map(semanticSignature).join(",");
440
+ return `${node.tag}|${node.role ?? ""}|${node.name}|${node.text ?? ""}|${node.value ?? ""}|${node.interactive ? "i" : ""}[${childSignatures}]`;
441
+ }
442
+ function countSemanticNodes(node) {
443
+ let count = 1;
444
+ for (const child of node.children) count += countSemanticNodes(child);
445
+ return count;
446
+ }
447
+ function shouldSkipChildrenForCollapsedElement(element, context) {
448
+ if (!context.options.pruneCollapsedSubtrees || context.options.includeHidden) return false;
449
+ if (attr(element, "aria-expanded") === "false") return true;
450
+ if (element.name === "details" && attr(element, "open") === null) return true;
451
+ if (element.name === "dialog" && attr(element, "open") === null) return true;
452
+ if (attr(element, "popover") !== null && attr(element, "open") === null) return true;
453
+ return false;
454
+ }
455
+ function isCollapsedControlledElement(element, context) {
456
+ const id = attr(element, "id");
457
+ return Boolean(id && context.options.pruneCollapsedSubtrees && context.collapsedControlledIds.has(id));
458
+ }
459
+ function isLikelyClosedOverlay(element, context) {
460
+ if (!context.options.pruneLikelyClosedOverlays || context.options.mode === "full") return false;
461
+ if (hasUsefulOpenSignal(element)) return false;
462
+ if (!hasOverlaySignal(element)) return false;
463
+ if (hasDirectFocusableIntent(element)) return false;
464
+ return hasOffscreenOrClosedStyle(element) || hasClosedClassSignal(element) || hasInertSignal(element);
465
+ }
466
+ function hasUsefulOpenSignal(element) {
467
+ return attr(element, "open") !== null || attr(element, "aria-expanded") === "true" || attr(element, "aria-modal") === "true" || attr(element, "data-open") === "true" || attr(element, "data-state") === "open";
468
+ }
469
+ function hasOverlaySignal(element) {
470
+ const value = [element.name, attr(element, "id"), attr(element, "class"), attr(element, "role"), attr(element, "aria-label")].filter(Boolean).join(" ").toLowerCase();
471
+ return /\b(drawer|modal|dialog|popover|overlay|hamburger|menu|sidebar|sheet|flyout|dropdown)\b/.test(value);
472
+ }
473
+ function hasDirectFocusableIntent(element) {
474
+ const tabindex = attr(element, "tabindex");
475
+ return tabindex !== null && Number(tabindex) >= 0;
476
+ }
477
+ function hasInertSignal(element) {
478
+ return attr(element, "inert") !== null || attr(element, "aria-hidden") === "true";
479
+ }
480
+ function hasClosedClassSignal(element) {
481
+ const className = attr(element, "class") ?? "";
482
+ return /\b(closed|collapsed|hidden|inactive|is-closed|is-hidden)\b/i.test(className);
483
+ }
484
+ function hasOffscreenOrClosedStyle(element) {
485
+ const style = attr(element, "style") ?? "";
486
+ if (!style) return false;
487
+ const normalized = style.replace(/\s+/g, "").toLowerCase();
488
+ return /(?:^|;)(?:left|right|top|bottom):-\d{2,}(?:px|rem|em|vw|vh|%)/.test(normalized) || /(?:^|;)transform:translate[xy]?\(-[1-9]\d*%/.test(normalized) || /(?:^|;)(?:max-height|height):0(?:px|rem|em|%)?/.test(normalized) || /(?:^|;)pointer-events:none/.test(normalized);
489
+ }
490
+ function shouldPrune(element, role, name, interactive, children, context) {
491
+ if (context.options.mode === "full") return false;
492
+ if (role === "none" || role === "presentation") return true;
493
+ if (interactive) return false;
494
+ if (role && role !== "generic") return false;
495
+ if (name) return false;
496
+ if (isReferencedIdTarget(element, context)) return false;
497
+ if (children.length === 0) return true;
498
+ if (attr(element, "id") || attr(element, "aria-label") || attr(element, "aria-labelledby")) return false;
499
+ return children.length > 0;
500
+ }
501
+ function isReferencedIdTarget(element, context) {
502
+ const id = attr(element, "id");
503
+ return Boolean(id && context.referencedIds.has(id));
504
+ }
505
+ function shouldPruneListItemWrapper(role, children, context) {
506
+ if (context.options.mode === "full") return false;
507
+ if (role !== "listitem") return false;
508
+ return children.some((child) => child.role === "link" || child.role === "button");
509
+ }
510
+ function shouldPruneCustomElementWrapper(element, role, name, interactive, children, context) {
511
+ if (context.options.mode === "full") return false;
512
+ if (!isCustomElement(element)) return false;
513
+ if (interactive) return false;
514
+ if (role && role !== "generic") return false;
515
+ if (name) return false;
516
+ if (children.length === 0) return false;
517
+ if (hasUsefulHostSignal(element)) return false;
518
+ return true;
519
+ }
520
+ function isCustomElement(element) {
521
+ return element.name.includes("-");
522
+ }
523
+ function hasUsefulHostSignal(element) {
524
+ return Boolean(
525
+ attr(element, "id") || attr(element, "aria-label") || attr(element, "aria-labelledby") || attr(element, "aria-describedby") || attr(element, "aria-controls") || attr(element, "aria-expanded") || attr(element, "aria-selected") || attr(element, "aria-current") || attr(element, "tabindex")
526
+ );
527
+ }
528
+ function getRole(element) {
529
+ const explicit = firstToken(attr(element, "role"));
530
+ if (explicit) return explicit;
531
+ const tag = element.name;
532
+ if (tag === "section" && !hasExplicitNameSource(element)) return null;
533
+ if (tag === "form" && !hasExplicitNameSource(element)) return null;
534
+ if (tag in landmarkTags) return landmarkTags[tag] ?? null;
535
+ if (/^h[1-6]$/.test(tag)) return "heading";
536
+ if (tag === "a" || tag === "area") return attr(element, "href") ? "link" : null;
537
+ if (tag === "button") return "button";
538
+ if (tag === "details" || tag === "fieldset") return "group";
539
+ if (tag === "dialog") return "dialog";
540
+ if (tag === "figure") return "figure";
541
+ if (tag === "form") return "form";
542
+ if (tag === "iframe") return "iframe";
543
+ if (tag === "img") return "img";
544
+ if (tag === "input") return inputRole(element);
545
+ if (tag === "li") return "listitem";
546
+ if (tag === "ol" || tag === "ul") return "list";
547
+ if (tag === "option") return "option";
548
+ if (tag === "p") return "p";
549
+ if (tag === "progress") return "progressbar";
550
+ if (tag === "select") return attr(element, "multiple") !== null ? "listbox" : "combobox";
551
+ if (tag === "summary") return "button";
552
+ if (tag === "table") return "table";
553
+ if (tag === "td") return "cell";
554
+ if (tag === "textarea") return "textbox";
555
+ if (tag === "th") return attr(element, "scope") === "row" ? "rowheader" : "columnheader";
556
+ if (tag === "tr") return "row";
557
+ return null;
558
+ }
559
+ function inputRole(element) {
560
+ const type = (attr(element, "type") ?? "text").toLowerCase();
561
+ if (type === "hidden") return null;
562
+ if (type === "button" || type === "submit" || type === "reset") return "button";
563
+ if (type === "checkbox") return "checkbox";
564
+ if (type === "image") return "button";
565
+ if (type === "radio") return "radio";
566
+ if (type === "range") return "slider";
567
+ if (type === "search") return "searchbox";
568
+ if (type === "number") return "spinbutton";
569
+ return "textbox";
570
+ }
571
+ function computeName(element, role, context) {
572
+ const labelledBy = attr(element, "aria-labelledby");
573
+ if (labelledBy) {
574
+ const value = labelledBy.split(/\s+/).map((id) => context.ids.get(id)).filter((item) => Boolean(item)).map((item) => descendantText(item, context)).join(" ");
575
+ const normalized = normalizeText(value, context.options.maxTextLength);
576
+ if (normalized) return normalized;
577
+ }
578
+ const ariaLabel = normalizeText(attr(element, "aria-label") ?? "", context.options.maxTextLength);
579
+ if (ariaLabel) return ariaLabel;
580
+ const labelled = labelName(element, context);
581
+ if (labelled) return labelled;
582
+ const valueName = elementValueName(element);
583
+ if (valueName) return normalizeText(valueName, context.options.maxTextLength);
584
+ if (role === "img") {
585
+ const alt = normalizeText(attr(element, "alt") ?? "", context.options.maxTextLength);
586
+ if (alt) return alt;
587
+ }
588
+ if (rolesNamedFromContents.has(role)) {
589
+ const contents = normalizeText(descendantText(element, context, { excludeIds: descriptionReferenceIds(element) }), context.options.maxTextLength);
590
+ if (contents) return contents;
591
+ }
592
+ const title = normalizeText(attr(element, "title") ?? "", context.options.maxTextLength);
593
+ if (title) return title;
594
+ return "";
595
+ }
596
+ function labelName(element, context) {
597
+ const id = attr(element, "id");
598
+ if (id) {
599
+ const label2 = context.labelsByFor.get(id);
600
+ if (label2) {
601
+ const value = normalizeText(descendantText(label2, context), context.options.maxTextLength);
602
+ if (value) return value;
603
+ }
604
+ }
605
+ const label = findClosestLabel(element);
606
+ return label ? normalizeText(descendantText(label, context), context.options.maxTextLength) : "";
607
+ }
608
+ function findClosestLabel(element) {
609
+ let parent = element.parent;
610
+ while (parent) {
611
+ if (isElement(parent) && parent.name === "label") return parent;
612
+ parent = parent.parent;
613
+ }
614
+ return null;
615
+ }
616
+ function elementValueName(element) {
617
+ if (element.name === "input") {
618
+ const type = (attr(element, "type") ?? "text").toLowerCase();
619
+ if (type === "button" || type === "submit" || type === "reset") return attr(element, "value") ?? "";
620
+ }
621
+ return "";
622
+ }
623
+ function getState(element) {
624
+ const state = {};
625
+ if (attr(element, "disabled") !== null || attr(element, "aria-disabled") === "true") state.disabled = true;
626
+ const busy = attr(element, "aria-busy");
627
+ if (busy === "true") state.busy = true;
628
+ if (busy === "false") state.busy = false;
629
+ const multiselectable = attr(element, "aria-multiselectable");
630
+ if (multiselectable === "true") state.multiselectable = true;
631
+ if (multiselectable === "false") state.multiselectable = false;
632
+ const sort = attr(element, "aria-sort");
633
+ if (sort) state.sort = normalizeText(sort, 40);
634
+ const grabbed = attr(element, "aria-grabbed");
635
+ if (grabbed === "true") state.grabbed = true;
636
+ if (grabbed === "false") state.grabbed = false;
637
+ const dropEffect = attr(element, "aria-dropeffect");
638
+ if (dropEffect) state.dropEffect = normalizeText(dropEffect, 80);
639
+ if (attr(element, "required") !== null || attr(element, "aria-required") === "true") state.required = true;
640
+ if (attr(element, "readonly") !== null || attr(element, "aria-readonly") === "true") state.readonly = true;
641
+ const checked = attr(element, "aria-checked") ?? (attr(element, "checked") !== null ? "true" : null);
642
+ if (checked === "true") state.checked = true;
643
+ if (checked === "false") state.checked = false;
644
+ if (checked === "mixed") state.checked = "mixed";
645
+ if (attr(element, "selected") !== null || attr(element, "aria-selected") === "true") state.selected = true;
646
+ const expanded = attr(element, "aria-expanded");
647
+ if (expanded === "true") state.expanded = true;
648
+ if (expanded === "false") state.expanded = false;
649
+ const pressed = attr(element, "aria-pressed");
650
+ if (pressed === "true") state.pressed = true;
651
+ if (pressed === "false") state.pressed = false;
652
+ if (pressed === "mixed") state.pressed = "mixed";
653
+ const invalid = attr(element, "aria-invalid");
654
+ if (invalid && invalid !== "false") state.invalid = invalid === "true" ? true : invalid;
655
+ const current = attr(element, "aria-current");
656
+ if (current && current !== "false") state.current = current === "true" ? true : current;
657
+ const haspopup = attr(element, "aria-haspopup");
658
+ if (haspopup && haspopup !== "false") state.haspopup = haspopup === "true" ? true : haspopup;
659
+ const controls = attr(element, "aria-controls");
660
+ if (controls) state.controls = normalizeText(controls, 120);
661
+ const live = attr(element, "aria-live");
662
+ if (live) state.live = normalizeText(live, 120);
663
+ if (attr(element, "aria-modal") === "true") state.modal = true;
664
+ const orientation = attr(element, "aria-orientation");
665
+ if (orientation) state.orientation = normalizeText(orientation, 40);
666
+ const valueMin = ariaNumber(attr(element, "aria-valuemin"));
667
+ if (typeof valueMin === "number") state.valueMin = valueMin;
668
+ const valueMax = ariaNumber(attr(element, "aria-valuemax"));
669
+ if (typeof valueMax === "number") state.valueMax = valueMax;
670
+ const valueNow = ariaNumber(attr(element, "aria-valuenow"));
671
+ if (typeof valueNow === "number") state.valueNow = valueNow;
672
+ const valueText = attr(element, "aria-valuetext");
673
+ if (valueText) state.valueText = normalizeText(valueText, 120);
674
+ return state;
675
+ }
676
+ function ariaNumber(value) {
677
+ if (value === null || value.trim() === "") return void 0;
678
+ const parsed = Number(value);
679
+ return Number.isFinite(parsed) ? parsed : void 0;
680
+ }
681
+ function isInteractive(element, role, focusable) {
682
+ if (role && interactiveRoles.has(role)) return true;
683
+ if (focusable) return true;
684
+ return ["button", "input", "select", "textarea"].includes(element.name);
685
+ }
686
+ function isFocusable(element, role) {
687
+ if (attr(element, "disabled") !== null) return false;
688
+ const tabindex = attr(element, "tabindex");
689
+ if (tabindex !== null && Number(tabindex) >= 0) return true;
690
+ if (role && interactiveRoles.has(role)) return true;
691
+ return element.name === "a" && attr(element, "href") !== null;
692
+ }
693
+ function isHidden(element) {
694
+ if (attr(element, "hidden") !== null) return true;
695
+ if (attr(element, "aria-hidden") === "true") return true;
696
+ const style = attr(element, "style");
697
+ return style ? hiddenStylePattern.test(style) : false;
698
+ }
699
+ function isLikelyAd(element) {
700
+ const value = [
701
+ attr(element, "id"),
702
+ attr(element, "class"),
703
+ attr(element, "aria-label"),
704
+ attr(element, "title"),
705
+ attr(element, "data-testid")
706
+ ].filter(Boolean).join(" ").toLowerCase();
707
+ return /\b(ad|ads|advert|advertisement|banner|sponsor|sponsored|promotion|promoted|powerlink)\b/.test(value) || /파워링크|광고|직접홍보|홍보/.test(value);
708
+ }
709
+ function isLikelyBoilerplate(element) {
710
+ if (element.name === "footer") return true;
711
+ if (element.name === "main" || element.name === "article") return false;
712
+ const role = firstToken(attr(element, "role"));
713
+ if (role === "main" || role === "article") return false;
714
+ const value = [
715
+ attr(element, "id"),
716
+ attr(element, "class"),
717
+ attr(element, "aria-label"),
718
+ attr(element, "title")
719
+ ].filter(Boolean).join(" ").toLowerCase();
720
+ if (!value) return false;
721
+ if (/\b(content|contents|entry|post-body|article-body|story-body|view-content)\b/.test(value)) return false;
722
+ return /\b(footer|sidebar)\b/.test(value) || /푸터/.test(value);
723
+ }
724
+ function isLikelyBoilerplateTable(element) {
725
+ if (element.name !== "table") return false;
726
+ const value = [
727
+ attr(element, "id"),
728
+ attr(element, "class"),
729
+ attr(element, "aria-label"),
730
+ attr(element, "title")
731
+ ].filter(Boolean).join(" ").toLowerCase();
732
+ return /\bgall[_-]?list\b/.test(value) || /\bbottom[_-]?list\w*\b/.test(value);
733
+ }
734
+ function flattenBoilerplateTable(element, context) {
735
+ const children = collectFlattenedBoilerplateItems(element, context);
736
+ if (children.length === 0) return null;
737
+ return containerNode(context, element.name, children);
738
+ }
739
+ function collectFlattenedBoilerplateItems(element, context) {
740
+ const children = [];
741
+ for (const child of element.children) {
742
+ if (!isElement(child)) continue;
743
+ if (shouldSkipElement(child, context)) continue;
744
+ if (!context.options.includeHidden && isHidden(child)) continue;
745
+ if (context.options.excludeLikelyAds && isLikelyAd(child)) continue;
746
+ const role = getRole(child);
747
+ const focusable = isFocusable(child, role);
748
+ const interactive = isInteractive(child, role, focusable);
749
+ const name = role ? computeName(child, role, context) : "";
750
+ if (role && name && (interactive || role === "heading" || role === "img")) {
751
+ const node = {
752
+ id: nextId(context),
753
+ tag: child.name,
754
+ role,
755
+ name,
756
+ interactive,
757
+ focusable,
758
+ selector: getSelector(child),
759
+ xpath: getXPath(child),
760
+ children: []
761
+ };
762
+ const description = computeDescription(child, context);
763
+ if (description) node.description = description;
764
+ const value = getValue(child);
765
+ if (value) node.value = value;
766
+ const state = getState(child);
767
+ if (Object.keys(state).length > 0) node.state = state;
768
+ if (context.options.includeAttributes) node.attributes = { ...child.attribs };
769
+ children.push(node);
770
+ continue;
771
+ }
772
+ children.push(...collectFlattenedBoilerplateItems(child, context));
773
+ }
774
+ return children;
775
+ }
776
+ function hasExplicitNameSource(element) {
777
+ return attr(element, "aria-label") !== null || attr(element, "aria-labelledby") !== null || attr(element, "title") !== null;
778
+ }
779
+ function directText(element, maxLength) {
780
+ return normalizeText(
781
+ element.children.filter(isText).map((node) => node.data).join(" "),
782
+ maxLength
783
+ );
784
+ }
785
+ function descendantText(element, context, options = {}) {
786
+ const parts = [];
787
+ const shadowTemplate = element.children.find((child) => isElement(child) && isDeclarativeShadowTemplate(child));
788
+ if (shadowTemplate && context) {
789
+ const previousAssignments = context.slotAssignments;
790
+ context.slotAssignments = collectSlotAssignments(element);
791
+ collectDescendantText(shadowTemplate.children, parts, context, options);
792
+ context.slotAssignments = previousAssignments;
793
+ return parts.join(" ");
794
+ }
795
+ collectDescendantText(element.children, parts, context, options);
796
+ return parts.join(" ");
797
+ }
798
+ function collectDescendantText(nodes, parts, context, options = {}) {
799
+ for (const node of nodes) {
800
+ if (isText(node)) {
801
+ parts.push(node.data);
802
+ continue;
803
+ }
804
+ if (!isElement(node)) continue;
805
+ const nodeId = attr(node, "id");
806
+ if (nodeId && options.excludeIds?.has(nodeId)) continue;
807
+ if (node.name === "slot" && context?.slotAssignments) {
808
+ const slotName = attr(node, "name") ?? "";
809
+ const assigned = context.slotAssignments.get(slotName) ?? [];
810
+ collectDescendantText(assigned.length > 0 ? assigned : node.children, parts, context, options);
811
+ continue;
812
+ }
813
+ if (nonSemanticTags.has(node.name) || node.name === "noscript") continue;
814
+ collectDescendantText(node.children, parts, context, options);
815
+ }
816
+ }
817
+ function normalizeText(value, maxLength) {
818
+ const normalized = value.replace(/\s+/g, " ").trim();
819
+ return normalized.length > maxLength ? `${normalized.slice(0, maxLength - 1)}...` : normalized;
820
+ }
821
+ function computeDescription(element, context) {
822
+ const describedBy = attr(element, "aria-describedby");
823
+ if (describedBy) {
824
+ const text = describedBy.split(/\s+/).map((id) => context.ids.get(id)).filter((item) => Boolean(item)).map((item) => descendantText(item, context)).filter(Boolean).join(" ");
825
+ if (text) return normalizeText(text, context.options.maxTextLength);
826
+ }
827
+ return normalizeText(attr(element, "title") ?? "", context.options.maxTextLength);
828
+ }
829
+ function getValue(element) {
830
+ return normalizeText(attr(element, "value") ?? attr(element, "aria-valuetext") ?? attr(element, "aria-valuenow") ?? "", 240);
831
+ }
832
+ function getSelector(element) {
833
+ const id = attr(element, "id");
834
+ if (id) return `#${cssEscape(id)}`;
835
+ const parent = element.parent;
836
+ if (!parent || !("children" in parent)) return element.name;
837
+ const siblings = parent.children.filter((node) => isElement(node) && node.name === element.name);
838
+ const index = siblings.indexOf(element);
839
+ return index > 0 ? `${element.name}:nth-of-type(${index + 1})` : element.name;
840
+ }
841
+ function getXPath(element) {
842
+ const parts = [];
843
+ let current = element;
844
+ while (current) {
845
+ const parent = current.parent;
846
+ if (!parent || !("children" in parent)) {
847
+ parts.unshift(current.name);
848
+ break;
849
+ }
850
+ const siblings = parent.children.filter((node) => isElement(node) && node.name === current?.name);
851
+ const index = siblings.indexOf(current) + 1;
852
+ parts.unshift(`${current.name}[${index}]`);
853
+ current = isElement(parent) ? parent : null;
854
+ }
855
+ return `/${parts.join("/")}`;
856
+ }
857
+ function containerNode(context, tag, children) {
858
+ return {
859
+ id: nextId(context),
860
+ tag,
861
+ role: null,
862
+ name: "",
863
+ interactive: false,
864
+ focusable: false,
865
+ children
866
+ };
867
+ }
868
+ function unavailableNode(context, tag, unavailableReason) {
869
+ return {
870
+ id: nextId(context),
871
+ tag,
872
+ role: null,
873
+ name: "",
874
+ interactive: false,
875
+ focusable: false,
876
+ children: [],
877
+ unavailableReason
878
+ };
879
+ }
880
+ function omittedNode(context, omitted) {
881
+ return {
882
+ id: nextId(context),
883
+ tag: "omitted",
884
+ role: "note",
885
+ name: `${omitted} static nodes omitted`,
886
+ interactive: false,
887
+ focusable: false,
888
+ children: []
889
+ };
890
+ }
891
+ function nextId(context) {
892
+ return `static-${context.nextId++}`;
893
+ }
894
+ function attr(element, name) {
895
+ return Object.prototype.hasOwnProperty.call(element.attribs, name) ? element.attribs[name] ?? "" : null;
896
+ }
897
+ function firstToken(value) {
898
+ return value?.trim().split(/\s+/)[0] || null;
899
+ }
900
+ function cssEscape(value) {
901
+ return value.replace(/[^a-zA-Z0-9_-]/g, (char) => `\\${char}`);
902
+ }
903
+ function isElement(node) {
904
+ return node.type === "tag" || node.type === "script" || node.type === "style";
905
+ }
906
+ function isText(node) {
907
+ return node.type === "text";
908
+ }
909
+ function findElement(nodes, name) {
910
+ for (const node of nodes) {
911
+ if (!isElement(node)) continue;
912
+ if (node.name === name) return node;
913
+ const child = findElement(node.children, name);
914
+ if (child) return child;
915
+ }
916
+ return void 0;
917
+ }
918
+ function fragmentRoot(children) {
919
+ return new DomElement("fragment", {}, children);
920
+ }
921
+
922
+ export {
923
+ extractStaticSemanticTree
924
+ };
925
+ //# sourceMappingURL=chunk-ZXTURCRT.js.map