@mz1999/defuddle 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +371 -0
  3. package/dist/cli.d.ts +2 -0
  4. package/dist/cli.js +145 -0
  5. package/dist/cli.js.map +1 -0
  6. package/dist/constants.d.ts +24 -0
  7. package/dist/constants.js +950 -0
  8. package/dist/constants.js.map +1 -0
  9. package/dist/defuddle.d.ts +136 -0
  10. package/dist/defuddle.js +1816 -0
  11. package/dist/defuddle.js.map +1 -0
  12. package/dist/elements/callouts.d.ts +6 -0
  13. package/dist/elements/callouts.js +74 -0
  14. package/dist/elements/callouts.js.map +1 -0
  15. package/dist/elements/code.d.ts +5 -0
  16. package/dist/elements/code.js +346 -0
  17. package/dist/elements/code.js.map +1 -0
  18. package/dist/elements/footnotes.d.ts +5 -0
  19. package/dist/elements/footnotes.js +619 -0
  20. package/dist/elements/footnotes.js.map +1 -0
  21. package/dist/elements/headings.d.ts +11 -0
  22. package/dist/elements/headings.js +100 -0
  23. package/dist/elements/headings.js.map +1 -0
  24. package/dist/elements/images.d.ts +8 -0
  25. package/dist/elements/images.js +877 -0
  26. package/dist/elements/images.js.map +1 -0
  27. package/dist/elements/math.base.d.ts +9 -0
  28. package/dist/elements/math.base.js +195 -0
  29. package/dist/elements/math.base.js.map +1 -0
  30. package/dist/elements/math.core.d.ts +7 -0
  31. package/dist/elements/math.core.js +52 -0
  32. package/dist/elements/math.core.js.map +1 -0
  33. package/dist/elements/math.d.ts +2 -0
  34. package/dist/elements/math.full.d.ts +8 -0
  35. package/dist/elements/math.js +7 -0
  36. package/dist/elements/math.js.map +1 -0
  37. package/dist/extractor-registry.d.ts +16 -0
  38. package/dist/extractor-registry.js +140 -0
  39. package/dist/extractor-registry.js.map +1 -0
  40. package/dist/extractors/_base.d.ts +22 -0
  41. package/dist/extractors/_base.js +27 -0
  42. package/dist/extractors/_base.js.map +1 -0
  43. package/dist/extractors/_conversation.d.ts +9 -0
  44. package/dist/extractors/_conversation.js +78 -0
  45. package/dist/extractors/_conversation.js.map +1 -0
  46. package/dist/extractors/chatgpt.d.ts +14 -0
  47. package/dist/extractors/chatgpt.js +138 -0
  48. package/dist/extractors/chatgpt.js.map +1 -0
  49. package/dist/extractors/claude.d.ts +10 -0
  50. package/dist/extractors/claude.js +91 -0
  51. package/dist/extractors/claude.js.map +1 -0
  52. package/dist/extractors/gemini.d.ts +14 -0
  53. package/dist/extractors/gemini.js +111 -0
  54. package/dist/extractors/gemini.js.map +1 -0
  55. package/dist/extractors/github.d.ts +20 -0
  56. package/dist/extractors/github.js +251 -0
  57. package/dist/extractors/github.js.map +1 -0
  58. package/dist/extractors/grok.d.ts +15 -0
  59. package/dist/extractors/grok.js +142 -0
  60. package/dist/extractors/grok.js.map +1 -0
  61. package/dist/extractors/hackernews.d.ts +21 -0
  62. package/dist/extractors/hackernews.js +155 -0
  63. package/dist/extractors/hackernews.js.map +1 -0
  64. package/dist/extractors/reddit.d.ts +22 -0
  65. package/dist/extractors/reddit.js +197 -0
  66. package/dist/extractors/reddit.js.map +1 -0
  67. package/dist/extractors/twitter.d.ts +16 -0
  68. package/dist/extractors/twitter.js +204 -0
  69. package/dist/extractors/twitter.js.map +1 -0
  70. package/dist/extractors/x-article.d.ts +24 -0
  71. package/dist/extractors/x-article.js +267 -0
  72. package/dist/extractors/x-article.js.map +1 -0
  73. package/dist/extractors/x-oembed.d.ts +20 -0
  74. package/dist/extractors/x-oembed.js +350 -0
  75. package/dist/extractors/x-oembed.js.map +1 -0
  76. package/dist/extractors/youtube.d.ts +87 -0
  77. package/dist/extractors/youtube.js +869 -0
  78. package/dist/extractors/youtube.js.map +1 -0
  79. package/dist/fetch.d.ts +18 -0
  80. package/dist/fetch.js +265 -0
  81. package/dist/fetch.js.map +1 -0
  82. package/dist/index.d.ts +3 -0
  83. package/dist/index.full.d.ts +12 -0
  84. package/dist/index.full.js +1 -0
  85. package/dist/index.js +1 -0
  86. package/dist/index.js.map +1 -0
  87. package/dist/markdown.d.ts +30 -0
  88. package/dist/markdown.js +661 -0
  89. package/dist/markdown.js.map +1 -0
  90. package/dist/metadata.d.ts +25 -0
  91. package/dist/metadata.js +426 -0
  92. package/dist/metadata.js.map +1 -0
  93. package/dist/node.d.ts +19 -0
  94. package/dist/node.js +78 -0
  95. package/dist/node.js.map +1 -0
  96. package/dist/scoring.d.ts +31 -0
  97. package/dist/scoring.js +472 -0
  98. package/dist/scoring.js.map +1 -0
  99. package/dist/standardize.d.ts +2 -0
  100. package/dist/standardize.js +1101 -0
  101. package/dist/standardize.js.map +1 -0
  102. package/dist/types/extractors.d.ts +41 -0
  103. package/dist/types/extractors.js +3 -0
  104. package/dist/types/extractors.js.map +1 -0
  105. package/dist/types.d.ts +135 -0
  106. package/dist/types.js +3 -0
  107. package/dist/types.js.map +1 -0
  108. package/dist/utils/comments.d.ts +44 -0
  109. package/dist/utils/comments.js +103 -0
  110. package/dist/utils/comments.js.map +1 -0
  111. package/dist/utils/dom.d.ts +42 -0
  112. package/dist/utils/dom.js +104 -0
  113. package/dist/utils/dom.js.map +1 -0
  114. package/dist/utils/linkedom-compat.d.ts +5 -0
  115. package/dist/utils/linkedom-compat.js +23 -0
  116. package/dist/utils/linkedom-compat.js.map +1 -0
  117. package/dist/utils/transcript.d.ts +37 -0
  118. package/dist/utils/transcript.js +61 -0
  119. package/dist/utils/transcript.js.map +1 -0
  120. package/dist/utils.d.ts +13 -0
  121. package/dist/utils.js +98 -0
  122. package/dist/utils.js.map +1 -0
  123. package/package.json +107 -0
@@ -0,0 +1,950 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ALLOWED_ATTRIBUTES_DEBUG = exports.ALLOWED_ATTRIBUTES = exports.ALLOWED_EMPTY_ELEMENTS = exports.FOOTNOTE_LIST_SELECTORS = exports.FOOTNOTE_INLINE_REFERENCES = exports.TEST_ATTRIBUTES_SELECTOR = exports.PARTIAL_SELECTORS_REGEX = exports.PARTIAL_SELECTORS = exports.TEST_ATTRIBUTES = exports.EXACT_SELECTORS_JOINED = exports.EXACT_SELECTORS = exports.HIDDEN_EXACT_SKIP_SELECTOR = exports.HIDDEN_EXACT_SELECTOR = exports.HIDDEN_EXACT_SELECTORS = exports.HIDDEN_EXACT_SKIP_SELECTORS = exports.CONTENT_ELEMENT_SELECTOR = exports.INLINE_ELEMENTS = exports.PRESERVE_ELEMENTS = exports.BLOCK_LEVEL_ELEMENTS = exports.BLOCK_ELEMENTS_SET = exports.BLOCK_ELEMENTS_SELECTOR = exports.BLOCK_ELEMENTS = exports.MOBILE_WIDTH = exports.ENTRY_POINT_ELEMENTS = void 0;
4
+ // Entry point elements
5
+ // These are the elements that will be used to find the main content
6
+ exports.ENTRY_POINT_ELEMENTS = [
7
+ '#post',
8
+ '.post-content',
9
+ '.post-body',
10
+ '.article-content',
11
+ '#article-content',
12
+ '.article_post',
13
+ '.article-wrapper',
14
+ '.entry-content',
15
+ '.content-article',
16
+ '.instapaper_body',
17
+ '.post',
18
+ '.markdown-body',
19
+ 'article',
20
+ '[role="article"]',
21
+ 'main',
22
+ '[role="main"]',
23
+ '#content',
24
+ 'body' // ensures there is always a match
25
+ ];
26
+ exports.MOBILE_WIDTH = 600;
27
+ exports.BLOCK_ELEMENTS = ['div', 'section', 'article', 'main', 'aside', 'header', 'footer', 'nav', 'content'];
28
+ exports.BLOCK_ELEMENTS_SELECTOR = exports.BLOCK_ELEMENTS.join(',');
29
+ exports.BLOCK_ELEMENTS_SET = new Set(exports.BLOCK_ELEMENTS);
30
+ // All block-level HTML elements (includes BLOCK_ELEMENTS + semantic content blocks)
31
+ exports.BLOCK_LEVEL_ELEMENTS = new Set([
32
+ ...exports.BLOCK_ELEMENTS,
33
+ 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
34
+ 'ul', 'ol', 'li', 'dl', 'dt', 'dd',
35
+ 'pre', 'blockquote', 'figure', 'figcaption',
36
+ 'table', 'thead', 'tbody', 'tfoot', 'tr', 'td', 'th',
37
+ 'details', 'summary', 'address', 'hr',
38
+ 'form', 'fieldset'
39
+ ]);
40
+ // Elements that should not be unwrapped
41
+ exports.PRESERVE_ELEMENTS = new Set([
42
+ 'pre', 'code', 'table', 'thead', 'tbody', 'tr', 'td', 'th',
43
+ 'ul', 'ol', 'li', 'dl', 'dt', 'dd',
44
+ 'figure', 'figcaption', 'picture',
45
+ 'details', 'summary',
46
+ 'blockquote',
47
+ 'form', 'fieldset'
48
+ ]);
49
+ // Inline elements that should not be unwrapped
50
+ exports.INLINE_ELEMENTS = new Set([
51
+ 'a', 'span', 'strong', 'em', 'i', 'b', 'u', 'code', 'br', 'small',
52
+ 'sub', 'sup', 'mark', 'date', 'del', 'ins', 'q', 'abbr', 'cite', 'relative-time', 'time',
53
+ 'font'
54
+ ]);
55
+ // Content element selectors — elements whose presence indicates real article
56
+ // content rather than navigation, promotional, or decorative material.
57
+ // Used to protect legitimate content from removal by scoring, content pattern
58
+ // detection, and other heuristics.
59
+ exports.CONTENT_ELEMENT_SELECTOR = [
60
+ 'math', '[data-mathml]',
61
+ '.katex', '.katex-mathml', '.katex-display',
62
+ '.MathJax', '.MathJax_Display', '.MathJax_SVG',
63
+ 'mjx-container',
64
+ 'pre', 'code',
65
+ 'table',
66
+ 'img', 'picture', 'video',
67
+ 'blockquote',
68
+ 'figure',
69
+ ].join(', ');
70
+ // Selectors to be removed
71
+ exports.HIDDEN_EXACT_SKIP_SELECTORS = [
72
+ '[hidden]',
73
+ '[aria-hidden="true"]',
74
+ '.hidden',
75
+ '.invisible',
76
+ ];
77
+ exports.HIDDEN_EXACT_SELECTORS = exports.HIDDEN_EXACT_SKIP_SELECTORS.map(s => s === '[aria-hidden="true"]' ? '[aria-hidden="true"]:not([class*="math"])' : s);
78
+ // Pre-joined selector strings (these arrays are constant)
79
+ exports.HIDDEN_EXACT_SELECTOR = exports.HIDDEN_EXACT_SELECTORS.join(',');
80
+ exports.HIDDEN_EXACT_SKIP_SELECTOR = exports.HIDDEN_EXACT_SKIP_SELECTORS.join(',');
81
+ exports.EXACT_SELECTORS = [
82
+ // scripts, styles
83
+ 'noscript',
84
+ 'script:not([type^="math/"])',
85
+ 'style',
86
+ 'meta',
87
+ 'link',
88
+ // ads
89
+ '.ad:not([class*="gradient"])',
90
+ '[class^="ad-" i]',
91
+ '[class$="-ad" i]',
92
+ '[id^="ad-" i]',
93
+ '[id$="-ad" i]',
94
+ '[role="banner" i]',
95
+ '[alt*="advert" i]',
96
+ '.promo',
97
+ '.Promo',
98
+ '#barrier-page', // ft.com
99
+ '.alert',
100
+ // comments
101
+ '[id="comments" i]',
102
+ '[id="comment" i]',
103
+ // cover images
104
+ 'div[class*="cover-"]',
105
+ 'div[id*="cover-"]',
106
+ // header, nav
107
+ 'header',
108
+ '.header:not(.banner)',
109
+ '#header',
110
+ '#Header',
111
+ '#banner',
112
+ '#Banner',
113
+ 'nav',
114
+ '.navigation',
115
+ '#navigation',
116
+ // '.hero', // see issue #132
117
+ '[role="navigation" i]',
118
+ '[role="dialog" i]',
119
+ '[role*="complementary" i]',
120
+ '[class*="pagination" i]',
121
+ '.menu',
122
+ // '#menu', // see issue #106
123
+ '#siteSub',
124
+ // '.fixed', see issue #44
125
+ '.previous',
126
+ // metadata
127
+ '.author',
128
+ '.Author',
129
+ '[class$="_bio"]',
130
+ '#categories',
131
+ '.contributor',
132
+ '.date',
133
+ '#date',
134
+ '[data-date]',
135
+ '.entry-meta',
136
+ '.meta',
137
+ '.tags',
138
+ '#tags',
139
+ '[rel="tag"]',
140
+ '.toc',
141
+ '.Toc',
142
+ '#toc',
143
+ '.headline',
144
+ '#headline',
145
+ '#title',
146
+ '#Title',
147
+ '#articleTag',
148
+ // '[href*="/category"]', // see issue #131
149
+ // '[href*="/categories"]', // see issue #131
150
+ '[href*="/tag/"]',
151
+ '[href*="/tags/"]',
152
+ // '[href*="/topics"]', // see issue #131
153
+ '[href*="/author/"]',
154
+ '[href*="/author?"]',
155
+ '[href$="/author"]',
156
+ 'a[href*="copyright.com"]',
157
+ 'a[href*="google.com/preferences"]',
158
+ '[href*="#toc"]',
159
+ '[href="#top"]',
160
+ '[href="#Top"]',
161
+ '[href="#page-header"]',
162
+ '[href="#content"]',
163
+ '[href="#site-content"]',
164
+ '[href="#main-content"]',
165
+ '[href^="#main"]',
166
+ '[src*="author"]',
167
+ // footer
168
+ 'footer',
169
+ // inputs, forms, elements
170
+ '.aside',
171
+ 'aside:not([class*="callout"])',
172
+ 'button',
173
+ // '[role="button"]', Medium images
174
+ 'canvas',
175
+ 'date',
176
+ 'dialog',
177
+ 'fieldset',
178
+ 'form',
179
+ 'input:not([type="checkbox"])',
180
+ 'label',
181
+ 'option',
182
+ 'select',
183
+ '[role="listbox"]',
184
+ '[role="option"]',
185
+ 'textarea',
186
+ // 'time', // see issue #136
187
+ // 'relative-time', // see issue #136
188
+ // hidden
189
+ ...exports.HIDDEN_EXACT_SELECTORS,
190
+ // Note: [style*="display: none"] removed — substring match causes false positives
191
+ // with CSS custom properties like --footer-display: none. The removeHiddenElements
192
+ // step handles inline style detection with a proper regex.
193
+ // iframes
194
+ 'instaread-player',
195
+ 'iframe:not([src*="youtube"]):not([src*="youtu.be"]):not([src*="vimeo"]):not([src*="twitter"]):not([src*="x.com"]):not([src*="datawrapper"])',
196
+ // logos
197
+ '[class="logo" i]',
198
+ '#logo',
199
+ '#Logo',
200
+ // newsletter
201
+ '#newsletter',
202
+ '#Newsletter',
203
+ '.subscribe',
204
+ // hidden for print
205
+ '.noprint',
206
+ '[data-print-layout="hide" i]',
207
+ '[data-block="donotprint" i]',
208
+ // footnotes, citations
209
+ '[class*="clickable-icon" i]',
210
+ 'li span[class*="ltx_tag" i][class*="ltx_tag_item" i]',
211
+ 'a[href^="#"][class*="anchor" i]',
212
+ 'a[href^="#"][class*="ref" i]:not(.ltx_ref)',
213
+ // link lists
214
+ '[data-container*="most-viewed" i]',
215
+ // sidebar
216
+ '.sidebar',
217
+ '.Sidebar',
218
+ '#sidebar',
219
+ '#Sidebar',
220
+ '#side-bar',
221
+ '#sitesub',
222
+ // skip links
223
+ '[data-link-name*="skip" i]',
224
+ '[aria-label*="skip" i]',
225
+ // other
226
+ '.copyright',
227
+ '#copyright',
228
+ '.licensebox',
229
+ '#page-info',
230
+ '#rss',
231
+ '#feed',
232
+ '.gutter',
233
+ '#primaryaudio', // NPR
234
+ '#NYT_ABOVE_MAIN_CONTENT_REGION',
235
+ '[data-testid="photoviewer-children-figure"] > span', // New York Times
236
+ 'table.infobox',
237
+ '[data-optimizely="related-articles-section" i]', // The Economist
238
+ '[data-orientation="vertical"]',
239
+ '.gh-header-sticky', // GitHub
240
+ '[data-testid="issue-metadata-sticky"]', // GitHub
241
+ ];
242
+ exports.EXACT_SELECTORS_JOINED = exports.EXACT_SELECTORS.join(',');
243
+ // Attributes to test against for partial matches
244
+ exports.TEST_ATTRIBUTES = [
245
+ 'class',
246
+ 'id',
247
+ 'data-test',
248
+ 'data-testid',
249
+ 'data-test-id',
250
+ 'data-qa',
251
+ 'data-cy'
252
+ ];
253
+ // Removal patterns tested against attributes above
254
+ // Case insensitive, partial matches allowed
255
+ exports.PARTIAL_SELECTORS = [
256
+ 'a-statement',
257
+ 'access-wall',
258
+ 'activitypub',
259
+ 'actioncall',
260
+ 'addcomment',
261
+ 'addtoany',
262
+ 'advert',
263
+ // '-ad-', howtogeek.com
264
+ 'adlayout',
265
+ 'ad-tldr',
266
+ 'ad-placement',
267
+ 'ads-container',
268
+ '_ad_',
269
+ 'AdBlock_',
270
+ 'AdUnit',
271
+ 'after_content',
272
+ 'after_main_article',
273
+ 'afterpost',
274
+ 'allterms',
275
+ '-alert-',
276
+ 'alert-box',
277
+ // 'appendix',
278
+ '_archive',
279
+ 'around-the-web',
280
+ 'aroundpages',
281
+ 'article-author',
282
+ 'article-badges',
283
+ 'article-banner',
284
+ 'article-bottom-section',
285
+ 'article-bottom',
286
+ 'article-category',
287
+ 'article-card',
288
+ 'article-citation',
289
+ 'article__copy',
290
+ 'article_date',
291
+ 'article-date',
292
+ 'article-end ',
293
+ 'article_header',
294
+ 'article-header',
295
+ 'article__header',
296
+ 'article__hero',
297
+ 'article__info',
298
+ 'article-info',
299
+ 'article-meta',
300
+ 'article_meta',
301
+ 'article__meta',
302
+ 'articlename',
303
+ 'article-subject',
304
+ 'article_subject',
305
+ 'article-snippet',
306
+ 'article-separator',
307
+ 'article--share',
308
+ 'article--topics',
309
+ 'articletags',
310
+ 'article-tags',
311
+ 'article_tags',
312
+ 'articletitle',
313
+ 'article-title',
314
+ 'article_title',
315
+ 'articletopics',
316
+ 'article-topics',
317
+ // 'article-type',
318
+ 'article-actions',
319
+ 'article--lede', // The Verge
320
+ 'articlewell',
321
+ 'associated-people',
322
+ 'audio-card',
323
+ // 'author', Gwern
324
+ // '-author',
325
+ 'author-bio',
326
+ 'author-box',
327
+ 'author-info',
328
+ 'author_info',
329
+ 'authorm',
330
+ 'author-mini-bio',
331
+ 'author-name',
332
+ 'author-publish-info',
333
+ 'authored-by',
334
+ 'avatar',
335
+ 'back-to-top',
336
+ 'backlink_container',
337
+ 'backlinks-section',
338
+ // 'banner',
339
+ 'bio-block',
340
+ 'biobox',
341
+ 'blog-pager',
342
+ 'bookmark-',
343
+ '-bookmark',
344
+ 'bottominfo',
345
+ 'bottomnav',
346
+ 'bottom-of-article',
347
+ 'bottom-wrapper',
348
+ 'brand-bar',
349
+ 'bcrumb',
350
+ 'breadcrumb',
351
+ 'brdcrumb',
352
+ 'button-wrapper',
353
+ 'buttons-container',
354
+ 'btn-',
355
+ '-btn',
356
+ 'byline',
357
+ 'captcha',
358
+ 'card-text',
359
+ 'card-media',
360
+ 'card-post',
361
+ // 'carousel',
362
+ 'carouselcontainer',
363
+ 'carousel-container',
364
+ 'cat_header',
365
+ 'catlinks',
366
+ '_categories',
367
+ 'card-author',
368
+ 'card-content',
369
+ 'chapter-list', // The Economist
370
+ 'collections',
371
+ 'comments',
372
+ '-comment', // comments in code blocks are skipped in removeBySelector
373
+ 'commentbox',
374
+ 'comment-button',
375
+ 'commentcomp',
376
+ 'comment-content',
377
+ 'comment-count',
378
+ 'comment-form',
379
+ 'comment-number',
380
+ 'comment-respond',
381
+ 'comment-thread',
382
+ 'comment-wrap',
383
+ 'complementary',
384
+ 'consent',
385
+ 'contact-',
386
+ 'content-card', // The Verge
387
+ 'content-topics',
388
+ 'contentpromo',
389
+ 'context-bar',
390
+ 'context-widget', // Reuters
391
+ 'core-collateral',
392
+ 'cover-image',
393
+ 'cover-photo',
394
+ 'cover-wrap',
395
+ 'created-date',
396
+ 'creative-commons_',
397
+ 'c-subscribe',
398
+ '_cta',
399
+ '-cta',
400
+ 'cta-',
401
+ 'cta_',
402
+ 'current-issue', // The Nation
403
+ 'custom-list-number',
404
+ 'dateline',
405
+ 'dateheader',
406
+ 'date-header',
407
+ 'date-pub',
408
+ // 'dialog',
409
+ 'disclaimer',
410
+ 'disclosure',
411
+ 'discussion',
412
+ 'discuss_',
413
+ '-dismiss',
414
+ 'disqus',
415
+ 'donate',
416
+ 'donation',
417
+ 'dropdown', // Ars Technica
418
+ 'element-invisible',
419
+ 'eletters',
420
+ 'emailsignup',
421
+ 'emoji-bar',
422
+ 'engagement-widget',
423
+ 'enhancement-',
424
+ 'entry-author-info',
425
+ 'entry-categories',
426
+ 'entry-date',
427
+ // 'entry-meta',
428
+ 'entry-title',
429
+ 'entry-utility',
430
+ '-error',
431
+ 'error-',
432
+ 'eyebrow',
433
+ 'expand-reduce',
434
+ 'external-anchor',
435
+ 'externallinkembedwrapper', // The New Yorker
436
+ 'extra-services',
437
+ 'extra-title',
438
+ 'facebook',
439
+ 'fancy-box',
440
+ 'favorite',
441
+ 'featured-content',
442
+ 'feature_feed',
443
+ 'feedback',
444
+ 'feed-links',
445
+ 'field-site-sections',
446
+ 'fixheader',
447
+ 'floating-vid',
448
+ // 'follow',
449
+ 'follower',
450
+ 'footer',
451
+ 'footnote-back',
452
+ 'footnoteback',
453
+ 'form-group',
454
+ 'for-you',
455
+ 'frontmatter',
456
+ 'further-reading',
457
+ 'fullbleedheader',
458
+ 'gallery-count',
459
+ 'gated-',
460
+ 'gh-feed',
461
+ 'gist-meta',
462
+ // 'global',
463
+ // 'google',
464
+ 'goog-',
465
+ 'graph-view',
466
+ 'hamburger',
467
+ 'header_logo',
468
+ 'header-logo',
469
+ 'header-pattern', // The Verge
470
+ // 'headlines', Mercurynews
471
+ 'hero-list',
472
+ // '-hidden',
473
+ 'hide-for-print',
474
+ 'hide-print',
475
+ 'hide-when-no-script',
476
+ 'hidden-print',
477
+ 'hidden-sidenote',
478
+ 'hidden-accessibility',
479
+ 'infoline',
480
+ 'inline-topic',
481
+ 'instacartIntegration',
482
+ 'interlude',
483
+ 'interaction',
484
+ 'itemendrow',
485
+ 'intro-date',
486
+ 'invisible',
487
+ 'jp-no-solution',
488
+ 'jp-relatedposts',
489
+ 'jswarning',
490
+ 'js-warning',
491
+ 'jumplink',
492
+ 'jumpto',
493
+ 'jump-to-',
494
+ 'js-skip-to-content',
495
+ 'keepreading',
496
+ 'keep-reading',
497
+ 'keep_reading',
498
+ // 'keyword', // used in syntax highlighting
499
+ 'keyword_wrap',
500
+ 'kicker',
501
+ 'labstab', // Arxiv
502
+ '-labels',
503
+ 'language-name',
504
+ 'lastupdated',
505
+ 'latest-content',
506
+ '-ledes-', // The Verge
507
+ '-license',
508
+ 'license-',
509
+ 'lightbox-popup',
510
+ 'like-button',
511
+ 'link-box',
512
+ 'links-grid', // BBC
513
+ 'links-title', // BBC
514
+ 'listing-dynamic-terms', // Boston Review
515
+ 'list-tags',
516
+ 'listinks',
517
+ 'loading',
518
+ 'loa-info',
519
+ 'logo_container',
520
+ 'ltx_role_refnum', // Arxiv
521
+ 'ltx_tag_bibitem',
522
+ 'ltx_error',
523
+ 'masthead',
524
+ 'marketing',
525
+ 'media-inquiry',
526
+ '-menu',
527
+ 'menu-',
528
+ // 'meta-', syntax highlighting
529
+ 'metadata',
530
+ 'meta-date',
531
+ 'meta-row',
532
+ 'might-like',
533
+ 'minibio',
534
+ 'more-about',
535
+ 'mod-paywall',
536
+ '_modal',
537
+ '-modal',
538
+ 'more-',
539
+ 'morenews',
540
+ 'morestories',
541
+ 'more_wrapper',
542
+ 'most-read',
543
+ 'move-helper',
544
+ 'mw-editsection',
545
+ 'mw-cite-backlink',
546
+ 'mw-indicators',
547
+ 'mw-jump-link',
548
+ 'nav-',
549
+ 'nav_',
550
+ // 'navbar',
551
+ // 'navigation',
552
+ 'navigation-post',
553
+ 'next-',
554
+ 'newsgallery',
555
+ 'news-story-title',
556
+ // 'newsletter', used on Substack
557
+ 'newsletter_',
558
+ 'newsletterbanner',
559
+ 'newslettercontainer',
560
+ 'newsletter-form',
561
+ 'newsletter-signup',
562
+ 'newslettersignup',
563
+ 'newsletterwidget',
564
+ 'newsletterwrapper',
565
+ 'not-found',
566
+ 'notessection',
567
+ 'nomobile',
568
+ 'noprint',
569
+ 'open-slideshow',
570
+ 'originally-published', // Mercury News
571
+ 'other-blogs',
572
+ 'outline-view',
573
+ // 'overlay',
574
+ 'pagehead',
575
+ 'page-header',
576
+ 'page-title',
577
+ 'paywall_message',
578
+ '-partners',
579
+ 'permission-',
580
+ 'plea',
581
+ 'popular',
582
+ // 'popup', Gwern
583
+ 'popup_links',
584
+ // 'popover',
585
+ 'pop_stories',
586
+ 'pop-up',
587
+ 'post-author',
588
+ 'post-bottom',
589
+ 'post__category',
590
+ 'postcomment',
591
+ 'postdate',
592
+ 'post-date',
593
+ 'post_date',
594
+ 'post-details',
595
+ 'post-feeds',
596
+ 'postinfo',
597
+ 'post-info',
598
+ 'post_info',
599
+ 'post-inline-date',
600
+ 'post-links',
601
+ 'postlist',
602
+ 'post_list',
603
+ 'post_meta',
604
+ 'post-meta',
605
+ 'postmeta',
606
+ 'post_more',
607
+ 'postnavi',
608
+ 'post-navigation',
609
+ 'postpath',
610
+ 'post-preview',
611
+ 'postsnippet',
612
+ 'post_snippet',
613
+ 'post-snippet',
614
+ 'post-subject',
615
+ 'posttax',
616
+ 'post-tax',
617
+ 'post_tax',
618
+ 'posttag',
619
+ 'post_tag',
620
+ 'post-tag',
621
+ 'post_time',
622
+ 'posttitle',
623
+ 'post-title',
624
+ 'post_title',
625
+ 'post__title',
626
+ 'post-ufi-button',
627
+ // 'preview', used on Obsidian Publish
628
+ 'prev-post',
629
+ 'prevnext',
630
+ 'prev_next',
631
+ 'prev-next',
632
+ 'previousnext',
633
+ 'press-inquiries',
634
+ 'print-none',
635
+ 'print-header',
636
+ 'print:hidden',
637
+ 'privacy-notice',
638
+ 'privacy-settings',
639
+ 'profile',
640
+ // 'promo',
641
+ 'promo_article',
642
+ 'promo-bar',
643
+ 'promo-box',
644
+ 'pubdate',
645
+ 'pub_date',
646
+ 'pub-date',
647
+ 'publish_date',
648
+ 'publish-date',
649
+ 'publication-date',
650
+ 'publicationName', // Medium
651
+ 'qr-code',
652
+ 'qr_code',
653
+ 'quick_up',
654
+ '_rail',
655
+ 'ratingssection',
656
+ 'read_also',
657
+ 'readmore',
658
+ 'read-next',
659
+ 'read_next',
660
+ 'read_time',
661
+ 'read-time',
662
+ 'reading_time',
663
+ 'reading-time',
664
+ 'reading-list',
665
+ 'recent-',
666
+ 'recent-articles',
667
+ 'recentpost',
668
+ 'recent_post',
669
+ 'recent-post',
670
+ 'recommend',
671
+ 'redirectedfrom',
672
+ 'recirc',
673
+ 'register',
674
+ 'related',
675
+ 'relevant',
676
+ 'reversefootnote',
677
+ 'robots-nocontent',
678
+ '_rss',
679
+ 'rss-link',
680
+ 'screen-reader-text',
681
+ 'scroll_to',
682
+ 'scroll-to',
683
+ '_search',
684
+ '-search',
685
+ 'section-nav',
686
+ 'series-banner',
687
+ // 'share',
688
+ // '-share', scitechdaily.com
689
+ 'share-box',
690
+ 'sharedaddy',
691
+ 'share-icons',
692
+ 'sharelinks',
693
+ 'share-post',
694
+ 'share-print',
695
+ 'share-section',
696
+ 'shariff-',
697
+ 'show-for-print',
698
+ 'sidebartitle',
699
+ // 'sidebar_',
700
+ 'sidebar-content',
701
+ 'sidebar-wrapper',
702
+ 'sideitems',
703
+ 'sidebar-author',
704
+ 'sidebar-item',
705
+ 'side-box',
706
+ 'side-logo',
707
+ 'sign-in-gate',
708
+ 'similar-',
709
+ 'similar_',
710
+ 'similars-',
711
+ 'site-index',
712
+ 'site-header',
713
+ 'siteheader',
714
+ 'site-logo',
715
+ 'site-name',
716
+ 'site-wordpress',
717
+ // 'skip-',
718
+ 'skip-content',
719
+ 'skip-to-content',
720
+ 'skip-link',
721
+ 'c-skip-link',
722
+ '_skip-link',
723
+ '-slider',
724
+ 'slug-wrap',
725
+ // 'social',
726
+ 'social-author',
727
+ 'social-shar',
728
+ 'social-date',
729
+ 'speechify-ignore',
730
+ 'speedbump',
731
+ 'sponsor',
732
+ 'springercitation',
733
+ 'sr-only',
734
+ // '-stats',
735
+ '_stats',
736
+ // 'sticky',
737
+ 'story-date',
738
+ 'story-navigation',
739
+ 'storyreadtime', // Medium
740
+ 'storysmall',
741
+ 'storypublishdate', // Medium
742
+ 'subject-label',
743
+ 'subhead',
744
+ 'submenu',
745
+ // 'subscribe',
746
+ '-subscribe-',
747
+ 'subscriber-drive',
748
+ 'subscription-',
749
+ '_tags',
750
+ 'tags__item',
751
+ 'tag_list',
752
+ 'taxonomy',
753
+ // 'table-content',
754
+ 'table-of-contents',
755
+ 'tabs-',
756
+ // 'teaser', Nature
757
+ 'terminaltout',
758
+ 'time-rubric',
759
+ 'timestamp',
760
+ 'time-read',
761
+ 'time-to-read',
762
+ 'tip_off',
763
+ 'tiptout',
764
+ '-tout-',
765
+ // '-toc',
766
+ 'toc-container',
767
+ 'toggle-caption',
768
+ // 'toolbar', prism.js
769
+ 'tooltip-content',
770
+ 'topbar',
771
+ 'topic-authors',
772
+ 'topic-footer',
773
+ 'topic-list',
774
+ 'topic-subnav',
775
+ // 'top-section',
776
+ 'top-wrapper',
777
+ 'tree-item',
778
+ 'trending',
779
+ 'trust-feat',
780
+ 'trust-badge',
781
+ 'trust-project',
782
+ 'twitter',
783
+ 'u-hide',
784
+ 'upsell',
785
+ 'viewbottom',
786
+ 'yarpp-related',
787
+ 'visually-hidden',
788
+ 'welcomebox',
789
+ 'widget_pages',
790
+ // 'widget-'
791
+ ];
792
+ // Pre-compiled combined regex for PARTIAL_SELECTORS — avoids rebuilding on every parse
793
+ exports.PARTIAL_SELECTORS_REGEX = new RegExp(exports.PARTIAL_SELECTORS.join('|'), 'i');
794
+ // Attribute selector for elements we test partial matches against
795
+ exports.TEST_ATTRIBUTES_SELECTOR = exports.TEST_ATTRIBUTES.map(attr => `[${attr}]`).join(',');
796
+ // Selectors for footnotes and citations
797
+ exports.FOOTNOTE_INLINE_REFERENCES = [
798
+ 'sup.reference',
799
+ 'cite.ltx_cite',
800
+ 'sup[id^="fnr"]',
801
+ 'span[id^="fnr"]',
802
+ 'span[class*="footnote_ref"]',
803
+ 'span[class*="footnote-ref"]',
804
+ 'span.footnote-link',
805
+ 'a.citation',
806
+ 'a[id^="ref-link"]',
807
+ 'a[href^="#fn"]',
808
+ 'a[href^="#cite"]',
809
+ 'a[href^="#reference"]',
810
+ 'a[href^="#footnote"]',
811
+ 'a[href^="#r"]', // Common in academic papers
812
+ 'a[href^="#b"]', // Common for bibliography references
813
+ 'a[href*="cite_note"]',
814
+ 'a[href*="cite_ref"]',
815
+ 'a.footnote-anchor', // Substack
816
+ 'span.footnote-hovercard-target a', // Substack
817
+ 'a[role="doc-biblioref"]', // Science.org
818
+ 'a[id^="fnref"]',
819
+ 'a[id^="ref-link"]', // Nature.com
820
+ 'sup.footnoteref', // Wikidot
821
+ ].join(',');
822
+ exports.FOOTNOTE_LIST_SELECTORS = [
823
+ 'div.footnote ol',
824
+ 'div.footnotes ol',
825
+ 'div[role="doc-endnotes"]',
826
+ 'div[role="doc-footnotes"]',
827
+ 'ol.footnotes-list',
828
+ 'ol.footnotes',
829
+ 'ol.references',
830
+ 'ol[class*="article-references"]',
831
+ 'section.footnotes ol',
832
+ 'section[role="doc-endnotes"]',
833
+ 'section[role="doc-footnotes"]',
834
+ 'section[role="doc-bibliography"]',
835
+ 'ul.footnotes-list',
836
+ 'ul.ltx_biblist',
837
+ 'div.footnote[data-component-name="FootnoteToDOM"]', // Substack
838
+ 'div.footnotes-footer' // Wikidot
839
+ ].join(',');
840
+ // Elements that are allowed to be empty
841
+ // These are not removed even if they have no content
842
+ exports.ALLOWED_EMPTY_ELEMENTS = new Set([
843
+ 'area',
844
+ 'audio',
845
+ 'base',
846
+ 'br',
847
+ 'circle',
848
+ 'col',
849
+ 'defs',
850
+ 'ellipse',
851
+ 'embed',
852
+ 'figure',
853
+ 'g',
854
+ 'hr',
855
+ 'iframe',
856
+ 'img',
857
+ 'input',
858
+ 'line',
859
+ 'link',
860
+ 'mask',
861
+ 'meta',
862
+ 'object',
863
+ 'param',
864
+ 'path',
865
+ 'pattern',
866
+ 'picture',
867
+ 'polygon',
868
+ 'polyline',
869
+ 'rect',
870
+ 'source',
871
+ 'stop',
872
+ 'svg',
873
+ 'td',
874
+ 'th',
875
+ 'track',
876
+ 'use',
877
+ 'video',
878
+ 'wbr'
879
+ ]);
880
+ // Attributes to keep
881
+ exports.ALLOWED_ATTRIBUTES = new Set([
882
+ 'alt',
883
+ 'allow',
884
+ 'allowfullscreen',
885
+ 'aria-label',
886
+ 'checked',
887
+ 'colspan',
888
+ 'controls',
889
+ 'data-latex',
890
+ 'data-src',
891
+ 'data-srcset',
892
+ 'data-callout',
893
+ 'data-callout-title',
894
+ 'data-lang',
895
+ 'dir',
896
+ 'display',
897
+ 'frameborder',
898
+ 'headers',
899
+ 'height',
900
+ 'href',
901
+ 'kind',
902
+ 'label',
903
+ 'lang',
904
+ 'role',
905
+ 'rowspan',
906
+ 'src',
907
+ 'srclang',
908
+ 'srcset',
909
+ 'title',
910
+ 'type',
911
+ 'width',
912
+ // MathML attributes
913
+ 'accent',
914
+ 'accentunder',
915
+ 'align',
916
+ 'columnalign',
917
+ 'columnlines',
918
+ 'columnspacing',
919
+ 'columnspan',
920
+ 'data-mjx-texclass',
921
+ 'depth',
922
+ 'displaystyle',
923
+ 'fence',
924
+ 'frame',
925
+ 'framespacing',
926
+ 'linethickness',
927
+ 'lspace',
928
+ 'mathsize',
929
+ 'mathvariant',
930
+ 'maxsize',
931
+ 'minsize',
932
+ 'movablelimits',
933
+ 'notation',
934
+ 'rowalign',
935
+ 'rowlines',
936
+ 'rowspacing',
937
+ 'rowspan',
938
+ 'rspace',
939
+ 'scriptlevel',
940
+ 'separator',
941
+ 'stretchy',
942
+ 'symmetric',
943
+ 'voffset',
944
+ 'xmlns'
945
+ ]);
946
+ exports.ALLOWED_ATTRIBUTES_DEBUG = new Set([
947
+ 'class',
948
+ 'id',
949
+ ]);
950
+ //# sourceMappingURL=constants.js.map