@mz1999/defuddle 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +371 -0
  3. package/dist/cli.d.ts +2 -0
  4. package/dist/cli.js +145 -0
  5. package/dist/cli.js.map +1 -0
  6. package/dist/constants.d.ts +24 -0
  7. package/dist/constants.js +950 -0
  8. package/dist/constants.js.map +1 -0
  9. package/dist/defuddle.d.ts +136 -0
  10. package/dist/defuddle.js +1816 -0
  11. package/dist/defuddle.js.map +1 -0
  12. package/dist/elements/callouts.d.ts +6 -0
  13. package/dist/elements/callouts.js +74 -0
  14. package/dist/elements/callouts.js.map +1 -0
  15. package/dist/elements/code.d.ts +5 -0
  16. package/dist/elements/code.js +346 -0
  17. package/dist/elements/code.js.map +1 -0
  18. package/dist/elements/footnotes.d.ts +5 -0
  19. package/dist/elements/footnotes.js +619 -0
  20. package/dist/elements/footnotes.js.map +1 -0
  21. package/dist/elements/headings.d.ts +11 -0
  22. package/dist/elements/headings.js +100 -0
  23. package/dist/elements/headings.js.map +1 -0
  24. package/dist/elements/images.d.ts +8 -0
  25. package/dist/elements/images.js +877 -0
  26. package/dist/elements/images.js.map +1 -0
  27. package/dist/elements/math.base.d.ts +9 -0
  28. package/dist/elements/math.base.js +195 -0
  29. package/dist/elements/math.base.js.map +1 -0
  30. package/dist/elements/math.core.d.ts +7 -0
  31. package/dist/elements/math.core.js +52 -0
  32. package/dist/elements/math.core.js.map +1 -0
  33. package/dist/elements/math.d.ts +2 -0
  34. package/dist/elements/math.full.d.ts +8 -0
  35. package/dist/elements/math.js +7 -0
  36. package/dist/elements/math.js.map +1 -0
  37. package/dist/extractor-registry.d.ts +16 -0
  38. package/dist/extractor-registry.js +140 -0
  39. package/dist/extractor-registry.js.map +1 -0
  40. package/dist/extractors/_base.d.ts +22 -0
  41. package/dist/extractors/_base.js +27 -0
  42. package/dist/extractors/_base.js.map +1 -0
  43. package/dist/extractors/_conversation.d.ts +9 -0
  44. package/dist/extractors/_conversation.js +78 -0
  45. package/dist/extractors/_conversation.js.map +1 -0
  46. package/dist/extractors/chatgpt.d.ts +14 -0
  47. package/dist/extractors/chatgpt.js +138 -0
  48. package/dist/extractors/chatgpt.js.map +1 -0
  49. package/dist/extractors/claude.d.ts +10 -0
  50. package/dist/extractors/claude.js +91 -0
  51. package/dist/extractors/claude.js.map +1 -0
  52. package/dist/extractors/gemini.d.ts +14 -0
  53. package/dist/extractors/gemini.js +111 -0
  54. package/dist/extractors/gemini.js.map +1 -0
  55. package/dist/extractors/github.d.ts +20 -0
  56. package/dist/extractors/github.js +251 -0
  57. package/dist/extractors/github.js.map +1 -0
  58. package/dist/extractors/grok.d.ts +15 -0
  59. package/dist/extractors/grok.js +142 -0
  60. package/dist/extractors/grok.js.map +1 -0
  61. package/dist/extractors/hackernews.d.ts +21 -0
  62. package/dist/extractors/hackernews.js +155 -0
  63. package/dist/extractors/hackernews.js.map +1 -0
  64. package/dist/extractors/reddit.d.ts +22 -0
  65. package/dist/extractors/reddit.js +197 -0
  66. package/dist/extractors/reddit.js.map +1 -0
  67. package/dist/extractors/twitter.d.ts +16 -0
  68. package/dist/extractors/twitter.js +204 -0
  69. package/dist/extractors/twitter.js.map +1 -0
  70. package/dist/extractors/x-article.d.ts +24 -0
  71. package/dist/extractors/x-article.js +267 -0
  72. package/dist/extractors/x-article.js.map +1 -0
  73. package/dist/extractors/x-oembed.d.ts +20 -0
  74. package/dist/extractors/x-oembed.js +350 -0
  75. package/dist/extractors/x-oembed.js.map +1 -0
  76. package/dist/extractors/youtube.d.ts +87 -0
  77. package/dist/extractors/youtube.js +869 -0
  78. package/dist/extractors/youtube.js.map +1 -0
  79. package/dist/fetch.d.ts +18 -0
  80. package/dist/fetch.js +265 -0
  81. package/dist/fetch.js.map +1 -0
  82. package/dist/index.d.ts +3 -0
  83. package/dist/index.full.d.ts +12 -0
  84. package/dist/index.full.js +1 -0
  85. package/dist/index.js +1 -0
  86. package/dist/index.js.map +1 -0
  87. package/dist/markdown.d.ts +30 -0
  88. package/dist/markdown.js +661 -0
  89. package/dist/markdown.js.map +1 -0
  90. package/dist/metadata.d.ts +25 -0
  91. package/dist/metadata.js +426 -0
  92. package/dist/metadata.js.map +1 -0
  93. package/dist/node.d.ts +19 -0
  94. package/dist/node.js +78 -0
  95. package/dist/node.js.map +1 -0
  96. package/dist/scoring.d.ts +31 -0
  97. package/dist/scoring.js +472 -0
  98. package/dist/scoring.js.map +1 -0
  99. package/dist/standardize.d.ts +2 -0
  100. package/dist/standardize.js +1101 -0
  101. package/dist/standardize.js.map +1 -0
  102. package/dist/types/extractors.d.ts +41 -0
  103. package/dist/types/extractors.js +3 -0
  104. package/dist/types/extractors.js.map +1 -0
  105. package/dist/types.d.ts +135 -0
  106. package/dist/types.js +3 -0
  107. package/dist/types.js.map +1 -0
  108. package/dist/utils/comments.d.ts +44 -0
  109. package/dist/utils/comments.js +103 -0
  110. package/dist/utils/comments.js.map +1 -0
  111. package/dist/utils/dom.d.ts +42 -0
  112. package/dist/utils/dom.js +104 -0
  113. package/dist/utils/dom.js.map +1 -0
  114. package/dist/utils/linkedom-compat.d.ts +5 -0
  115. package/dist/utils/linkedom-compat.js +23 -0
  116. package/dist/utils/linkedom-compat.js.map +1 -0
  117. package/dist/utils/transcript.d.ts +37 -0
  118. package/dist/utils/transcript.js +61 -0
  119. package/dist/utils/transcript.js.map +1 -0
  120. package/dist/utils.d.ts +13 -0
  121. package/dist/utils.js +98 -0
  122. package/dist/utils.js.map +1 -0
  123. package/package.json +107 -0
@@ -0,0 +1,31 @@
1
+ import { DebugRemoval } from './types';
2
+ export interface ContentScore {
3
+ score: number;
4
+ element: Element;
5
+ }
6
+ export declare class ContentScorer {
7
+ private doc;
8
+ private debug;
9
+ constructor(doc: Document, debug?: boolean);
10
+ static scoreElement(element: Element): number;
11
+ static findBestElement(elements: Element[], minScore?: number): Element | null;
12
+ /**
13
+ * Scores blocks based on their content and structure
14
+ * and removes those that are likely not content.
15
+ */
16
+ static scoreAndRemove(doc: Document, debug?: boolean, debugRemovals?: DebugRemoval[], mainContent?: Element | null): void;
17
+ /**
18
+ * Determines if an element is likely to be content based on its structure and attributes.
19
+ */
20
+ private static isLikelyContent;
21
+ /**
22
+ * Scores a block element based on various criteria to determine if it's likely not content.
23
+ * Returns a negative score if the element is likely not content, a positive score if it is.
24
+ */
25
+ private static scoreNonContentBlock;
26
+ /**
27
+ * Detects article card grids: blocks with 3+ headings and 2+ images
28
+ * but very little prose per heading.
29
+ */
30
+ private static isCardGrid;
31
+ }
@@ -0,0 +1,472 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ContentScorer = void 0;
4
+ const constants_1 = require("./constants");
5
+ const utils_1 = require("./utils");
6
+ const dom_1 = require("./utils/dom");
7
+ const contentIndicators = [
8
+ 'admonition',
9
+ 'article',
10
+ 'content',
11
+ 'entry',
12
+ 'image',
13
+ 'img',
14
+ 'font',
15
+ 'figure',
16
+ 'figcaption',
17
+ 'pre',
18
+ 'main',
19
+ 'post',
20
+ 'story',
21
+ 'table'
22
+ ];
23
+ // Text content to test against
24
+ const navigationIndicators = [
25
+ 'advertisement',
26
+ 'all rights reserved',
27
+ 'banner',
28
+ 'cookie',
29
+ 'comments',
30
+ 'copyright',
31
+ 'follow me',
32
+ 'follow us',
33
+ 'footer',
34
+ 'header',
35
+ 'homepage',
36
+ 'login',
37
+ 'menu',
38
+ 'more articles',
39
+ 'more like this',
40
+ 'most read',
41
+ 'nav',
42
+ 'navigation',
43
+ 'newsletter',
44
+ 'popular',
45
+ 'privacy',
46
+ 'recommended',
47
+ 'register',
48
+ 'related',
49
+ 'responses',
50
+ 'share',
51
+ 'sidebar',
52
+ 'sign in',
53
+ 'sign up',
54
+ 'signup',
55
+ 'social',
56
+ 'sponsored',
57
+ 'subscribe',
58
+ 'terms',
59
+ 'trending'
60
+ ];
61
+ // Social media profile URL pattern — used to detect author bios
62
+ const socialProfilePattern = /\b(linkedin\.com\/(in|company)\/|twitter\.com\/(?!intent\b)\w|x\.com\/(?!intent\b)\w|facebook\.com\/(?!share\b)\w|instagram\.com\/\w|threads\.net\/\w|mastodon\.\w)/i;
63
+ // Date pattern for detecting standalone bylines — no leading \b because
64
+ // textContent can concatenate adjacent elements without whitespace
65
+ const datePattern = /(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,2}/i;
66
+ // Author attribution pattern — case-sensitive "By" + capitalized name
67
+ const bylinePattern = /\bBy\s+[A-Z]/;
68
+ // Pre-compiled navigation indicator regexes for scoreNonContentBlock
69
+ const navigationIndicatorRegexes = navigationIndicators.map(indicator => new RegExp(`\\b${indicator.replace(/\s+/g, '\\s+')}\\b`));
70
+ // Single combined regex for heading text matching in isLikelyContent
71
+ const navigationHeadingPattern = new RegExp(navigationIndicators.map(i => i.replace(/\s+/g, '\\s+')).join('|'), 'i');
72
+ // Date pattern for content scoring (extended with year)
73
+ const contentDatePattern = /\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\s+\d{1,2},?\s+\d{4}\b/i;
74
+ // Author attribution pattern for content scoring
75
+ const contentAuthorPattern = /\b(?:by|written by|author:)\s+[A-Za-z\s]+\b/i;
76
+ // Classes that indicate non-content these are elements are
77
+ // not removed, but lower the score
78
+ const nonContentPatterns = [
79
+ 'advert',
80
+ 'ad-',
81
+ 'ads',
82
+ 'banner',
83
+ 'cookie',
84
+ 'copyright',
85
+ 'footer',
86
+ 'header',
87
+ 'homepage',
88
+ 'menu',
89
+ 'nav',
90
+ 'newsletter',
91
+ 'popular',
92
+ 'privacy',
93
+ 'recommended',
94
+ 'related',
95
+ 'rights',
96
+ 'share',
97
+ 'sidebar',
98
+ 'social',
99
+ 'sponsored',
100
+ 'subscribe',
101
+ 'terms',
102
+ 'trending',
103
+ 'widget'
104
+ ];
105
+ class ContentScorer {
106
+ constructor(doc, debug = false) {
107
+ this.doc = doc;
108
+ this.debug = debug;
109
+ }
110
+ static scoreElement(element) {
111
+ let score = 0;
112
+ // Text density
113
+ const text = element.textContent || '';
114
+ const words = (0, utils_1.countWords)(text);
115
+ score += words;
116
+ // Paragraph ratio
117
+ const paragraphs = element.getElementsByTagName('p').length;
118
+ score += paragraphs * 10;
119
+ // Comma counting — prose text has commas, navigation doesn't
120
+ const commas = text.split(/,/).length - 1;
121
+ score += commas;
122
+ // Image ratio (penalize high image density)
123
+ const images = element.getElementsByTagName('img').length;
124
+ const imageDensity = images / (words || 1);
125
+ score -= imageDensity * 3;
126
+ // Position bonus (center/right elements)
127
+ try {
128
+ const style = element.getAttribute('style') || '';
129
+ const align = element.getAttribute('align') || '';
130
+ const isRightSide = style.includes('float: right') ||
131
+ style.includes('text-align: right') ||
132
+ align === 'right';
133
+ if (isRightSide)
134
+ score += 5;
135
+ }
136
+ catch (e) {
137
+ // Ignore position if we can't get style
138
+ }
139
+ // Content indicators
140
+ const hasDate = contentDatePattern.test(text);
141
+ if (hasDate)
142
+ score += 10;
143
+ const hasAuthor = contentAuthorPattern.test(text);
144
+ if (hasAuthor)
145
+ score += 10;
146
+ // Check for common content classes/attributes
147
+ const className = (0, dom_1.getClassName)(element).toLowerCase();
148
+ if (className.includes('content') || className.includes('article') || className.includes('post')) {
149
+ score += 15;
150
+ }
151
+ // Check for footnotes/references
152
+ const hasFootnotes = element.querySelector(constants_1.FOOTNOTE_INLINE_REFERENCES);
153
+ if (hasFootnotes)
154
+ score += 10;
155
+ const hasFootnotesList = element.querySelector(constants_1.FOOTNOTE_LIST_SELECTORS);
156
+ if (hasFootnotesList)
157
+ score += 10;
158
+ // Check for nested tables (penalize)
159
+ const nestedTables = element.getElementsByTagName('table').length;
160
+ score -= nestedTables * 5;
161
+ // Additional scoring for table cells
162
+ if (element.tagName.toLowerCase() === 'td') {
163
+ // Table cells get a bonus for being in the main content area
164
+ const parentTable = element.closest('table');
165
+ if (parentTable) {
166
+ // Only favor cells in tables that look like old-style content layouts
167
+ const tableWidth = parseInt(parentTable.getAttribute('width') || '0');
168
+ const tableAlign = parentTable.getAttribute('align') || '';
169
+ const tableClass = (0, dom_1.getClassName)(parentTable).toLowerCase();
170
+ const isTableLayout = tableWidth > 400 || // Common width for main content tables
171
+ tableAlign === 'center' ||
172
+ tableClass.includes('content') ||
173
+ tableClass.includes('article');
174
+ if (isTableLayout) {
175
+ // Additional checks to ensure this is likely the main content cell
176
+ const allCells = Array.from(parentTable.getElementsByTagName('td'));
177
+ const cellIndex = allCells.indexOf(element);
178
+ const isCenterCell = cellIndex > 0 && cellIndex < allCells.length - 1;
179
+ if (isCenterCell) {
180
+ score += 10;
181
+ }
182
+ }
183
+ }
184
+ }
185
+ // Link density as a multiplier — scales the score down proportionally
186
+ // rather than applying a fixed penalty. Capped at 0.5 reduction to
187
+ // avoid over-penalizing link-heavy content like blog index pages.
188
+ const linkElements = element.getElementsByTagName('a');
189
+ let linkTextLength = 0;
190
+ for (let i = 0; i < linkElements.length; i++) {
191
+ linkTextLength += (linkElements[i].textContent || '').length;
192
+ }
193
+ const textLength = text.length || 1;
194
+ const linkDensity = Math.min(linkTextLength / textLength, 0.5);
195
+ score *= (1 - linkDensity);
196
+ return score;
197
+ }
198
+ static findBestElement(elements, minScore = 50) {
199
+ let bestElement = null;
200
+ let bestScore = 0;
201
+ elements.forEach(element => {
202
+ const score = this.scoreElement(element);
203
+ if (score > bestScore) {
204
+ bestScore = score;
205
+ bestElement = element;
206
+ }
207
+ });
208
+ return bestScore > minScore ? bestElement : null;
209
+ }
210
+ /**
211
+ * Scores blocks based on their content and structure
212
+ * and removes those that are likely not content.
213
+ */
214
+ static scoreAndRemove(doc, debug = false, debugRemovals, mainContent) {
215
+ const startTime = Date.now();
216
+ // Track all elements to be removed
217
+ const elementsToRemove = new Map();
218
+ // Get all block elements
219
+ const blockElements = Array.from(doc.querySelectorAll(constants_1.BLOCK_ELEMENTS_SELECTOR));
220
+ // Process each block element
221
+ blockElements.forEach(element => {
222
+ // Skip elements that are already marked for removal
223
+ if (elementsToRemove.has(element)) {
224
+ return;
225
+ }
226
+ // Skip ancestors of mainContent to avoid disconnecting it
227
+ if (mainContent && element.contains(mainContent)) {
228
+ return;
229
+ }
230
+ // Skip elements inside code blocks — they are code structure, not page navigation
231
+ if (element.closest('pre')) {
232
+ return;
233
+ }
234
+ // Skip elements that are likely to be content
235
+ if (ContentScorer.isLikelyContent(element)) {
236
+ return;
237
+ }
238
+ // Score the element based on various criteria
239
+ const score = ContentScorer.scoreNonContentBlock(element);
240
+ // If the score is below the threshold, mark for removal
241
+ if (score < 0) {
242
+ elementsToRemove.set(element, score);
243
+ }
244
+ });
245
+ // Remove all collected elements in a single pass
246
+ elementsToRemove.forEach((score, el) => {
247
+ if (debug && debugRemovals) {
248
+ debugRemovals.push({
249
+ step: 'scoreAndRemove',
250
+ reason: `score: ${score}`,
251
+ text: (0, utils_1.textPreview)(el)
252
+ });
253
+ }
254
+ el.remove();
255
+ });
256
+ const endTime = Date.now();
257
+ if (debug) {
258
+ console.log('Defuddle', 'Removed non-content blocks:', {
259
+ count: elementsToRemove.size,
260
+ processingTime: `${(endTime - startTime).toFixed(2)}ms`
261
+ });
262
+ }
263
+ }
264
+ /**
265
+ * Determines if an element is likely to be content based on its structure and attributes.
266
+ */
267
+ static isLikelyContent(element) {
268
+ // Check if the element has a role that indicates content
269
+ const role = element.getAttribute('role');
270
+ if (role && ['article', 'main', 'contentinfo'].includes(role)) {
271
+ return true;
272
+ }
273
+ // Check if the element has a class or id that indicates content
274
+ const className = (0, dom_1.getClassName)(element).toLowerCase();
275
+ const id = element.id.toLowerCase();
276
+ for (const indicator of contentIndicators) {
277
+ if (className.includes(indicator) || id.includes(indicator)) {
278
+ return true;
279
+ }
280
+ }
281
+ // Elements containing code blocks or tables are likely content
282
+ if (element.querySelector('pre, table')) {
283
+ return true;
284
+ }
285
+ const text = element.textContent || '';
286
+ const words = (0, utils_1.countWords)(text);
287
+ // Check for headings that signal non-content sections (e.g. "Related articles")
288
+ // even if the element has enough text/paragraphs to otherwise look like content.
289
+ // Skip very large elements (1000+ words) as they are likely page-level wrappers.
290
+ if (words < 1000) {
291
+ const headings = element.querySelectorAll('h1, h2, h3, h4, h5, h6');
292
+ let hasNavigationHeading = false;
293
+ for (let i = 0; i < headings.length; i++) {
294
+ const headingText = (headings[i].textContent || '').toLowerCase().trim();
295
+ if (navigationHeadingPattern.test(headingText)) {
296
+ hasNavigationHeading = true;
297
+ break;
298
+ }
299
+ }
300
+ if (hasNavigationHeading) {
301
+ if (words < 200) {
302
+ return false;
303
+ }
304
+ // Larger sections (e.g. card grids) are also non-content
305
+ // if they have high link density
306
+ const linkCount = element.getElementsByTagName('a').length;
307
+ const linkDensity = linkCount / (words || 1);
308
+ if (linkDensity > 0.2) {
309
+ return false;
310
+ }
311
+ }
312
+ }
313
+ // Article card listing detection: blocks with many headings and images
314
+ // but very little prose per heading are likely article card grids
315
+ // (e.g. "related articles", "more stories"), not single-article content.
316
+ // Also checked in scoreNonContentBlock as a score penalty for elements
317
+ // that pass the content checks above but still look like card grids.
318
+ if (ContentScorer.isCardGrid(element, words)) {
319
+ return false;
320
+ }
321
+ // Small elements containing social media profile links are likely
322
+ // author bios or social widgets, not article content.
323
+ if (words < 80) {
324
+ const links = element.getElementsByTagName('a');
325
+ for (let i = 0; i < links.length; i++) {
326
+ const href = (links[i].getAttribute('href') || '').toLowerCase();
327
+ if (socialProfilePattern.test(href)) {
328
+ return false;
329
+ }
330
+ }
331
+ }
332
+ const paragraphs = element.getElementsByTagName('p').length;
333
+ const listItems = element.getElementsByTagName('li').length;
334
+ const contentBlocks = paragraphs + listItems;
335
+ // If the element has a significant amount of text and paragraphs/list items, it's likely content
336
+ if (words > 50 && contentBlocks > 1) {
337
+ return true;
338
+ }
339
+ // Check for elements with significant text content, even if they don't have many paragraphs
340
+ if (words > 100) {
341
+ return true;
342
+ }
343
+ // Check for elements with text content and some paragraphs/list items
344
+ if (words > 30 && contentBlocks > 0) {
345
+ return true;
346
+ }
347
+ // Prose text with sentence-ending punctuation and low link density is
348
+ // likely content even without <p> tags (e.g. transcript segments using divs/spans)
349
+ if (words >= 10 && /[.?!]/.test(text)) {
350
+ const linkCount = element.getElementsByTagName('a').length;
351
+ const linkDensity = linkCount / words;
352
+ if (linkDensity < 0.1) {
353
+ return true;
354
+ }
355
+ }
356
+ return false;
357
+ }
358
+ /**
359
+ * Scores a block element based on various criteria to determine if it's likely not content.
360
+ * Returns a negative score if the element is likely not content, a positive score if it is.
361
+ */
362
+ static scoreNonContentBlock(element) {
363
+ // Skip footnote list elements and their descendants
364
+ try {
365
+ if (element.matches(constants_1.FOOTNOTE_LIST_SELECTORS) ||
366
+ element.querySelector(constants_1.FOOTNOTE_LIST_SELECTORS) ||
367
+ element.closest(constants_1.FOOTNOTE_LIST_SELECTORS)) {
368
+ return 0;
369
+ }
370
+ }
371
+ catch (e) { }
372
+ let score = 0;
373
+ // Get text content
374
+ const text = element.textContent || '';
375
+ const words = (0, utils_1.countWords)(text);
376
+ // Skip very small elements
377
+ if (words < 3) {
378
+ return 0;
379
+ }
380
+ // Comma counting — prose has commas, navigation/boilerplate doesn't.
381
+ // This counterbalances negative signals from navigation indicators.
382
+ const commas = text.split(/,/).length - 1;
383
+ score += commas;
384
+ const textLower = text.toLowerCase();
385
+ let indicatorMatches = 0;
386
+ for (const regex of navigationIndicatorRegexes) {
387
+ if (regex.test(textLower)) {
388
+ indicatorMatches++;
389
+ }
390
+ }
391
+ score -= indicatorMatches * 10;
392
+ // Check for high link density (navigation)
393
+ const linkElements = element.getElementsByTagName('a');
394
+ const links = linkElements.length;
395
+ const linkDensity = links / (words || 1);
396
+ if (linkDensity > 0.5) {
397
+ score -= 15;
398
+ }
399
+ // Check for high link text ratio (e.g. card groups, nav sections)
400
+ // Requires multiple links to avoid penalizing content paragraphs
401
+ // that happen to be wrapped in a single link
402
+ if (links > 1 && words < 80) {
403
+ let linkTextLength = 0;
404
+ for (let i = 0; i < linkElements.length; i++) {
405
+ linkTextLength += (linkElements[i].textContent || '').length;
406
+ }
407
+ const totalTextLength = text.length;
408
+ if (totalTextLength > 0 && linkTextLength / totalTextLength > 0.8) {
409
+ score -= 15;
410
+ }
411
+ }
412
+ // Check for list structure (navigation)
413
+ const lists = element.getElementsByTagName('ul').length + element.getElementsByTagName('ol').length;
414
+ if (lists > 0 && links > lists * 3) {
415
+ score -= 10;
416
+ }
417
+ // Check for social media profile links (author bios, social widgets)
418
+ if (words < 80) {
419
+ const elLinks = element.getElementsByTagName('a');
420
+ for (let i = 0; i < elLinks.length; i++) {
421
+ const href = (elLinks[i].getAttribute('href') || '').toLowerCase();
422
+ if (socialProfilePattern.test(href)) {
423
+ score -= 15;
424
+ break;
425
+ }
426
+ }
427
+ }
428
+ // Penalize very small blocks that look like standalone author bylines with dates
429
+ // e.g. "By Author Name · March 4, 2026". Requires both an author attribution
430
+ // and a date to avoid false positives.
431
+ if (words < 15) {
432
+ if (bylinePattern.test(text) && datePattern.test(text)) {
433
+ score -= 10;
434
+ }
435
+ }
436
+ // Penalize blocks that look like article card grids
437
+ if (ContentScorer.isCardGrid(element, words)) {
438
+ score -= 15;
439
+ }
440
+ // Check for specific class patterns that indicate non-content
441
+ const className = (0, dom_1.getClassName)(element).toLowerCase();
442
+ const id = element.id.toLowerCase();
443
+ for (const pattern of nonContentPatterns) {
444
+ if (className.includes(pattern) || id.includes(pattern)) {
445
+ score -= 8;
446
+ }
447
+ }
448
+ return score;
449
+ }
450
+ /**
451
+ * Detects article card grids: blocks with 3+ headings and 2+ images
452
+ * but very little prose per heading.
453
+ */
454
+ static isCardGrid(element, words) {
455
+ if (words < 3 || words >= 500)
456
+ return false;
457
+ const headings = element.querySelectorAll('h2, h3, h4');
458
+ if (headings.length < 3)
459
+ return false;
460
+ const images = element.querySelectorAll('img');
461
+ if (images.length < 2)
462
+ return false;
463
+ let headingWordCount = 0;
464
+ for (let i = 0; i < headings.length; i++) {
465
+ headingWordCount += (0, utils_1.countWords)(headings[i].textContent || '');
466
+ }
467
+ const prosePerHeading = (words - headingWordCount) / headings.length;
468
+ return prosePerHeading < 20;
469
+ }
470
+ }
471
+ exports.ContentScorer = ContentScorer;
472
+ //# sourceMappingURL=scoring.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scoring.js","sourceRoot":"","sources":["../src/scoring.ts"],"names":[],"mappings":";;;AAAA,2CAA2G;AAE3G,mCAAkD;AAClD,qCAA2C;AAE3C,MAAM,iBAAiB,GAAG;IACzB,YAAY;IACZ,SAAS;IACT,SAAS;IACT,OAAO;IACP,OAAO;IACP,KAAK;IACL,MAAM;IACN,QAAQ;IACR,YAAY;IACZ,KAAK;IACL,MAAM;IACN,MAAM;IACN,OAAO;IACP,OAAO;CACP,CAAC;AAEF,+BAA+B;AAC/B,MAAM,oBAAoB,GAAG;IAC5B,eAAe;IACf,qBAAqB;IACrB,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,WAAW;IACX,WAAW;IACX,WAAW;IACX,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,OAAO;IACP,MAAM;IACN,eAAe;IACf,gBAAgB;IAChB,WAAW;IACX,KAAK;IACL,YAAY;IACZ,YAAY;IACZ,SAAS;IACT,SAAS;IACT,aAAa;IACb,UAAU;IACV,SAAS;IACT,WAAW;IACX,OAAO;IACP,SAAS;IACT,SAAS;IACT,SAAS;IACT,QAAQ;IACR,QAAQ;IACR,WAAW;IACX,WAAW;IACX,OAAO;IACP,UAAU;CACV,CAAC;AAEF,gEAAgE;AAChE,MAAM,oBAAoB,GAAG,sKAAsK,CAAC;AAEpM,wEAAwE;AACxE,mEAAmE;AACnE,MAAM,WAAW,GAAG,sEAAsE,CAAC;AAE3F,sEAAsE;AACtE,MAAM,aAAa,GAAG,cAAc,CAAC;AAErC,qEAAqE;AACrE,MAAM,0BAA0B,GAAG,oBAAoB,CAAC,GAAG,CAC1D,SAAS,CAAC,EAAE,CAAC,IAAI,MAAM,CAAC,MAAM,SAAS,CAAC,OAAO,CAAC,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CACrE,CAAC;AAEF,qEAAqE;AACrE,MAAM,wBAAwB,GAAG,IAAI,MAAM,CAC1C,oBAAoB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,GAAG,CACvE,CAAC;AAEF,wDAAwD;AACxD,MAAM,kBAAkB,GAAG,oFAAoF,CAAC;AAEhH,iDAAiD;AACjD,MAAM,oBAAoB,GAAG,8CAA8C,CAAC;AAE5E,2DAA2D;AAC3D,mCAAmC;AACnC,MAAM,kBAAkB,GAAG;IAC1B,QAAQ;IACR,KAAK;IACL,KAAK;IACL,QAAQ;IACR,QAAQ;IACR,WAAW;IACX,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,MAAM;IACN,KAAK;IACL,YAAY;IACZ,SAAS;IACT,SAAS;IACT,aAAa;IACb,SAAS;IACT,QAAQ;IACR,OAAO;IACP,SAAS;IACT,QAAQ;IACR,WAAW;IACX,WAAW;IACX,OAAO;IACP,UAAU;IACV,QAAQ;CACR,CAAC;AAOF,MAAa,aAAa;IAIzB,YAAY,GAAa,EAAE,QAAiB,KAAK;QAChD,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC;QACf,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACpB,CAAC;IAED,MAAM,CAAC,YAAY,CAAC,OAAgB;QACnC,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,eAAe;QACf,MAAM,IAAI,GAAG,OAAO,CAAC,WAAW,IAAI,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,IAAA,kBAAU,EAAC,IAAI,CAAC,CAAC;QAC/B,KAAK,IAAI,KAAK,CAAC;QAEf,kBAAkB;QAClB,MAAM,UAAU,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;QAC5D,KAAK,IAAI,UAAU,GAAG,EAAE,CAAC;QAEzB,6DAA6D;QAC7D,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;QAC1C,KAAK,IAAI,MAAM,CAAC;QAEhB,4CAA4C;QAC5C,MAAM,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;QAC1D,MAAM,YAAY,GAAG,MAAM,GAAG,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC;QAC3C,KAAK,IAAI,YAAY,GAAG,CAAC,CAAC;QAE1B,yCAAyC;QACzC,IAAI,CAAC;YACJ,MAAM,KAAK,GAAG,OAAO,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YAClD,MAAM,KAAK,GAAG,OAAO,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YAClD,MAAM,WAAW,GAAG,KAAK,CAAC,QAAQ,CAAC,cAAc,CAAC;gBAC3C,KAAK,CAAC,QAAQ,CAAC,mBAAmB,CAAC;gBACnC,KAAK,KAAK,OAAO,CAAC;YACzB,IAAI,WAAW;gBAAE,KAAK,IAAI,CAAC,CAAC;QAC7B,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACZ,wCAAwC;QACzC,CAAC;QAED,qBAAqB;QACrB,MAAM,OAAO,GAAG,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9C,IAAI,OAAO;YAAE,KAAK,IAAI,EAAE,CAAC;QAEzB,MAAM,SAAS,GAAG,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClD,IAAI,SAAS;YAAE,KAAK,IAAI,EAAE,CAAC;QAE3B,8CAA8C;QAC9C,MAAM,SAAS,GAAG,IAAA,kBAAY,EAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;QACtD,IAAI,SAAS,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAClG,KAAK,IAAI,EAAE,CAAC;QACb,CAAC;QAED,iCAAiC;QACjC,MAAM,YAAY,GAAG,OAAO,CAAC,aAAa,CAAC,sCAA0B,CAAC,CAAC;QACvE,IAAI,YAAY;YAAE,KAAK,IAAI,EAAE,CAAC;QAE9B,MAAM,gBAAgB,GAAG,OAAO,CAAC,aAAa,CAAC,mCAAuB,CAAC,CAAC;QACxE,IAAI,gBAAgB;YAAE,KAAK,IAAI,EAAE,CAAC;QAElC,qCAAqC;QACrC,MAAM,YAAY,GAAG,OAAO,CAAC,oBAAoB,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;QAClE,KAAK,IAAI,YAAY,GAAG,CAAC,CAAC;QAE1B,qCAAqC;QACrC,IAAI,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,KAAK,IAAI,EAAE,CAAC;YAC5C,6DAA6D;YAC7D,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAC7C,IAAI,WAAW,EAAE,CAAC;gBACjB,sEAAsE;gBACtE,MAAM,UAAU,GAAG,QAAQ,CAAC,WAAW,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC,CAAC;gBACtE,MAAM,UAAU,GAAG,WAAW,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;gBAC3D,MAAM,UAAU,GAAG,IAAA,kBAAY,EAAC,WAAW,CAAC,CAAC,WAAW,EAAE,CAAC;gBAC3D,MAAM,aAAa,GAClB,UAAU,GAAG,GAAG,IAAI,uCAAuC;oBAC3D,UAAU,KAAK,QAAQ;oBACvB,UAAU,CAAC,QAAQ,CAAC,SAAS,CAAC;oBAC9B,UAAU,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;gBAEhC,IAAI,aAAa,EAAE,CAAC;oBACnB,mEAAmE;oBACnE,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC,CAAC;oBACpE,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,OAA+B,CAAC,CAAC;oBACpE,MAAM,YAAY,GAAG,SAAS,GAAG,CAAC,IAAI,SAAS,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC;oBAEtE,IAAI,YAAY,EAAE,CAAC;wBAClB,KAAK,IAAI,EAAE,CAAC;oBACb,CAAC;gBACF,CAAC;YACF,CAAC;QACF,CAAC;QAED,sEAAsE;QACtE,mEAAmE;QACnE,kEAAkE;QAClE,MAAM,YAAY,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG,CAAC,CAAC;QACvD,IAAI,cAAc,GAAG,CAAC,CAAC;QACvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9C,cAAc,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;QAC9D,CAAC;QACD,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,IAAI,CAAC,CAAC;QACpC,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,cAAc,GAAG,UAAU,EAAE,GAAG,CAAC,CAAC;QAC/D,KAAK,IAAI,CAAC,CAAC,GAAG,WAAW,CAAC,CAAC;QAE3B,OAAO,KAAK,CAAC;IACd,CAAC;IAED,MAAM,CAAC,eAAe,CAAC,QAAmB,EAAE,WAAmB,EAAE;QAChE,IAAI,WAAW,GAAmB,IAAI,CAAC;QACvC,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE;YAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;YACzC,IAAI,KAAK,GAAG,SAAS,EAAE,CAAC;gBACvB,SAAS,GAAG,KAAK,CAAC;gBAClB,WAAW,GAAG,OAAO,CAAC;YACvB,CAAC;QACF,CAAC,CAAC,CAAC;QAEH,OAAO,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC;IAClD,CAAC;IAED;;;OAGG;IACI,MAAM,CAAC,cAAc,CAAC,GAAa,EAAE,QAAiB,KAAK,EAAE,aAA8B,EAAE,WAA4B;QAC/H,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,mCAAmC;QACnC,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAmB,CAAC;QAEpD,yBAAyB;QACzB,MAAM,aAAa,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,mCAAuB,CAAC,CAAC,CAAC;QAEhF,6BAA6B;QAC7B,aAAa,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE;YAC/B,oDAAoD;YACpD,IAAI,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;gBACnC,OAAO;YACR,CAAC;YAED,0DAA0D;YAC1D,IAAI,WAAW,IAAI,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;gBAClD,OAAO;YACR,CAAC;YAED,kFAAkF;YAClF,IAAI,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC5B,OAAO;YACR,CAAC;YAED,8CAA8C;YAC9C,IAAI,aAAa,CAAC,eAAe,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC5C,OAAO;YACR,CAAC;YAED,8CAA8C;YAC9C,MAAM,KAAK,GAAG,aAAa,CAAC,oBAAoB,CAAC,OAAO,CAAC,CAAC;YAE1D,wDAAwD;YACxD,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBACf,gBAAgB,CAAC,GAAG,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;YACtC,CAAC;QACF,CAAC,CAAC,CAAC;QAEH,iDAAiD;QACjD,gBAAgB,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE;YACtC,IAAI,KAAK,IAAI,aAAa,EAAE,CAAC;gBAC5B,aAAa,CAAC,IAAI,CAAC;oBAClB,IAAI,EAAE,gBAAgB;oBACtB,MAAM,EAAE,UAAU,KAAK,EAAE;oBACzB,IAAI,EAAE,IAAA,mBAAW,EAAC,EAAE,CAAC;iBACrB,CAAC,CAAC;YACJ,CAAC;YACD,EAAE,CAAC,MAAM,EAAE,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC3B,IAAI,KAAK,EAAE,CAAC;YACX,OAAO,CAAC,GAAG,CAAC,UAAU,EAAE,6BAA6B,EAAE;gBACtD,KAAK,EAAE,gBAAgB,CAAC,IAAI;gBAC5B,cAAc,EAAE,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;aACvD,CAAC,CAAC;QACJ,CAAC;IACF,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,eAAe,CAAC,OAAgB;QAC9C,yDAAyD;QACzD,MAAM,IAAI,GAAG,OAAO,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QAC1C,IAAI,IAAI,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,aAAa,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YAC/D,OAAO,IAAI,CAAC;QACb,CAAC;QAED,gEAAgE;QAChE,MAAM,SAAS,GAAG,IAAA,kBAAY,EAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;QACtD,MAAM,EAAE,GAAG,OAAO,CAAC,EAAE,CAAC,WAAW,EAAE,CAAC;QAEpC,KAAK,MAAM,SAAS,IAAI,iBAAiB,EAAE,CAAC;YAC3C,IAAI,SAAS,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC7D,OAAO,IAAI,CAAC;YACb,CAAC;QACF,CAAC;QAED,+DAA+D;QAC/D,IAAI,OAAO,CAAC,aAAa,CAAC,YAAY,CAAC,EAAE,CAAC;YACzC,OAAO,IAAI,CAAC;QACb,CAAC;QAED,MAAM,IAAI,GAAG,OAAO,CAAC,WAAW,IAAI,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,IAAA,kBAAU,EAAC,IAAI,CAAC,CAAC;QAE/B,gFAAgF;QAChF,iFAAiF;QACjF,iFAAiF;QACjF,IAAI,KAAK,GAAG,IAAI,EAAE,CAAC;YAClB,MAAM,QAAQ,GAAG,OAAO,CAAC,gBAAgB,CAAC,wBAAwB,CAAC,CAAC;YACpE,IAAI,oBAAoB,GAAG,KAAK,CAAC;YACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC1C,MAAM,WAAW,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;gBACzE,IAAI,wBAAwB,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC;oBAChD,oBAAoB,GAAG,IAAI,CAAC;oBAC5B,MAAM;gBACP,CAAC;YACF,CAAC;YAED,IAAI,oBAAoB,EAAE,CAAC;gBAC1B,IAAI,KAAK,GAAG,GAAG,EAAE,CAAC;oBACjB,OAAO,KAAK,CAAC;gBACd,CAAC;gBACD,yDAAyD;gBACzD,iCAAiC;gBACjC,MAAM,SAAS,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;gBAC3D,MAAM,WAAW,GAAG,SAAS,GAAG,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC;gBAC7C,IAAI,WAAW,GAAG,GAAG,EAAE,CAAC;oBACvB,OAAO,KAAK,CAAC;gBACd,CAAC;YACF,CAAC;QACF,CAAC;QAED,uEAAuE;QACvE,kEAAkE;QAClE,yEAAyE;QACzE,uEAAuE;QACvE,qEAAqE;QACrE,IAAI,aAAa,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,CAAC;YAC9C,OAAO,KAAK,CAAC;QACd,CAAC;QAED,kEAAkE;QAClE,sDAAsD;QACtD,IAAI,KAAK,GAAG,EAAE,EAAE,CAAC;YAChB,MAAM,KAAK,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG,CAAC,CAAC;YAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACvC,MAAM,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;gBACjE,IAAI,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;oBACrC,OAAO,KAAK,CAAC;gBACd,CAAC;YACF,CAAC;QACF,CAAC;QAED,MAAM,UAAU,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;QAC5D,MAAM,SAAS,GAAG,OAAO,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;QAC5D,MAAM,aAAa,GAAG,UAAU,GAAG,SAAS,CAAC;QAE7C,iGAAiG;QACjG,IAAI,KAAK,GAAG,EAAE,IAAI,aAAa,GAAG,CAAC,EAAE,CAAC;YACrC,OAAO,IAAI,CAAC;QACb,CAAC;QAED,4FAA4F;QAC5F,IAAI,KAAK,GAAG,GAAG,EAAE,CAAC;YACjB,OAAO,IAAI,CAAC;QACb,CAAC;QAED,sEAAsE;QACtE,IAAI,KAAK,GAAG,EAAE,IAAI,aAAa,GAAG,CAAC,EAAE,CAAC;YACrC,OAAO,IAAI,CAAC;QACb,CAAC;QAED,sEAAsE;QACtE,mFAAmF;QACnF,IAAI,KAAK,IAAI,EAAE,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACvC,MAAM,SAAS,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;YAC3D,MAAM,WAAW,GAAG,SAAS,GAAG,KAAK,CAAC;YACtC,IAAI,WAAW,GAAG,GAAG,EAAE,CAAC;gBACvB,OAAO,IAAI,CAAC;YACb,CAAC;QACF,CAAC;QAED,OAAO,KAAK,CAAC;IACd,CAAC;IAED;;;OAGG;IACK,MAAM,CAAC,oBAAoB,CAAC,OAAgB;QACnD,oDAAoD;QACpD,IAAI,CAAC;YACJ,IAAI,OAAO,CAAC,OAAO,CAAC,mCAAuB,CAAC;gBAC3C,OAAO,CAAC,aAAa,CAAC,mCAAuB,CAAC;gBAC9C,OAAO,CAAC,OAAO,CAAC,mCAAuB,CAAC,EAAE,CAAC;gBAC3C,OAAO,CAAC,CAAC;YACV,CAAC;QACF,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC,CAAA,CAAC;QAEd,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,mBAAmB;QACnB,MAAM,IAAI,GAAG,OAAO,CAAC,WAAW,IAAI,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,IAAA,kBAAU,EAAC,IAAI,CAAC,CAAC;QAE/B,2BAA2B;QAC3B,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YACf,OAAO,CAAC,CAAC;QACV,CAAC;QAED,qEAAqE;QACrE,oEAAoE;QACpE,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;QAC1C,KAAK,IAAI,MAAM,CAAC;QAEhB,MAAM,SAAS,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACrC,IAAI,gBAAgB,GAAG,CAAC,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,0BAA0B,EAAE,CAAC;YAChD,IAAI,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC3B,gBAAgB,EAAE,CAAC;YACpB,CAAC;QACF,CAAC;QACD,KAAK,IAAI,gBAAgB,GAAG,EAAE,CAAC;QAE/B,2CAA2C;QAC3C,MAAM,YAAY,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,YAAY,CAAC,MAAM,CAAC;QAClC,MAAM,WAAW,GAAG,KAAK,GAAG,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC;QACzC,IAAI,WAAW,GAAG,GAAG,EAAE,CAAC;YACvB,KAAK,IAAI,EAAE,CAAC;QACb,CAAC;QAED,kEAAkE;QAClE,iEAAiE;QACjE,6CAA6C;QAC7C,IAAI,KAAK,GAAG,CAAC,IAAI,KAAK,GAAG,EAAE,EAAE,CAAC;YAC7B,IAAI,cAAc,GAAG,CAAC,CAAC;YACvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC9C,cAAc,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;YAC9D,CAAC;YACD,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC;YACpC,IAAI,eAAe,GAAG,CAAC,IAAI,cAAc,GAAG,eAAe,GAAG,GAAG,EAAE,CAAC;gBACnE,KAAK,IAAI,EAAE,CAAC;YACb,CAAC;QACF,CAAC;QAED,wCAAwC;QACxC,MAAM,KAAK,GAAG,OAAO,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,OAAO,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;QACpG,IAAI,KAAK,GAAG,CAAC,IAAI,KAAK,GAAG,KAAK,GAAG,CAAC,EAAE,CAAC;YACpC,KAAK,IAAI,EAAE,CAAC;QACb,CAAC;QAED,qEAAqE;QACrE,IAAI,KAAK,GAAG,EAAE,EAAE,CAAC;YAChB,MAAM,OAAO,GAAG,OAAO,CAAC,oBAAoB,CAAC,GAAG,CAAC,CAAC;YAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACzC,MAAM,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;gBACnE,IAAI,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;oBACrC,KAAK,IAAI,EAAE,CAAC;oBACZ,MAAM;gBACP,CAAC;YACF,CAAC;QACF,CAAC;QAED,iFAAiF;QACjF,6EAA6E;QAC7E,uCAAuC;QACvC,IAAI,KAAK,GAAG,EAAE,EAAE,CAAC;YAChB,IAAI,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBACxD,KAAK,IAAI,EAAE,CAAC;YACb,CAAC;QACF,CAAC;QAED,oDAAoD;QACpD,IAAI,aAAa,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,CAAC;YAC9C,KAAK,IAAI,EAAE,CAAC;QACb,CAAC;QAED,8DAA8D;QAC9D,MAAM,SAAS,GAAG,IAAA,kBAAY,EAAC,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC;QACtD,MAAM,EAAE,GAAG,OAAO,CAAC,EAAE,CAAC,WAAW,EAAE,CAAC;QAEpC,KAAK,MAAM,OAAO,IAAI,kBAAkB,EAAE,CAAC;YAC1C,IAAI,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBACzD,KAAK,IAAI,CAAC,CAAC;YACZ,CAAC;QACF,CAAC;QAED,OAAO,KAAK,CAAC;IACd,CAAC;IAED;;;OAGG;IACK,MAAM,CAAC,UAAU,CAAC,OAAgB,EAAE,KAAa;QACxD,IAAI,KAAK,GAAG,CAAC,IAAI,KAAK,IAAI,GAAG;YAAE,OAAO,KAAK,CAAC;QAC5C,MAAM,QAAQ,GAAG,OAAO,CAAC,gBAAgB,CAAC,YAAY,CAAC,CAAC;QACxD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QACtC,MAAM,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;QAC/C,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QACpC,IAAI,gBAAgB,GAAG,CAAC,CAAC;QACzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,gBAAgB,IAAI,IAAA,kBAAU,EAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC;QAC/D,CAAC;QACD,MAAM,eAAe,GAAG,CAAC,KAAK,GAAG,gBAAgB,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC;QACrE,OAAO,eAAe,GAAG,EAAE,CAAC;IAC7B,CAAC;CACD;AAraD,sCAqaC"}
@@ -0,0 +1,2 @@
1
+ import { DefuddleMetadata } from './types';
2
+ export declare function standardizeContent(element: Element, metadata: DefuddleMetadata, doc: Document, debug?: boolean): void;