@adobe/spacecat-shared-html-analyzer 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ # [@adobe/spacecat-shared-html-analyzer-v1.2.3](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-html-analyzer-v1.2.2...@adobe/spacecat-shared-html-analyzer-v1.2.3) (2026-02-04)
2
+
3
+
4
+ ### Bug Fixes
5
+
6
+ * updated selectors for nav-footer to not exclude the breadcrumbs ([#1309](https://github.com/adobe/spacecat-shared/issues/1309)) ([2c9246c](https://github.com/adobe/spacecat-shared/commit/2c9246c295ab90ab8e2bae39fd5d11d71bba6546))
7
+
8
+ # [@adobe/spacecat-shared-html-analyzer-v1.2.2](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-html-analyzer-v1.2.1...@adobe/spacecat-shared-html-analyzer-v1.2.2) (2026-01-22)
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * added option to include noscript tags in server-side html ([#1274](https://github.com/adobe/spacecat-shared/issues/1274)) ([f26e320](https://github.com/adobe/spacecat-shared/commit/f26e3200cc2b129237073da5c7cae1cbfb3ae4b1))
14
+
1
15
  # [@adobe/spacecat-shared-html-analyzer-v1.2.1](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-html-analyzer-v1.2.0...@adobe/spacecat-shared-html-analyzer-v1.2.1) (2026-01-15)
2
16
 
3
17
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-html-analyzer",
3
- "version": "1.2.1",
3
+ "version": "1.2.3",
4
4
  "description": "Analyze HTML content visibility for AI crawlers and citations - compare static HTML vs fully rendered content",
5
5
  "type": "module",
6
6
  "engines": {
package/src/analyzer.js CHANGED
@@ -25,12 +25,20 @@ import { hashDJB2, pct } from './utils.js';
25
25
  * @param {string} initHtml - Initial HTML content (what crawlers see)
26
26
  * @param {string} finHtml - Final HTML content (what users see)
27
27
  * @param {boolean} [ignoreNavFooter=true] - Whether to ignore navigation/footer elements
28
+ * @param {boolean} [includeNoscriptInFinal=false] -
29
+ * Whether to include noscript content in final HTML
28
30
  * @returns {Promise<Object>} Comprehensive analysis results
29
31
  */
30
- export async function analyzeTextComparison(initHtml, finHtml, ignoreNavFooter = true) {
31
- // Handle both sync (browser) and async (Node.js) stripTagsToText
32
- const initTextResult = stripTagsToText(initHtml, ignoreNavFooter);
33
- const finTextResult = stripTagsToText(finHtml, ignoreNavFooter);
32
+ export async function analyzeTextComparison(
33
+ initHtml,
34
+ finHtml,
35
+ ignoreNavFooter = true,
36
+ includeNoscriptInFinal = false,
37
+ ) {
38
+ // Server-side (initial): Always includes noscript (true) - what crawlers see
39
+ const initTextResult = stripTagsToText(initHtml, ignoreNavFooter, true);
40
+ // Client-side (final): Configurable noscript inclusion - what users see
41
+ const finTextResult = stripTagsToText(finHtml, ignoreNavFooter, includeNoscriptInFinal);
34
42
 
35
43
  const initText = await Promise.resolve(initTextResult);
36
44
  const finText = await Promise.resolve(finTextResult);
@@ -61,12 +69,20 @@ export async function analyzeTextComparison(initHtml, finHtml, ignoreNavFooter =
61
69
  * @param {string} originalHTML - Initial HTML content
62
70
  * @param {string} currentHTML - Final HTML content
63
71
  * @param {boolean} [ignoreNavFooter=true] - Whether to ignore navigation/footer elements
72
+ * @param {boolean} [includeNoscriptInCurrent=false] -
73
+ * Whether to include noscript content in current HTML
64
74
  * @returns {Promise<Object>} Basic statistics
65
75
  */
66
- export async function calculateStats(originalHTML, currentHTML, ignoreNavFooter = true) {
67
- // Handle both sync (browser) and async (Node.js) stripTagsToText
68
- const originalTextResult = stripTagsToText(originalHTML, ignoreNavFooter);
69
- const currentTextResult = stripTagsToText(currentHTML, ignoreNavFooter);
76
+ export async function calculateStats(
77
+ originalHTML,
78
+ currentHTML,
79
+ ignoreNavFooter = true,
80
+ includeNoscriptInCurrent = false,
81
+ ) {
82
+ // Server-side (original): Always includes noscript (true) - what crawlers see
83
+ const originalTextResult = stripTagsToText(originalHTML, ignoreNavFooter, true);
84
+ // Client-side (current): Configurable noscript inclusion - what users see
85
+ const currentTextResult = stripTagsToText(currentHTML, ignoreNavFooter, includeNoscriptInCurrent);
70
86
 
71
87
  const originalText = await Promise.resolve(originalTextResult);
72
88
  const currentText = await Promise.resolve(currentTextResult);
@@ -103,14 +119,30 @@ export async function calculateStats(originalHTML, currentHTML, ignoreNavFooter
103
119
  * Calculate stats for both nav/footer scenarios
104
120
  * @param {string} originalHTML - Initial HTML content
105
121
  * @param {string} currentHTML - Final HTML content
122
+ * @param {boolean} [includeNoscriptInCurrent=false] -
123
+ * Whether to include noscript content in current HTML
106
124
  * @returns {Promise<Object>} Analysis results for both scenarios
107
125
  */
108
- export async function calculateBothScenarioStats(originalHTML, currentHTML) {
126
+ export async function calculateBothScenarioStats(
127
+ originalHTML,
128
+ currentHTML,
129
+ includeNoscriptInCurrent = false,
130
+ ) {
109
131
  // Calculate stats with nav/footer ignored
110
- const statsIgnored = await calculateStats(originalHTML, currentHTML, true);
132
+ const statsIgnored = await calculateStats(
133
+ originalHTML,
134
+ currentHTML,
135
+ true,
136
+ includeNoscriptInCurrent,
137
+ );
111
138
 
112
139
  // Calculate stats without nav/footer ignored
113
- const statsNotIgnored = await calculateStats(originalHTML, currentHTML, false);
140
+ const statsNotIgnored = await calculateStats(
141
+ originalHTML,
142
+ currentHTML,
143
+ false,
144
+ includeNoscriptInCurrent,
145
+ );
114
146
  return {
115
147
  withNavFooterIgnored: {
116
148
  wordCountBefore: statsIgnored.wordCountBefore,
@@ -30,8 +30,6 @@ const NAVIGATION_FOOTER_SELECTOR = [
30
30
  // Header/footer classes
31
31
  '.header', '.site-header', '.page-header', '.top-header', '.header-wrapper',
32
32
  '.footer', '.site-footer', '.page-footer', '.bottom-footer', '.footer-wrapper',
33
- // Breadcrumb navigation
34
- '.breadcrumb', '.breadcrumbs',
35
33
  // Common ID selectors
36
34
  '#nav', '#navigation', '#navbar', '#header', '#footer', '#menu', '#main-menu',
37
35
  '#site-header', '#site-footer', '#page-header', '#page-footer',
@@ -182,16 +180,17 @@ function filterNavigationAndFooterCheerio($) {
182
180
  * @param {string} htmlContent - Raw HTML content
183
181
  * @param {boolean} ignoreNavFooter - Whether to remove navigation/footer elements
184
182
  * @param {boolean} returnText - Whether to return text only
183
+ * @param {boolean} includeNoscript - Whether to include noscript elements (false excludes them)
185
184
  * @returns {string} Filtered content
186
185
  */
187
- function filterHtmlBrowser(htmlContent, ignoreNavFooter, returnText) {
186
+ function filterHtmlBrowser(htmlContent, ignoreNavFooter, returnText, includeNoscript) {
188
187
  const parser = new DOMParser(); // eslint-disable-line no-undef
189
188
  const doc = parser.parseFromString(htmlContent, 'text/html');
190
189
 
191
190
  // Process the entire document to capture JSON-LD in both head and body
192
191
  const documentElement = doc.documentElement || doc;
193
192
 
194
- // Remove script elements except JSON-LD, also remove style, noscript, template
193
+ // Remove script elements except JSON-LD, also remove style, template
195
194
  documentElement.querySelectorAll('script').forEach((n) => {
196
195
  // Preserve JSON-LD structured data scripts by converting them to code blocks
197
196
  if (n.type === 'application/ld+json') {
@@ -234,7 +233,12 @@ function filterHtmlBrowser(htmlContent, ignoreNavFooter, returnText) {
234
233
  }
235
234
  n.remove();
236
235
  });
237
- documentElement.querySelectorAll('style,noscript,template').forEach((n) => n.remove());
236
+
237
+ if (includeNoscript) {
238
+ documentElement.querySelectorAll('style,template').forEach((n) => n.remove());
239
+ } else {
240
+ documentElement.querySelectorAll('noscript,style,template').forEach((n) => n.remove());
241
+ }
238
242
 
239
243
  // Remove all media elements (images, videos, audio, etc.) to keep only text
240
244
  const mediaSelector = 'img,video,audio,picture,svg,canvas,embed,object,iframe';
@@ -259,9 +263,10 @@ function filterHtmlBrowser(htmlContent, ignoreNavFooter, returnText) {
259
263
  * @param {string} htmlContent - Raw HTML content
260
264
  * @param {boolean} ignoreNavFooter - Whether to remove navigation/footer elements
261
265
  * @param {boolean} returnText - Whether to return text only
266
+ * @param {boolean} includeNoscript - Whether to include noscript elements (false excludes them)
262
267
  * @returns {Promise<string>} Filtered content
263
268
  */
264
- async function filterHtmlNode(htmlContent, ignoreNavFooter, returnText) {
269
+ async function filterHtmlNode(htmlContent, ignoreNavFooter, returnText, includeNoscript) {
265
270
  let cheerio;
266
271
  try {
267
272
  cheerio = await import('cheerio');
@@ -305,7 +310,12 @@ async function filterHtmlNode(htmlContent, ignoreNavFooter, returnText) {
305
310
  $(this).remove();
306
311
  }
307
312
  });
308
- $('style, noscript, template').remove();
313
+
314
+ if (includeNoscript) {
315
+ $('style, template').remove();
316
+ } else {
317
+ $('style, noscript, template').remove();
318
+ }
309
319
 
310
320
  // Remove all media elements (images, videos, audio, etc.) to keep only text
311
321
  $('img, video, audio, picture, svg, canvas, embed, object, iframe').remove();
@@ -330,45 +340,54 @@ async function filterHtmlNode(htmlContent, ignoreNavFooter, returnText) {
330
340
  /**
331
341
  * Filter HTML content by removing unwanted elements
332
342
  * @param {string} htmlContent - Raw HTML content
333
- * @param {boolean} ignoreNavFooter - Whether to remove navigation/footer elements
334
- * @param {boolean} returnText - Whether to return text only (true) or filtered HTML (false)
343
+ * @param {boolean} [ignoreNavFooter=true] - Whether to remove navigation/footer elements
344
+ * @param {boolean} [returnText=true] - Whether to return text only (true) or filtered HTML (false)
345
+ * @param {boolean} [includeNoscript=false] - Whether to include noscript elements
335
346
  * @returns {string|Promise<string>} Filtered content (sync in browser, async in Node.js)
336
347
  */
337
- export function filterHtmlContent(htmlContent, ignoreNavFooter = true, returnText = true) {
348
+ export function filterHtmlContent(
349
+ htmlContent,
350
+ ignoreNavFooter = true,
351
+ returnText = true,
352
+ includeNoscript = false,
353
+ ) {
338
354
  if (!htmlContent) return '';
339
355
 
340
356
  // Browser environment (DOMParser) - works in Chrome extensions too - SYNCHRONOUS
341
357
  if (isBrowser()) {
342
- return filterHtmlBrowser(htmlContent, ignoreNavFooter, returnText);
358
+ return filterHtmlBrowser(htmlContent, ignoreNavFooter, returnText, includeNoscript);
343
359
  }
344
360
 
345
361
  // Node.js environment (cheerio) - dynamic import to avoid bundling issues - ASYNCHRONOUS
346
- return filterHtmlNode(htmlContent, ignoreNavFooter, returnText);
362
+ return filterHtmlNode(htmlContent, ignoreNavFooter, returnText, includeNoscript);
347
363
  }
348
364
 
349
365
  /**
350
366
  * Strip HTML tags and return plain text
367
+ *
351
368
  * @param {string} htmlContent - Raw HTML content
352
- * @param {boolean} ignoreNavFooter - Whether to remove navigation/footer elements
369
+ * @param {boolean} [ignoreNavFooter=true] - Whether to remove navigation/footer elements
370
+ * @param {boolean} [includeNoscript=false] - Whether to include noscript elements
353
371
  * @returns {string|Promise<string>} Plain text content (sync in browser, async in Node.js)
354
372
  */
355
- export function stripTagsToText(htmlContent, ignoreNavFooter = true) {
356
- return filterHtmlContent(htmlContent, ignoreNavFooter, true);
373
+ export function stripTagsToText(htmlContent, ignoreNavFooter = true, includeNoscript = false) {
374
+ return filterHtmlContent(htmlContent, ignoreNavFooter, true, includeNoscript);
357
375
  }
358
376
 
359
377
  /**
360
378
  * Extract word count from HTML content
361
379
  * @param {string} htmlContent - Raw HTML content
362
- * @param {boolean} ignoreNavFooter - Whether to ignore navigation/footer
380
+ * @param {boolean} [ignoreNavFooter=true] - Whether to ignore navigation/footer
381
+ * @param {boolean} [includeNoscript=false] - Whether to include noscript elements
363
382
  * @returns {Object|Promise<Object>} Object with word_count property
364
383
  * (sync in browser, async in Node.js)
365
384
  */
366
- export function extractWordCount(htmlContent, ignoreNavFooter = true) {
385
+ export function extractWordCount(htmlContent, ignoreNavFooter = true, includeNoscript = false) {
367
386
  if (!htmlContent) {
368
387
  return { word_count: 0 };
369
388
  }
370
389
 
371
- const textContent = stripTagsToText(htmlContent, ignoreNavFooter);
390
+ const textContent = stripTagsToText(htmlContent, ignoreNavFooter, includeNoscript);
372
391
 
373
392
  // Handle both sync (browser) and async (Node.js) cases
374
393
  if (textContent && typeof textContent.then === 'function') {
package/src/index.d.ts CHANGED
@@ -90,17 +90,30 @@ export function generateDiffReport(initText: string, finText: string, mode?: "wo
90
90
  /**
91
91
  * Filter HTML content by removing unwanted elements
92
92
  */
93
- export function filterHtmlContent(htmlContent: string, ignoreNavFooter?: boolean, returnText?: boolean): Promise<string>;
93
+ export function filterHtmlContent(
94
+ htmlContent: string,
95
+ ignoreNavFooter?: boolean,
96
+ returnText?: boolean,
97
+ includeNoscript?: boolean
98
+ ): Promise<string>;
94
99
 
95
100
  /**
96
101
  * Extract plain text from HTML content
97
102
  */
98
- export function stripTagsToText(htmlContent: string, ignoreNavFooter?: boolean): Promise<string>;
103
+ export function stripTagsToText(
104
+ htmlContent: string,
105
+ ignoreNavFooter?: boolean,
106
+ includeNoscript?: boolean
107
+ ): Promise<string>;
99
108
 
100
109
  /**
101
110
  * Extract word count from HTML content
102
111
  */
103
- export function extractWordCount(htmlContent: string, ignoreNavFooter?: boolean): Promise<{ word_count: number }>;
112
+ export function extractWordCount(
113
+ htmlContent: string,
114
+ ignoreNavFooter?: boolean,
115
+ includeNoscript?: boolean
116
+ ): Promise<{ word_count: number }>;
104
117
 
105
118
  /**
106
119
  * Remove navigation and footer elements from DOM element (browser environment)
@@ -150,28 +163,42 @@ interface BothScenariosStats {
150
163
 
151
164
  /**
152
165
  * Comprehensive text-only analysis between initial and final HTML (original chrome extension logic)
166
+ * @param initHtml - Initial HTML content (what crawlers/bots see - server-side rendered)
167
+ * @param finHtml - Final HTML content (what users see - client-side rendered)
168
+ * @param ignoreNavFooter - Whether to ignore navigation/footer elements
169
+ * @param includeNoscriptInFinal - Whether to include noscript content in final HTML (client-side)
153
170
  */
154
171
  export function analyzeTextComparison(
155
172
  initHtml: string,
156
173
  finHtml: string,
157
- ignoreNavFooter?: boolean
174
+ ignoreNavFooter?: boolean,
175
+ includeNoscriptInFinal?: boolean
158
176
  ): Promise<TextComparison>;
159
177
 
160
178
  /**
161
179
  * Calculate basic stats from HTML comparison (original chrome extension logic)
180
+ * @param originalHTML - Initial HTML content (server-side)
181
+ * @param currentHTML - Final HTML content (client-side)
182
+ * @param ignoreNavFooter - Whether to ignore navigation/footer elements
183
+ * @param includeNoscriptInCurrent - Whether to include noscript content in current HTML (client-side)
162
184
  */
163
185
  export function calculateStats(
164
186
  originalHTML: string,
165
187
  currentHTML: string,
166
- ignoreNavFooter?: boolean
188
+ ignoreNavFooter?: boolean,
189
+ includeNoscriptInCurrent?: boolean
167
190
  ): Promise<BasicStats>;
168
191
 
169
192
  /**
170
193
  * Calculate stats for both nav/footer scenarios (original chrome extension logic)
194
+ * @param originalHTML - Initial HTML content (server-side)
195
+ * @param currentHTML - Final HTML content (client-side)
196
+ * @param includeNoscriptInCurrent - Whether to include noscript content in current HTML (client-side)
171
197
  */
172
198
  export function calculateBothScenarioStats(
173
199
  originalHTML: string,
174
- currentHTML: string
200
+ currentHTML: string,
201
+ includeNoscriptInCurrent?: boolean
175
202
  ): Promise<BothScenariosStats>;
176
203
 
177
204
  /** MARKDOWN DIFF FUNCTIONS */
@@ -46,6 +46,35 @@ describe('HTML Visibility Analyzer', () => {
46
46
  expect(result.initialText).to.equal('');
47
47
  expect(result.finalText.length).to.be.greaterThan(0);
48
48
  });
49
+
50
+ it('should include noscript in initial HTML and exclude in final HTML by default', async () => {
51
+ const initHtml = '<html><body><h1>Title</h1><noscript>Enable JS</noscript><p>Content</p></body></html>';
52
+ const finHtml = '<html><body><h1>Title</h1><noscript>Enable JS</noscript><p>Content</p><div>Extra</div></body></html>';
53
+ const result = await analyzeTextComparison(initHtml, finHtml);
54
+
55
+ // Initial text should include noscript content
56
+ expect(result.initialText).to.include('Enable JS');
57
+ // Final text should NOT include noscript content by default
58
+ expect(result.finalText).to.not.include('Enable JS');
59
+ // Both should have the main content
60
+ expect(result.initialText).to.include('Title');
61
+ expect(result.finalText).to.include('Title');
62
+ });
63
+
64
+ it('should include noscript in final HTML when includeNoscriptInFinal is true', async () => {
65
+ const initHtml = '<html><body><h1>Title</h1><noscript>Enable JS</noscript><p>Content</p></body></html>';
66
+ const finHtml = '<html><body><h1>Title</h1><noscript>Enable JS</noscript><p>Content</p><div>Extra</div></body></html>';
67
+ const result = await analyzeTextComparison(initHtml, finHtml, true, true);
68
+
69
+ // Initial text should include noscript content
70
+ expect(result.initialText).to.include('Enable JS');
71
+ // Final text should ALSO include noscript content when flag is true
72
+ expect(result.finalText).to.include('Enable JS');
73
+ // Both should have the main content
74
+ expect(result.initialText).to.include('Title');
75
+ expect(result.finalText).to.include('Title');
76
+ expect(result.finalText).to.include('Extra');
77
+ });
49
78
  });
50
79
 
51
80
  describe('calculateStats', () => {
@@ -64,6 +93,41 @@ describe('HTML Visibility Analyzer', () => {
64
93
  expect(result.contentIncreaseRatio).to.be.a('number');
65
94
  expect(result.citationReadability).to.be.a('number');
66
95
  });
96
+
97
+ it('should handle noscript elements correctly in word counts by default', async () => {
98
+ const originalHtml = '<html><body><h1>Title</h1><noscript>Enable JavaScript</noscript><p>Original content</p></body></html>';
99
+ const currentHtml = '<html><body><h1>Title</h1><noscript>Enable JavaScript</noscript><p>Original content</p><p>New content</p></body></html>';
100
+ const result = await calculateStats(originalHtml, currentHtml);
101
+
102
+ // Word counts should reflect the includeNoscript behavior
103
+ // originalText includes noscript (includeNoscript=true):
104
+ // "Title Enable JavaScript Original content"
105
+ // currentText excludes noscript (includeNoscript=false):
106
+ // "Title Original content New content"
107
+ expect(result.wordCountBefore).to.be.greaterThan(0);
108
+ expect(result.wordCountAfter).to.be.greaterThan(0);
109
+ expect(result.contentIncreaseRatio).to.be.a('number');
110
+ });
111
+
112
+ it('should include noscript in current HTML when includeNoscriptInCurrent is true', async () => {
113
+ const originalHtml = '<html><body><h1>Title</h1><noscript>Enable JavaScript</noscript><p>Original content</p></body></html>';
114
+ const currentHtml = '<html><body><h1>Title</h1><noscript>Enable JavaScript</noscript><p>Original content</p><p>New content</p></body></html>';
115
+ const resultWithout = await calculateStats(originalHtml, currentHtml, true, false);
116
+ const resultWith = await calculateStats(originalHtml, currentHtml, true, true);
117
+
118
+ // When noscript is excluded from current, word count should be lower
119
+ expect(resultWithout.wordCountAfter).to.be.lessThan(resultWith.wordCountAfter);
120
+
121
+ // Note: Text extraction concatenates without spaces, so words merge
122
+ // originalHtml with noscript: "TitleEnable JavaScriptOriginal content" = 3 words
123
+ // originalHtml without noscript: "TitleOriginal content" = 2 words
124
+ // currentHtml without noscript: "TitleOriginal contentNew content" = 3 words
125
+ // currentHtml with noscript: "TitleEnable JavaScriptOriginal contentNew content" = 4 words
126
+ expect(resultWithout.wordCountBefore).to.equal(3);
127
+ expect(resultWithout.wordCountAfter).to.equal(3);
128
+ expect(resultWith.wordCountBefore).to.equal(3);
129
+ expect(resultWith.wordCountAfter).to.equal(4);
130
+ });
67
131
  });
68
132
 
69
133
  describe('calculateBothScenarioStats', () => {
@@ -118,5 +182,55 @@ describe('HTML Visibility Analyzer', () => {
118
182
  expect(text).to.include('Navigation');
119
183
  expect(text).to.include('Footer');
120
184
  });
185
+
186
+ it('should remove noscript elements by default', async () => {
187
+ const html = '<html><body><h1>Title</h1><noscript>Please enable JavaScript</noscript><p>Content</p></body></html>';
188
+ const text = await stripTagsToText(html);
189
+
190
+ expect(text).to.include('Title');
191
+ expect(text).to.include('Content');
192
+ expect(text).to.not.include('Please enable JavaScript');
193
+ expect(text).to.not.include('noscript');
194
+ });
195
+
196
+ it('should remove noscript elements when includeNoscript is false', async () => {
197
+ const html = '<html><body><h1>Title</h1><noscript>Noscript content</noscript><p>Regular content</p></body></html>';
198
+ const text = await stripTagsToText(html, true, false);
199
+
200
+ expect(text).to.include('Title');
201
+ expect(text).to.include('Regular content');
202
+ expect(text).to.not.include('Noscript content');
203
+ });
204
+
205
+ it('should keep noscript elements when includeNoscript is true', async () => {
206
+ const html = '<html><body><h1>Title</h1><noscript>Noscript fallback</noscript><p>Regular content</p></body></html>';
207
+ const text = await stripTagsToText(html, true, true);
208
+
209
+ expect(text).to.include('Title');
210
+ expect(text).to.include('Regular content');
211
+ expect(text).to.include('Noscript fallback');
212
+ });
213
+
214
+ it('should handle multiple noscript elements with includeNoscript', async () => {
215
+ const html = `<html><body>
216
+ <h1>Title</h1>
217
+ <noscript>First noscript</noscript>
218
+ <p>Content</p>
219
+ <noscript>Second noscript</noscript>
220
+ </body></html>`;
221
+
222
+ const textWithout = await stripTagsToText(html, true, false);
223
+ const textWith = await stripTagsToText(html, true, true);
224
+
225
+ expect(textWithout).to.include('Title');
226
+ expect(textWithout).to.include('Content');
227
+ expect(textWithout).to.not.include('First noscript');
228
+ expect(textWithout).to.not.include('Second noscript');
229
+
230
+ expect(textWith).to.include('Title');
231
+ expect(textWith).to.include('Content');
232
+ expect(textWith).to.include('First noscript');
233
+ expect(textWith).to.include('Second noscript');
234
+ });
121
235
  });
122
236
  });