@adobe/spacecat-shared-html-analyzer 1.2.3 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.releaserc.cjs CHANGED
@@ -1,8 +1,12 @@
1
1
  module.exports = {
2
2
  extends: "semantic-release-monorepo",
3
3
  plugins: [
4
- "@semantic-release/commit-analyzer",
5
- "@semantic-release/release-notes-generator",
4
+ ["@semantic-release/commit-analyzer", {
5
+ "preset": "conventionalcommits",
6
+ }],
7
+ ["@semantic-release/release-notes-generator", {
8
+ "preset": "conventionalcommits",
9
+ }],
6
10
  ["@semantic-release/changelog", {
7
11
  "changelogFile": "CHANGELOG.md",
8
12
  }],
package/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+ ## [@adobe/spacecat-shared-html-analyzer-v1.2.5](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-html-analyzer-v1.2.4...@adobe/spacecat-shared-html-analyzer-v1.2.5) (2026-02-23)
2
+
3
+ ### Bug Fixes
4
+
5
+ * filtering out digit access related accessibility divs ([#1366](https://github.com/adobe/spacecat-shared/issues/1366)) ([3ad20a5](https://github.com/adobe/spacecat-shared/commit/3ad20a582f6a9d7ec21050fd56afcbeb16a6f096))
6
+
7
+ # [@adobe/spacecat-shared-html-analyzer-v1.2.4](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-html-analyzer-v1.2.3...@adobe/spacecat-shared-html-analyzer-v1.2.4) (2026-02-05)
8
+
9
+
10
+ ### Bug Fixes
11
+
12
+ * rcv adding new cookie selectors ([#1323](https://github.com/adobe/spacecat-shared/issues/1323)) ([9c566e6](https://github.com/adobe/spacecat-shared/commit/9c566e6dccac85db2da4915dfa143c6bb7d43f90))
13
+
1
14
  # [@adobe/spacecat-shared-html-analyzer-v1.2.3](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-html-analyzer-v1.2.2...@adobe/spacecat-shared-html-analyzer-v1.2.3) (2026-02-04)
2
15
 
3
16
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-html-analyzer",
3
- "version": "1.2.3",
3
+ "version": "1.2.5",
4
4
  "description": "Analyze HTML content visibility for AI crawlers and citations - compare static HTML vs fully rendered content",
5
5
  "type": "module",
6
6
  "engines": {
@@ -59,6 +59,7 @@ const COOKIE_BANNER_CLASS_SELECTORS = [
59
59
  '.cookie-modal', '.privacy-modal', '.consent-modal', '.gdpr-modal',
60
60
  '.cookie-overlay', '.privacy-overlay', '.consent-overlay', '.gdpr-overlay',
61
61
  '[class*="syrenis-cookie"]',
62
+ '.tc-privacy-wrapper',
62
63
  ];
63
64
 
64
65
  const COOKIE_BANNER_ID_SELECTORS = [
@@ -67,6 +68,7 @@ const COOKIE_BANNER_ID_SELECTORS = [
67
68
  '#cookie-bar', '#privacy-bar', '#consent-bar', '#gdpr-bar', '#cookiemgmt',
68
69
  '#cookie-popup', '#privacy-popup', '#consent-popup', '#gdpr-popup',
69
70
  '#onetrust-consent-sdk', '#onetrust-banner-sdk',
71
+ '#tc-privacy-wrapper',
70
72
  ];
71
73
 
72
74
  const COOKIE_BANNER_ARIA_SELECTORS = [
@@ -80,6 +82,10 @@ const COOKIE_BANNER_ARIA_SELECTORS = [
80
82
  '[aria-describedby*="privacy" i]',
81
83
  ];
82
84
 
85
+ const ACCESSIBILITY_SELECTORS = [
86
+ '#digiAccess',
87
+ ];
88
+
83
89
  /**
84
90
  * Validates if an element is likely a cookie banner based on text content
85
91
  * Optimized: Set lookup + early exit for common keywords (3x faster)
@@ -119,6 +125,13 @@ function removeCookieBanners(element) {
119
125
  });
120
126
  }
121
127
 
128
+ function removeAccessibilityElements(element) {
129
+ const elements = element.querySelectorAll(ACCESSIBILITY_SELECTORS);
130
+ elements.forEach((el) => {
131
+ el.remove();
132
+ });
133
+ }
134
+
122
135
  /**
123
136
  * Remove navigation and footer elements from DOM element (browser environment)
124
137
  * For Chrome extension DOM manipulation use cases
@@ -165,6 +178,12 @@ function removeCookieBannersCheerio($) {
165
178
  });
166
179
  }
167
180
 
181
+ function removeAccessibilityElementsCheerio($) {
182
+ ACCESSIBILITY_SELECTORS.forEach((selector) => {
183
+ $(selector).remove();
184
+ });
185
+ }
186
+
168
187
  /**
169
188
  * Remove navigation and footer elements (Node.js environment)
170
189
  * Optimized: single cheerio query instead of 35 separate queries (35x performance improvement)
@@ -247,6 +266,9 @@ function filterHtmlBrowser(htmlContent, ignoreNavFooter, returnText, includeNosc
247
266
  // Remove consent banners with intelligent detection
248
267
  removeCookieBanners(documentElement);
249
268
 
269
+ // Remove accessibility elements
270
+ removeAccessibilityElements(documentElement);
271
+
250
272
  // Conditionally remove navigation and footer elements
251
273
  if (ignoreNavFooter) {
252
274
  filterNavigationAndFooterBrowser(documentElement);
@@ -323,6 +345,9 @@ async function filterHtmlNode(htmlContent, ignoreNavFooter, returnText, includeN
323
345
  // Remove cookie banners with comprehensive detection
324
346
  removeCookieBannersCheerio($);
325
347
 
348
+ // Remove accessibility elements
349
+ removeAccessibilityElementsCheerio($);
350
+
326
351
  // Conditionally remove navigation and footer elements
327
352
  if (ignoreNavFooter) {
328
353
  filterNavigationAndFooterCheerio($);
@@ -183,6 +183,51 @@ describe('HTML Visibility Analyzer', () => {
183
183
  expect(text).to.include('Footer');
184
184
  });
185
185
 
186
+ it('should remove accessibility widget elements', async () => {
187
+ const html = `<html><body>
188
+ <h1>Title</h1>
189
+ <div id="digiAccess">Accessibility Widget</div>
190
+ <div id="dAopener">Accessibility Opener</div>
191
+ <p>Content</p>
192
+ </body></html>`;
193
+
194
+ const text = await stripTagsToText(html, true);
195
+
196
+ expect(text).to.include('Title');
197
+ expect(text).to.include('Content');
198
+ expect(text).to.not.include('Accessibility Widget');
199
+ expect(text).to.include('Accessibility Opener');
200
+ });
201
+
202
+ it('should remove cookie banner when selector matches and content indicates consent', async () => {
203
+ const html = `<html><body>
204
+ <h1>Title</h1>
205
+ <div id="onetrust-consent-sdk">We use cookies. Manage consent preferences.</div>
206
+ <p>Content</p>
207
+ </body></html>`;
208
+
209
+ const text = await stripTagsToText(html, true);
210
+
211
+ expect(text).to.include('Title');
212
+ expect(text).to.include('Content');
213
+ expect(text).to.not.include('We use cookies');
214
+ expect(text).to.not.include('Manage consent preferences');
215
+ });
216
+
217
+ it('should not remove cookie-banner selectors when content does not indicate consent', async () => {
218
+ const html = `<html><body>
219
+ <h1>Title</h1>
220
+ <div id="onetrust-consent-sdk">Just a container with neutral text.</div>
221
+ <p>Content</p>
222
+ </body></html>`;
223
+
224
+ const text = await stripTagsToText(html, true);
225
+
226
+ expect(text).to.include('Title');
227
+ expect(text).to.include('Content');
228
+ expect(text).to.include('Just a container with neutral text.');
229
+ });
230
+
186
231
  it('should remove noscript elements by default', async () => {
187
232
  const html = '<html><body><h1>Title</h1><noscript>Please enable JavaScript</noscript><p>Content</p></body></html>';
188
233
  const text = await stripTagsToText(html);