@adobe/spacecat-shared-html-analyzer 1.2.4 → 1.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.releaserc.cjs +6 -2
- package/CHANGELOG.md +12 -0
- package/package.json +3 -3
- package/src/html-filter.js +23 -0
- package/test/index.test.js +45 -0
package/.releaserc.cjs
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
module.exports = {
|
|
2
2
|
extends: "semantic-release-monorepo",
|
|
3
3
|
plugins: [
|
|
4
|
-
"@semantic-release/commit-analyzer",
|
|
5
|
-
|
|
4
|
+
["@semantic-release/commit-analyzer", {
|
|
5
|
+
"preset": "conventionalcommits",
|
|
6
|
+
}],
|
|
7
|
+
["@semantic-release/release-notes-generator", {
|
|
8
|
+
"preset": "conventionalcommits",
|
|
9
|
+
}],
|
|
6
10
|
["@semantic-release/changelog", {
|
|
7
11
|
"changelogFile": "CHANGELOG.md",
|
|
8
12
|
}],
|
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,15 @@
|
|
|
1
|
+
## [@adobe/spacecat-shared-html-analyzer-v1.2.6](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-html-analyzer-v1.2.5...@adobe/spacecat-shared-html-analyzer-v1.2.6) (2026-03-02)
|
|
2
|
+
|
|
3
|
+
### Bug Fixes
|
|
4
|
+
|
|
5
|
+
* **deps:** update external fixes ([#1223](https://github.com/adobe/spacecat-shared/issues/1223)) ([7ee8461](https://github.com/adobe/spacecat-shared/commit/7ee8461c99223d07a2f47bd6838b6942fcb30f28))
|
|
6
|
+
|
|
7
|
+
## [@adobe/spacecat-shared-html-analyzer-v1.2.5](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-html-analyzer-v1.2.4...@adobe/spacecat-shared-html-analyzer-v1.2.5) (2026-02-23)
|
|
8
|
+
|
|
9
|
+
### Bug Fixes
|
|
10
|
+
|
|
11
|
+
* filtering out digit access related accessibility divs ([#1366](https://github.com/adobe/spacecat-shared/issues/1366)) ([3ad20a5](https://github.com/adobe/spacecat-shared/commit/3ad20a582f6a9d7ec21050fd56afcbeb16a6f096))
|
|
12
|
+
|
|
1
13
|
# [@adobe/spacecat-shared-html-analyzer-v1.2.4](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-html-analyzer-v1.2.3...@adobe/spacecat-shared-html-analyzer-v1.2.4) (2026-02-05)
|
|
2
14
|
|
|
3
15
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adobe/spacecat-shared-html-analyzer",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.6",
|
|
4
4
|
"description": "Analyze HTML content visibility for AI crawlers and citations - compare static HTML vs fully rendered content",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
@@ -44,10 +44,10 @@
|
|
|
44
44
|
"devDependencies": {
|
|
45
45
|
"@rollup/plugin-node-resolve": "^16.0.1",
|
|
46
46
|
"@rollup/plugin-terser": "^0.4.4",
|
|
47
|
-
"chai": "6.2.
|
|
47
|
+
"chai": "6.2.2",
|
|
48
48
|
"chai-as-promised": "8.0.2",
|
|
49
49
|
"rollup": "^4.52.2",
|
|
50
|
-
"sinon": "21.0.
|
|
50
|
+
"sinon": "21.0.1",
|
|
51
51
|
"sinon-chai": "4.0.1"
|
|
52
52
|
},
|
|
53
53
|
"directories": {
|
package/src/html-filter.js
CHANGED
|
@@ -82,6 +82,10 @@ const COOKIE_BANNER_ARIA_SELECTORS = [
|
|
|
82
82
|
'[aria-describedby*="privacy" i]',
|
|
83
83
|
];
|
|
84
84
|
|
|
85
|
+
const ACCESSIBILITY_SELECTORS = [
|
|
86
|
+
'#digiAccess',
|
|
87
|
+
];
|
|
88
|
+
|
|
85
89
|
/**
|
|
86
90
|
* Validates if an element is likely a cookie banner based on text content
|
|
87
91
|
* Optimized: Set lookup + early exit for common keywords (3x faster)
|
|
@@ -121,6 +125,13 @@ function removeCookieBanners(element) {
|
|
|
121
125
|
});
|
|
122
126
|
}
|
|
123
127
|
|
|
128
|
+
function removeAccessibilityElements(element) {
|
|
129
|
+
const elements = element.querySelectorAll(ACCESSIBILITY_SELECTORS);
|
|
130
|
+
elements.forEach((el) => {
|
|
131
|
+
el.remove();
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
|
|
124
135
|
/**
|
|
125
136
|
* Remove navigation and footer elements from DOM element (browser environment)
|
|
126
137
|
* For Chrome extension DOM manipulation use cases
|
|
@@ -167,6 +178,12 @@ function removeCookieBannersCheerio($) {
|
|
|
167
178
|
});
|
|
168
179
|
}
|
|
169
180
|
|
|
181
|
+
function removeAccessibilityElementsCheerio($) {
|
|
182
|
+
ACCESSIBILITY_SELECTORS.forEach((selector) => {
|
|
183
|
+
$(selector).remove();
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
|
|
170
187
|
/**
|
|
171
188
|
* Remove navigation and footer elements (Node.js environment)
|
|
172
189
|
* Optimized: single cheerio query instead of 35 separate queries (35x performance improvement)
|
|
@@ -249,6 +266,9 @@ function filterHtmlBrowser(htmlContent, ignoreNavFooter, returnText, includeNosc
|
|
|
249
266
|
// Remove consent banners with intelligent detection
|
|
250
267
|
removeCookieBanners(documentElement);
|
|
251
268
|
|
|
269
|
+
// Remove accessibility elements
|
|
270
|
+
removeAccessibilityElements(documentElement);
|
|
271
|
+
|
|
252
272
|
// Conditionally remove navigation and footer elements
|
|
253
273
|
if (ignoreNavFooter) {
|
|
254
274
|
filterNavigationAndFooterBrowser(documentElement);
|
|
@@ -325,6 +345,9 @@ async function filterHtmlNode(htmlContent, ignoreNavFooter, returnText, includeN
|
|
|
325
345
|
// Remove cookie banners with comprehensive detection
|
|
326
346
|
removeCookieBannersCheerio($);
|
|
327
347
|
|
|
348
|
+
// Remove accessibility elements
|
|
349
|
+
removeAccessibilityElementsCheerio($);
|
|
350
|
+
|
|
328
351
|
// Conditionally remove navigation and footer elements
|
|
329
352
|
if (ignoreNavFooter) {
|
|
330
353
|
filterNavigationAndFooterCheerio($);
|
package/test/index.test.js
CHANGED
|
@@ -183,6 +183,51 @@ describe('HTML Visibility Analyzer', () => {
|
|
|
183
183
|
expect(text).to.include('Footer');
|
|
184
184
|
});
|
|
185
185
|
|
|
186
|
+
it('should remove accessibility widget elements', async () => {
|
|
187
|
+
const html = `<html><body>
|
|
188
|
+
<h1>Title</h1>
|
|
189
|
+
<div id="digiAccess">Accessibility Widget</div>
|
|
190
|
+
<div id="dAopener">Accessibility Opener</div>
|
|
191
|
+
<p>Content</p>
|
|
192
|
+
</body></html>`;
|
|
193
|
+
|
|
194
|
+
const text = await stripTagsToText(html, true);
|
|
195
|
+
|
|
196
|
+
expect(text).to.include('Title');
|
|
197
|
+
expect(text).to.include('Content');
|
|
198
|
+
expect(text).to.not.include('Accessibility Widget');
|
|
199
|
+
expect(text).to.include('Accessibility Opener');
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
it('should remove cookie banner when selector matches and content indicates consent', async () => {
|
|
203
|
+
const html = `<html><body>
|
|
204
|
+
<h1>Title</h1>
|
|
205
|
+
<div id="onetrust-consent-sdk">We use cookies. Manage consent preferences.</div>
|
|
206
|
+
<p>Content</p>
|
|
207
|
+
</body></html>`;
|
|
208
|
+
|
|
209
|
+
const text = await stripTagsToText(html, true);
|
|
210
|
+
|
|
211
|
+
expect(text).to.include('Title');
|
|
212
|
+
expect(text).to.include('Content');
|
|
213
|
+
expect(text).to.not.include('We use cookies');
|
|
214
|
+
expect(text).to.not.include('Manage consent preferences');
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
it('should not remove cookie-banner selectors when content does not indicate consent', async () => {
|
|
218
|
+
const html = `<html><body>
|
|
219
|
+
<h1>Title</h1>
|
|
220
|
+
<div id="onetrust-consent-sdk">Just a container with neutral text.</div>
|
|
221
|
+
<p>Content</p>
|
|
222
|
+
</body></html>`;
|
|
223
|
+
|
|
224
|
+
const text = await stripTagsToText(html, true);
|
|
225
|
+
|
|
226
|
+
expect(text).to.include('Title');
|
|
227
|
+
expect(text).to.include('Content');
|
|
228
|
+
expect(text).to.include('Just a container with neutral text.');
|
|
229
|
+
});
|
|
230
|
+
|
|
186
231
|
it('should remove noscript elements by default', async () => {
|
|
187
232
|
const html = '<html><body><h1>Title</h1><noscript>Please enable JavaScript</noscript><p>Content</p></body></html>';
|
|
188
233
|
const text = await stripTagsToText(html);
|