@adobe/spacecat-shared-html-analyzer 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ # [@adobe/spacecat-shared-html-analyzer-v1.0.5](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-html-analyzer-v1.0.4...@adobe/spacecat-shared-html-analyzer-v1.0.5) (2025-11-06)
2
+
3
+
4
+ ### Bug Fixes
5
+
6
+ * html-analyser updating cookie selector ([#1096](https://github.com/adobe/spacecat-shared/issues/1096)) ([f1e0f49](https://github.com/adobe/spacecat-shared/commit/f1e0f49a9ade3fd2f4299515283d684f7c388835))
7
+
8
+ # [@adobe/spacecat-shared-html-analyzer-v1.0.4](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-html-analyzer-v1.0.3...@adobe/spacecat-shared-html-analyzer-v1.0.4) (2025-11-03)
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * adding wordCountBefore & wordCountAfter ([#1084](https://github.com/adobe/spacecat-shared/issues/1084)) ([d3ae176](https://github.com/adobe/spacecat-shared/commit/d3ae1769e9a8548da3396027897e55d479afec2f))
14
+
1
15
  # [@adobe/spacecat-shared-html-analyzer-v1.0.3](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-html-analyzer-v1.0.2...@adobe/spacecat-shared-html-analyzer-v1.0.3) (2025-10-29)
2
16
 
3
17
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-html-analyzer",
3
- "version": "1.0.3",
3
+ "version": "1.0.5",
4
4
  "description": "Analyze HTML content visibility for AI crawlers and citations - compare static HTML vs fully rendered content",
5
5
  "type": "module",
6
6
  "engines": {
package/src/analyzer.js CHANGED
@@ -74,7 +74,9 @@ export async function calculateStats(originalHTML, currentHTML, ignoreNavFooter
74
74
  // Calculate word counts using consistent tokenization
75
75
  const originalTokens = tokenize(originalText, 'word');
76
76
  const currentTokens = tokenize(currentText, 'word');
77
- const wordDiff = Math.abs(currentTokens.length - originalTokens.length);
77
+ const wordCountBefore = originalTokens.length;
78
+ const wordCountAfter = currentTokens.length;
79
+ const wordDiff = Math.abs(wordCountAfter - wordCountBefore);
78
80
 
79
81
  // Calculate content increase ratio (how many times content increased)
80
82
  let contentIncreaseRatio;
@@ -89,6 +91,8 @@ export async function calculateStats(originalHTML, currentHTML, ignoreNavFooter
89
91
  ? Math.min(100, (originalTokens.length / currentTokens.length) * 100) : 100;
90
92
 
91
93
  return {
94
+ wordCountBefore,
95
+ wordCountAfter,
92
96
  wordDiff,
93
97
  contentIncreaseRatio: Math.round(contentIncreaseRatio * 100) / 100, // Round to 1 decimal place
94
98
  citationReadability: Math.round(citationReadability),
@@ -109,6 +113,8 @@ export async function calculateBothScenarioStats(originalHTML, currentHTML) {
109
113
  const statsNotIgnored = await calculateStats(originalHTML, currentHTML, false);
110
114
  return {
111
115
  withNavFooterIgnored: {
116
+ wordCountBefore: statsIgnored.wordCountBefore,
117
+ wordCountAfter: statsIgnored.wordCountAfter,
112
118
  wordDiff: statsIgnored.wordDiff,
113
119
  contentIncreaseRatio: statsIgnored.contentIncreaseRatio,
114
120
  citationReadability: statsIgnored.citationReadability,
@@ -116,6 +122,8 @@ export async function calculateBothScenarioStats(originalHTML, currentHTML) {
116
122
  missingWords: statsIgnored.wordDiff,
117
123
  },
118
124
  withoutNavFooterIgnored: {
125
+ wordCountBefore: statsNotIgnored.wordCountBefore,
126
+ wordCountAfter: statsNotIgnored.wordCountAfter,
119
127
  wordDiff: statsNotIgnored.wordDiff,
120
128
  contentIncreaseRatio: statsNotIgnored.contentIncreaseRatio,
121
129
  citationReadability: statsNotIgnored.citationReadability,
@@ -60,6 +60,7 @@ const COOKIE_BANNER_CLASS_SELECTORS = [
60
60
  '.cookie-popup', '.privacy-popup', '.consent-popup', '.gdpr-popup',
61
61
  '.cookie-modal', '.privacy-modal', '.consent-modal', '.gdpr-modal',
62
62
  '.cookie-overlay', '.privacy-overlay', '.consent-overlay', '.gdpr-overlay',
63
+ '[class*="syrenis-cookie"]',
63
64
  ];
64
65
 
65
66
  const COOKIE_BANNER_ID_SELECTORS = [
package/src/index.d.ts CHANGED
@@ -125,12 +125,16 @@ interface TextComparison {
125
125
  }
126
126
 
127
127
  interface BasicStats {
128
+ wordCountBefore: number;
129
+ wordCountAfter: number;
128
130
  wordDiff: number;
129
131
  contentIncreaseRatio: number;
130
132
  citationReadability: number;
131
133
  }
132
134
 
133
135
  interface ScenarioStats {
136
+ wordCountBefore: number;
137
+ wordCountAfter: number;
134
138
  wordDiff: number;
135
139
  contentIncreaseRatio: number;
136
140
  citationReadability: number;
@@ -52,10 +52,14 @@ describe('HTML Visibility Analyzer', () => {
52
52
  it('should provide basic comparison statistics', async () => {
53
53
  const result = await calculateStats(simpleHtml, richHtml);
54
54
 
55
+ expect(result).to.have.property('wordCountBefore');
56
+ expect(result).to.have.property('wordCountAfter');
55
57
  expect(result).to.have.property('wordDiff');
56
58
  expect(result).to.have.property('contentIncreaseRatio');
57
59
  expect(result).to.have.property('citationReadability');
58
60
 
61
+ expect(result.wordCountBefore).to.be.a('number');
62
+ expect(result.wordCountAfter).to.be.a('number');
59
63
  expect(result.wordDiff).to.be.a('number');
60
64
  expect(result.contentIncreaseRatio).to.be.a('number');
61
65
  expect(result.citationReadability).to.be.a('number');
@@ -68,7 +72,16 @@ describe('HTML Visibility Analyzer', () => {
68
72
 
69
73
  expect(result).to.have.property('withNavFooterIgnored');
70
74
  expect(result).to.have.property('withoutNavFooterIgnored');
75
+
76
+ // Verify withNavFooterIgnored has all required properties
77
+ expect(result.withNavFooterIgnored).to.have.property('wordCountBefore');
78
+ expect(result.withNavFooterIgnored).to.have.property('wordCountAfter');
71
79
  expect(result.withNavFooterIgnored).to.have.property('contentGain');
80
+ expect(result.withNavFooterIgnored).to.have.property('missingWords');
81
+
82
+ // Verify withoutNavFooterIgnored has all required properties
83
+ expect(result.withoutNavFooterIgnored).to.have.property('wordCountBefore');
84
+ expect(result.withoutNavFooterIgnored).to.have.property('wordCountAfter');
72
85
  expect(result.withoutNavFooterIgnored).to.have.property('missingWords');
73
86
  });
74
87
  });