@adobe/spacecat-shared-html-analyzer 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mocha-multi.json +4 -0
- package/CHANGELOG.md +14 -0
- package/CODE_OF_CONDUCT.md +74 -0
- package/CONTRIBUTING.md +74 -0
- package/LICENSE.txt +264 -0
- package/README.md +152 -0
- package/package.json +66 -0
- package/rollup.config.js +52 -0
- package/src/analyzer.js +126 -0
- package/src/browser-entry.js +92 -0
- package/src/diff-engine.js +184 -0
- package/src/html-filter.js +326 -0
- package/src/index.d.ts +172 -0
- package/src/index.js +48 -0
- package/src/tokenizer.js +116 -0
- package/src/utils.js +62 -0
- package/test/index.test.js +109 -0
- package/test/setup-env.js +21 -0
package/src/utils.js
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Utility functions for the HTML visibility analyzer
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Generate DJB2 hash for content comparison
|
|
19
|
+
* @param {string} str - String to hash
|
|
20
|
+
* @returns {string} Hex hash string
|
|
21
|
+
*/
|
|
22
|
+
export function hashDJB2(str) {
|
|
23
|
+
if (!str) return '';
|
|
24
|
+
let h = 5381;
|
|
25
|
+
for (let i = 0; i < str.length; i += 1) {
|
|
26
|
+
// eslint-disable-next-line no-bitwise
|
|
27
|
+
h = ((h << 5) + h) + str.charCodeAt(i);
|
|
28
|
+
}
|
|
29
|
+
// eslint-disable-next-line no-bitwise
|
|
30
|
+
return (h >>> 0).toString(16);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Format percentage with 1 decimal place
|
|
35
|
+
* @param {number} n - Number to format as percentage
|
|
36
|
+
* @returns {string} Formatted percentage string
|
|
37
|
+
*/
|
|
38
|
+
export function pct(n) {
|
|
39
|
+
return (Number.isFinite(n) ? `${(n * 100).toFixed(1)}%` : '–');
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Format number to K/M format for readability
|
|
44
|
+
* @param {number} num - Number to format
|
|
45
|
+
* @returns {string} Formatted number string
|
|
46
|
+
*/
|
|
47
|
+
export function formatNumberToK(num) {
|
|
48
|
+
if (num >= 1000000) {
|
|
49
|
+
return `${(num / 1000000).toFixed(1).replace(/\.0$/, '')}M`;
|
|
50
|
+
} else if (num >= 10000) {
|
|
51
|
+
return `${(num / 1000).toFixed(1).replace(/\.0$/, '')}K`;
|
|
52
|
+
}
|
|
53
|
+
return num.toString();
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Check if code is running in browser environment
|
|
58
|
+
* @returns {boolean} True if in browser
|
|
59
|
+
*/
|
|
60
|
+
export function isBrowser() {
|
|
61
|
+
return typeof window !== 'undefined' && typeof document !== 'undefined';
|
|
62
|
+
}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { expect } from 'chai';
|
|
14
|
+
import {
|
|
15
|
+
analyzeTextComparison,
|
|
16
|
+
calculateStats,
|
|
17
|
+
calculateBothScenarioStats,
|
|
18
|
+
stripTagsToText,
|
|
19
|
+
} from '../src/index.js';
|
|
20
|
+
|
|
21
|
+
describe('HTML Visibility Analyzer', () => {
|
|
22
|
+
const simpleHtml = '<html><body><h1>Title</h1><p>Content here</p></body></html>';
|
|
23
|
+
const richHtml = '<html><body><h1>Title</h1><p>Content here</p><script>console.log("loaded")</script><div class="dynamic">Dynamic content</div></body></html>';
|
|
24
|
+
|
|
25
|
+
describe('analyzeTextComparison', () => {
|
|
26
|
+
it('should analyze content differences', async () => {
|
|
27
|
+
const result = await analyzeTextComparison(simpleHtml, richHtml);
|
|
28
|
+
|
|
29
|
+
expect(result).to.have.property('initialText');
|
|
30
|
+
expect(result).to.have.property('finalText');
|
|
31
|
+
expect(result).to.have.property('textRetention');
|
|
32
|
+
expect(result).to.have.property('wordDiff');
|
|
33
|
+
expect(result).to.have.property('lineDiff');
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it('should handle identical content', async () => {
|
|
37
|
+
const result = await analyzeTextComparison(simpleHtml, simpleHtml);
|
|
38
|
+
|
|
39
|
+
expect(result.textRetention).to.equal(1);
|
|
40
|
+
expect(result.initialText).to.equal(result.finalText);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it('should handle empty content', async () => {
|
|
44
|
+
const result = await analyzeTextComparison('', richHtml);
|
|
45
|
+
|
|
46
|
+
expect(result.initialText).to.equal('');
|
|
47
|
+
expect(result.finalText.length).to.be.greaterThan(0);
|
|
48
|
+
});
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
describe('calculateStats', () => {
|
|
52
|
+
it('should provide basic comparison statistics', async () => {
|
|
53
|
+
const result = await calculateStats(simpleHtml, richHtml);
|
|
54
|
+
|
|
55
|
+
expect(result).to.have.property('wordDiff');
|
|
56
|
+
expect(result).to.have.property('contentIncreaseRatio');
|
|
57
|
+
expect(result).to.have.property('citationReadability');
|
|
58
|
+
|
|
59
|
+
expect(result.wordDiff).to.be.a('number');
|
|
60
|
+
expect(result.contentIncreaseRatio).to.be.a('number');
|
|
61
|
+
expect(result.citationReadability).to.be.a('number');
|
|
62
|
+
});
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
describe('calculateBothScenarioStats', () => {
|
|
66
|
+
it('should provide statistics for both scenarios', async () => {
|
|
67
|
+
const result = await calculateBothScenarioStats(simpleHtml, richHtml);
|
|
68
|
+
|
|
69
|
+
expect(result).to.have.property('withNavFooterIgnored');
|
|
70
|
+
expect(result).to.have.property('withoutNavFooterIgnored');
|
|
71
|
+
expect(result.withNavFooterIgnored).to.have.property('contentGain');
|
|
72
|
+
expect(result.withoutNavFooterIgnored).to.have.property('missingWords');
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
describe('stripTagsToText', () => {
|
|
77
|
+
it('should extract text content from HTML', async () => {
|
|
78
|
+
const html = '<div><h1>Title</h1><p>Content with <strong>bold</strong> text</p></div>';
|
|
79
|
+
const text = await stripTagsToText(html);
|
|
80
|
+
|
|
81
|
+
expect(text).to.include('Title');
|
|
82
|
+
expect(text).to.include('Content with');
|
|
83
|
+
expect(text).to.include('bold');
|
|
84
|
+
expect(text).to.include('text');
|
|
85
|
+
expect(text).to.not.include('<');
|
|
86
|
+
expect(text).to.not.include('>');
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it('should remove navigation elements when ignoreNavFooter is true', async () => {
|
|
90
|
+
const html = '<html><body><nav>Navigation</nav><h1>Title</h1><p>Content</p><footer>Footer</footer></body></html>';
|
|
91
|
+
const text = await stripTagsToText(html, true);
|
|
92
|
+
|
|
93
|
+
expect(text).to.include('Title');
|
|
94
|
+
expect(text).to.include('Content');
|
|
95
|
+
expect(text).to.not.include('Navigation');
|
|
96
|
+
expect(text).to.not.include('Footer');
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it('should keep navigation elements when ignoreNavFooter is false', async () => {
|
|
100
|
+
const html = '<html><body><nav>Navigation</nav><h1>Title</h1><p>Content</p><footer>Footer</footer></body></html>';
|
|
101
|
+
const text = await stripTagsToText(html, false);
|
|
102
|
+
|
|
103
|
+
expect(text).to.include('Title');
|
|
104
|
+
expect(text).to.include('Content');
|
|
105
|
+
expect(text).to.include('Navigation');
|
|
106
|
+
expect(text).to.include('Footer');
|
|
107
|
+
});
|
|
108
|
+
});
|
|
109
|
+
});
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
// eslint-disable-next-line import/no-extraneous-dependencies
|
|
14
|
+
import { use } from 'chai';
|
|
15
|
+
// eslint-disable-next-line import/no-extraneous-dependencies
|
|
16
|
+
import chaiAsPromised from 'chai-as-promised';
|
|
17
|
+
// eslint-disable-next-line import/no-extraneous-dependencies
|
|
18
|
+
import sinonChai from 'sinon-chai';
|
|
19
|
+
|
|
20
|
+
use(chaiAsPromised);
|
|
21
|
+
use(sinonChai);
|