@adobe/spacecat-shared-html-analyzer 1.0.6 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/package.json +6 -3
- package/rollup.config.js +3 -1
- package/src/browser-entry.js +20 -10
- package/src/index.js +14 -0
- package/src/markdown-converter.js +105 -0
- package/src/markdown-diff.js +247 -0
- package/src/utils.js +16 -0
- package/test/markdown.test.js +644 -0
- package/test/utils.test.js +201 -0
- package/vendor/marked.min.js +74 -0
- package/vendor/turndown.min.js +974 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
# [@adobe/spacecat-shared-html-analyzer-v1.1.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-html-analyzer-v1.0.7...@adobe/spacecat-shared-html-analyzer-v1.1.0) (2025-12-01)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Features
|
|
5
|
+
|
|
6
|
+
* added utilities for markdown diff & conversion from LLMO chrome extension ([#1184](https://github.com/adobe/spacecat-shared/issues/1184)) ([dc9867e](https://github.com/adobe/spacecat-shared/commit/dc9867ea4ac0cf9f8bd2fdc3f22ab74cd3e1f12e))
|
|
7
|
+
|
|
8
|
+
# [@adobe/spacecat-shared-html-analyzer-v1.0.7](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-html-analyzer-v1.0.6...@adobe/spacecat-shared-html-analyzer-v1.0.7) (2025-11-28)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Bug Fixes
|
|
12
|
+
|
|
13
|
+
* update to node 24 ([#1179](https://github.com/adobe/spacecat-shared/issues/1179)) ([0e60c0a](https://github.com/adobe/spacecat-shared/commit/0e60c0ab791b47662d07822f7c93009a8f7048fd))
|
|
14
|
+
|
|
1
15
|
# [@adobe/spacecat-shared-html-analyzer-v1.0.6](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-html-analyzer-v1.0.5...@adobe/spacecat-shared-html-analyzer-v1.0.6) (2025-11-15)
|
|
2
16
|
|
|
3
17
|
|
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adobe/spacecat-shared-html-analyzer",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"description": "Analyze HTML content visibility for AI crawlers and citations - compare static HTML vs fully rendered content",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
7
|
-
"node": ">=22.0.0 <
|
|
7
|
+
"node": ">=22.0.0 <25.0.0",
|
|
8
8
|
"npm": ">=10.9.0 <12.0.0"
|
|
9
9
|
},
|
|
10
10
|
"main": "src/index.js",
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
"scripts": {
|
|
13
13
|
"test": "c8 mocha",
|
|
14
14
|
"lint": "eslint .",
|
|
15
|
+
"lint:fix": "eslint --fix .",
|
|
15
16
|
"clean": "rm -rf package-lock.json node_modules",
|
|
16
17
|
"build": "rollup -c",
|
|
17
18
|
"build:chrome": "rollup -c && echo '✅ Chrome extension bundle ready: dist/html-analyzer.min.js'"
|
|
@@ -36,7 +37,9 @@
|
|
|
36
37
|
"access": "public"
|
|
37
38
|
},
|
|
38
39
|
"dependencies": {
|
|
39
|
-
"cheerio": "^1.0.0-rc.12"
|
|
40
|
+
"cheerio": "^1.0.0-rc.12",
|
|
41
|
+
"turndown": "^7.2.0",
|
|
42
|
+
"marked": "^16.2.0"
|
|
40
43
|
},
|
|
41
44
|
"devDependencies": {
|
|
42
45
|
"@rollup/plugin-node-resolve": "^16.0.1",
|
package/rollup.config.js
CHANGED
|
@@ -57,8 +57,10 @@ export default {
|
|
|
57
57
|
}),
|
|
58
58
|
],
|
|
59
59
|
external: [
|
|
60
|
-
// Exclude
|
|
60
|
+
// Exclude Node.js-only dependencies from bundle - they won't work in browser anyway
|
|
61
61
|
'cheerio',
|
|
62
|
+
'turndown',
|
|
63
|
+
'marked',
|
|
62
64
|
],
|
|
63
65
|
onwarn(warning, warn) {
|
|
64
66
|
// Suppress warnings about dynamic imports that we'll handle
|
package/src/browser-entry.js
CHANGED
|
@@ -32,10 +32,18 @@ import {
|
|
|
32
32
|
countLines,
|
|
33
33
|
diffTokens,
|
|
34
34
|
generateDiffReport,
|
|
35
|
+
htmlToMarkdown,
|
|
36
|
+
markdownToHtml,
|
|
37
|
+
htmlToMarkdownToHtml,
|
|
38
|
+
diffDOMBlocks,
|
|
39
|
+
createMarkdownTableDiff,
|
|
40
|
+
generateMarkdownDiff,
|
|
41
|
+
htmlToRenderedMarkdown,
|
|
35
42
|
hashDJB2,
|
|
36
43
|
pct,
|
|
37
44
|
formatNumberToK,
|
|
38
45
|
isBrowser,
|
|
46
|
+
getGlobalObject,
|
|
39
47
|
} from './index.js';
|
|
40
48
|
|
|
41
49
|
// Create global object for Chrome extension
|
|
@@ -60,6 +68,17 @@ const HTMLAnalyzer = {
|
|
|
60
68
|
diffTokens,
|
|
61
69
|
generateDiffReport,
|
|
62
70
|
|
|
71
|
+
// Markdown conversion functions
|
|
72
|
+
htmlToMarkdown,
|
|
73
|
+
markdownToHtml,
|
|
74
|
+
htmlToMarkdownToHtml,
|
|
75
|
+
|
|
76
|
+
// Markdown diff functions
|
|
77
|
+
diffDOMBlocks,
|
|
78
|
+
createMarkdownTableDiff,
|
|
79
|
+
generateMarkdownDiff,
|
|
80
|
+
htmlToRenderedMarkdown,
|
|
81
|
+
|
|
63
82
|
// Utility functions
|
|
64
83
|
hashDJB2,
|
|
65
84
|
pct,
|
|
@@ -73,17 +92,8 @@ const HTMLAnalyzer = {
|
|
|
73
92
|
|
|
74
93
|
// Make available globally for Chrome extension script tags
|
|
75
94
|
// This needs to be executed immediately when the bundle loads
|
|
76
|
-
/* eslint-env browser */
|
|
77
|
-
/* global window, self */
|
|
78
95
|
(function setGlobal() {
|
|
79
|
-
|
|
80
|
-
const globalObject = (function getGlobalObject() {
|
|
81
|
-
if (typeof window !== 'undefined') return window;
|
|
82
|
-
if (typeof globalThis !== 'undefined') return globalThis;
|
|
83
|
-
if (typeof self !== 'undefined') return self;
|
|
84
|
-
return this || {};
|
|
85
|
-
}());
|
|
86
|
-
|
|
96
|
+
const globalObject = getGlobalObject();
|
|
87
97
|
// Assign to global scope
|
|
88
98
|
globalObject.HTMLAnalyzer = HTMLAnalyzer;
|
|
89
99
|
}());
|
package/src/index.js
CHANGED
|
@@ -40,9 +40,23 @@ export {
|
|
|
40
40
|
calculateBothScenarioStats,
|
|
41
41
|
} from './analyzer.js';
|
|
42
42
|
|
|
43
|
+
export {
|
|
44
|
+
htmlToMarkdown,
|
|
45
|
+
markdownToHtml,
|
|
46
|
+
htmlToMarkdownToHtml,
|
|
47
|
+
} from './markdown-converter.js';
|
|
48
|
+
|
|
49
|
+
export {
|
|
50
|
+
diffDOMBlocks,
|
|
51
|
+
createMarkdownTableDiff,
|
|
52
|
+
generateMarkdownDiff,
|
|
53
|
+
htmlToRenderedMarkdown,
|
|
54
|
+
} from './markdown-diff.js';
|
|
55
|
+
|
|
43
56
|
export {
|
|
44
57
|
hashDJB2,
|
|
45
58
|
pct,
|
|
46
59
|
formatNumberToK,
|
|
47
60
|
isBrowser,
|
|
61
|
+
getGlobalObject,
|
|
48
62
|
} from './utils.js';
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Markdown conversion utilities
|
|
15
|
+
* Provides HTML to Markdown and Markdown to HTML conversions
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { isBrowser, getGlobalObject } from './utils.js';
|
|
19
|
+
|
|
20
|
+
// Cache for imported modules in Node.js
|
|
21
|
+
let TurndownServiceClass = null;
|
|
22
|
+
let markedParser = null;
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Get Turndown service instance
|
|
26
|
+
* @private
|
|
27
|
+
* @returns {Promise<Object>} TurndownService instance
|
|
28
|
+
*/
|
|
29
|
+
async function getTurndownService() {
|
|
30
|
+
if (isBrowser()) {
|
|
31
|
+
// In browser environment, expect global TurndownService
|
|
32
|
+
const globalObj = getGlobalObject();
|
|
33
|
+
if (globalObj.TurndownService) {
|
|
34
|
+
return new globalObj.TurndownService();
|
|
35
|
+
}
|
|
36
|
+
throw new Error('TurndownService must be loaded in browser environment');
|
|
37
|
+
}
|
|
38
|
+
// In Node.js environment, dynamically import turndown
|
|
39
|
+
if (!TurndownServiceClass) {
|
|
40
|
+
const module = await import('turndown');
|
|
41
|
+
TurndownServiceClass = module.default;
|
|
42
|
+
}
|
|
43
|
+
return new TurndownServiceClass();
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Get marked parser
|
|
48
|
+
* @private
|
|
49
|
+
* @returns {Promise<Object>} marked parser
|
|
50
|
+
*/
|
|
51
|
+
async function getMarked() {
|
|
52
|
+
if (isBrowser()) {
|
|
53
|
+
// In browser environment, expect global marked
|
|
54
|
+
const globalObj = getGlobalObject();
|
|
55
|
+
if (globalObj.marked) {
|
|
56
|
+
return globalObj.marked;
|
|
57
|
+
}
|
|
58
|
+
throw new Error('marked must be loaded in browser environment');
|
|
59
|
+
}
|
|
60
|
+
// In Node.js environment, dynamically import marked
|
|
61
|
+
if (!markedParser) {
|
|
62
|
+
const module = await import('marked');
|
|
63
|
+
markedParser = module.marked;
|
|
64
|
+
}
|
|
65
|
+
return markedParser;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Convert HTML to Markdown
|
|
70
|
+
* @param {string} html - HTML content to convert
|
|
71
|
+
* @returns {Promise<string>} Markdown content
|
|
72
|
+
*/
|
|
73
|
+
export async function htmlToMarkdown(html) {
|
|
74
|
+
if (!html || typeof html !== 'string') {
|
|
75
|
+
return '';
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const turndownService = await getTurndownService();
|
|
79
|
+
return turndownService.turndown(html);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Convert Markdown to HTML
|
|
84
|
+
* @param {string} markdown - Markdown content to convert
|
|
85
|
+
* @returns {Promise<string>} HTML content
|
|
86
|
+
*/
|
|
87
|
+
export async function markdownToHtml(markdown) {
|
|
88
|
+
if (!markdown || typeof markdown !== 'string') {
|
|
89
|
+
return '';
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const marked = await getMarked();
|
|
93
|
+
return marked.parse(markdown);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Convert HTML to Markdown and then render it back to HTML
|
|
98
|
+
* Useful for normalizing HTML through markdown representation
|
|
99
|
+
* @param {string} html - HTML content to convert
|
|
100
|
+
* @returns {Promise<string>} Rendered HTML from markdown
|
|
101
|
+
*/
|
|
102
|
+
export async function htmlToMarkdownToHtml(html) {
|
|
103
|
+
const markdown = await htmlToMarkdown(html);
|
|
104
|
+
return markdownToHtml(markdown);
|
|
105
|
+
}
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Markdown diff utilities
|
|
15
|
+
* Provides DOM block-level diffing for markdown content
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { filterHtmlContent } from './html-filter.js';
|
|
19
|
+
import { htmlToMarkdown, markdownToHtml } from './markdown-converter.js';
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Diff DOM blocks using LCS algorithm
|
|
23
|
+
* Compares blocks based on text content while preserving full HTML structure
|
|
24
|
+
* @param {Array<{html: string, text: string, tagName: string}>} originalBlocks
|
|
25
|
+
* - Original DOM blocks
|
|
26
|
+
* @param {Array<{html: string, text: string, tagName: string}>} currentBlocks
|
|
27
|
+
* - Current DOM blocks
|
|
28
|
+
* @returns {Array<{type: 'same'|'del'|'add', originalBlock?: Object,
|
|
29
|
+
* currentBlock?: Object}>} Diff operations
|
|
30
|
+
*/
|
|
31
|
+
export function diffDOMBlocks(originalBlocks, currentBlocks) {
|
|
32
|
+
// Create a mapping function that uses text content for comparison
|
|
33
|
+
// while preserving the full HTML structure
|
|
34
|
+
const A = originalBlocks.map((block) => block.text);
|
|
35
|
+
const B = currentBlocks.map((block) => block.text);
|
|
36
|
+
|
|
37
|
+
// Map tokens to ints for faster LCS
|
|
38
|
+
const sym = new Map();
|
|
39
|
+
const mapTok = (t) => {
|
|
40
|
+
if (!sym.has(t)) sym.set(t, sym.size + 1);
|
|
41
|
+
return sym.get(t);
|
|
42
|
+
};
|
|
43
|
+
const a = A.map(mapTok);
|
|
44
|
+
const b = B.map(mapTok);
|
|
45
|
+
|
|
46
|
+
// LCS length table
|
|
47
|
+
const m = a.length;
|
|
48
|
+
const n = b.length;
|
|
49
|
+
const dp = Array(m + 1).fill(0).map(() => Array(n + 1).fill(0));
|
|
50
|
+
for (let i = 1; i <= m; i += 1) {
|
|
51
|
+
for (let j = 1; j <= n; j += 1) {
|
|
52
|
+
dp[i][j] = (a[i - 1] === b[j - 1])
|
|
53
|
+
? dp[i - 1][j - 1] + 1
|
|
54
|
+
: Math.max(dp[i - 1][j], dp[i][j - 1]);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Backtrack to collect ops with full block data
|
|
59
|
+
const ops = [];
|
|
60
|
+
let i = m;
|
|
61
|
+
let j = n;
|
|
62
|
+
while (i > 0 && j > 0) {
|
|
63
|
+
if (a[i - 1] === b[j - 1]) {
|
|
64
|
+
ops.push({
|
|
65
|
+
type: 'same',
|
|
66
|
+
originalBlock: originalBlocks[i - 1],
|
|
67
|
+
currentBlock: currentBlocks[j - 1],
|
|
68
|
+
});
|
|
69
|
+
i -= 1;
|
|
70
|
+
j -= 1;
|
|
71
|
+
} else if (dp[i - 1][j] >= dp[i][j - 1]) {
|
|
72
|
+
ops.push({
|
|
73
|
+
type: 'del',
|
|
74
|
+
originalBlock: originalBlocks[i - 1],
|
|
75
|
+
});
|
|
76
|
+
i -= 1;
|
|
77
|
+
} else {
|
|
78
|
+
ops.push({
|
|
79
|
+
type: 'add',
|
|
80
|
+
currentBlock: currentBlocks[j - 1],
|
|
81
|
+
});
|
|
82
|
+
j -= 1;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
while (i > 0) {
|
|
86
|
+
ops.push({
|
|
87
|
+
type: 'del',
|
|
88
|
+
originalBlock: originalBlocks[i - 1],
|
|
89
|
+
});
|
|
90
|
+
i -= 1;
|
|
91
|
+
}
|
|
92
|
+
while (j > 0) {
|
|
93
|
+
ops.push({
|
|
94
|
+
type: 'add',
|
|
95
|
+
currentBlock: currentBlocks[j - 1],
|
|
96
|
+
});
|
|
97
|
+
j -= 1;
|
|
98
|
+
}
|
|
99
|
+
ops.reverse();
|
|
100
|
+
return ops;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Extract blocks from parsed HTML, breaking down lists into individual items
|
|
105
|
+
* @param {Array} children - Array of child elements
|
|
106
|
+
* @returns {Array<{html: string, text: string, tagName: string}>} Extracted blocks
|
|
107
|
+
* @private
|
|
108
|
+
*/
|
|
109
|
+
function extractBlocks(children) {
|
|
110
|
+
const blocks = [];
|
|
111
|
+
children.forEach((el) => {
|
|
112
|
+
// If it's a list (ul/ol), break it down into individual list items
|
|
113
|
+
if (el.tagName === 'UL' || el.tagName === 'OL') {
|
|
114
|
+
const listType = el.tagName.toLowerCase();
|
|
115
|
+
Array.from(el.children).forEach((li) => {
|
|
116
|
+
if (li.tagName === 'LI') {
|
|
117
|
+
// Skip empty list items - they cause alignment issues
|
|
118
|
+
const liText = li.textContent?.trim() || '';
|
|
119
|
+
if (!liText) return;
|
|
120
|
+
|
|
121
|
+
// Check if the list item contains nested block elements (p, div, h1-h6, etc.)
|
|
122
|
+
const nestedBlocks = Array.from(li.children).filter((child) => {
|
|
123
|
+
const tag = child.tagName;
|
|
124
|
+
return tag === 'P' || tag === 'DIV' || tag === 'H1' || tag === 'H2'
|
|
125
|
+
|| tag === 'H3' || tag === 'H4' || tag === 'H5' || tag === 'H6'
|
|
126
|
+
|| tag === 'BLOCKQUOTE' || tag === 'PRE';
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
if (nestedBlocks.length > 0) {
|
|
130
|
+
// Extract nested blocks individually for better matching
|
|
131
|
+
// but wrap them in li/ul for proper display
|
|
132
|
+
nestedBlocks.forEach((child) => {
|
|
133
|
+
const childText = child.textContent?.trim() || '';
|
|
134
|
+
if (!childText) return; // Skip empty nested blocks too
|
|
135
|
+
|
|
136
|
+
blocks.push({
|
|
137
|
+
html: `<${listType}><li>${child.outerHTML}</li></${listType}>`,
|
|
138
|
+
text: child.textContent?.trim() || '',
|
|
139
|
+
tagName: child.tagName.toLowerCase(),
|
|
140
|
+
});
|
|
141
|
+
});
|
|
142
|
+
} else {
|
|
143
|
+
// No nested blocks, treat the whole li as one block
|
|
144
|
+
// wrap in ul/ol for proper display
|
|
145
|
+
blocks.push({
|
|
146
|
+
html: `<${listType}>${li.outerHTML}</${listType}>`,
|
|
147
|
+
text: li.textContent?.trim() || '',
|
|
148
|
+
tagName: 'li',
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
});
|
|
153
|
+
} else {
|
|
154
|
+
// For all other elements, add them as-is
|
|
155
|
+
blocks.push({
|
|
156
|
+
html: el.outerHTML,
|
|
157
|
+
text: el.textContent?.trim() || '',
|
|
158
|
+
tagName: el.tagName.toLowerCase(),
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
});
|
|
162
|
+
return blocks;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Create markdown table diff from parsed DOM children
|
|
167
|
+
* @param {Array} originalChildren - Array of original DOM child elements
|
|
168
|
+
* @param {Array} currentChildren - Array of current DOM child elements
|
|
169
|
+
* @returns {{tableHtml: string, counters: string}} Diff table and counter information
|
|
170
|
+
*/
|
|
171
|
+
export function createMarkdownTableDiff(originalChildren, currentChildren) {
|
|
172
|
+
// Get all block-level elements from both sides and extract their text content
|
|
173
|
+
const originalBlocks = extractBlocks(originalChildren);
|
|
174
|
+
const currentBlocks = extractBlocks(currentChildren);
|
|
175
|
+
|
|
176
|
+
// Run diff algorithm once and count changes
|
|
177
|
+
const ops = diffDOMBlocks(originalBlocks, currentBlocks);
|
|
178
|
+
let addCount = 0;
|
|
179
|
+
let delCount = 0;
|
|
180
|
+
|
|
181
|
+
// Create table rows based on diff operations and count changes
|
|
182
|
+
const tableRows = [];
|
|
183
|
+
ops.forEach((op) => {
|
|
184
|
+
if (op.type === 'same') {
|
|
185
|
+
// Show unchanged blocks on both sides
|
|
186
|
+
const leftContent = op.originalBlock.html;
|
|
187
|
+
const rightContent = op.currentBlock.html;
|
|
188
|
+
tableRows.push(`<tr><td class="diff-line-same markdown-rendered">${leftContent}</td><td class="diff-line-same markdown-rendered">${rightContent}</td></tr>`);
|
|
189
|
+
} else if (op.type === 'del') {
|
|
190
|
+
// Show deleted blocks only on left side
|
|
191
|
+
delCount += 1;
|
|
192
|
+
const leftContent = op.originalBlock.html;
|
|
193
|
+
tableRows.push(`<tr><td class="diff-line-del markdown-rendered">${leftContent}</td><td class="diff-line-empty"></td></tr>`);
|
|
194
|
+
} else if (op.type === 'add') {
|
|
195
|
+
// Show added blocks only on right side
|
|
196
|
+
addCount += 1;
|
|
197
|
+
const rightContent = op.currentBlock.html;
|
|
198
|
+
tableRows.push(`<tr><td class="diff-line-empty"></td><td class="diff-line-add markdown-rendered">${rightContent}</td></tr>`);
|
|
199
|
+
}
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
const hasChanges = addCount > 0 || delCount > 0;
|
|
203
|
+
const counters = hasChanges
|
|
204
|
+
? `${addCount} block additions, ${delCount} block deletions`
|
|
205
|
+
: 'No differences';
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
tableHtml: tableRows.join('\n'),
|
|
209
|
+
counters,
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Convert HTML to rendered markdown HTML (for display)
|
|
215
|
+
* @param {string} html - HTML content to convert
|
|
216
|
+
* @param {boolean} [ignoreNavFooter=true] - Whether to filter nav/footer elements
|
|
217
|
+
* @returns {Promise<string>} Rendered markdown HTML
|
|
218
|
+
*/
|
|
219
|
+
export async function htmlToRenderedMarkdown(html, ignoreNavFooter = true) {
|
|
220
|
+
// Extract body content only (with nav/footer filtering applied)
|
|
221
|
+
const bodyContent = await filterHtmlContent(html, ignoreNavFooter, false);
|
|
222
|
+
|
|
223
|
+
// Convert to markdown and back to HTML
|
|
224
|
+
const markdown = await htmlToMarkdown(bodyContent);
|
|
225
|
+
return markdownToHtml(markdown);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Generate complete markdown diff with HTML to Markdown conversion
|
|
230
|
+
* @param {string} originalHtml - Original HTML content
|
|
231
|
+
* @param {string} currentHtml - Current HTML content
|
|
232
|
+
* @param {boolean} [ignoreNavFooter=true] - Whether to filter nav/footer elements
|
|
233
|
+
* @returns {Promise<{originalRenderedHtml: string, currentRenderedHtml: string}>}
|
|
234
|
+
* Rendered markdown HTML for both sides
|
|
235
|
+
*/
|
|
236
|
+
export async function generateMarkdownDiff(originalHtml, currentHtml, ignoreNavFooter = true) {
|
|
237
|
+
// Convert both HTMLs to rendered markdown HTML
|
|
238
|
+
const [originalRenderedHtml, currentRenderedHtml] = await Promise.all([
|
|
239
|
+
htmlToRenderedMarkdown(originalHtml, ignoreNavFooter),
|
|
240
|
+
htmlToRenderedMarkdown(currentHtml, ignoreNavFooter),
|
|
241
|
+
]);
|
|
242
|
+
|
|
243
|
+
return {
|
|
244
|
+
originalRenderedHtml,
|
|
245
|
+
currentRenderedHtml,
|
|
246
|
+
};
|
|
247
|
+
}
|
package/src/utils.js
CHANGED
|
@@ -60,3 +60,19 @@ export function formatNumberToK(num) {
|
|
|
60
60
|
export function isBrowser() {
|
|
61
61
|
return typeof window !== 'undefined' && typeof document !== 'undefined';
|
|
62
62
|
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Get global object in a cross-platform way
|
|
66
|
+
* @returns {Object} Global object
|
|
67
|
+
*/
|
|
68
|
+
export function getGlobalObject() {
|
|
69
|
+
// eslint-disable-next-line no-undef
|
|
70
|
+
if (typeof globalThis !== 'undefined') return globalThis;
|
|
71
|
+
// eslint-disable-next-line no-undef
|
|
72
|
+
if (typeof self !== 'undefined') return self;
|
|
73
|
+
// eslint-disable-next-line no-undef
|
|
74
|
+
if (typeof window !== 'undefined') return window;
|
|
75
|
+
// eslint-disable-next-line no-undef
|
|
76
|
+
if (typeof global !== 'undefined') return global;
|
|
77
|
+
return {};
|
|
78
|
+
}
|