markpaste 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +28 -0
- package/LICENSE +201 -0
- package/README.md +74 -0
- package/index.html +153 -0
- package/package.json +40 -0
- package/playwright.config.ts +24 -0
- package/prettier.config.mjs +14 -0
- package/src/app.js +369 -0
- package/src/cleaner.js +145 -0
- package/src/converter.js +61 -0
- package/src/markpaste.js +26 -0
- package/src/pandoc.js +98 -0
- package/src/renderer.js +31 -0
- package/src/style.css +556 -0
- package/test/node/cleaner.test.js +50 -0
- package/test/node/converter.test.js +18 -0
- package/test/node/hello.test.js +6 -0
- package/test/node/index.test.js +23 -0
- package/test/node/pandoc.test.js +13 -0
- package/test/web/basic-load.spec.ts +75 -0
- package/test/web/cleaner.spec.ts +32 -0
- package/test/web/pasting.spec.ts +117 -0
- package/third_party/pandoc.wasm +0 -0
- package/tsconfig.json +17 -0
- package/types/browser_wasi_shim.d.ts +7 -0
- package/types/globals.d.ts +47 -0
- package/types/pandoc-wasm.d.ts +11 -0
package/src/app.js
ADDED
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
import {cleanHTML, removeStyleAttributes} from './cleaner.js';
|
|
2
|
+
import {renderMarkdown} from './renderer.js';
|
|
3
|
+
import {getConverter} from './converter.js';
|
|
4
|
+
|
|
5
|
+
/* bling.js + guaranteed and typed. Brand new in Nov 2025. */
|
|
6
|
+
/**
|
|
7
|
+
* Guaranteed context.querySelector. Always returns an element or throws if nothing matches query.
|
|
8
|
+
* @template {string} T
|
|
9
|
+
* @param {T} query
|
|
10
|
+
* @param {ParentNode=} context
|
|
11
|
+
* @return {import('typed-query-selector/parser.js').ParseSelector<T, Element>}
|
|
12
|
+
*/
|
|
13
|
+
window.$ = function (query, context) {
|
|
14
|
+
const result = (context || document).querySelector(query);
|
|
15
|
+
if (result === null) {
|
|
16
|
+
throw new Error(`query ${query} not found`);
|
|
17
|
+
}
|
|
18
|
+
return /** @type {import('typed-query-selector/parser.js').ParseSelector<T, Element>} */ (result);
|
|
19
|
+
};
|
|
20
|
+
/**
|
|
21
|
+
* @template {string} T
|
|
22
|
+
* @param {T} query
|
|
23
|
+
* @param {ParentNode=} context
|
|
24
|
+
* @return {NodeListOf<import('typed-query-selector/parser.js').ParseSelector<T, Element>>}
|
|
25
|
+
*/
|
|
26
|
+
window.$$ = (query, context) => (context || document).querySelectorAll(query);
|
|
27
|
+
|
|
28
|
+
Node.prototype.on = window.on = function (name, fn) {
|
|
29
|
+
this.addEventListener(name, fn);
|
|
30
|
+
};
|
|
31
|
+
// @ts-ignore
|
|
32
|
+
NodeList.prototype.__proto__ = Array.prototype;
|
|
33
|
+
NodeList.prototype.on = function (name, fn) {
|
|
34
|
+
this.forEach(elem => elem.on(name, fn));
|
|
35
|
+
};
|
|
36
|
+
// Bling'ed out.
|
|
37
|
+
|
|
38
|
+
// Theme Management
|
|
39
|
+
function loadTheme() {
|
|
40
|
+
const savedTheme = localStorage.getItem('theme');
|
|
41
|
+
|
|
42
|
+
if (savedTheme) {
|
|
43
|
+
document.documentElement.setAttribute('data-theme', savedTheme);
|
|
44
|
+
} else {
|
|
45
|
+
const prefersDark = window.matchMedia('(prefers-color-scheme: dark)').matches;
|
|
46
|
+
document.documentElement.setAttribute('data-theme', prefersDark ? 'dark' : 'light');
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function toggleTheme() {
|
|
51
|
+
const currentTheme = document.documentElement.getAttribute('data-theme');
|
|
52
|
+
const newTheme = currentTheme === 'dark' ? 'light' : 'dark';
|
|
53
|
+
|
|
54
|
+
document.documentElement.setAttribute('data-theme', newTheme);
|
|
55
|
+
localStorage.setItem('theme', newTheme);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const {$, $$ } = window;
|
|
59
|
+
|
|
60
|
+
const inputArea = $('div#inputArea');
|
|
61
|
+
const htmlCode = $('code#htmlCode');
|
|
62
|
+
const copyBtn = $('button#copyBtn');
|
|
63
|
+
const themeToggle = $('button#themeToggle');
|
|
64
|
+
const cleanHtmlToggle = $('input#cleanHtmlToggle');
|
|
65
|
+
|
|
66
|
+
// View Toggle
|
|
67
|
+
const viewMarkdownBtn = $('button#viewMarkdownBtn');
|
|
68
|
+
const viewRenderedBtn = $('button#viewRenderedBtn');
|
|
69
|
+
|
|
70
|
+
// Output Elements
|
|
71
|
+
const outputs = {
|
|
72
|
+
turndown: {
|
|
73
|
+
code: $('code#outputCodeTurndown'),
|
|
74
|
+
preview: $('div#renderPreviewTurndown'),
|
|
75
|
+
pre: $('pre#outputPreTurndown'),
|
|
76
|
+
},
|
|
77
|
+
pandoc: {
|
|
78
|
+
code: $('code#outputCodePandoc'),
|
|
79
|
+
preview: $('div#renderPreviewPandoc'),
|
|
80
|
+
pre: $('pre#outputPrePandoc'),
|
|
81
|
+
},
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
let lastProcessedContent = '';
|
|
85
|
+
const converters = {};
|
|
86
|
+
const convertersPromise = (async () => {
|
|
87
|
+
const names = ['turndown', 'pandoc'];
|
|
88
|
+
for (const name of names) {
|
|
89
|
+
try {
|
|
90
|
+
converters[name] = await getConverter(name);
|
|
91
|
+
} catch (e) {
|
|
92
|
+
console.error(`Failed to load converter: ${name}`, e);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
})();
|
|
96
|
+
|
|
97
|
+
let currentView = 'markdown'; // 'markdown' or 'rendered'
|
|
98
|
+
|
|
99
|
+
async function init() {
|
|
100
|
+
|
|
101
|
+
setupEventListeners();
|
|
102
|
+
|
|
103
|
+
loadTheme();
|
|
104
|
+
|
|
105
|
+
// Initialize all converters
|
|
106
|
+
await convertersPromise;
|
|
107
|
+
|
|
108
|
+
// Initial process if there's content (e.g. from reload, though usually empty)
|
|
109
|
+
if (inputArea.innerHTML) {
|
|
110
|
+
lastProcessedContent = inputArea.innerHTML;
|
|
111
|
+
processContent(lastProcessedContent);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
let idleDetectorInitialized = false;
|
|
116
|
+
|
|
117
|
+
async function startIdleDetector() {
|
|
118
|
+
if (idleDetectorInitialized) return;
|
|
119
|
+
idleDetectorInitialized = true;
|
|
120
|
+
|
|
121
|
+
// Setup Idle Detection
|
|
122
|
+
if ('IdleDetector' in window) {
|
|
123
|
+
try {
|
|
124
|
+
const controller = new AbortController();
|
|
125
|
+
const signal = controller.signal;
|
|
126
|
+
|
|
127
|
+
const idleDetector = new IdleDetector();
|
|
128
|
+
idleDetector.addEventListener('change', () => {
|
|
129
|
+
const userState = idleDetector.userState;
|
|
130
|
+
const screenState = idleDetector.screenState;
|
|
131
|
+
console.log(`Idle change: ${userState}, ${screenState}`);
|
|
132
|
+
|
|
133
|
+
if (userState === 'idle') {
|
|
134
|
+
// Unload pandoc if it exists
|
|
135
|
+
if (converters.pandoc) {
|
|
136
|
+
console.log('User is idle. Unloading pandoc module to free memory.');
|
|
137
|
+
if (converters.pandoc.dispose) {
|
|
138
|
+
converters.pandoc.dispose();
|
|
139
|
+
}
|
|
140
|
+
delete converters.pandoc;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
// 10 minutes = 600,000 ms
|
|
146
|
+
idleDetector.start({
|
|
147
|
+
threshold: 600000,
|
|
148
|
+
signal,
|
|
149
|
+
}).catch(err => {
|
|
150
|
+
console.warn('Idle detection start failed:', err);
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
} catch (err) {
|
|
154
|
+
console.warn('Idle detection setup failed:', err);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function setupEventListeners() {
|
|
160
|
+
inputArea.on('paste', handlePaste);
|
|
161
|
+
|
|
162
|
+
inputArea.on('input', () => {
|
|
163
|
+
lastProcessedContent = inputArea.innerHTML;
|
|
164
|
+
processContent(lastProcessedContent);
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
copyBtn.on('click', copyToClipboard);
|
|
168
|
+
|
|
169
|
+
themeToggle.on('click', toggleTheme);
|
|
170
|
+
|
|
171
|
+
cleanHtmlToggle.on('change', () => {
|
|
172
|
+
if (lastProcessedContent) {
|
|
173
|
+
processContent(lastProcessedContent);
|
|
174
|
+
}
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
viewMarkdownBtn.on('click', () => switchView('markdown'));
|
|
178
|
+
viewRenderedBtn.on('click', () => switchView('rendered'));
|
|
179
|
+
|
|
180
|
+
// Add a keydown event listener for scoped select all
|
|
181
|
+
document.on('keydown', handleSelectAll);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function switchView(view) {
|
|
185
|
+
currentView = view;
|
|
186
|
+
|
|
187
|
+
if (view === 'markdown') {
|
|
188
|
+
viewMarkdownBtn.classList.add('active');
|
|
189
|
+
viewRenderedBtn.classList.remove('active');
|
|
190
|
+
|
|
191
|
+
Object.values(outputs).forEach(out => {
|
|
192
|
+
out.pre.classList.remove('hidden');
|
|
193
|
+
out.preview.classList.add('hidden');
|
|
194
|
+
});
|
|
195
|
+
} else {
|
|
196
|
+
viewRenderedBtn.classList.add('active');
|
|
197
|
+
viewMarkdownBtn.classList.remove('active');
|
|
198
|
+
|
|
199
|
+
Object.values(outputs).forEach(out => {
|
|
200
|
+
out.pre.classList.add('hidden');
|
|
201
|
+
out.preview.classList.remove('hidden');
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
// Render previews
|
|
205
|
+
updateRenderedPreviews();
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
function handleSelectAll(e) {
|
|
210
|
+
if ((e.metaKey || e.ctrlKey) && e.key === 'a') {
|
|
211
|
+
const activeElement = document.activeElement;
|
|
212
|
+
if (activeElement) {
|
|
213
|
+
const editorContainer = activeElement.closest('.editor-container');
|
|
214
|
+
if (editorContainer) {
|
|
215
|
+
e.preventDefault();
|
|
216
|
+
const range = document.createRange();
|
|
217
|
+
range.selectNodeContents(editorContainer);
|
|
218
|
+
const selection = window.getSelection();
|
|
219
|
+
if (selection) {
|
|
220
|
+
selection.removeAllRanges();
|
|
221
|
+
selection.addRange(range);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
async function handlePaste(e) {
|
|
229
|
+
e.preventDefault();
|
|
230
|
+
|
|
231
|
+
const clipboardData = e.clipboardData;
|
|
232
|
+
const pastedHtml = clipboardData.getData('text/html');
|
|
233
|
+
const pastedText = clipboardData.getData('text/plain');
|
|
234
|
+
|
|
235
|
+
await convertersPromise;
|
|
236
|
+
|
|
237
|
+
const content = pastedHtml || pastedText;
|
|
238
|
+
lastProcessedContent = content;
|
|
239
|
+
processContent(content);
|
|
240
|
+
|
|
241
|
+
// Reset scroll position for all pre elements
|
|
242
|
+
$$('pre').forEach(pre => pre.scrollTop = 0);
|
|
243
|
+
|
|
244
|
+
inputArea.innerHTML = '';
|
|
245
|
+
inputArea.setAttribute('placeholder', 'Pasted! Ready for more...');
|
|
246
|
+
setTimeout(() => {
|
|
247
|
+
inputArea.setAttribute('placeholder', 'Paste your rich text here...');
|
|
248
|
+
}, 2000);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
function processContent(html) {
|
|
252
|
+
const shouldClean = cleanHtmlToggle.checked;
|
|
253
|
+
const contentToConvert = shouldClean ? cleanHTML(html) : removeStyleAttributes(html);
|
|
254
|
+
|
|
255
|
+
// Update HTML Preview
|
|
256
|
+
htmlCode.textContent = formatHTML(contentToConvert);
|
|
257
|
+
if (window.Prism) {
|
|
258
|
+
window.Prism.highlightElement(htmlCode);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Run all converters
|
|
262
|
+
for (const [name, converter] of Object.entries(converters)) {
|
|
263
|
+
if (converter) {
|
|
264
|
+
try {
|
|
265
|
+
const markdown = converter.convert(contentToConvert);
|
|
266
|
+
outputs[name].code.textContent = markdown;
|
|
267
|
+
if (window.Prism) {
|
|
268
|
+
window.Prism.highlightElement(outputs[name].code);
|
|
269
|
+
}
|
|
270
|
+
} catch (err) {
|
|
271
|
+
console.error(`Converter ${name} failed:`, err);
|
|
272
|
+
outputs[name].code.textContent = `Error converting with ${name}: ${err.message}`;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
if (currentView === 'rendered') {
|
|
278
|
+
updateRenderedPreviews();
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
async function updateRenderedPreviews() {
|
|
283
|
+
for (const [name, out] of Object.entries(outputs)) {
|
|
284
|
+
const markdown = out.code.textContent || '';
|
|
285
|
+
await renderMarkdown(markdown, out.preview);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
function formatHTML(html) {
|
|
290
|
+
// Simple formatter for the HTML preview
|
|
291
|
+
if (!html) return '';
|
|
292
|
+
|
|
293
|
+
let formatted = '';
|
|
294
|
+
const reg = /(>)(<)(\/*)/g;
|
|
295
|
+
const xml = html.replace(reg, '$1\r\n$2$3');
|
|
296
|
+
let pad = 0;
|
|
297
|
+
|
|
298
|
+
xml.split('\r\n').forEach(node => {
|
|
299
|
+
let indent = 0;
|
|
300
|
+
if (node.match(/.+<\/\w[^>]*>$/)) {
|
|
301
|
+
indent = 0;
|
|
302
|
+
} else if (node.match(/^<\/\w/)) {
|
|
303
|
+
if (pad != 0) {
|
|
304
|
+
pad -= 1;
|
|
305
|
+
}
|
|
306
|
+
} else if (node.match(/^<\w[^>]*[^\/]>.*$/)) {
|
|
307
|
+
indent = 1;
|
|
308
|
+
} else {
|
|
309
|
+
indent = 0;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
let padding = '';
|
|
313
|
+
for (let i = 0; i < pad; i++) {
|
|
314
|
+
padding += ' ';
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
formatted += padding + node + '\r\n';
|
|
318
|
+
pad += indent;
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
return formatted;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
async function copyToClipboard() {
|
|
325
|
+
startIdleDetector();
|
|
326
|
+
|
|
327
|
+
const selectedRadio = $('input[name="converter"]:checked');
|
|
328
|
+
const selectedName = selectedRadio ? selectedRadio.value : 'turndown';
|
|
329
|
+
|
|
330
|
+
const textToCopy = outputs[selectedName].code.textContent;
|
|
331
|
+
|
|
332
|
+
// Let's grab the HTML from the preview div if we can, or render it if it's empty.
|
|
333
|
+
let htmlToCopy;
|
|
334
|
+
|
|
335
|
+
const tempDiv = document.createElement('div');
|
|
336
|
+
await renderMarkdown(textToCopy, tempDiv);
|
|
337
|
+
htmlToCopy = tempDiv.innerHTML;
|
|
338
|
+
|
|
339
|
+
// Store original text for restoration
|
|
340
|
+
const originalText = copyBtn.textContent;
|
|
341
|
+
|
|
342
|
+
try {
|
|
343
|
+
const items = {
|
|
344
|
+
'text/plain': new Blob([textToCopy], {type: 'text/plain'})
|
|
345
|
+
};
|
|
346
|
+
if (htmlToCopy) {
|
|
347
|
+
items['text/html'] = new Blob([htmlToCopy], {type: 'text/html'});
|
|
348
|
+
}
|
|
349
|
+
const cpItem = new ClipboardItem(items);
|
|
350
|
+
await navigator.clipboard.write([cpItem]);
|
|
351
|
+
|
|
352
|
+
// Visual feedback
|
|
353
|
+
copyBtn.innerHTML = `<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><polyline points="20 6 9 17 4 12"></polyline></svg> Copied (${selectedName})!`;
|
|
354
|
+
copyBtn.classList.add('success');
|
|
355
|
+
|
|
356
|
+
setTimeout(() => {
|
|
357
|
+
copyBtn.innerHTML = originalText;
|
|
358
|
+
copyBtn.classList.remove('success');
|
|
359
|
+
}, 2000);
|
|
360
|
+
} catch (err) {
|
|
361
|
+
console.error('Failed to copy:', err);
|
|
362
|
+
copyBtn.textContent = 'Copy failed';
|
|
363
|
+
setTimeout(() => {
|
|
364
|
+
copyBtn.innerHTML = originalText;
|
|
365
|
+
}, 2000);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
init();
|
package/src/cleaner.js
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* cleaner.js
|
|
3
|
+
* Implements HTML cleaning logic similar to paste-html-subset.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// Environment detection and DOM setup
|
|
7
|
+
let parseHTMLGlobal, documentGlobal, NodeGlobal;
|
|
8
|
+
|
|
9
|
+
if (typeof window !== 'undefined') {
|
|
10
|
+
parseHTMLGlobal = (html) => {
|
|
11
|
+
const parser = new DOMParser();
|
|
12
|
+
return parser.parseFromString(html, 'text/html');
|
|
13
|
+
};
|
|
14
|
+
documentGlobal = window.document;
|
|
15
|
+
NodeGlobal = window.Node;
|
|
16
|
+
} else {
|
|
17
|
+
// We are in Node.js
|
|
18
|
+
const { parseHTML } = await import('linkedom');
|
|
19
|
+
parseHTMLGlobal = (html) => {
|
|
20
|
+
const fullHtml = `<!DOCTYPE html><html><body>${html}</body></html>`;
|
|
21
|
+
return parseHTML(fullHtml).document;
|
|
22
|
+
};
|
|
23
|
+
const linkedom = parseHTML('<html><body></body></html>');
|
|
24
|
+
documentGlobal = linkedom.document;
|
|
25
|
+
NodeGlobal = linkedom.Node;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const ALLOWED_TAGS = [
|
|
29
|
+
'P', 'STRONG', 'B', 'EM', 'I', 'BLOCKQUOTE', 'CODE', 'PRE', 'A', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6',
|
|
30
|
+
'UL', 'OL', 'LI', 'DL', 'DT', 'DD', 'BR', 'HR', 'TABLE', 'THEAD', 'TBODY', 'TR', 'TH', 'TD',
|
|
31
|
+
];
|
|
32
|
+
|
|
33
|
+
const ALLOWED_ATTRIBUTES = {
|
|
34
|
+
A: ['href', 'title', 'target'],
|
|
35
|
+
IMG: ['src', 'alt', 'title', 'width', 'height'],
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
export function cleanHTML(html) {
|
|
39
|
+
const doc = parseHTMLGlobal(html);
|
|
40
|
+
const sourceBody = doc.body;
|
|
41
|
+
|
|
42
|
+
// Create a new document for the cleaned output
|
|
43
|
+
let targetBody;
|
|
44
|
+
if (documentGlobal.implementation && documentGlobal.implementation.createHTMLDocument) {
|
|
45
|
+
const cleanDoc = documentGlobal.implementation.createHTMLDocument('clean');
|
|
46
|
+
targetBody = cleanDoc.body;
|
|
47
|
+
} else {
|
|
48
|
+
// Fallback for linkedom or other environments
|
|
49
|
+
targetBody = documentGlobal.createElement('body');
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Process the input body
|
|
53
|
+
processNode(sourceBody, targetBody);
|
|
54
|
+
|
|
55
|
+
return targetBody.innerHTML;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function processNode(sourceNode, targetParent) {
|
|
59
|
+
// Handle text nodes
|
|
60
|
+
if (sourceNode.nodeType === NodeGlobal.TEXT_NODE) {
|
|
61
|
+
if (sourceNode.textContent) {
|
|
62
|
+
targetParent.appendChild(documentGlobal.createTextNode(sourceNode.textContent));
|
|
63
|
+
}
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Handle element nodes
|
|
68
|
+
if (sourceNode.nodeType === NodeGlobal.ELEMENT_NODE) {
|
|
69
|
+
const tagName = sourceNode.tagName.toUpperCase();
|
|
70
|
+
|
|
71
|
+
// MDN specific cleaning: remove copy button and play links
|
|
72
|
+
if (sourceNode.classList && sourceNode.classList.contains('mdn-copy-button')) {
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const href = sourceNode.getAttribute('href');
|
|
77
|
+
if (
|
|
78
|
+
tagName === 'A' &&
|
|
79
|
+
href && href.startsWith('https://developer.mozilla.org/en-US/play')
|
|
80
|
+
) {
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (ALLOWED_TAGS.includes(tagName)) {
|
|
85
|
+
// Special case: UL/OL without LI children (often a bug in clipboard content)
|
|
86
|
+
// This tweak should only happen when this element is the FIRST element in the received DOM.
|
|
87
|
+
if (tagName === 'UL' || tagName === 'OL') {
|
|
88
|
+
const parent = sourceNode.parentNode;
|
|
89
|
+
const isFirstElementInBody = parent &&
|
|
90
|
+
parent.tagName === 'BODY' &&
|
|
91
|
+
Array.from(parent.children).find(c => !['META', 'STYLE'].includes(c.tagName.toUpperCase())) === sourceNode;
|
|
92
|
+
|
|
93
|
+
if (isFirstElementInBody) {
|
|
94
|
+
const hasLiChild = Array.from(sourceNode.childNodes).some(child =>
|
|
95
|
+
child.nodeType === NodeGlobal.ELEMENT_NODE && child.tagName.toUpperCase() === 'LI'
|
|
96
|
+
);
|
|
97
|
+
if (!hasLiChild) {
|
|
98
|
+
// Unwrap: process children directly into targetParent
|
|
99
|
+
Array.from(sourceNode.childNodes).forEach(child => {
|
|
100
|
+
processNode(child, targetParent);
|
|
101
|
+
});
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const newElement = documentGlobal.createElement(tagName);
|
|
108
|
+
|
|
109
|
+
// Copy allowed attributes
|
|
110
|
+
if (ALLOWED_ATTRIBUTES[tagName]) {
|
|
111
|
+
ALLOWED_ATTRIBUTES[tagName].forEach(attr => {
|
|
112
|
+
if (sourceNode.hasAttribute(attr)) {
|
|
113
|
+
newElement.setAttribute(attr, sourceNode.getAttribute(attr));
|
|
114
|
+
}
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
targetParent.appendChild(newElement);
|
|
119
|
+
|
|
120
|
+
// Process children
|
|
121
|
+
Array.from(sourceNode.childNodes).forEach(child => {
|
|
122
|
+
processNode(child, newElement);
|
|
123
|
+
});
|
|
124
|
+
} else {
|
|
125
|
+
const DANGEROUS_TAGS = ['SCRIPT', 'STYLE', 'IFRAME', 'OBJECT', 'EMBED', 'LINK', 'META'];
|
|
126
|
+
|
|
127
|
+
if (!DANGEROUS_TAGS.includes(tagName) || tagName === 'BODY' || tagName === 'HTML') {
|
|
128
|
+
// Unwrap safe-ish tags (like div, span, body, html)
|
|
129
|
+
Array.from(sourceNode.childNodes).forEach(child => {
|
|
130
|
+
processNode(child, targetParent);
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
export function removeStyleAttributes(html) {
|
|
138
|
+
const doc = parseHTMLGlobal(html);
|
|
139
|
+
const body = doc.body;
|
|
140
|
+
const allElements = body.querySelectorAll('*');
|
|
141
|
+
for (let i = 0; i < allElements.length; i++) {
|
|
142
|
+
allElements[i].removeAttribute('style');
|
|
143
|
+
}
|
|
144
|
+
return body.innerHTML;
|
|
145
|
+
}
|
package/src/converter.js
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* converter.js
|
|
3
|
+
* Handles conversion from HTML to Markdown.
|
|
4
|
+
* Dynamically loads and uses different conversion libraries.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const isBrowser = typeof window !== 'undefined';
|
|
8
|
+
|
|
9
|
+
async function getTurndownConverter() {
|
|
10
|
+
let TurndownService, turndownPluginGfm;
|
|
11
|
+
if (isBrowser) {
|
|
12
|
+
const turndownMod = await import('turndown');
|
|
13
|
+
TurndownService = turndownMod.default;
|
|
14
|
+
const gfmMod = await import('turndown-plugin-gfm');
|
|
15
|
+
turndownPluginGfm = gfmMod.gfm;
|
|
16
|
+
} else {
|
|
17
|
+
TurndownService = (await import('turndown')).default;
|
|
18
|
+
turndownPluginGfm = (await import('turndown-plugin-gfm')).gfm;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const turndownService = new TurndownService({
|
|
22
|
+
headingStyle: 'atx',
|
|
23
|
+
codeBlockStyle: 'fenced',
|
|
24
|
+
emDelimiter: '*',
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
if (turndownPluginGfm) {
|
|
28
|
+
turndownService.use(turndownPluginGfm);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return {
|
|
32
|
+
convert: html => turndownService.turndown(html),
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async function getPandocConverter() {
|
|
37
|
+
const pandocModule = await import('./pandoc.js');
|
|
38
|
+
return {
|
|
39
|
+
convert: html => {
|
|
40
|
+
const args = '--from html --to gfm --no-highlight --wrap=preserve';
|
|
41
|
+
const markdown = pandocModule.pandoc(args, html);
|
|
42
|
+
return markdown;
|
|
43
|
+
},
|
|
44
|
+
dispose: () => {
|
|
45
|
+
pandocModule.dispose();
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const converters = {
|
|
51
|
+
turndown: getTurndownConverter,
|
|
52
|
+
pandoc: getPandocConverter,
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
export async function getConverter(name) {
|
|
56
|
+
if (!converters[name]) {
|
|
57
|
+
const available = Object.keys(converters).join(', ');
|
|
58
|
+
throw new Error(`Unknown converter: ${name}. Available converters: ${available}`);
|
|
59
|
+
}
|
|
60
|
+
return converters[name]();
|
|
61
|
+
}
|
package/src/markpaste.js
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* markpaste.js
|
|
3
|
+
* MarkPaste Library Entry Point
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { cleanHTML, removeStyleAttributes } from './cleaner.js';
|
|
7
|
+
import { getConverter } from './converter.js';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Converts HTML to Markdown using the specified converter.
|
|
11
|
+
* @param {string} html The HTML string to convert.
|
|
12
|
+
* @param {Object} options Configuration options.
|
|
13
|
+
* @param {string} [options.converter='turndown'] The converter to use ('turndown', 'pandoc').
|
|
14
|
+
* @param {boolean} [options.clean=true] Whether to clean the HTML before conversion.
|
|
15
|
+
* @returns {Promise<string>} The resulting Markdown string.
|
|
16
|
+
*/
|
|
17
|
+
export async function convert(html, options = {}) {
|
|
18
|
+
const { converter: converterName = 'turndown', clean = true } = options;
|
|
19
|
+
|
|
20
|
+
const cleanedHtml = clean ? await cleanHTML(html) : await removeStyleAttributes(html);
|
|
21
|
+
const converter = await getConverter(converterName);
|
|
22
|
+
|
|
23
|
+
return converter.convert(cleanedHtml);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export { cleanHTML, removeStyleAttributes, getConverter };
|
package/src/pandoc.js
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adapted from https://github.com/haskell-wasm/pandoc-wasm
|
|
3
|
+
* See README.md for more details about the pandoc WASM integration.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/** @import * as PandocWasm from '../types/pandoc-wasm.js' */
|
|
7
|
+
/** @import * as WasiShimT from '@bjorn3/browser_wasi_shim' */
|
|
8
|
+
|
|
9
|
+
const isBrowser = typeof window !== 'undefined';
|
|
10
|
+
|
|
11
|
+
let WasiShim;
|
|
12
|
+
if (isBrowser) {
|
|
13
|
+
WasiShim = await import('@bjorn3/browser_wasi_shim');
|
|
14
|
+
} else {
|
|
15
|
+
WasiShim = await import('@bjorn3/browser_wasi_shim');
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/** @type {WasiShimT} */
|
|
19
|
+
const {WASI, OpenFile, File, ConsoleStdout, PreopenDirectory} = WasiShim;
|
|
20
|
+
|
|
21
|
+
const args = ['pandoc.wasm', '+RTS', '-H64m', '-RTS'];
|
|
22
|
+
const env = [];
|
|
23
|
+
let in_file = new File(new Uint8Array(), {readonly: true});
|
|
24
|
+
let out_file = new File(new Uint8Array(), {readonly: false});
|
|
25
|
+
const fds = [
|
|
26
|
+
new OpenFile(new File(new Uint8Array(), {readonly: true})),
|
|
27
|
+
ConsoleStdout.lineBuffered(msg => console.log(`[WASI stdout] ${msg}`)),
|
|
28
|
+
ConsoleStdout.lineBuffered(msg => console.warn(`[WASI stderr] ${msg}`)),
|
|
29
|
+
new PreopenDirectory(
|
|
30
|
+
'/',
|
|
31
|
+
new Map([
|
|
32
|
+
['in', in_file],
|
|
33
|
+
['out', out_file],
|
|
34
|
+
])
|
|
35
|
+
),
|
|
36
|
+
];
|
|
37
|
+
const options = {debug: false};
|
|
38
|
+
let wasi = new WASI(args, env, fds, options);
|
|
39
|
+
|
|
40
|
+
async function loadWasm() {
|
|
41
|
+
if (isBrowser) {
|
|
42
|
+
const response = await fetch('third_party/pandoc.wasm');
|
|
43
|
+
const bytes = await response.arrayBuffer();
|
|
44
|
+
return await WebAssembly.instantiate(bytes, {
|
|
45
|
+
wasi_snapshot_preview1: wasi.wasiImport,
|
|
46
|
+
});
|
|
47
|
+
} else {
|
|
48
|
+
const fs = await import('node:fs');
|
|
49
|
+
const path = await import('node:path');
|
|
50
|
+
const { fileURLToPath } = await import('node:url');
|
|
51
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
52
|
+
const wasmPath = path.join(__dirname, '..', 'third_party', 'pandoc.wasm');
|
|
53
|
+
const bytes = fs.readFileSync(wasmPath);
|
|
54
|
+
return await WebAssembly.instantiate(bytes, {
|
|
55
|
+
wasi_snapshot_preview1: wasi.wasiImport,
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const source = await loadWasm();
|
|
61
|
+
let instance = /** @type {PandocWasm.PandocWasmInstance} */ (source.instance);
|
|
62
|
+
|
|
63
|
+
wasi.initialize(instance);
|
|
64
|
+
instance.exports.__wasm_call_ctors();
|
|
65
|
+
|
|
66
|
+
function memory_data_view() {
|
|
67
|
+
return new DataView(instance.exports.memory.buffer);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const argc_ptr = instance.exports.malloc(4);
|
|
71
|
+
memory_data_view().setUint32(argc_ptr, args.length, true);
|
|
72
|
+
const argv = instance.exports.malloc(4 * (args.length + 1));
|
|
73
|
+
for (let i = 0; i < args.length; ++i) {
|
|
74
|
+
const arg = instance.exports.malloc(args[i].length + 1);
|
|
75
|
+
new TextEncoder().encodeInto(args[i], new Uint8Array(instance.exports.memory.buffer, arg, args[i].length));
|
|
76
|
+
memory_data_view().setUint8(arg + args[i].length, 0);
|
|
77
|
+
memory_data_view().setUint32(argv + 4 * i, arg, true);
|
|
78
|
+
}
|
|
79
|
+
memory_data_view().setUint32(argv + 4 * args.length, 0, true);
|
|
80
|
+
const argv_ptr = instance.exports.malloc(4);
|
|
81
|
+
memory_data_view().setUint32(argv_ptr, argv, true);
|
|
82
|
+
|
|
83
|
+
instance.exports.hs_init_with_rtsopts(argc_ptr, argv_ptr);
|
|
84
|
+
|
|
85
|
+
export function pandoc(args_str, in_str) {
|
|
86
|
+
const args_ptr = instance.exports.malloc(args_str.length);
|
|
87
|
+
new TextEncoder().encodeInto(args_str, new Uint8Array(instance.exports.memory.buffer, args_ptr, args_str.length));
|
|
88
|
+
in_file.data = new TextEncoder().encode(in_str);
|
|
89
|
+
instance.exports.wasm_main(args_ptr, args_str.length);
|
|
90
|
+
return new TextDecoder('utf-8', {fatal: true}).decode(out_file.data);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export function dispose() {
|
|
94
|
+
in_file = null;
|
|
95
|
+
out_file = null;
|
|
96
|
+
wasi = null;
|
|
97
|
+
instance = null;
|
|
98
|
+
}
|