@ckeditor/ckeditor5-markdown-gfm 45.2.1-alpha.9 → 46.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -4,174 +4,183 @@
4
4
  */
5
5
  import { Plugin } from '@ckeditor/ckeditor5-core/dist/index.js';
6
6
  import { HtmlDataProcessor } from '@ckeditor/ckeditor5-engine/dist/index.js';
7
- import { marked } from 'marked';
8
- import Turndown from 'turndown';
9
- import { gfm } from 'turndown-plugin-gfm';
7
+ import { unified } from 'unified';
8
+ import remarkGfm from 'remark-gfm';
9
+ import remarkParse from 'remark-parse';
10
+ import remarkRehype from 'remark-rehype';
11
+ import remarkBreaks from 'remark-breaks';
12
+ import rehypeStringify from 'rehype-dom-stringify';
13
+ import { visit } from 'unist-util-visit';
14
+ import { toHtml } from 'hast-util-to-html';
15
+ import { fromDom } from 'hast-util-from-dom';
16
+ import rehypeParse from 'rehype-dom-parse';
17
+ import rehypeRemark from 'rehype-remark';
18
+ import remarkStringify from 'remark-stringify';
19
+ import { h } from 'hastscript';
10
20
  import { ClipboardPipeline } from '@ckeditor/ckeditor5-clipboard/dist/index.js';
11
21
 
12
22
  /**
13
23
  * This is a helper class used by the {@link module:markdown-gfm/markdown Markdown feature} to convert Markdown to HTML.
14
- */ class MarkdownToHtml {
15
- _parser;
16
- _options = {
17
- gfm: true,
18
- breaks: true,
19
- tables: true,
20
- xhtml: true,
21
- headerIds: false
22
- };
24
+ */ class MarkdownGfmMdToHtml {
25
+ _processor;
23
26
  constructor(){
24
- // Overrides.
25
- marked.use({
26
- tokenizer: {
27
- // Disable the autolink rule in the lexer.
28
- autolink: ()=>null,
29
- url: ()=>null
30
- },
31
- renderer: {
32
- checkbox (...args) {
33
- // Remove bogus space after <input type="checkbox"> because it would be preserved
34
- // by DomConverter as it's next to an inline object.
35
- return Object.getPrototypeOf(this).checkbox.call(this, ...args).trimRight();
36
- },
37
- code (...args) {
38
- // Since marked v1.2.8, every <code> gets a trailing "\n" whether it originally
39
- // ended with one or not (see https://github.com/markedjs/marked/issues/1884 to learn why).
40
- // This results in a redundant soft break in the model when loaded into the editor, which
41
- // is best prevented at this stage. See https://github.com/ckeditor/ckeditor5/issues/11124.
42
- return Object.getPrototypeOf(this).code.call(this, ...args).replace('\n</code>', '</code>');
43
- }
44
- }
45
- });
46
- this._parser = marked;
27
+ this._processor = unified()// Parses Markdown to an abstract syntax tree (AST).
28
+ .use(remarkParse)// Adds support for GitHub Flavored Markdown (GFM).
29
+ .use(remarkGfm, {
30
+ singleTilde: true
31
+ })// Replaces line breaks with `<br>` tags.
32
+ .use(remarkBreaks)// Turns markdown syntax tree to HTML syntax tree, ignoring embedded HTML.
33
+ .use(remarkRehype, {
34
+ allowDangerousHtml: true
35
+ })// Handles HTML embedded in Markdown.
36
+ .use(rehypeDomRaw)// Removes classes from list elements.
37
+ .use(deleteClassesFromToDoLists)// Serializes HTML syntax tree to HTML string.
38
+ .use(rehypeStringify);
47
39
  }
48
40
  parse(markdown) {
49
- return this._parser.parse(markdown, this._options);
41
+ return this._processor.processSync(markdown).toString().replaceAll('\n</code>', '</code>');
50
42
  }
51
43
  }
52
-
53
- const autolinkRegex = /* #__PURE__ */ new RegExp(// Prefix.
54
- /\b(?:(?:https?|ftp):\/\/|www\.)/.source + // Domain name.
55
- /(?![-_])(?:[-_a-z0-9\u00a1-\uffff]{1,63}\.)+(?:[a-z\u00a1-\uffff]{2,63})/.source + // The rest.
56
- /(?:[^\s<>]*)/.source, 'gi');
57
- class UpdatedTurndown extends Turndown {
58
- escape(string) {
59
- const originalEscape = super.escape;
60
- function escape(string) {
61
- string = originalEscape(string);
62
- // Escape "<".
63
- string = string.replace(/</g, '\\<');
64
- return string;
65
- }
66
- // Urls should not be escaped. Our strategy is using a regex to find them and escape everything
67
- // which is out of the matches parts.
68
- let escaped = '';
69
- let lastLinkEnd = 0;
70
- for (const match of this._matchAutolink(string)){
71
- const index = match.index;
72
- // Append the substring between the last match and the current one (if anything).
73
- if (index > lastLinkEnd) {
74
- escaped += escape(string.substring(lastLinkEnd, index));
44
+ /**
45
+ * Rehype plugin that improves handling of the To-do lists by removing:
46
+ * * default classes added to `<ul>`, `<ol>`, and `<li>` elements.
47
+ * * bogus space after <input type="checkbox"> because it would be preserved by ViewDomConverter as it's next to an inline object.
48
+ */ function deleteClassesFromToDoLists() {
49
+ return (tree)=>{
50
+ visit(tree, 'element', (node)=>{
51
+ if (node.tagName === 'ul' || node.tagName === 'ol' || node.tagName === 'li') {
52
+ node.children = node.children.filter((child)=>child.type !== 'text' || !!child.value.trim());
53
+ delete node.properties.className;
75
54
  }
76
- const matchedURL = match[0];
77
- escaped += matchedURL;
78
- lastLinkEnd = index + matchedURL.length;
79
- }
80
- // Add text after the last link or at the string start if no matches.
81
- if (lastLinkEnd < string.length) {
82
- escaped += escape(string.substring(lastLinkEnd, string.length));
83
- }
84
- return escaped;
85
- }
86
- /**
87
- * Trimming end of link.
88
- * https://github.github.com/gfm/#autolinks-extension-
89
- */ *_matchAutolink(string) {
90
- for (const match of string.matchAll(autolinkRegex)){
91
- const matched = match[0];
92
- const length = this._autolinkFindEnd(matched);
93
- yield Object.assign([
94
- matched.substring(0, length)
95
- ], {
96
- index: match.index
97
- });
98
- // We could adjust regex.lastIndex but it's not needed because what we skipped is for sure not a valid URL.
99
- }
100
- }
101
- /**
102
- * Returns the new length of the link (after it would trim trailing characters).
103
- */ _autolinkFindEnd(string) {
104
- let length = string.length;
105
- while(length > 0){
106
- const char = string[length - 1];
107
- if ('?!.,:*_~\'"'.includes(char)) {
108
- length--;
109
- } else if (char == ')') {
110
- let openBrackets = 0;
111
- for(let i = 0; i < length; i++){
112
- if (string[i] == '(') {
113
- openBrackets++;
114
- } else if (string[i] == ')') {
115
- openBrackets--;
116
- }
117
- }
118
- // If there is fewer opening brackets then closing ones we should remove a closing bracket.
119
- if (openBrackets < 0) {
120
- length--;
121
- } else {
122
- break;
123
- }
124
- } else {
125
- break;
55
+ });
56
+ };
57
+ }
58
+ /**
59
+ * Rehype plugin to parse raw HTML nodes inside Markdown. This plugin is used instead of `rehype-raw` or `rehype-stringify`,
60
+ * because those plugins rely on `parse5` DOM parser which is heavy and redundant in the browser environment where we can
61
+ * use the native DOM APIs.
62
+ *
63
+ * This plugins finds any node (root or element) whose children include `raw` nodes and reparses them like so:
64
+ * 1. Serializes its children to an HTML string.
65
+ * 2. Reparses the HTML string using a `<template>` element.
66
+ * 3. Converts each parsed DOM node back into HAST nodes.
67
+ * 4. Replaces the original children with the newly created HAST nodes.
68
+ */ function rehypeDomRaw() {
69
+ return (tree)=>{
70
+ visit(tree, [
71
+ 'root',
72
+ 'element'
73
+ ], (node)=>{
74
+ /* istanbul ignore next -- @preserve */ if (!isNodeRootOrElement(node)) {
75
+ return;
126
76
  }
127
- }
128
- return length;
129
- }
77
+ // Only act on nodes with at least one raw child.
78
+ if (!node.children.some((child)=>child.type === 'raw')) {
79
+ return;
80
+ }
81
+ const template = document.createElement('template');
82
+ // Serialize all children to an HTML fragment.
83
+ template.innerHTML = toHtml({
84
+ type: 'root',
85
+ children: node.children
86
+ }, {
87
+ allowDangerousHtml: true
88
+ });
89
+ // Convert each parsed DOM node back into HAST and replace the original children.
90
+ node.children = Array.from(template.content.childNodes).map((domNode)=>fromDom(domNode));
91
+ });
92
+ };
130
93
  }
131
94
  /**
132
- * This is a helper class used by the {@link module:markdown-gfm/markdown Markdown feature} to convert HTML to Markdown.
133
- */ class HtmlToMarkdown {
134
- _parser;
95
+ * Only needed for the type guard.
96
+ */ function isNodeRootOrElement(node) {
97
+ return (node.type === 'root' || node.type === 'element') && node.children;
98
+ }
99
+
100
+ class MarkdownGfmHtmlToMd {
101
+ _processor;
102
+ _keepRawTags = [];
135
103
  constructor(){
136
- this._parser = this._createParser();
104
+ this._buildProcessor();
105
+ }
106
+ keep(tagName) {
107
+ this._keepRawTags.push(tagName.toLowerCase());
108
+ this._buildProcessor();
137
109
  }
138
110
  parse(html) {
139
- return this._parser.turndown(html);
111
+ return this._processor.processSync(html).toString().trim();
140
112
  }
141
- keep(elements) {
142
- this._parser.keep(elements);
113
+ /**
114
+ * Returns handlers for raw HTML tags that should be kept in the Markdown output.
115
+ */ _getRawTagsHandlers() {
116
+ return this._keepRawTags.reduce((handlers, tagName)=>{
117
+ handlers[tagName] = (state, node)=>{
118
+ const tag = toHtml(h(node.tagName, node.properties), {
119
+ allowDangerousHtml: true,
120
+ closeSelfClosing: true
121
+ });
122
+ const endOfOpeningTagIndex = tag.indexOf('>');
123
+ const openingTag = tag.slice(0, endOfOpeningTagIndex + 1);
124
+ const closingTag = tag.slice(endOfOpeningTagIndex + 1);
125
+ return [
126
+ {
127
+ type: 'html',
128
+ value: openingTag
129
+ },
130
+ ...state.all(node),
131
+ {
132
+ type: 'html',
133
+ value: closingTag
134
+ }
135
+ ];
136
+ };
137
+ return handlers;
138
+ }, {});
143
139
  }
144
- _createParser() {
145
- const parser = new UpdatedTurndown({
146
- codeBlockStyle: 'fenced',
147
- hr: '---',
148
- headingStyle: 'atx'
140
+ _buildProcessor() {
141
+ this._processor = unified()// Parse HTML to an abstract syntax tree (AST).
142
+ .use(rehypeParse)// Removes `<label>` element from TODO lists.
143
+ .use(removeLabelFromCheckboxes)// Turns HTML syntax tree into Markdown syntax tree.
144
+ .use(rehypeRemark, {
145
+ // Keeps allowed HTML tags.
146
+ handlers: this._getRawTagsHandlers()
147
+ })// Adds support for GitHub Flavored Markdown (GFM).
148
+ .use(remarkGfm, {
149
+ singleTilde: true
150
+ })// Replaces line breaks with `<br>` tags.
151
+ .use(remarkBreaks)// Serializes Markdown syntax tree to Markdown string.
152
+ .use(remarkStringify, {
153
+ resourceLink: true,
154
+ emphasis: '_',
155
+ rule: '-',
156
+ handlers: {
157
+ break: ()=>'\n'
158
+ },
159
+ unsafe: [
160
+ {
161
+ character: '<'
162
+ }
163
+ ]
149
164
  });
150
- parser.use([
151
- gfm,
152
- this._todoList
153
- ]);
154
- return parser;
155
165
  }
156
- // This is a copy of the original taskListItems rule from turndown-plugin-gfm, with minor changes.
157
- _todoList(turndown) {
158
- turndown.addRule('taskListItems', {
159
- filter (node) {
160
- return node.type === 'checkbox' && // Changes here as CKEditor outputs a deeper structure.
161
- (node.parentNode.nodeName === 'LI' || node.parentNode.parentNode.nodeName === 'LI');
162
- },
163
- replacement (content, node) {
164
- return (node.checked ? '[x]' : '[ ]') + ' ';
166
+ }
167
+ /**
168
+ * Removes `<label>` element from TODO lists, so that `<input>` and `text` are direct children of `<li>`.
169
+ */ function removeLabelFromCheckboxes() {
170
+ return function(tree) {
171
+ visit(tree, 'element', (node, index, parent)=>{
172
+ if (index !== null && node.tagName === 'label' && parent.type === 'element' && parent.tagName === 'li') {
173
+ parent.children.splice(index, 1, ...node.children);
165
174
  }
166
175
  });
167
- }
176
+ };
168
177
  }
169
178
 
170
179
  /**
171
180
  * This data processor implementation uses GitHub Flavored Markdown as input/output data.
172
181
  *
173
182
  * See the {@glink features/markdown Markdown output} guide to learn more on how to enable it.
174
- */ class GFMDataProcessor {
183
+ */ class MarkdownGfmDataProcessor {
175
184
  /**
176
185
  * HTML data processor used to process HTML produced by the Markdown-to-HTML converter and the other way.
177
186
  */ _htmlDP;
@@ -185,8 +194,8 @@ class UpdatedTurndown extends Turndown {
185
194
  * Creates a new instance of the Markdown data processor class.
186
195
  */ constructor(document){
187
196
  this._htmlDP = new HtmlDataProcessor(document);
188
- this._markdown2html = new MarkdownToHtml();
189
- this._html2markdown = new HtmlToMarkdown();
197
+ this._markdown2html = new MarkdownGfmMdToHtml();
198
+ this._html2markdown = new MarkdownGfmHtmlToMd();
190
199
  }
191
200
  /**
192
201
  * Keeps the specified element in the output as HTML. This is useful if the editor contains
@@ -196,9 +205,7 @@ class UpdatedTurndown extends Turndown {
196
205
  *
197
206
  * @param element The element name to be kept.
198
207
  */ keepHtml(element) {
199
- this._html2markdown.keep([
200
- element
201
- ]);
208
+ this._html2markdown.keep(element);
202
209
  }
203
210
  /**
204
211
  * Converts the provided Markdown string to a view tree.
@@ -210,7 +217,7 @@ class UpdatedTurndown extends Turndown {
210
217
  return this._htmlDP.toView(html);
211
218
  }
212
219
  /**
213
- * Converts the provided {@link module:engine/view/documentfragment~DocumentFragment} to data format &ndash; in this
220
+ * Converts the provided {@link module:engine/view/documentfragment~ViewDocumentFragment} to data format &ndash; in this
214
221
  * case to a Markdown string.
215
222
  *
216
223
  * @returns Markdown string.
@@ -223,7 +230,7 @@ class UpdatedTurndown extends Turndown {
223
230
  * and not processed during the conversion from Markdown to view elements.
224
231
  *
225
232
  * The raw data can be later accessed by a
226
- * {@link module:engine/view/element~Element#getCustomProperty custom property of a view element} called `"$rawContent"`.
233
+ * {@link module:engine/view/element~ViewElement#getCustomProperty custom property of a view element} called `"$rawContent"`.
227
234
  *
228
235
  * @param pattern The pattern matching all view elements whose content should
229
236
  * be treated as raw data.
@@ -245,7 +252,7 @@ class UpdatedTurndown extends Turndown {
245
252
  * @inheritDoc
246
253
  */ constructor(editor){
247
254
  super(editor);
248
- editor.data.processor = new GFMDataProcessor(editor.data.viewDocument);
255
+ editor.data.processor = new MarkdownGfmDataProcessor(editor.data.viewDocument);
249
256
  }
250
257
  /**
251
258
  * @inheritDoc
@@ -277,7 +284,7 @@ const ALLOWED_MARKDOWN_FIRST_LEVEL_TAGS = [
277
284
  * @inheritDoc
278
285
  */ constructor(editor){
279
286
  super(editor);
280
- this._gfmDataProcessor = new GFMDataProcessor(editor.data.viewDocument);
287
+ this._gfmDataProcessor = new MarkdownGfmDataProcessor(editor.data.viewDocument);
281
288
  }
282
289
  /**
283
290
  * @inheritDoc
@@ -398,5 +405,5 @@ const ALLOWED_MARKDOWN_FIRST_LEVEL_TAGS = [
398
405
  }
399
406
  }
400
407
 
401
- export { GFMDataProcessor, Markdown, MarkdownToHtml, PasteFromMarkdownExperimental };
408
+ export { Markdown, MarkdownGfmDataProcessor, MarkdownGfmHtmlToMd, MarkdownGfmMdToHtml, PasteFromMarkdownExperimental };
402
409
  //# sourceMappingURL=index.js.map