@ckeditor/ckeditor5-markdown-gfm 41.4.2 → 42.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5,223 +5,253 @@
5
5
  import { Plugin } from '@ckeditor/ckeditor5-core/dist/index.js';
6
6
  import { HtmlDataProcessor } from '@ckeditor/ckeditor5-engine/dist/index.js';
7
7
  import { marked } from 'marked';
8
- import TurndownService from 'turndown/lib/turndown.browser.es.js';
8
+ import Turndown from 'turndown';
9
9
  import { gfm } from 'turndown-plugin-gfm';
10
10
  import { ClipboardPipeline } from '@ckeditor/ckeditor5-clipboard/dist/index.js';
11
11
 
12
- // Overrides.
13
- marked.use({
14
- tokenizer: {
15
- // Disable the autolink rule in the lexer.
16
- autolink: ()=>null,
17
- url: ()=>null
18
- },
19
- renderer: {
20
- checkbox (...args) {
21
- // Remove bogus space after <input type="checkbox"> because it would be preserved
22
- // by DomConverter as it's next to an inline object.
23
- return Object.getPrototypeOf(this).checkbox.call(this, ...args).trimRight();
24
- },
25
- code (...args) {
26
- // Since marked v1.2.8, every <code> gets a trailing "\n" whether it originally
27
- // ended with one or not (see https://github.com/markedjs/marked/issues/1884 to learn why).
28
- // This results in a redundant soft break in the model when loaded into the editor, which
29
- // is best prevented at this stage. See https://github.com/ckeditor/ckeditor5/issues/11124.
30
- return Object.getPrototypeOf(this).code.call(this, ...args).replace('\n</code>', '</code>');
31
- }
32
- }
33
- });
34
12
  /**
35
- * Parses markdown string to an HTML.
36
- */ function markdown2html(markdown) {
37
- const options = {
13
+ * This is a helper class used by the {@link module:markdown-gfm/markdown Markdown feature} to convert Markdown to HTML.
14
+ */ class MarkdownToHtml {
15
+ _parser;
16
+ _options = {
38
17
  gfm: true,
39
18
  breaks: true,
40
19
  tables: true,
41
20
  xhtml: true,
42
21
  headerIds: false
43
22
  };
44
- return marked.parse(markdown, options);
45
- }
46
-
47
- /**
48
- * @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
49
- * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
50
- */ /**
51
- * @module markdown-gfm/html2markdown/html2markdown
52
- */ /* eslint-disable @typescript-eslint/ban-ts-comment */ // Importing types for this package is problematic, so it's omitted.
53
- // @ts-ignore
54
- // Override the original escape method by not escaping links.
55
- const originalEscape = TurndownService.prototype.escape;
56
- function escape(string) {
57
- string = originalEscape(string);
58
- // Escape "<".
59
- string = string.replace(/</g, '\\<');
60
- return string;
61
- }
62
- TurndownService.prototype.escape = function(string) {
63
- // Urls should not be escaped. Our strategy is using a regex to find them and escape everything
64
- // which is out of the matches parts.
65
- let escaped = '';
66
- let lastLinkEnd = 0;
67
- for (const match of matchAutolink(string)){
68
- const index = match.index;
69
- // Append the substring between the last match and the current one (if anything).
70
- if (index > lastLinkEnd) {
71
- escaped += escape(string.substring(lastLinkEnd, index));
72
- }
73
- const matchedURL = match[0];
74
- escaped += matchedURL;
75
- lastLinkEnd = index + matchedURL.length;
23
+ constructor(){
24
+ // Overrides.
25
+ marked.use({
26
+ tokenizer: {
27
+ // Disable the autolink rule in the lexer.
28
+ autolink: ()=>null,
29
+ url: ()=>null
30
+ },
31
+ renderer: {
32
+ checkbox (...args) {
33
+ // Remove bogus space after <input type="checkbox"> because it would be preserved
34
+ // by DomConverter as it's next to an inline object.
35
+ return Object.getPrototypeOf(this).checkbox.call(this, ...args).trimRight();
36
+ },
37
+ code (...args) {
38
+ // Since marked v1.2.8, every <code> gets a trailing "\n" whether it originally
39
+ // ended with one or not (see https://github.com/markedjs/marked/issues/1884 to learn why).
40
+ // This results in a redundant soft break in the model when loaded into the editor, which
41
+ // is best prevented at this stage. See https://github.com/ckeditor/ckeditor5/issues/11124.
42
+ return Object.getPrototypeOf(this).code.call(this, ...args).replace('\n</code>', '</code>');
43
+ }
44
+ }
45
+ });
46
+ this._parser = marked;
76
47
  }
77
- // Add text after the last link or at the string start if no matches.
78
- if (lastLinkEnd < string.length) {
79
- escaped += escape(string.substring(lastLinkEnd, string.length));
48
+ parse(markdown) {
49
+ return this._parser.parse(markdown, this._options);
80
50
  }
81
- return escaped;
82
- };
83
- const turndownService = new TurndownService({
84
- codeBlockStyle: 'fenced',
85
- hr: '---',
86
- headingStyle: 'atx'
87
- });
88
- turndownService.use([
89
- gfm,
90
- todoList
91
- ]);
92
- /**
93
- * Parses HTML to a markdown.
94
- */ function html2markdown(html) {
95
- return turndownService.turndown(html);
96
51
  }
97
- // This is a copy of the original taskListItems rule from turdown-plugin-gfm, with minor changes.
98
- function todoList(turndownService) {
99
- turndownService.addRule('taskListItems', {
100
- filter (node) {
101
- return node.type === 'checkbox' && // Changes here as CKEditor outputs a deeper structure.
102
- (node.parentNode.nodeName === 'LI' || node.parentNode.parentNode.nodeName === 'LI');
103
- },
104
- replacement (content, node) {
105
- return (node.checked ? '[x]' : '[ ]') + ' ';
106
- }
107
- });
108
- }
109
- // Autolink matcher.
110
- const regex = new RegExp(// Prefix.
52
+
53
+ const autolinkRegex = /* #__PURE__ */ new RegExp(// Prefix.
111
54
  /\b(?:(?:https?|ftp):\/\/|www\.)/.source + // Domain name.
112
55
  /(?![-_])(?:[-_a-z0-9\u00a1-\uffff]{1,63}\.)+(?:[a-z\u00a1-\uffff]{2,63})/.source + // The rest.
113
56
  /(?:[^\s<>]*)/.source, 'gi');
114
- /**
115
- * Trimming end of link.
116
- * https://github.github.com/gfm/#autolinks-extension-
117
- */ function* matchAutolink(string) {
118
- for (const match of string.matchAll(regex)){
119
- const matched = match[0];
120
- const length = autolinkFindEnd(matched);
121
- yield Object.assign([
122
- matched.substring(0, length)
123
- ], {
124
- index: match.index
125
- });
126
- // We could adjust regex.lastIndex but it's not needed because what we skipped is for sure not a valid URL.
127
- }
128
- }
129
- /**
130
- * Returns the new length of the link (after it would trim trailing characters).
131
- */ function autolinkFindEnd(string) {
132
- let length = string.length;
133
- while(length > 0){
134
- const char = string[length - 1];
135
- if ('?!.,:*_~\'"'.includes(char)) {
136
- length--;
137
- } else if (char == ')') {
138
- let openBrackets = 0;
139
- for(let i = 0; i < length; i++){
140
- if (string[i] == '(') {
141
- openBrackets++;
142
- } else if (string[i] == ')') {
143
- openBrackets--;
144
- }
57
+ class UpdatedTurndown extends Turndown {
58
+ escape(string) {
59
+ const originalEscape = super.escape;
60
+ function escape(string) {
61
+ string = originalEscape(string);
62
+ // Escape "<".
63
+ string = string.replace(/</g, '\\<');
64
+ return string;
65
+ }
66
+ // Urls should not be escaped. Our strategy is using a regex to find them and escape everything
67
+ // which is out of the matches parts.
68
+ let escaped = '';
69
+ let lastLinkEnd = 0;
70
+ for (const match of this._matchAutolink(string)){
71
+ const index = match.index;
72
+ // Append the substring between the last match and the current one (if anything).
73
+ if (index > lastLinkEnd) {
74
+ escaped += escape(string.substring(lastLinkEnd, index));
145
75
  }
146
- // If there is fewer opening brackets then closing ones we should remove a closing bracket.
147
- if (openBrackets < 0) {
76
+ const matchedURL = match[0];
77
+ escaped += matchedURL;
78
+ lastLinkEnd = index + matchedURL.length;
79
+ }
80
+ // Add text after the last link or at the string start if no matches.
81
+ if (lastLinkEnd < string.length) {
82
+ escaped += escape(string.substring(lastLinkEnd, string.length));
83
+ }
84
+ return escaped;
85
+ }
86
+ /**
87
+ * Trimming end of link.
88
+ * https://github.github.com/gfm/#autolinks-extension-
89
+ */ *_matchAutolink(string) {
90
+ for (const match of string.matchAll(autolinkRegex)){
91
+ const matched = match[0];
92
+ const length = this._autolinkFindEnd(matched);
93
+ yield Object.assign([
94
+ matched.substring(0, length)
95
+ ], {
96
+ index: match.index
97
+ });
98
+ // We could adjust regex.lastIndex but it's not needed because what we skipped is for sure not a valid URL.
99
+ }
100
+ }
101
+ /**
102
+ * Returns the new length of the link (after it would trim trailing characters).
103
+ */ _autolinkFindEnd(string) {
104
+ let length = string.length;
105
+ while(length > 0){
106
+ const char = string[length - 1];
107
+ if ('?!.,:*_~\'"'.includes(char)) {
148
108
  length--;
109
+ } else if (char == ')') {
110
+ let openBrackets = 0;
111
+ for(let i = 0; i < length; i++){
112
+ if (string[i] == '(') {
113
+ openBrackets++;
114
+ } else if (string[i] == ')') {
115
+ openBrackets--;
116
+ }
117
+ }
118
+ // If there is fewer opening brackets then closing ones we should remove a closing bracket.
119
+ if (openBrackets < 0) {
120
+ length--;
121
+ } else {
122
+ break;
123
+ }
149
124
  } else {
150
125
  break;
151
126
  }
152
- } else {
153
- break;
154
127
  }
128
+ return length;
129
+ }
130
+ }
131
+ /**
132
+ * This is a helper class used by the {@link module:markdown-gfm/markdown Markdown feature} to convert HTML to Markdown.
133
+ */ class HtmlToMarkdown {
134
+ _parser;
135
+ constructor(){
136
+ this._parser = this._createParser();
137
+ }
138
+ parse(html) {
139
+ return this._parser.turndown(html);
140
+ }
141
+ keep(elements) {
142
+ this._parser.keep(elements);
143
+ }
144
+ _createParser() {
145
+ const parser = new UpdatedTurndown({
146
+ codeBlockStyle: 'fenced',
147
+ hr: '---',
148
+ headingStyle: 'atx'
149
+ });
150
+ parser.use([
151
+ gfm,
152
+ this._todoList
153
+ ]);
154
+ return parser;
155
+ }
156
+ // This is a copy of the original taskListItems rule from turndown-plugin-gfm, with minor changes.
157
+ _todoList(turndown) {
158
+ turndown.addRule('taskListItems', {
159
+ filter (node) {
160
+ return node.type === 'checkbox' && // Changes here as CKEditor outputs a deeper structure.
161
+ (node.parentNode.nodeName === 'LI' || node.parentNode.parentNode.nodeName === 'LI');
162
+ },
163
+ replacement (content, node) {
164
+ return (node.checked ? '[x]' : '[ ]') + ' ';
165
+ }
166
+ });
155
167
  }
156
- return length;
157
168
  }
158
169
 
159
- class GFMDataProcessor {
170
+ /**
171
+ * This data processor implementation uses GitHub Flavored Markdown as input/output data.
172
+ *
173
+ * See the {@glink features/markdown Markdown output} guide to learn more on how to enable it.
174
+ */ class GFMDataProcessor {
160
175
  /**
161
- * Keeps the specified element in the output as HTML. This is useful if the editor contains
162
- * features producing HTML that is not a part of the Markdown standard.
163
- *
164
- * By default, all HTML tags are removed.
165
- *
166
- * @param element The element name to be kept.
167
- */ keepHtml(element) {
168
- turndownService.keep([
176
+ * HTML data processor used to process HTML produced by the Markdown-to-HTML converter and the other way.
177
+ */ _htmlDP;
178
+ /**
179
+ * Helper for converting Markdown to HTML.
180
+ */ _markdown2html;
181
+ /**
182
+ * Helper for converting HTML to Markdown.
183
+ */ _html2markdown;
184
+ /**
185
+ * Creates a new instance of the Markdown data processor class.
186
+ */ constructor(document){
187
+ this._htmlDP = new HtmlDataProcessor(document);
188
+ this._markdown2html = new MarkdownToHtml();
189
+ this._html2markdown = new HtmlToMarkdown();
190
+ }
191
+ /**
192
+ * Keeps the specified element in the output as HTML. This is useful if the editor contains
193
+ * features producing HTML that is not a part of the Markdown standard.
194
+ *
195
+ * By default, all HTML tags are removed.
196
+ *
197
+ * @param element The element name to be kept.
198
+ */ keepHtml(element) {
199
+ this._html2markdown.keep([
169
200
  element
170
201
  ]);
171
202
  }
172
203
  /**
173
- * Converts the provided Markdown string to a view tree.
174
- *
175
- * @param data A Markdown string.
176
- * @returns The converted view element.
177
- */ toView(data) {
178
- const html = markdown2html(data);
204
+ * Converts the provided Markdown string to a view tree.
205
+ *
206
+ * @param data A Markdown string.
207
+ * @returns The converted view element.
208
+ */ toView(data) {
209
+ const html = this._markdown2html.parse(data);
179
210
  return this._htmlDP.toView(html);
180
211
  }
181
212
  /**
182
- * Converts the provided {@link module:engine/view/documentfragment~DocumentFragment} to data format &ndash; in this
183
- * case to a Markdown string.
184
- *
185
- * @returns Markdown string.
186
- */ toData(viewFragment) {
213
+ * Converts the provided {@link module:engine/view/documentfragment~DocumentFragment} to data format &ndash; in this
214
+ * case to a Markdown string.
215
+ *
216
+ * @returns Markdown string.
217
+ */ toData(viewFragment) {
187
218
  const html = this._htmlDP.toData(viewFragment);
188
- return html2markdown(html);
219
+ return this._html2markdown.parse(html);
189
220
  }
190
221
  /**
191
- * Registers a {@link module:engine/view/matcher~MatcherPattern} for view elements whose content should be treated as raw data
192
- * and not processed during the conversion from Markdown to view elements.
193
- *
194
- * The raw data can be later accessed by a
195
- * {@link module:engine/view/element~Element#getCustomProperty custom property of a view element} called `"$rawContent"`.
196
- *
197
- * @param pattern The pattern matching all view elements whose content should
198
- * be treated as raw data.
199
- */ registerRawContentMatcher(pattern) {
222
+ * Registers a {@link module:engine/view/matcher~MatcherPattern} for view elements whose content should be treated as raw data
223
+ * and not processed during the conversion from Markdown to view elements.
224
+ *
225
+ * The raw data can be later accessed by a
226
+ * {@link module:engine/view/element~Element#getCustomProperty custom property of a view element} called `"$rawContent"`.
227
+ *
228
+ * @param pattern The pattern matching all view elements whose content should
229
+ * be treated as raw data.
230
+ */ registerRawContentMatcher(pattern) {
200
231
  this._htmlDP.registerRawContentMatcher(pattern);
201
232
  }
202
233
  /**
203
- * This method does not have any effect on the data processor result. It exists for compatibility with the
204
- * {@link module:engine/dataprocessor/dataprocessor~DataProcessor `DataProcessor` interface}.
205
- */ useFillerType() {}
206
- /**
207
- * Creates a new instance of the Markdown data processor class.
208
- */ constructor(document){
209
- this._htmlDP = new HtmlDataProcessor(document);
210
- }
234
+ * This method does not have any effect on the data processor result. It exists for compatibility with the
235
+ * {@link module:engine/dataprocessor/dataprocessor~DataProcessor `DataProcessor` interface}.
236
+ */ useFillerType() {}
211
237
  }
212
238
 
213
- class Markdown extends Plugin {
214
- /**
215
- * @inheritDoc
216
- */ static get pluginName() {
217
- return 'Markdown';
218
- }
239
+ /**
240
+ * The GitHub Flavored Markdown (GFM) plugin.
241
+ *
242
+ * For a detailed overview, check the {@glink features/markdown Markdown feature} guide.
243
+ */ class Markdown extends Plugin {
219
244
  /**
220
- * @inheritDoc
221
- */ constructor(editor){
245
+ * @inheritDoc
246
+ */ constructor(editor){
222
247
  super(editor);
223
248
  editor.data.processor = new GFMDataProcessor(editor.data.viewDocument);
224
249
  }
250
+ /**
251
+ * @inheritDoc
252
+ */ static get pluginName() {
253
+ return 'Markdown';
254
+ }
225
255
  }
226
256
 
227
257
  const ALLOWED_MARKDOWN_FIRST_LEVEL_TAGS = [
@@ -230,22 +260,35 @@ const ALLOWED_MARKDOWN_FIRST_LEVEL_TAGS = [
230
260
  'PRE',
231
261
  'CODE'
232
262
  ];
233
- class PasteFromMarkdownExperimental extends Plugin {
263
+ /**
264
+ * The GitHub Flavored Markdown (GFM) paste plugin.
265
+ *
266
+ * For a detailed overview, check the {@glink features/pasting/paste-markdown Paste Markdown feature} guide.
267
+ */ class PasteFromMarkdownExperimental extends Plugin {
268
+ /**
269
+ * @internal
270
+ */ _gfmDataProcessor;
234
271
  /**
235
- * @inheritDoc
236
- */ static get pluginName() {
272
+ * @inheritDoc
273
+ */ constructor(editor){
274
+ super(editor);
275
+ this._gfmDataProcessor = new GFMDataProcessor(editor.data.viewDocument);
276
+ }
277
+ /**
278
+ * @inheritDoc
279
+ */ static get pluginName() {
237
280
  return 'PasteFromMarkdownExperimental';
238
281
  }
239
282
  /**
240
- * @inheritDoc
241
- */ static get requires() {
283
+ * @inheritDoc
284
+ */ static get requires() {
242
285
  return [
243
286
  ClipboardPipeline
244
287
  ];
245
288
  }
246
289
  /**
247
- * @inheritDoc
248
- */ init() {
290
+ * @inheritDoc
291
+ */ init() {
249
292
  const editor = this.editor;
250
293
  const view = editor.editing.view;
251
294
  const viewDocument = view.document;
@@ -271,13 +314,13 @@ class PasteFromMarkdownExperimental extends Plugin {
271
314
  });
272
315
  }
273
316
  /**
274
- * Determines if the code copied from a website in the `text/html` type can be parsed as Markdown.
275
- * It removes any OS-specific HTML tags, for example, <meta> on macOS and <!--StartFragment--> on Windows.
276
- * Then removes a single wrapper HTML tag or wrappers for sibling tags, and if there are no more tags left,
277
- * returns the remaining text. Returns null if there are any remaining HTML tags detected.
278
- *
279
- * @param htmlString Clipboard content in the `text/html` type format.
280
- */ _parseMarkdownFromHtml(htmlString) {
317
+ * Determines if the code copied from a website in the `text/html` type can be parsed as Markdown.
318
+ * It removes any OS-specific HTML tags, for example, <meta> on macOS and <!--StartFragment--> on Windows.
319
+ * Then removes a single wrapper HTML tag or wrappers for sibling tags, and if there are no more tags left,
320
+ * returns the remaining text. Returns null if there are any remaining HTML tags detected.
321
+ *
322
+ * @param htmlString Clipboard content in the `text/html` type format.
323
+ */ _parseMarkdownFromHtml(htmlString) {
281
324
  const withoutOsSpecificTags = this._removeOsSpecificTags(htmlString);
282
325
  if (!this._containsOnlyAllowedFirstLevelTags(withoutOsSpecificTags)) {
283
326
  return null;
@@ -289,10 +332,10 @@ class PasteFromMarkdownExperimental extends Plugin {
289
332
  return this._replaceHtmlReservedEntitiesWithCharacters(withoutWrapperTag);
290
333
  }
291
334
  /**
292
- * Removes OS-specific tags.
293
- *
294
- * @param htmlString Clipboard content in the `text/html` type format.
295
- */ _removeOsSpecificTags(htmlString) {
335
+ * Removes OS-specific tags.
336
+ *
337
+ * @param htmlString Clipboard content in the `text/html` type format.
338
+ */ _removeOsSpecificTags(htmlString) {
296
339
  // Removing the <meta> tag present on Mac.
297
340
  const withoutMetaTag = htmlString.replace(/^<meta\b[^>]*>/, '').trim();
298
341
  // Removing the <html> tag present on Windows.
@@ -303,21 +346,21 @@ class PasteFromMarkdownExperimental extends Plugin {
303
346
  return withoutBodyTag.replace(/^<!--StartFragment-->/, '').replace(/<!--EndFragment-->$/, '').trim();
304
347
  }
305
348
  /**
306
- * If the input HTML string contains any first-level formatting tags
307
- * like <b>, <strong>, or <i>, we should not treat it as Markdown.
308
- *
309
- * @param htmlString Clipboard content.
310
- */ _containsOnlyAllowedFirstLevelTags(htmlString) {
349
+ * If the input HTML string contains any first-level formatting tags
350
+ * like <b>, <strong>, or <i>, we should not treat it as Markdown.
351
+ *
352
+ * @param htmlString Clipboard content.
353
+ */ _containsOnlyAllowedFirstLevelTags(htmlString) {
311
354
  const parser = new DOMParser();
312
355
  const { body: tempElement } = parser.parseFromString(htmlString, 'text/html');
313
356
  const tagNames = Array.from(tempElement.children).map((el)=>el.tagName);
314
357
  return tagNames.every((el)=>ALLOWED_MARKDOWN_FIRST_LEVEL_TAGS.includes(el));
315
358
  }
316
359
  /**
317
- * Removes multiple HTML wrapper tags from a list of sibling HTML tags.
318
- *
319
- * @param htmlString Clipboard content without any OS-specific tags.
320
- */ _removeFirstLevelWrapperTagsAndBrs(htmlString) {
360
+ * Removes multiple HTML wrapper tags from a list of sibling HTML tags.
361
+ *
362
+ * @param htmlString Clipboard content without any OS-specific tags.
363
+ */ _removeFirstLevelWrapperTagsAndBrs(htmlString) {
321
364
  const parser = new DOMParser();
322
365
  const { body: tempElement } = parser.parseFromString(htmlString, 'text/html');
323
366
  const brElements = tempElement.querySelectorAll('br');
@@ -332,23 +375,17 @@ class PasteFromMarkdownExperimental extends Plugin {
332
375
  return tempElement.innerHTML;
333
376
  }
334
377
  /**
335
- * Determines if a string contains any HTML tags.
336
- */ _containsAnyRemainingHtmlTags(str) {
378
+ * Determines if a string contains any HTML tags.
379
+ */ _containsAnyRemainingHtmlTags(str) {
337
380
  return str.includes('<');
338
381
  }
339
382
  /**
340
- * Replaces the reserved HTML entities with the actual characters.
341
- *
342
- * @param htmlString Clipboard content without any tags.
343
- */ _replaceHtmlReservedEntitiesWithCharacters(htmlString) {
383
+ * Replaces the reserved HTML entities with the actual characters.
384
+ *
385
+ * @param htmlString Clipboard content without any tags.
386
+ */ _replaceHtmlReservedEntitiesWithCharacters(htmlString) {
344
387
  return htmlString.replace(/&gt;/g, '>').replace(/&lt;/g, '<').replace(/&nbsp;/g, ' ');
345
388
  }
346
- /**
347
- * @inheritDoc
348
- */ constructor(editor){
349
- super(editor);
350
- this._gfmDataProcessor = new GFMDataProcessor(editor.data.viewDocument);
351
- }
352
389
  }
353
390
 
354
391
  export { Markdown, PasteFromMarkdownExperimental };