@adobe/spacecat-shared-html-analyzer 1.0.6 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,974 @@
1
+ var TurndownService = (function () {
2
+ 'use strict';
3
+
4
+ function extend (destination) {
5
+ for (var i = 1; i < arguments.length; i++) {
6
+ var source = arguments[i];
7
+ for (var key in source) {
8
+ if (source.hasOwnProperty(key)) destination[key] = source[key];
9
+ }
10
+ }
11
+ return destination
12
+ }
13
+
14
+ function repeat (character, count) {
15
+ return Array(count + 1).join(character)
16
+ }
17
+
18
+ function trimLeadingNewlines (string) {
19
+ return string.replace(/^\n*/, '')
20
+ }
21
+
22
+ function trimTrailingNewlines (string) {
23
+ // avoid match-at-end regexp bottleneck, see #370
24
+ var indexEnd = string.length;
25
+ while (indexEnd > 0 && string[indexEnd - 1] === '\n') indexEnd--;
26
+ return string.substring(0, indexEnd)
27
+ }
28
+
29
+ var blockElements = [
30
+ 'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS',
31
+ 'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE',
32
+ 'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER',
33
+ 'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES',
34
+ 'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD',
35
+ 'TFOOT', 'TH', 'THEAD', 'TR', 'UL'
36
+ ];
37
+
38
+ function isBlock (node) {
39
+ return is(node, blockElements)
40
+ }
41
+
42
+ var voidElements = [
43
+ 'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
44
+ 'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR'
45
+ ];
46
+
47
+ function isVoid (node) {
48
+ return is(node, voidElements)
49
+ }
50
+
51
+ function hasVoid (node) {
52
+ return has(node, voidElements)
53
+ }
54
+
55
+ var meaningfulWhenBlankElements = [
56
+ 'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT',
57
+ 'AUDIO', 'VIDEO'
58
+ ];
59
+
60
+ function isMeaningfulWhenBlank (node) {
61
+ return is(node, meaningfulWhenBlankElements)
62
+ }
63
+
64
+ function hasMeaningfulWhenBlank (node) {
65
+ return has(node, meaningfulWhenBlankElements)
66
+ }
67
+
68
+ function is (node, tagNames) {
69
+ return tagNames.indexOf(node.nodeName) >= 0
70
+ }
71
+
72
+ function has (node, tagNames) {
73
+ return (
74
+ node.getElementsByTagName &&
75
+ tagNames.some(function (tagName) {
76
+ return node.getElementsByTagName(tagName).length
77
+ })
78
+ )
79
+ }
80
+
81
+ var rules = {};
82
+
83
+ rules.paragraph = {
84
+ filter: 'p',
85
+
86
+ replacement: function (content) {
87
+ return '\n\n' + content + '\n\n'
88
+ }
89
+ };
90
+
91
+ rules.lineBreak = {
92
+ filter: 'br',
93
+
94
+ replacement: function (content, node, options) {
95
+ return options.br + '\n'
96
+ }
97
+ };
98
+
99
+ rules.heading = {
100
+ filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
101
+
102
+ replacement: function (content, node, options) {
103
+ var hLevel = Number(node.nodeName.charAt(1));
104
+
105
+ if (options.headingStyle === 'setext' && hLevel < 3) {
106
+ var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
107
+ return (
108
+ '\n\n' + content + '\n' + underline + '\n\n'
109
+ )
110
+ } else {
111
+ return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
112
+ }
113
+ }
114
+ };
115
+
116
+ rules.blockquote = {
117
+ filter: 'blockquote',
118
+
119
+ replacement: function (content) {
120
+ content = content.replace(/^\n+|\n+$/g, '');
121
+ content = content.replace(/^/gm, '> ');
122
+ return '\n\n' + content + '\n\n'
123
+ }
124
+ };
125
+
126
+ rules.list = {
127
+ filter: ['ul', 'ol'],
128
+
129
+ replacement: function (content, node) {
130
+ var parent = node.parentNode;
131
+ if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
132
+ return '\n' + content
133
+ } else {
134
+ return '\n\n' + content + '\n\n'
135
+ }
136
+ }
137
+ };
138
+
139
+ rules.listItem = {
140
+ filter: 'li',
141
+
142
+ replacement: function (content, node, options) {
143
+ var prefix = options.bulletListMarker + ' ';
144
+ var parent = node.parentNode;
145
+ if (parent.nodeName === 'OL') {
146
+ var start = parent.getAttribute('start');
147
+ var index = Array.prototype.indexOf.call(parent.children, node);
148
+ prefix = (start ? Number(start) + index : index + 1) + '. ';
149
+ }
150
+ content = content
151
+ .replace(/^\n+/, '') // remove leading newlines
152
+ .replace(/\n+$/, '\n') // replace trailing newlines with just a single one
153
+ .replace(/\n/gm, '\n' + ' '.repeat(prefix.length)); // indent
154
+ return (
155
+ prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '')
156
+ )
157
+ }
158
+ };
159
+
160
+ rules.indentedCodeBlock = {
161
+ filter: function (node, options) {
162
+ return (
163
+ options.codeBlockStyle === 'indented' &&
164
+ node.nodeName === 'PRE' &&
165
+ node.firstChild &&
166
+ node.firstChild.nodeName === 'CODE'
167
+ )
168
+ },
169
+
170
+ replacement: function (content, node, options) {
171
+ return (
172
+ '\n\n ' +
173
+ node.firstChild.textContent.replace(/\n/g, '\n ') +
174
+ '\n\n'
175
+ )
176
+ }
177
+ };
178
+
179
+ rules.fencedCodeBlock = {
180
+ filter: function (node, options) {
181
+ return (
182
+ options.codeBlockStyle === 'fenced' &&
183
+ node.nodeName === 'PRE' &&
184
+ node.firstChild &&
185
+ node.firstChild.nodeName === 'CODE'
186
+ )
187
+ },
188
+
189
+ replacement: function (content, node, options) {
190
+ var className = node.firstChild.getAttribute('class') || '';
191
+ var language = (className.match(/language-(\S+)/) || [null, ''])[1];
192
+ var code = node.firstChild.textContent;
193
+
194
+ var fenceChar = options.fence.charAt(0);
195
+ var fenceSize = 3;
196
+ var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm');
197
+
198
+ var match;
199
+ while ((match = fenceInCodeRegex.exec(code))) {
200
+ if (match[0].length >= fenceSize) {
201
+ fenceSize = match[0].length + 1;
202
+ }
203
+ }
204
+
205
+ var fence = repeat(fenceChar, fenceSize);
206
+
207
+ return (
208
+ '\n\n' + fence + language + '\n' +
209
+ code.replace(/\n$/, '') +
210
+ '\n' + fence + '\n\n'
211
+ )
212
+ }
213
+ };
214
+
215
+ rules.horizontalRule = {
216
+ filter: 'hr',
217
+
218
+ replacement: function (content, node, options) {
219
+ return '\n\n' + options.hr + '\n\n'
220
+ }
221
+ };
222
+
223
+ rules.inlineLink = {
224
+ filter: function (node, options) {
225
+ return (
226
+ options.linkStyle === 'inlined' &&
227
+ node.nodeName === 'A' &&
228
+ node.getAttribute('href')
229
+ )
230
+ },
231
+
232
+ replacement: function (content, node) {
233
+ var href = node.getAttribute('href');
234
+ if (href) href = href.replace(/([()])/g, '\\$1');
235
+ var title = cleanAttribute(node.getAttribute('title'));
236
+ if (title) title = ' "' + title.replace(/"/g, '\\"') + '"';
237
+ return '[' + content + '](' + href + title + ')'
238
+ }
239
+ };
240
+
241
+ rules.referenceLink = {
242
+ filter: function (node, options) {
243
+ return (
244
+ options.linkStyle === 'referenced' &&
245
+ node.nodeName === 'A' &&
246
+ node.getAttribute('href')
247
+ )
248
+ },
249
+
250
+ replacement: function (content, node, options) {
251
+ var href = node.getAttribute('href');
252
+ var title = cleanAttribute(node.getAttribute('title'));
253
+ if (title) title = ' "' + title + '"';
254
+ var replacement;
255
+ var reference;
256
+
257
+ switch (options.linkReferenceStyle) {
258
+ case 'collapsed':
259
+ replacement = '[' + content + '][]';
260
+ reference = '[' + content + ']: ' + href + title;
261
+ break
262
+ case 'shortcut':
263
+ replacement = '[' + content + ']';
264
+ reference = '[' + content + ']: ' + href + title;
265
+ break
266
+ default:
267
+ var id = this.references.length + 1;
268
+ replacement = '[' + content + '][' + id + ']';
269
+ reference = '[' + id + ']: ' + href + title;
270
+ }
271
+
272
+ this.references.push(reference);
273
+ return replacement
274
+ },
275
+
276
+ references: [],
277
+
278
+ append: function (options) {
279
+ var references = '';
280
+ if (this.references.length) {
281
+ references = '\n\n' + this.references.join('\n') + '\n\n';
282
+ this.references = []; // Reset references
283
+ }
284
+ return references
285
+ }
286
+ };
287
+
288
+ rules.emphasis = {
289
+ filter: ['em', 'i'],
290
+
291
+ replacement: function (content, node, options) {
292
+ if (!content.trim()) return ''
293
+ return options.emDelimiter + content + options.emDelimiter
294
+ }
295
+ };
296
+
297
+ rules.strong = {
298
+ filter: ['strong', 'b'],
299
+
300
+ replacement: function (content, node, options) {
301
+ if (!content.trim()) return ''
302
+ return options.strongDelimiter + content + options.strongDelimiter
303
+ }
304
+ };
305
+
306
+ rules.code = {
307
+ filter: function (node) {
308
+ var hasSiblings = node.previousSibling || node.nextSibling;
309
+ var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
310
+
311
+ return node.nodeName === 'CODE' && !isCodeBlock
312
+ },
313
+
314
+ replacement: function (content) {
315
+ if (!content) return ''
316
+ content = content.replace(/\r?\n|\r/g, ' ');
317
+
318
+ var extraSpace = /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : '';
319
+ var delimiter = '`';
320
+ var matches = content.match(/`+/gm) || [];
321
+ while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
322
+
323
+ return delimiter + extraSpace + content + extraSpace + delimiter
324
+ }
325
+ };
326
+
327
+ rules.image = {
328
+ filter: 'img',
329
+
330
+ replacement: function (content, node) {
331
+ var alt = cleanAttribute(node.getAttribute('alt'));
332
+ var src = node.getAttribute('src') || '';
333
+ var title = cleanAttribute(node.getAttribute('title'));
334
+ var titlePart = title ? ' "' + title + '"' : '';
335
+ return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
336
+ }
337
+ };
338
+
339
+ function cleanAttribute (attribute) {
340
+ return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : ''
341
+ }
342
+
343
+ /**
344
+ * Manages a collection of rules used to convert HTML to Markdown
345
+ */
346
+
347
+ function Rules (options) {
348
+ this.options = options;
349
+ this._keep = [];
350
+ this._remove = [];
351
+
352
+ this.blankRule = {
353
+ replacement: options.blankReplacement
354
+ };
355
+
356
+ this.keepReplacement = options.keepReplacement;
357
+
358
+ this.defaultRule = {
359
+ replacement: options.defaultReplacement
360
+ };
361
+
362
+ this.array = [];
363
+ for (var key in options.rules) this.array.push(options.rules[key]);
364
+ }
365
+
366
+ Rules.prototype = {
367
+ add: function (key, rule) {
368
+ this.array.unshift(rule);
369
+ },
370
+
371
+ keep: function (filter) {
372
+ this._keep.unshift({
373
+ filter: filter,
374
+ replacement: this.keepReplacement
375
+ });
376
+ },
377
+
378
+ remove: function (filter) {
379
+ this._remove.unshift({
380
+ filter: filter,
381
+ replacement: function () {
382
+ return ''
383
+ }
384
+ });
385
+ },
386
+
387
+ forNode: function (node) {
388
+ if (node.isBlank) return this.blankRule
389
+ var rule;
390
+
391
+ if ((rule = findRule(this.array, node, this.options))) return rule
392
+ if ((rule = findRule(this._keep, node, this.options))) return rule
393
+ if ((rule = findRule(this._remove, node, this.options))) return rule
394
+
395
+ return this.defaultRule
396
+ },
397
+
398
+ forEach: function (fn) {
399
+ for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
400
+ }
401
+ };
402
+
403
+ function findRule (rules, node, options) {
404
+ for (var i = 0; i < rules.length; i++) {
405
+ var rule = rules[i];
406
+ if (filterValue(rule, node, options)) return rule
407
+ }
408
+ return void 0
409
+ }
410
+
411
+ function filterValue (rule, node, options) {
412
+ var filter = rule.filter;
413
+ if (typeof filter === 'string') {
414
+ if (filter === node.nodeName.toLowerCase()) return true
415
+ } else if (Array.isArray(filter)) {
416
+ if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
417
+ } else if (typeof filter === 'function') {
418
+ if (filter.call(rule, node, options)) return true
419
+ } else {
420
+ throw new TypeError('`filter` needs to be a string, array, or function')
421
+ }
422
+ }
423
+
424
+ /**
425
+ * The collapseWhitespace function is adapted from collapse-whitespace
426
+ * by Luc Thevenard.
427
+ *
428
+ * The MIT License (MIT)
429
+ *
430
+ * Copyright (c) 2014 Luc Thevenard <lucthevenard@gmail.com>
431
+ *
432
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
433
+ * of this software and associated documentation files (the "Software"), to deal
434
+ * in the Software without restriction, including without limitation the rights
435
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
436
+ * copies of the Software, and to permit persons to whom the Software is
437
+ * furnished to do so, subject to the following conditions:
438
+ *
439
+ * The above copyright notice and this permission notice shall be included in
440
+ * all copies or substantial portions of the Software.
441
+ *
442
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
443
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
444
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
445
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
446
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
447
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
448
+ * THE SOFTWARE.
449
+ */
450
+
451
+ /**
452
+ * collapseWhitespace(options) removes extraneous whitespace from an the given element.
453
+ *
454
+ * @param {Object} options
455
+ */
456
+ function collapseWhitespace (options) {
457
+ var element = options.element;
458
+ var isBlock = options.isBlock;
459
+ var isVoid = options.isVoid;
460
+ var isPre = options.isPre || function (node) {
461
+ return node.nodeName === 'PRE'
462
+ };
463
+
464
+ if (!element.firstChild || isPre(element)) return
465
+
466
+ var prevText = null;
467
+ var keepLeadingWs = false;
468
+
469
+ var prev = null;
470
+ var node = next(prev, element, isPre);
471
+
472
+ while (node !== element) {
473
+ if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
474
+ var text = node.data.replace(/[ \r\n\t]+/g, ' ');
475
+
476
+ if ((!prevText || / $/.test(prevText.data)) &&
477
+ !keepLeadingWs && text[0] === ' ') {
478
+ text = text.substr(1);
479
+ }
480
+
481
+ // `text` might be empty at this point.
482
+ if (!text) {
483
+ node = remove(node);
484
+ continue
485
+ }
486
+
487
+ node.data = text;
488
+
489
+ prevText = node;
490
+ } else if (node.nodeType === 1) { // Node.ELEMENT_NODE
491
+ if (isBlock(node) || node.nodeName === 'BR') {
492
+ if (prevText) {
493
+ prevText.data = prevText.data.replace(/ $/, '');
494
+ }
495
+
496
+ prevText = null;
497
+ keepLeadingWs = false;
498
+ } else if (isVoid(node) || isPre(node)) {
499
+ // Avoid trimming space around non-block, non-BR void elements and inline PRE.
500
+ prevText = null;
501
+ keepLeadingWs = true;
502
+ } else if (prevText) {
503
+ // Drop protection if set previously.
504
+ keepLeadingWs = false;
505
+ }
506
+ } else {
507
+ node = remove(node);
508
+ continue
509
+ }
510
+
511
+ var nextNode = next(prev, node, isPre);
512
+ prev = node;
513
+ node = nextNode;
514
+ }
515
+
516
+ if (prevText) {
517
+ prevText.data = prevText.data.replace(/ $/, '');
518
+ if (!prevText.data) {
519
+ remove(prevText);
520
+ }
521
+ }
522
+ }
523
+
524
+ /**
525
+ * remove(node) removes the given node from the DOM and returns the
526
+ * next node in the sequence.
527
+ *
528
+ * @param {Node} node
529
+ * @return {Node} node
530
+ */
531
+ function remove (node) {
532
+ var next = node.nextSibling || node.parentNode;
533
+
534
+ node.parentNode.removeChild(node);
535
+
536
+ return next
537
+ }
538
+
539
+ /**
540
+ * next(prev, current, isPre) returns the next node in the sequence, given the
541
+ * current and previous nodes.
542
+ *
543
+ * @param {Node} prev
544
+ * @param {Node} current
545
+ * @param {Function} isPre
546
+ * @return {Node}
547
+ */
548
+ function next (prev, current, isPre) {
549
+ if ((prev && prev.parentNode === current) || isPre(current)) {
550
+ return current.nextSibling || current.parentNode
551
+ }
552
+
553
+ return current.firstChild || current.nextSibling || current.parentNode
554
+ }
555
+
556
+ /*
557
+ * Set up window for Node.js
558
+ */
559
+
560
+ var root = (typeof window !== 'undefined' ? window : {});
561
+
562
+ /*
563
+ * Parsing HTML strings
564
+ */
565
+
566
+ function canParseHTMLNatively () {
567
+ var Parser = root.DOMParser;
568
+ var canParse = false;
569
+
570
+ // Adapted from https://gist.github.com/1129031
571
+ // Firefox/Opera/IE throw errors on unsupported types
572
+ try {
573
+ // WebKit returns null on unsupported types
574
+ if (new Parser().parseFromString('', 'text/html')) {
575
+ canParse = true;
576
+ }
577
+ } catch (e) {}
578
+
579
+ return canParse
580
+ }
581
+
582
+ function createHTMLParser () {
583
+ var Parser = function () {};
584
+
585
+ {
586
+ if (shouldUseActiveX()) {
587
+ Parser.prototype.parseFromString = function (string) {
588
+ var doc = new window.ActiveXObject('htmlfile');
589
+ doc.designMode = 'on'; // disable on-page scripts
590
+ doc.open();
591
+ doc.write(string);
592
+ doc.close();
593
+ return doc
594
+ };
595
+ } else {
596
+ Parser.prototype.parseFromString = function (string) {
597
+ var doc = document.implementation.createHTMLDocument('');
598
+ doc.open();
599
+ doc.write(string);
600
+ doc.close();
601
+ return doc
602
+ };
603
+ }
604
+ }
605
+ return Parser
606
+ }
607
+
608
+ function shouldUseActiveX () {
609
+ var useActiveX = false;
610
+ try {
611
+ document.implementation.createHTMLDocument('').open();
612
+ } catch (e) {
613
+ if (root.ActiveXObject) useActiveX = true;
614
+ }
615
+ return useActiveX
616
+ }
617
+
618
+ var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser();
619
+
620
+ function RootNode (input, options) {
621
+ var root;
622
+ if (typeof input === 'string') {
623
+ var doc = htmlParser().parseFromString(
624
+ // DOM parsers arrange elements in the <head> and <body>.
625
+ // Wrapping in a custom element ensures elements are reliably arranged in
626
+ // a single element.
627
+ '<x-turndown id="turndown-root">' + input + '</x-turndown>',
628
+ 'text/html'
629
+ );
630
+ root = doc.getElementById('turndown-root');
631
+ } else {
632
+ root = input.cloneNode(true);
633
+ }
634
+ collapseWhitespace({
635
+ element: root,
636
+ isBlock: isBlock,
637
+ isVoid: isVoid,
638
+ isPre: options.preformattedCode ? isPreOrCode : null
639
+ });
640
+
641
+ return root
642
+ }
643
+
644
+ var _htmlParser;
645
+ function htmlParser () {
646
+ _htmlParser = _htmlParser || new HTMLParser();
647
+ return _htmlParser
648
+ }
649
+
650
+ function isPreOrCode (node) {
651
+ return node.nodeName === 'PRE' || node.nodeName === 'CODE'
652
+ }
653
+
654
+ function Node (node, options) {
655
+ node.isBlock = isBlock(node);
656
+ node.isCode = node.nodeName === 'CODE' || node.parentNode.isCode;
657
+ node.isBlank = isBlank(node);
658
+ node.flankingWhitespace = flankingWhitespace(node, options);
659
+ return node
660
+ }
661
+
662
+ function isBlank (node) {
663
+ return (
664
+ !isVoid(node) &&
665
+ !isMeaningfulWhenBlank(node) &&
666
+ /^\s*$/i.test(node.textContent) &&
667
+ !hasVoid(node) &&
668
+ !hasMeaningfulWhenBlank(node)
669
+ )
670
+ }
671
+
672
+ function flankingWhitespace (node, options) {
673
+ if (node.isBlock || (options.preformattedCode && node.isCode)) {
674
+ return { leading: '', trailing: '' }
675
+ }
676
+
677
+ var edges = edgeWhitespace(node.textContent);
678
+
679
+ // abandon leading ASCII WS if left-flanked by ASCII WS
680
+ if (edges.leadingAscii && isFlankedByWhitespace('left', node, options)) {
681
+ edges.leading = edges.leadingNonAscii;
682
+ }
683
+
684
+ // abandon trailing ASCII WS if right-flanked by ASCII WS
685
+ if (edges.trailingAscii && isFlankedByWhitespace('right', node, options)) {
686
+ edges.trailing = edges.trailingNonAscii;
687
+ }
688
+
689
+ return { leading: edges.leading, trailing: edges.trailing }
690
+ }
691
+
692
+ function edgeWhitespace (string) {
693
+ var m = string.match(/^(([ \t\r\n]*)(\s*))(?:(?=\S)[\s\S]*\S)?((\s*?)([ \t\r\n]*))$/);
694
+ return {
695
+ leading: m[1], // whole string for whitespace-only strings
696
+ leadingAscii: m[2],
697
+ leadingNonAscii: m[3],
698
+ trailing: m[4], // empty for whitespace-only strings
699
+ trailingNonAscii: m[5],
700
+ trailingAscii: m[6]
701
+ }
702
+ }
703
+
704
+ function isFlankedByWhitespace (side, node, options) {
705
+ var sibling;
706
+ var regExp;
707
+ var isFlanked;
708
+
709
+ if (side === 'left') {
710
+ sibling = node.previousSibling;
711
+ regExp = / $/;
712
+ } else {
713
+ sibling = node.nextSibling;
714
+ regExp = /^ /;
715
+ }
716
+
717
+ if (sibling) {
718
+ if (sibling.nodeType === 3) {
719
+ isFlanked = regExp.test(sibling.nodeValue);
720
+ } else if (options.preformattedCode && sibling.nodeName === 'CODE') {
721
+ isFlanked = false;
722
+ } else if (sibling.nodeType === 1 && !isBlock(sibling)) {
723
+ isFlanked = regExp.test(sibling.textContent);
724
+ }
725
+ }
726
+ return isFlanked
727
+ }
728
+
729
+ var reduce = Array.prototype.reduce;
730
+ var escapes = [
731
+ [/\\/g, '\\\\'],
732
+ [/\*/g, '\\*'],
733
+ [/^-/g, '\\-'],
734
+ [/^\+ /g, '\\+ '],
735
+ [/^(=+)/g, '\\$1'],
736
+ [/^(#{1,6}) /g, '\\$1 '],
737
+ [/`/g, '\\`'],
738
+ [/^~~~/g, '\\~~~'],
739
+ [/\[/g, '\\['],
740
+ [/\]/g, '\\]'],
741
+ [/^>/g, '\\>'],
742
+ [/_/g, '\\_'],
743
+ [/^(\d+)\. /g, '$1\\. ']
744
+ ];
745
+
746
+ function TurndownService (options) {
747
+ if (!(this instanceof TurndownService)) return new TurndownService(options)
748
+
749
+ var defaults = {
750
+ rules: rules,
751
+ headingStyle: 'setext',
752
+ hr: '* * *',
753
+ bulletListMarker: '*',
754
+ codeBlockStyle: 'indented',
755
+ fence: '```',
756
+ emDelimiter: '_',
757
+ strongDelimiter: '**',
758
+ linkStyle: 'inlined',
759
+ linkReferenceStyle: 'full',
760
+ br: ' ',
761
+ preformattedCode: false,
762
+ blankReplacement: function (content, node) {
763
+ return node.isBlock ? '\n\n' : ''
764
+ },
765
+ keepReplacement: function (content, node) {
766
+ return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
767
+ },
768
+ defaultReplacement: function (content, node) {
769
+ return node.isBlock ? '\n\n' + content + '\n\n' : content
770
+ }
771
+ };
772
+ this.options = extend({}, defaults, options);
773
+ this.rules = new Rules(this.options);
774
+ }
775
+
776
+ TurndownService.prototype = {
777
+ /**
778
+ * The entry point for converting a string or DOM node to Markdown
779
+ * @public
780
+ * @param {String|HTMLElement} input The string or DOM node to convert
781
+ * @returns A Markdown representation of the input
782
+ * @type String
783
+ */
784
+
785
+ turndown: function (input) {
786
+ if (!canConvert(input)) {
787
+ throw new TypeError(
788
+ input + ' is not a string, or an element/document/fragment node.'
789
+ )
790
+ }
791
+
792
+ if (input === '') return ''
793
+
794
+ var output = process.call(this, new RootNode(input, this.options));
795
+ return postProcess.call(this, output)
796
+ },
797
+
798
+ /**
799
+ * Add one or more plugins
800
+ * @public
801
+ * @param {Function|Array} plugin The plugin or array of plugins to add
802
+ * @returns The Turndown instance for chaining
803
+ * @type Object
804
+ */
805
+
806
+ use: function (plugin) {
807
+ if (Array.isArray(plugin)) {
808
+ for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
809
+ } else if (typeof plugin === 'function') {
810
+ plugin(this);
811
+ } else {
812
+ throw new TypeError('plugin must be a Function or an Array of Functions')
813
+ }
814
+ return this
815
+ },
816
+
817
+ /**
818
+ * Adds a rule
819
+ * @public
820
+ * @param {String} key The unique key of the rule
821
+ * @param {Object} rule The rule
822
+ * @returns The Turndown instance for chaining
823
+ * @type Object
824
+ */
825
+
826
+ addRule: function (key, rule) {
827
+ this.rules.add(key, rule);
828
+ return this
829
+ },
830
+
831
+ /**
832
+ * Keep a node (as HTML) that matches the filter
833
+ * @public
834
+ * @param {String|Array|Function} filter The unique key of the rule
835
+ * @returns The Turndown instance for chaining
836
+ * @type Object
837
+ */
838
+
839
+ keep: function (filter) {
840
+ this.rules.keep(filter);
841
+ return this
842
+ },
843
+
844
+ /**
845
+ * Remove a node that matches the filter
846
+ * @public
847
+ * @param {String|Array|Function} filter The unique key of the rule
848
+ * @returns The Turndown instance for chaining
849
+ * @type Object
850
+ */
851
+
852
+ remove: function (filter) {
853
+ this.rules.remove(filter);
854
+ return this
855
+ },
856
+
857
+ /**
858
+ * Escapes Markdown syntax
859
+ * @public
860
+ * @param {String} string The string to escape
861
+ * @returns A string with Markdown syntax escaped
862
+ * @type String
863
+ */
864
+
865
+ escape: function (string) {
866
+ return escapes.reduce(function (accumulator, escape) {
867
+ return accumulator.replace(escape[0], escape[1])
868
+ }, string)
869
+ }
870
+ };
871
+
872
+ /**
873
+ * Reduces a DOM node down to its Markdown string equivalent
874
+ * @private
875
+ * @param {HTMLElement} parentNode The node to convert
876
+ * @returns A Markdown representation of the node
877
+ * @type String
878
+ */
879
+
880
+ function process (parentNode) {
881
+ var self = this;
882
+ return reduce.call(parentNode.childNodes, function (output, node) {
883
+ node = new Node(node, self.options);
884
+
885
+ var replacement = '';
886
+ if (node.nodeType === 3) {
887
+ replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
888
+ } else if (node.nodeType === 1) {
889
+ replacement = replacementForNode.call(self, node);
890
+ }
891
+
892
+ return join(output, replacement)
893
+ }, '')
894
+ }
895
+
896
+ /**
897
+ * Appends strings as each rule requires and trims the output
898
+ * @private
899
+ * @param {String} output The conversion output
900
+ * @returns A trimmed version of the ouput
901
+ * @type String
902
+ */
903
+
904
+ function postProcess (output) {
905
+ var self = this;
906
+ this.rules.forEach(function (rule) {
907
+ if (typeof rule.append === 'function') {
908
+ output = join(output, rule.append(self.options));
909
+ }
910
+ });
911
+
912
+ return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
913
+ }
914
+
915
+ /**
916
+ * Converts an element node to its Markdown equivalent
917
+ * @private
918
+ * @param {HTMLElement} node The node to convert
919
+ * @returns A Markdown representation of the node
920
+ * @type String
921
+ */
922
+
923
+ function replacementForNode (node) {
924
+ var rule = this.rules.forNode(node);
925
+ var content = process.call(this, node);
926
+ var whitespace = node.flankingWhitespace;
927
+ if (whitespace.leading || whitespace.trailing) content = content.trim();
928
+ return (
929
+ whitespace.leading +
930
+ rule.replacement(content, node, this.options) +
931
+ whitespace.trailing
932
+ )
933
+ }
934
+
935
+ /**
936
+ * Joins replacement to the current output with appropriate number of new lines
937
+ * @private
938
+ * @param {String} output The current conversion output
939
+ * @param {String} replacement The string to append to the output
940
+ * @returns Joined output
941
+ * @type String
942
+ */
943
+
944
+ function join (output, replacement) {
945
+ var s1 = trimTrailingNewlines(output);
946
+ var s2 = trimLeadingNewlines(replacement);
947
+ var nls = Math.max(output.length - s1.length, replacement.length - s2.length);
948
+ var separator = '\n\n'.substring(0, nls);
949
+
950
+ return s1 + separator + s2
951
+ }
952
+
953
+ /**
954
+ * Determines whether an input can be converted
955
+ * @private
956
+ * @param {String|HTMLElement} input Describe this parameter
957
+ * @returns Describe what it returns
958
+ * @type String|Object|Array|Boolean|Number
959
+ */
960
+
961
+ function canConvert (input) {
962
+ return (
963
+ input != null && (
964
+ typeof input === 'string' ||
965
+ (input.nodeType && (
966
+ input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
967
+ ))
968
+ )
969
+ )
970
+ }
971
+
972
+ return TurndownService;
973
+
974
+ }());