@dragon708/docmind-markdown 1.2.6 → 1.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +139 -12
- package/dist/index.js +781 -48
- package/node_modules/turndown-plugin-gfm/LICENSE +21 -0
- package/node_modules/turndown-plugin-gfm/README.md +50 -0
- package/node_modules/turndown-plugin-gfm/dist/turndown-plugin-gfm.js +165 -0
- package/node_modules/turndown-plugin-gfm/lib/turndown-plugin-gfm.browser.cjs.js +162 -0
- package/node_modules/turndown-plugin-gfm/lib/turndown-plugin-gfm.browser.es.js +154 -0
- package/node_modules/turndown-plugin-gfm/lib/turndown-plugin-gfm.cjs.js +162 -0
- package/node_modules/turndown-plugin-gfm/lib/turndown-plugin-gfm.es.js +154 -0
- package/node_modules/turndown-plugin-gfm/package.json +43 -0
- package/package.json +5 -1
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, '__esModule', { value: true });
|
|
4
|
+
|
|
5
|
+
var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/;
|
|
6
|
+
|
|
7
|
+
function highlightedCodeBlock (turndownService) {
|
|
8
|
+
turndownService.addRule('highlightedCodeBlock', {
|
|
9
|
+
filter: function (node) {
|
|
10
|
+
var firstChild = node.firstChild;
|
|
11
|
+
return (
|
|
12
|
+
node.nodeName === 'DIV' &&
|
|
13
|
+
highlightRegExp.test(node.className) &&
|
|
14
|
+
firstChild &&
|
|
15
|
+
firstChild.nodeName === 'PRE'
|
|
16
|
+
)
|
|
17
|
+
},
|
|
18
|
+
replacement: function (content, node, options) {
|
|
19
|
+
var className = node.className || '';
|
|
20
|
+
var language = (className.match(highlightRegExp) || [null, ''])[1];
|
|
21
|
+
|
|
22
|
+
return (
|
|
23
|
+
'\n\n' + options.fence + language + '\n' +
|
|
24
|
+
node.firstChild.textContent +
|
|
25
|
+
'\n' + options.fence + '\n\n'
|
|
26
|
+
)
|
|
27
|
+
}
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function strikethrough (turndownService) {
|
|
32
|
+
turndownService.addRule('strikethrough', {
|
|
33
|
+
filter: ['del', 's', 'strike'],
|
|
34
|
+
replacement: function (content) {
|
|
35
|
+
return '~' + content + '~'
|
|
36
|
+
}
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
var indexOf = Array.prototype.indexOf;
|
|
41
|
+
var every = Array.prototype.every;
|
|
42
|
+
var rules = {};
|
|
43
|
+
|
|
44
|
+
rules.tableCell = {
|
|
45
|
+
filter: ['th', 'td'],
|
|
46
|
+
replacement: function (content, node) {
|
|
47
|
+
return cell(content, node)
|
|
48
|
+
}
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
rules.tableRow = {
|
|
52
|
+
filter: 'tr',
|
|
53
|
+
replacement: function (content, node) {
|
|
54
|
+
var borderCells = '';
|
|
55
|
+
var alignMap = { left: ':--', right: '--:', center: ':-:' };
|
|
56
|
+
|
|
57
|
+
if (isHeadingRow(node)) {
|
|
58
|
+
for (var i = 0; i < node.childNodes.length; i++) {
|
|
59
|
+
var border = '---';
|
|
60
|
+
var align = (
|
|
61
|
+
node.childNodes[i].getAttribute('align') || ''
|
|
62
|
+
).toLowerCase();
|
|
63
|
+
|
|
64
|
+
if (align) border = alignMap[align] || border;
|
|
65
|
+
|
|
66
|
+
borderCells += cell(border, node.childNodes[i]);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return '\n' + content + (borderCells ? '\n' + borderCells : '')
|
|
70
|
+
}
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
rules.table = {
|
|
74
|
+
// Only convert tables with a heading row.
|
|
75
|
+
// Tables with no heading row are kept using `keep` (see below).
|
|
76
|
+
filter: function (node) {
|
|
77
|
+
return node.nodeName === 'TABLE' && isHeadingRow(node.rows[0])
|
|
78
|
+
},
|
|
79
|
+
|
|
80
|
+
replacement: function (content) {
|
|
81
|
+
// Ensure there are no blank lines
|
|
82
|
+
content = content.replace('\n\n', '\n');
|
|
83
|
+
return '\n\n' + content + '\n\n'
|
|
84
|
+
}
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
rules.tableSection = {
|
|
88
|
+
filter: ['thead', 'tbody', 'tfoot'],
|
|
89
|
+
replacement: function (content) {
|
|
90
|
+
return content
|
|
91
|
+
}
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
// A tr is a heading row if:
|
|
95
|
+
// - the parent is a THEAD
|
|
96
|
+
// - or if its the first child of the TABLE or the first TBODY (possibly
|
|
97
|
+
// following a blank THEAD)
|
|
98
|
+
// - and every cell is a TH
|
|
99
|
+
function isHeadingRow (tr) {
|
|
100
|
+
var parentNode = tr.parentNode;
|
|
101
|
+
return (
|
|
102
|
+
parentNode.nodeName === 'THEAD' ||
|
|
103
|
+
(
|
|
104
|
+
parentNode.firstChild === tr &&
|
|
105
|
+
(parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
|
|
106
|
+
every.call(tr.childNodes, function (n) { return n.nodeName === 'TH' })
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function isFirstTbody (element) {
|
|
112
|
+
var previousSibling = element.previousSibling;
|
|
113
|
+
return (
|
|
114
|
+
element.nodeName === 'TBODY' && (
|
|
115
|
+
!previousSibling ||
|
|
116
|
+
(
|
|
117
|
+
previousSibling.nodeName === 'THEAD' &&
|
|
118
|
+
/^\s*$/i.test(previousSibling.textContent)
|
|
119
|
+
)
|
|
120
|
+
)
|
|
121
|
+
)
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function cell (content, node) {
|
|
125
|
+
var index = indexOf.call(node.parentNode.childNodes, node);
|
|
126
|
+
var prefix = ' ';
|
|
127
|
+
if (index === 0) prefix = '| ';
|
|
128
|
+
return prefix + content + ' |'
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function tables (turndownService) {
|
|
132
|
+
turndownService.keep(function (node) {
|
|
133
|
+
return node.nodeName === 'TABLE' && !isHeadingRow(node.rows[0])
|
|
134
|
+
});
|
|
135
|
+
for (var key in rules) turndownService.addRule(key, rules[key]);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function taskListItems (turndownService) {
|
|
139
|
+
turndownService.addRule('taskListItems', {
|
|
140
|
+
filter: function (node) {
|
|
141
|
+
return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'
|
|
142
|
+
},
|
|
143
|
+
replacement: function (content, node) {
|
|
144
|
+
return (node.checked ? '[x]' : '[ ]') + ' '
|
|
145
|
+
}
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function gfm (turndownService) {
|
|
150
|
+
turndownService.use([
|
|
151
|
+
highlightedCodeBlock,
|
|
152
|
+
strikethrough,
|
|
153
|
+
tables,
|
|
154
|
+
taskListItems
|
|
155
|
+
]);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
exports.gfm = gfm;
|
|
159
|
+
exports.highlightedCodeBlock = highlightedCodeBlock;
|
|
160
|
+
exports.strikethrough = strikethrough;
|
|
161
|
+
exports.tables = tables;
|
|
162
|
+
exports.taskListItems = taskListItems;
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/;
|
|
2
|
+
|
|
3
|
+
function highlightedCodeBlock (turndownService) {
|
|
4
|
+
turndownService.addRule('highlightedCodeBlock', {
|
|
5
|
+
filter: function (node) {
|
|
6
|
+
var firstChild = node.firstChild;
|
|
7
|
+
return (
|
|
8
|
+
node.nodeName === 'DIV' &&
|
|
9
|
+
highlightRegExp.test(node.className) &&
|
|
10
|
+
firstChild &&
|
|
11
|
+
firstChild.nodeName === 'PRE'
|
|
12
|
+
)
|
|
13
|
+
},
|
|
14
|
+
replacement: function (content, node, options) {
|
|
15
|
+
var className = node.className || '';
|
|
16
|
+
var language = (className.match(highlightRegExp) || [null, ''])[1];
|
|
17
|
+
|
|
18
|
+
return (
|
|
19
|
+
'\n\n' + options.fence + language + '\n' +
|
|
20
|
+
node.firstChild.textContent +
|
|
21
|
+
'\n' + options.fence + '\n\n'
|
|
22
|
+
)
|
|
23
|
+
}
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function strikethrough (turndownService) {
|
|
28
|
+
turndownService.addRule('strikethrough', {
|
|
29
|
+
filter: ['del', 's', 'strike'],
|
|
30
|
+
replacement: function (content) {
|
|
31
|
+
return '~' + content + '~'
|
|
32
|
+
}
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
var indexOf = Array.prototype.indexOf;
|
|
37
|
+
var every = Array.prototype.every;
|
|
38
|
+
var rules = {};
|
|
39
|
+
|
|
40
|
+
rules.tableCell = {
|
|
41
|
+
filter: ['th', 'td'],
|
|
42
|
+
replacement: function (content, node) {
|
|
43
|
+
return cell(content, node)
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
rules.tableRow = {
|
|
48
|
+
filter: 'tr',
|
|
49
|
+
replacement: function (content, node) {
|
|
50
|
+
var borderCells = '';
|
|
51
|
+
var alignMap = { left: ':--', right: '--:', center: ':-:' };
|
|
52
|
+
|
|
53
|
+
if (isHeadingRow(node)) {
|
|
54
|
+
for (var i = 0; i < node.childNodes.length; i++) {
|
|
55
|
+
var border = '---';
|
|
56
|
+
var align = (
|
|
57
|
+
node.childNodes[i].getAttribute('align') || ''
|
|
58
|
+
).toLowerCase();
|
|
59
|
+
|
|
60
|
+
if (align) border = alignMap[align] || border;
|
|
61
|
+
|
|
62
|
+
borderCells += cell(border, node.childNodes[i]);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return '\n' + content + (borderCells ? '\n' + borderCells : '')
|
|
66
|
+
}
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
rules.table = {
|
|
70
|
+
// Only convert tables with a heading row.
|
|
71
|
+
// Tables with no heading row are kept using `keep` (see below).
|
|
72
|
+
filter: function (node) {
|
|
73
|
+
return node.nodeName === 'TABLE' && isHeadingRow(node.rows[0])
|
|
74
|
+
},
|
|
75
|
+
|
|
76
|
+
replacement: function (content) {
|
|
77
|
+
// Ensure there are no blank lines
|
|
78
|
+
content = content.replace('\n\n', '\n');
|
|
79
|
+
return '\n\n' + content + '\n\n'
|
|
80
|
+
}
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
rules.tableSection = {
|
|
84
|
+
filter: ['thead', 'tbody', 'tfoot'],
|
|
85
|
+
replacement: function (content) {
|
|
86
|
+
return content
|
|
87
|
+
}
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
// A tr is a heading row if:
|
|
91
|
+
// - the parent is a THEAD
|
|
92
|
+
// - or if its the first child of the TABLE or the first TBODY (possibly
|
|
93
|
+
// following a blank THEAD)
|
|
94
|
+
// - and every cell is a TH
|
|
95
|
+
function isHeadingRow (tr) {
|
|
96
|
+
var parentNode = tr.parentNode;
|
|
97
|
+
return (
|
|
98
|
+
parentNode.nodeName === 'THEAD' ||
|
|
99
|
+
(
|
|
100
|
+
parentNode.firstChild === tr &&
|
|
101
|
+
(parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
|
|
102
|
+
every.call(tr.childNodes, function (n) { return n.nodeName === 'TH' })
|
|
103
|
+
)
|
|
104
|
+
)
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function isFirstTbody (element) {
|
|
108
|
+
var previousSibling = element.previousSibling;
|
|
109
|
+
return (
|
|
110
|
+
element.nodeName === 'TBODY' && (
|
|
111
|
+
!previousSibling ||
|
|
112
|
+
(
|
|
113
|
+
previousSibling.nodeName === 'THEAD' &&
|
|
114
|
+
/^\s*$/i.test(previousSibling.textContent)
|
|
115
|
+
)
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function cell (content, node) {
|
|
121
|
+
var index = indexOf.call(node.parentNode.childNodes, node);
|
|
122
|
+
var prefix = ' ';
|
|
123
|
+
if (index === 0) prefix = '| ';
|
|
124
|
+
return prefix + content + ' |'
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function tables (turndownService) {
|
|
128
|
+
turndownService.keep(function (node) {
|
|
129
|
+
return node.nodeName === 'TABLE' && !isHeadingRow(node.rows[0])
|
|
130
|
+
});
|
|
131
|
+
for (var key in rules) turndownService.addRule(key, rules[key]);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function taskListItems (turndownService) {
|
|
135
|
+
turndownService.addRule('taskListItems', {
|
|
136
|
+
filter: function (node) {
|
|
137
|
+
return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'
|
|
138
|
+
},
|
|
139
|
+
replacement: function (content, node) {
|
|
140
|
+
return (node.checked ? '[x]' : '[ ]') + ' '
|
|
141
|
+
}
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function gfm (turndownService) {
|
|
146
|
+
turndownService.use([
|
|
147
|
+
highlightedCodeBlock,
|
|
148
|
+
strikethrough,
|
|
149
|
+
tables,
|
|
150
|
+
taskListItems
|
|
151
|
+
]);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export { gfm, highlightedCodeBlock, strikethrough, tables, taskListItems };
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "turndown-plugin-gfm",
|
|
3
|
+
"description": "Turndown plugin to add GitHub Flavored Markdown extensions.",
|
|
4
|
+
"version": "1.0.2",
|
|
5
|
+
"author": "Dom Christie",
|
|
6
|
+
"main": "lib/turndown-plugin-gfm.cjs.js",
|
|
7
|
+
"module": "lib/turndown-plugin-gfm.es.js",
|
|
8
|
+
"jsnext:main": "lib/turndown-plugin-gfm.es.js",
|
|
9
|
+
"devDependencies": {
|
|
10
|
+
"browserify": "^14.5.0",
|
|
11
|
+
"rollup": "^0.50.0",
|
|
12
|
+
"standard": "^10.0.3",
|
|
13
|
+
"turndown": "4.0.1",
|
|
14
|
+
"turndown-attendant": "0.0.2"
|
|
15
|
+
},
|
|
16
|
+
"files": [
|
|
17
|
+
"lib",
|
|
18
|
+
"dist"
|
|
19
|
+
],
|
|
20
|
+
"keywords": [
|
|
21
|
+
"turndown",
|
|
22
|
+
"turndown-plugin",
|
|
23
|
+
"html-to-markdown",
|
|
24
|
+
"html",
|
|
25
|
+
"markdown",
|
|
26
|
+
"github-flavored-markdown",
|
|
27
|
+
"gfm"
|
|
28
|
+
],
|
|
29
|
+
"license": "MIT",
|
|
30
|
+
"repository": {
|
|
31
|
+
"type": "git",
|
|
32
|
+
"url": "https://github.com/domchristie/turndown-plugin-gfm.git"
|
|
33
|
+
},
|
|
34
|
+
"scripts": {
|
|
35
|
+
"build": "npm run build-cjs && npm run build-es && npm run build-iife && npm run build-test",
|
|
36
|
+
"build-cjs": "rollup -c config/rollup.config.cjs.js && rollup -c config/rollup.config.browser.cjs.js",
|
|
37
|
+
"build-es": "rollup -c config/rollup.config.es.js && rollup -c config/rollup.config.browser.es.js",
|
|
38
|
+
"build-iife": "rollup -c config/rollup.config.iife.js",
|
|
39
|
+
"build-test": "browserify test/turndown-plugin-gfm-test.js --outfile test/turndown-plugin-gfm-test.browser.js",
|
|
40
|
+
"prepublish": "npm run build",
|
|
41
|
+
"test": "npm run build && standard ./src/**/*.js && node test/turndown-plugin-gfm-test.js"
|
|
42
|
+
}
|
|
43
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@dragon708/docmind-markdown",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.8",
|
|
4
4
|
"description": "StructuredDocumentResult → Markdown and LLM-oriented plain text for DocMind.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"sideEffects": false,
|
|
@@ -44,6 +44,9 @@
|
|
|
44
44
|
"turndown": "^7.0.0",
|
|
45
45
|
"turndown-plugin-gfm": "^1.0.2"
|
|
46
46
|
},
|
|
47
|
+
"bundledDependenciesExtra": [
|
|
48
|
+
"turndown-plugin-gfm"
|
|
49
|
+
],
|
|
47
50
|
"bundledDependencies": [
|
|
48
51
|
"@cognipeer/to-markdown",
|
|
49
52
|
"@mixmark-io/domino",
|
|
@@ -115,6 +118,7 @@
|
|
|
115
118
|
"strtok3",
|
|
116
119
|
"token-types",
|
|
117
120
|
"turndown",
|
|
121
|
+
"turndown-plugin-gfm",
|
|
118
122
|
"underscore",
|
|
119
123
|
"undici",
|
|
120
124
|
"unpdf",
|