@uniweb/content-reader 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +28 -0
- package/LICENSE +674 -0
- package/README.md +174 -0
- package/package.json +44 -0
- package/src/index.js +22 -0
- package/src/parser/block.js +170 -0
- package/src/parser/index.js +54 -0
- package/src/parser/inline.js +117 -0
- package/src/parser/lists.js +106 -0
- package/src/parser/patterns.js +46 -0
- package/src/parser/tables.js +75 -0
- package/src/parser/utils.js +24 -0
- package/src/schema/index.js +144 -0
- package/src/utils.js +63 -0
- package/tests/code.test.js +122 -0
- package/tests/lists.test.js +260 -0
- package/tests/parser.test.js +343 -0
- package/tests/sample.md +44 -0
- package/tests/tables.test.js +335 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Pattern detection for markdown structures
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { parseInline } from "./inline.js";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Check if tokens represent an eyebrow pattern
|
|
9
|
+
* @param {Array} tokens - Array of tokens
|
|
10
|
+
* @param {number} index - Current token index
|
|
11
|
+
* @returns {boolean}
|
|
12
|
+
*/
|
|
13
|
+
function isEyebrowPattern(tokens, index) {
|
|
14
|
+
return (
|
|
15
|
+
tokens[index]?.type === "heading" &&
|
|
16
|
+
tokens[index]?.depth === 3 &&
|
|
17
|
+
tokens[index + 1]?.type === "heading" &&
|
|
18
|
+
tokens[index + 1]?.depth === 1
|
|
19
|
+
);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Parse eyebrow heading pattern
|
|
24
|
+
* @param {Array} tokens - Array of tokens
|
|
25
|
+
* @param {number} index - Current token index
|
|
26
|
+
* @param {Object} schema - ProseMirror schema
|
|
27
|
+
* @returns {Array} Array of ProseMirror nodes
|
|
28
|
+
*/
|
|
29
|
+
function parseEyebrowPattern(tokens, index, schema) {
|
|
30
|
+
return [
|
|
31
|
+
{
|
|
32
|
+
type: "eyebrowHeading",
|
|
33
|
+
content: tokens[index].tokens.flatMap((t) => parseInline(t, schema)),
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
type: "heading",
|
|
37
|
+
attrs: {
|
|
38
|
+
level: 1,
|
|
39
|
+
id: null,
|
|
40
|
+
},
|
|
41
|
+
content: tokens[index + 1].tokens.flatMap((t) => parseInline(t, schema)),
|
|
42
|
+
},
|
|
43
|
+
];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export { isEyebrowPattern, parseEyebrowPattern };
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Parse markdown tables
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { marked } from "marked";
|
|
6
|
+
import { parseInline } from "./inline.js";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Extract alignment from column definition
|
|
10
|
+
* @param {string} colDef - Column definition from separator row
|
|
11
|
+
* @returns {string|null} Alignment (left, center, right) or null
|
|
12
|
+
*/
|
|
13
|
+
function getColumnAlignment(colDef) {
|
|
14
|
+
if (!colDef) return null;
|
|
15
|
+
const trimmed = colDef.trim();
|
|
16
|
+
if (trimmed.startsWith(":") && trimmed.endsWith(":")) return "center";
|
|
17
|
+
if (trimmed.endsWith(":")) return "right";
|
|
18
|
+
if (trimmed.startsWith(":")) return "left";
|
|
19
|
+
return null;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Parse table row content
|
|
24
|
+
* @param {Object} token - Row token
|
|
25
|
+
* @param {boolean} isHeader - Whether this is a header row
|
|
26
|
+
* @param {Array} alignments - Column alignments
|
|
27
|
+
* @param {Object} schema - ProseMirror schema
|
|
28
|
+
* @returns {Object} Table row node
|
|
29
|
+
*/
|
|
30
|
+
function parseTableRow(row, isHeader, alignments, schema) {
|
|
31
|
+
return {
|
|
32
|
+
type: "tableRow",
|
|
33
|
+
content: row.map((cell, index) => ({
|
|
34
|
+
type: "tableCell",
|
|
35
|
+
attrs: {
|
|
36
|
+
colspan: 1,
|
|
37
|
+
rowspan: 1,
|
|
38
|
+
align: alignments[index] || null,
|
|
39
|
+
header: isHeader,
|
|
40
|
+
},
|
|
41
|
+
content: [
|
|
42
|
+
{
|
|
43
|
+
type: "paragraph",
|
|
44
|
+
content: marked.Lexer.lexInline(cell).flatMap((t) =>
|
|
45
|
+
parseInline(t, schema)
|
|
46
|
+
),
|
|
47
|
+
},
|
|
48
|
+
],
|
|
49
|
+
})),
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Parse table block
|
|
55
|
+
* @param {Object} token - Table token
|
|
56
|
+
* @param {Object} schema - ProseMirror schema
|
|
57
|
+
* @returns {Object} ProseMirror table node
|
|
58
|
+
*/
|
|
59
|
+
function parseTable(token, schema) {
|
|
60
|
+
// Extract alignments from separator row
|
|
61
|
+
const alignments = token.align || [];
|
|
62
|
+
|
|
63
|
+
// Build rows
|
|
64
|
+
const headerRow = parseTableRow(token.header, true, alignments, schema);
|
|
65
|
+
const bodyRows = token.rows.map((row) =>
|
|
66
|
+
parseTableRow(row, false, alignments, schema)
|
|
67
|
+
);
|
|
68
|
+
|
|
69
|
+
return {
|
|
70
|
+
type: "table",
|
|
71
|
+
content: [headerRow, ...bodyRows],
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export { parseTable };
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Shared utility functions for parsing
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Check if content is empty or whitespace-only
|
|
7
|
+
* @param {Array} content - Array of inline content nodes
|
|
8
|
+
* @returns {boolean}
|
|
9
|
+
*/
|
|
10
|
+
function isEmptyContent(content) {
|
|
11
|
+
if (!content || content.length === 0) return true;
|
|
12
|
+
|
|
13
|
+
if (content.length === 1) {
|
|
14
|
+
const node = content[0];
|
|
15
|
+
if (node.type === "text") {
|
|
16
|
+
const text = node.text || "";
|
|
17
|
+
return text.trim() === "";
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
return false;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export { isEmptyContent };
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Base schema definition compatible with TipTap v2
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
const baseNodes = {
|
|
6
|
+
doc: {
|
|
7
|
+
content: "block+",
|
|
8
|
+
},
|
|
9
|
+
|
|
10
|
+
paragraph: {
|
|
11
|
+
content: "inline*",
|
|
12
|
+
group: "block",
|
|
13
|
+
},
|
|
14
|
+
|
|
15
|
+
heading: {
|
|
16
|
+
attrs: {
|
|
17
|
+
level: { default: 1 },
|
|
18
|
+
id: { default: null },
|
|
19
|
+
},
|
|
20
|
+
content: "inline*",
|
|
21
|
+
group: "block",
|
|
22
|
+
},
|
|
23
|
+
|
|
24
|
+
eyebrowHeading: {
|
|
25
|
+
content: "inline*",
|
|
26
|
+
group: "block",
|
|
27
|
+
},
|
|
28
|
+
|
|
29
|
+
text: {
|
|
30
|
+
group: "inline",
|
|
31
|
+
},
|
|
32
|
+
|
|
33
|
+
image: {
|
|
34
|
+
attrs: {
|
|
35
|
+
src: {},
|
|
36
|
+
caption: { default: null },
|
|
37
|
+
alt: { default: null },
|
|
38
|
+
role: { default: "content" },
|
|
39
|
+
},
|
|
40
|
+
// group: "block inline",
|
|
41
|
+
},
|
|
42
|
+
|
|
43
|
+
divider: {
|
|
44
|
+
attrs: {
|
|
45
|
+
style: { default: "line" },
|
|
46
|
+
size: { default: "normal" },
|
|
47
|
+
},
|
|
48
|
+
group: "block",
|
|
49
|
+
},
|
|
50
|
+
|
|
51
|
+
// List nodes
|
|
52
|
+
bulletList: {
|
|
53
|
+
content: "listItem+",
|
|
54
|
+
group: "block",
|
|
55
|
+
},
|
|
56
|
+
|
|
57
|
+
orderedList: {
|
|
58
|
+
attrs: {
|
|
59
|
+
start: { default: 1 },
|
|
60
|
+
},
|
|
61
|
+
content: "listItem+",
|
|
62
|
+
group: "block",
|
|
63
|
+
},
|
|
64
|
+
|
|
65
|
+
listItem: {
|
|
66
|
+
content: "paragraph block*",
|
|
67
|
+
defining: true,
|
|
68
|
+
},
|
|
69
|
+
|
|
70
|
+
// Code blocks
|
|
71
|
+
codeBlock: {
|
|
72
|
+
attrs: {
|
|
73
|
+
language: { default: null },
|
|
74
|
+
filename: { default: null },
|
|
75
|
+
},
|
|
76
|
+
content: "text*",
|
|
77
|
+
marks: "", // No marks (formatting) allowed inside code blocks
|
|
78
|
+
group: "block",
|
|
79
|
+
code: true,
|
|
80
|
+
defining: true,
|
|
81
|
+
},
|
|
82
|
+
blockquote: {
|
|
83
|
+
content: "inline*",
|
|
84
|
+
group: "block",
|
|
85
|
+
},
|
|
86
|
+
// Table nodes
|
|
87
|
+
table: {
|
|
88
|
+
content: "tableRow+",
|
|
89
|
+
group: "block",
|
|
90
|
+
tableRole: "table",
|
|
91
|
+
},
|
|
92
|
+
|
|
93
|
+
tableRow: {
|
|
94
|
+
content: "tableCell+",
|
|
95
|
+
tableRole: "row",
|
|
96
|
+
},
|
|
97
|
+
|
|
98
|
+
tableCell: {
|
|
99
|
+
content: "paragraph+",
|
|
100
|
+
attrs: {
|
|
101
|
+
colspan: { default: 1 },
|
|
102
|
+
rowspan: { default: 1 },
|
|
103
|
+
align: { default: null }, // left, center, right
|
|
104
|
+
header: { default: false },
|
|
105
|
+
},
|
|
106
|
+
tableRole: "cell",
|
|
107
|
+
},
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
const baseMarks = {
|
|
111
|
+
bold: {},
|
|
112
|
+
italic: {},
|
|
113
|
+
link: {
|
|
114
|
+
attrs: {
|
|
115
|
+
href: {},
|
|
116
|
+
title: { default: null },
|
|
117
|
+
},
|
|
118
|
+
},
|
|
119
|
+
button: {
|
|
120
|
+
attrs: {
|
|
121
|
+
href: {},
|
|
122
|
+
title: { default: null },
|
|
123
|
+
variant: { default: "primary" },
|
|
124
|
+
},
|
|
125
|
+
},
|
|
126
|
+
code: {
|
|
127
|
+
// For inline code
|
|
128
|
+
inclusive: true,
|
|
129
|
+
code: true,
|
|
130
|
+
},
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Get the base schema definition
|
|
135
|
+
* @returns {Object} Combined schema with nodes and marks
|
|
136
|
+
*/
|
|
137
|
+
function getBaseSchema() {
|
|
138
|
+
return {
|
|
139
|
+
nodes: baseNodes,
|
|
140
|
+
marks: baseMarks,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
export { getBaseSchema };
|
package/src/utils.js
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
function isValidUniwebMarkdown(text) {
|
|
2
|
+
// Early return for empty or very short text
|
|
3
|
+
if (!text || text.length < 8) return false;
|
|
4
|
+
|
|
5
|
+
// More comprehensive and accurate patterns
|
|
6
|
+
const patterns = [
|
|
7
|
+
// Links and images
|
|
8
|
+
/\!\[.*?\]\(.*?\)/, // Image syntax 
|
|
9
|
+
/\[.*?\]\(.*?\)/, // Link syntax [text](href)
|
|
10
|
+
|
|
11
|
+
// Headers
|
|
12
|
+
/^#{1,6}\s+.+$/m, // Atx headers with proper spacing
|
|
13
|
+
/^.+\n[=]{2,}$/m, // Setext header level 1
|
|
14
|
+
/^.+\n[-]{2,}$/m, // Setext header level 2
|
|
15
|
+
|
|
16
|
+
// Quotes and lists
|
|
17
|
+
/^\s{0,3}>\s.+/m, // Blockquote with content
|
|
18
|
+
/^\s{0,3}(\*|-|\+)\s+.+/m, // Unordered list items with content
|
|
19
|
+
/^\s{0,3}\d+\.\s+.+/m, // Ordered list items with content
|
|
20
|
+
|
|
21
|
+
// Code
|
|
22
|
+
/^\s{0,3}`{3}[\s\S]*?`{3}/m, // Fenced code blocks
|
|
23
|
+
/^\s{4}.+/m, // Indented code blocks
|
|
24
|
+
/`[^`\n]+`/, // Inline code
|
|
25
|
+
|
|
26
|
+
// Emphasis
|
|
27
|
+
/(\*\*|__)[^\*\n_]+(\*\*|__)/, // Bold
|
|
28
|
+
/(\*|_)[^\*\n_]+(\*|_)/, // Italic
|
|
29
|
+
/(\*\*\*|___)[^\*\n_]+(\*\*\*|___)/, // Bold and italic
|
|
30
|
+
|
|
31
|
+
// Other elements
|
|
32
|
+
/^\s{0,3}([-*_]){3,}\s*$/m, // Horizontal rules
|
|
33
|
+
/^\s{0,3}\|.+\|.+\|/m, // Tables
|
|
34
|
+
/^\s{0,3}\|[-:| ]+\|/m, // Table formatting row
|
|
35
|
+
];
|
|
36
|
+
|
|
37
|
+
// Check if the text contains any markdown patterns
|
|
38
|
+
const hasMarkdown = patterns.some((pattern) => pattern.test(text));
|
|
39
|
+
|
|
40
|
+
return hasMarkdown;
|
|
41
|
+
// Add heuristics to reduce false positives
|
|
42
|
+
// if (hasMarkdown) {
|
|
43
|
+
// // If it's just a very short text with asterisks or underscores, it might be regular emphasis
|
|
44
|
+
// if (text.length < 30 && /^[^*_`#\[\]\(\)\n\|\-]+$/.test(text)) {
|
|
45
|
+
// return false;
|
|
46
|
+
// }
|
|
47
|
+
|
|
48
|
+
// // Calculate a "markdown density" - if there are multiple patterns it's more likely to be markdown
|
|
49
|
+
// let matchCount = 0;
|
|
50
|
+
// patterns.forEach((pattern) => {
|
|
51
|
+
// const matches = text.match(pattern);
|
|
52
|
+
// if (matches) matchCount += matches.length;
|
|
53
|
+
// });
|
|
54
|
+
|
|
55
|
+
// // Higher threshold for very short texts to avoid false positives
|
|
56
|
+
// const minMatches = text.length < 48 ? 2 : 1;
|
|
57
|
+
// return matchCount >= minMatches;
|
|
58
|
+
// }
|
|
59
|
+
|
|
60
|
+
// return false;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export { isValidUniwebMarkdown };
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import { markdownToProseMirror } from "../src/index.js";
|
|
2
|
+
|
|
3
|
+
describe("Code Parsing", () => {
|
|
4
|
+
test("parses fenced code blocks and single quotes", () => {
|
|
5
|
+
const markdown = "```javascript\nconst x = 1;\nconsole.log('x:', x);\n```";
|
|
6
|
+
const result = markdownToProseMirror(markdown);
|
|
7
|
+
|
|
8
|
+
expect(result).toEqual({
|
|
9
|
+
type: "doc",
|
|
10
|
+
content: [
|
|
11
|
+
{
|
|
12
|
+
type: "codeBlock",
|
|
13
|
+
attrs: {
|
|
14
|
+
language: "javascript",
|
|
15
|
+
filename: null,
|
|
16
|
+
},
|
|
17
|
+
content: [
|
|
18
|
+
{
|
|
19
|
+
type: "text",
|
|
20
|
+
text: "const x = 1;\nconsole.log('x:', x);",
|
|
21
|
+
},
|
|
22
|
+
],
|
|
23
|
+
},
|
|
24
|
+
],
|
|
25
|
+
});
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
test("parses code blocks with filenames", () => {
|
|
29
|
+
const markdown = "```javascript:example.js\nconst x = 1;\n```";
|
|
30
|
+
const result = markdownToProseMirror(markdown);
|
|
31
|
+
|
|
32
|
+
expect(result).toEqual({
|
|
33
|
+
type: "doc",
|
|
34
|
+
content: [
|
|
35
|
+
{
|
|
36
|
+
type: "codeBlock",
|
|
37
|
+
attrs: {
|
|
38
|
+
language: "javascript",
|
|
39
|
+
filename: "example.js",
|
|
40
|
+
},
|
|
41
|
+
content: [
|
|
42
|
+
{
|
|
43
|
+
type: "text",
|
|
44
|
+
text: "const x = 1;",
|
|
45
|
+
},
|
|
46
|
+
],
|
|
47
|
+
},
|
|
48
|
+
],
|
|
49
|
+
});
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
test("parses indented code blocks", () => {
|
|
53
|
+
const markdown = " const x = 1;\n console.log(x);";
|
|
54
|
+
const result = markdownToProseMirror(markdown);
|
|
55
|
+
|
|
56
|
+
expect(result).toEqual({
|
|
57
|
+
type: "doc",
|
|
58
|
+
content: [
|
|
59
|
+
{
|
|
60
|
+
type: "codeBlock",
|
|
61
|
+
attrs: {
|
|
62
|
+
language: null,
|
|
63
|
+
filename: null,
|
|
64
|
+
},
|
|
65
|
+
content: [
|
|
66
|
+
{
|
|
67
|
+
type: "text",
|
|
68
|
+
text: "const x = 1;\nconsole.log(x);",
|
|
69
|
+
},
|
|
70
|
+
],
|
|
71
|
+
},
|
|
72
|
+
],
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
test("parses inline code", () => {
|
|
77
|
+
const markdown = "Use the `console.log('test')` function.";
|
|
78
|
+
const result = markdownToProseMirror(markdown);
|
|
79
|
+
|
|
80
|
+
expect(result).toEqual({
|
|
81
|
+
type: "doc",
|
|
82
|
+
content: [
|
|
83
|
+
{
|
|
84
|
+
type: "paragraph",
|
|
85
|
+
content: [
|
|
86
|
+
{ type: "text", text: "Use the " },
|
|
87
|
+
{
|
|
88
|
+
type: "text",
|
|
89
|
+
text: "console.log('test')",
|
|
90
|
+
marks: [{ type: "code" }],
|
|
91
|
+
},
|
|
92
|
+
{ type: "text", text: " function." },
|
|
93
|
+
],
|
|
94
|
+
},
|
|
95
|
+
],
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
test("preserves empty lines in code blocks", () => {
|
|
100
|
+
const markdown = "```\nline 1\n\nline 2\n```";
|
|
101
|
+
const result = markdownToProseMirror(markdown);
|
|
102
|
+
|
|
103
|
+
expect(result).toEqual({
|
|
104
|
+
type: "doc",
|
|
105
|
+
content: [
|
|
106
|
+
{
|
|
107
|
+
type: "codeBlock",
|
|
108
|
+
attrs: {
|
|
109
|
+
language: null,
|
|
110
|
+
filename: null,
|
|
111
|
+
},
|
|
112
|
+
content: [
|
|
113
|
+
{
|
|
114
|
+
type: "text",
|
|
115
|
+
text: "line 1\n\nline 2",
|
|
116
|
+
},
|
|
117
|
+
],
|
|
118
|
+
},
|
|
119
|
+
],
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
});
|