@uniweb/content-reader 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,46 @@
1
+ /**
2
+ * @fileoverview Pattern detection for markdown structures
3
+ */
4
+
5
+ import { parseInline } from "./inline.js";
6
+
7
+ /**
8
+ * Check if tokens represent an eyebrow pattern
9
+ * @param {Array} tokens - Array of tokens
10
+ * @param {number} index - Current token index
11
+ * @returns {boolean}
12
+ */
13
+ function isEyebrowPattern(tokens, index) {
14
+ return (
15
+ tokens[index]?.type === "heading" &&
16
+ tokens[index]?.depth === 3 &&
17
+ tokens[index + 1]?.type === "heading" &&
18
+ tokens[index + 1]?.depth === 1
19
+ );
20
+ }
21
+
22
+ /**
23
+ * Parse eyebrow heading pattern
24
+ * @param {Array} tokens - Array of tokens
25
+ * @param {number} index - Current token index
26
+ * @param {Object} schema - ProseMirror schema
27
+ * @returns {Array} Array of ProseMirror nodes
28
+ */
29
+ function parseEyebrowPattern(tokens, index, schema) {
30
+ return [
31
+ {
32
+ type: "eyebrowHeading",
33
+ content: tokens[index].tokens.flatMap((t) => parseInline(t, schema)),
34
+ },
35
+ {
36
+ type: "heading",
37
+ attrs: {
38
+ level: 1,
39
+ id: null,
40
+ },
41
+ content: tokens[index + 1].tokens.flatMap((t) => parseInline(t, schema)),
42
+ },
43
+ ];
44
+ }
45
+
46
+ export { isEyebrowPattern, parseEyebrowPattern };
@@ -0,0 +1,75 @@
1
+ /**
2
+ * @fileoverview Parse markdown tables
3
+ */
4
+
5
+ import { marked } from "marked";
6
+ import { parseInline } from "./inline.js";
7
+
8
+ /**
9
+ * Extract alignment from column definition
10
+ * @param {string} colDef - Column definition from separator row
11
+ * @returns {string|null} Alignment (left, center, right) or null
12
+ */
13
+ function getColumnAlignment(colDef) {
14
+ if (!colDef) return null;
15
+ const trimmed = colDef.trim();
16
+ if (trimmed.startsWith(":") && trimmed.endsWith(":")) return "center";
17
+ if (trimmed.endsWith(":")) return "right";
18
+ if (trimmed.startsWith(":")) return "left";
19
+ return null;
20
+ }
21
+
22
+ /**
23
+ * Parse table row content
24
+ * @param {Object} token - Row token
25
+ * @param {boolean} isHeader - Whether this is a header row
26
+ * @param {Array} alignments - Column alignments
27
+ * @param {Object} schema - ProseMirror schema
28
+ * @returns {Object} Table row node
29
+ */
30
+ function parseTableRow(row, isHeader, alignments, schema) {
31
+ return {
32
+ type: "tableRow",
33
+ content: row.map((cell, index) => ({
34
+ type: "tableCell",
35
+ attrs: {
36
+ colspan: 1,
37
+ rowspan: 1,
38
+ align: alignments[index] || null,
39
+ header: isHeader,
40
+ },
41
+ content: [
42
+ {
43
+ type: "paragraph",
44
+ content: marked.Lexer.lexInline(cell).flatMap((t) =>
45
+ parseInline(t, schema)
46
+ ),
47
+ },
48
+ ],
49
+ })),
50
+ };
51
+ }
52
+
53
+ /**
54
+ * Parse table block
55
+ * @param {Object} token - Table token
56
+ * @param {Object} schema - ProseMirror schema
57
+ * @returns {Object} ProseMirror table node
58
+ */
59
+ function parseTable(token, schema) {
60
+ // Extract alignments from separator row
61
+ const alignments = token.align || [];
62
+
63
+ // Build rows
64
+ const headerRow = parseTableRow(token.header, true, alignments, schema);
65
+ const bodyRows = token.rows.map((row) =>
66
+ parseTableRow(row, false, alignments, schema)
67
+ );
68
+
69
+ return {
70
+ type: "table",
71
+ content: [headerRow, ...bodyRows],
72
+ };
73
+ }
74
+
75
+ export { parseTable };
@@ -0,0 +1,24 @@
1
+ /**
2
+ * @fileoverview Shared utility functions for parsing
3
+ */
4
+
5
+ /**
6
+ * Check if content is empty or whitespace-only
7
+ * @param {Array} content - Array of inline content nodes
8
+ * @returns {boolean}
9
+ */
10
+ function isEmptyContent(content) {
11
+ if (!content || content.length === 0) return true;
12
+
13
+ if (content.length === 1) {
14
+ const node = content[0];
15
+ if (node.type === "text") {
16
+ const text = node.text || "";
17
+ return text.trim() === "";
18
+ }
19
+ }
20
+
21
+ return false;
22
+ }
23
+
24
+ export { isEmptyContent };
@@ -0,0 +1,144 @@
1
+ /**
2
+ * @fileoverview Base schema definition compatible with TipTap v2
3
+ */
4
+
5
+ const baseNodes = {
6
+ doc: {
7
+ content: "block+",
8
+ },
9
+
10
+ paragraph: {
11
+ content: "inline*",
12
+ group: "block",
13
+ },
14
+
15
+ heading: {
16
+ attrs: {
17
+ level: { default: 1 },
18
+ id: { default: null },
19
+ },
20
+ content: "inline*",
21
+ group: "block",
22
+ },
23
+
24
+ eyebrowHeading: {
25
+ content: "inline*",
26
+ group: "block",
27
+ },
28
+
29
+ text: {
30
+ group: "inline",
31
+ },
32
+
33
+ image: {
34
+ attrs: {
35
+ src: {},
36
+ caption: { default: null },
37
+ alt: { default: null },
38
+ role: { default: "content" },
39
+ },
40
+ // group: "block inline",
41
+ },
42
+
43
+ divider: {
44
+ attrs: {
45
+ style: { default: "line" },
46
+ size: { default: "normal" },
47
+ },
48
+ group: "block",
49
+ },
50
+
51
+ // List nodes
52
+ bulletList: {
53
+ content: "listItem+",
54
+ group: "block",
55
+ },
56
+
57
+ orderedList: {
58
+ attrs: {
59
+ start: { default: 1 },
60
+ },
61
+ content: "listItem+",
62
+ group: "block",
63
+ },
64
+
65
+ listItem: {
66
+ content: "paragraph block*",
67
+ defining: true,
68
+ },
69
+
70
+ // Code blocks
71
+ codeBlock: {
72
+ attrs: {
73
+ language: { default: null },
74
+ filename: { default: null },
75
+ },
76
+ content: "text*",
77
+ marks: "", // No marks (formatting) allowed inside code blocks
78
+ group: "block",
79
+ code: true,
80
+ defining: true,
81
+ },
82
+ blockquote: {
83
+ content: "inline*",
84
+ group: "block",
85
+ },
86
+ // Table nodes
87
+ table: {
88
+ content: "tableRow+",
89
+ group: "block",
90
+ tableRole: "table",
91
+ },
92
+
93
+ tableRow: {
94
+ content: "tableCell+",
95
+ tableRole: "row",
96
+ },
97
+
98
+ tableCell: {
99
+ content: "paragraph+",
100
+ attrs: {
101
+ colspan: { default: 1 },
102
+ rowspan: { default: 1 },
103
+ align: { default: null }, // left, center, right
104
+ header: { default: false },
105
+ },
106
+ tableRole: "cell",
107
+ },
108
+ };
109
+
110
+ const baseMarks = {
111
+ bold: {},
112
+ italic: {},
113
+ link: {
114
+ attrs: {
115
+ href: {},
116
+ title: { default: null },
117
+ },
118
+ },
119
+ button: {
120
+ attrs: {
121
+ href: {},
122
+ title: { default: null },
123
+ variant: { default: "primary" },
124
+ },
125
+ },
126
+ code: {
127
+ // For inline code
128
+ inclusive: true,
129
+ code: true,
130
+ },
131
+ };
132
+
133
+ /**
134
+ * Get the base schema definition
135
+ * @returns {Object} Combined schema with nodes and marks
136
+ */
137
+ function getBaseSchema() {
138
+ return {
139
+ nodes: baseNodes,
140
+ marks: baseMarks,
141
+ };
142
+ }
143
+
144
+ export { getBaseSchema };
package/src/utils.js ADDED
@@ -0,0 +1,63 @@
1
+ function isValidUniwebMarkdown(text) {
2
+ // Early return for empty or very short text
3
+ if (!text || text.length < 8) return false;
4
+
5
+ // More comprehensive and accurate patterns
6
+ const patterns = [
7
+ // Links and images
8
+ /\!\[.*?\]\(.*?\)/, // Image syntax ![alt](src)
9
+ /\[.*?\]\(.*?\)/, // Link syntax [text](href)
10
+
11
+ // Headers
12
+ /^#{1,6}\s+.+$/m, // Atx headers with proper spacing
13
+ /^.+\n[=]{2,}$/m, // Setext header level 1
14
+ /^.+\n[-]{2,}$/m, // Setext header level 2
15
+
16
+ // Quotes and lists
17
+ /^\s{0,3}>\s.+/m, // Blockquote with content
18
+ /^\s{0,3}(\*|-|\+)\s+.+/m, // Unordered list items with content
19
+ /^\s{0,3}\d+\.\s+.+/m, // Ordered list items with content
20
+
21
+ // Code
22
+ /^\s{0,3}`{3}[\s\S]*?`{3}/m, // Fenced code blocks
23
+ /^\s{4}.+/m, // Indented code blocks
24
+ /`[^`\n]+`/, // Inline code
25
+
26
+ // Emphasis
27
+ /(\*\*|__)[^\*\n_]+(\*\*|__)/, // Bold
28
+ /(\*|_)[^\*\n_]+(\*|_)/, // Italic
29
+ /(\*\*\*|___)[^\*\n_]+(\*\*\*|___)/, // Bold and italic
30
+
31
+ // Other elements
32
+ /^\s{0,3}([-*_]){3,}\s*$/m, // Horizontal rules
33
+ /^\s{0,3}\|.+\|.+\|/m, // Tables
34
+ /^\s{0,3}\|[-:| ]+\|/m, // Table formatting row
35
+ ];
36
+
37
+ // Check if the text contains any markdown patterns
38
+ const hasMarkdown = patterns.some((pattern) => pattern.test(text));
39
+
40
+ return hasMarkdown;
41
+ // Add heuristics to reduce false positives
42
+ // if (hasMarkdown) {
43
+ // // If it's just a very short text with asterisks or underscores, it might be regular emphasis
44
+ // if (text.length < 30 && /^[^*_`#\[\]\(\)\n\|\-]+$/.test(text)) {
45
+ // return false;
46
+ // }
47
+
48
+ // // Calculate a "markdown density" - if there are multiple patterns it's more likely to be markdown
49
+ // let matchCount = 0;
50
+ // patterns.forEach((pattern) => {
51
+ // const matches = text.match(pattern);
52
+ // if (matches) matchCount += matches.length;
53
+ // });
54
+
55
+ // // Higher threshold for very short texts to avoid false positives
56
+ // const minMatches = text.length < 48 ? 2 : 1;
57
+ // return matchCount >= minMatches;
58
+ // }
59
+
60
+ // return false;
61
+ }
62
+
63
+ export { isValidUniwebMarkdown };
@@ -0,0 +1,122 @@
1
+ import { markdownToProseMirror } from "../src/index.js";
2
+
3
+ describe("Code Parsing", () => {
4
+ test("parses fenced code blocks and single quotes", () => {
5
+ const markdown = "```javascript\nconst x = 1;\nconsole.log('x:', x);\n```";
6
+ const result = markdownToProseMirror(markdown);
7
+
8
+ expect(result).toEqual({
9
+ type: "doc",
10
+ content: [
11
+ {
12
+ type: "codeBlock",
13
+ attrs: {
14
+ language: "javascript",
15
+ filename: null,
16
+ },
17
+ content: [
18
+ {
19
+ type: "text",
20
+ text: "const x = 1;\nconsole.log('x:', x);",
21
+ },
22
+ ],
23
+ },
24
+ ],
25
+ });
26
+ });
27
+
28
+ test("parses code blocks with filenames", () => {
29
+ const markdown = "```javascript:example.js\nconst x = 1;\n```";
30
+ const result = markdownToProseMirror(markdown);
31
+
32
+ expect(result).toEqual({
33
+ type: "doc",
34
+ content: [
35
+ {
36
+ type: "codeBlock",
37
+ attrs: {
38
+ language: "javascript",
39
+ filename: "example.js",
40
+ },
41
+ content: [
42
+ {
43
+ type: "text",
44
+ text: "const x = 1;",
45
+ },
46
+ ],
47
+ },
48
+ ],
49
+ });
50
+ });
51
+
52
+ test("parses indented code blocks", () => {
53
+ const markdown = " const x = 1;\n console.log(x);";
54
+ const result = markdownToProseMirror(markdown);
55
+
56
+ expect(result).toEqual({
57
+ type: "doc",
58
+ content: [
59
+ {
60
+ type: "codeBlock",
61
+ attrs: {
62
+ language: null,
63
+ filename: null,
64
+ },
65
+ content: [
66
+ {
67
+ type: "text",
68
+ text: "const x = 1;\nconsole.log(x);",
69
+ },
70
+ ],
71
+ },
72
+ ],
73
+ });
74
+ });
75
+
76
+ test("parses inline code", () => {
77
+ const markdown = "Use the `console.log('test')` function.";
78
+ const result = markdownToProseMirror(markdown);
79
+
80
+ expect(result).toEqual({
81
+ type: "doc",
82
+ content: [
83
+ {
84
+ type: "paragraph",
85
+ content: [
86
+ { type: "text", text: "Use the " },
87
+ {
88
+ type: "text",
89
+ text: "console.log('test')",
90
+ marks: [{ type: "code" }],
91
+ },
92
+ { type: "text", text: " function." },
93
+ ],
94
+ },
95
+ ],
96
+ });
97
+ });
98
+
99
+ test("preserves empty lines in code blocks", () => {
100
+ const markdown = "```\nline 1\n\nline 2\n```";
101
+ const result = markdownToProseMirror(markdown);
102
+
103
+ expect(result).toEqual({
104
+ type: "doc",
105
+ content: [
106
+ {
107
+ type: "codeBlock",
108
+ attrs: {
109
+ language: null,
110
+ filename: null,
111
+ },
112
+ content: [
113
+ {
114
+ type: "text",
115
+ text: "line 1\n\nline 2",
116
+ },
117
+ ],
118
+ },
119
+ ],
120
+ });
121
+ });
122
+ });