@uniweb/content-reader 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -0
- package/package.json +2 -1
- package/src/parser/block.js +60 -12
- package/src/parser/inline.js +34 -0
- package/src/parser/marked-extensions.js +58 -0
- package/tests/code.test.js +5 -14
- package/tests/parser.test.js +82 -0
package/README.md
CHANGED
|
@@ -105,6 +105,46 @@ Buttons can be created using the `.button` class or the legacy `button:` prefix:
|
|
|
105
105
|
| `icon` | Icon name or path |
|
|
106
106
|
| `target`, `rel`, `download` | Same as links |
|
|
107
107
|
|
|
108
|
+
#### Bracketed Spans
|
|
109
|
+
|
|
110
|
+
Style inline text with semantic classes using Pandoc-style bracketed spans:
|
|
111
|
+
|
|
112
|
+
```markdown
|
|
113
|
+
# Basic class
|
|
114
|
+
This has [highlighted text]{.highlight} for emphasis.
|
|
115
|
+
|
|
116
|
+
# Multiple classes
|
|
117
|
+
Here's [styled text]{.highlight .large} with two classes.
|
|
118
|
+
|
|
119
|
+
# ID attribute
|
|
120
|
+
Jump to [this section]{#anchor-point}.
|
|
121
|
+
|
|
122
|
+
# Class and ID together
|
|
123
|
+
[Important note]{.callout #note-1}
|
|
124
|
+
|
|
125
|
+
# Custom attributes
|
|
126
|
+
[Hover me]{.tooltip data-tip="More info here"}
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Output structure:
|
|
130
|
+
|
|
131
|
+
```js
|
|
132
|
+
{
|
|
133
|
+
type: "text",
|
|
134
|
+
text: "highlighted text",
|
|
135
|
+
marks: [{ type: "span", attrs: { class: "highlight" } }]
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
| Syntax | Result |
|
|
140
|
+
|--------|--------|
|
|
141
|
+
| `[text]{.class}` | `<span class="class">` |
|
|
142
|
+
| `[text]{#id}` | `<span id="id">` |
|
|
143
|
+
| `[text]{.a .b}` | `<span class="a b">` |
|
|
144
|
+
| `[text]{key=value}` | `<span key="value">` |
|
|
145
|
+
|
|
146
|
+
Spans can be combined with other marks (bold, italic, links).
|
|
147
|
+
|
|
108
148
|
#### Legacy Prefix Syntax
|
|
109
149
|
|
|
110
150
|
The original prefix syntax is still supported for backward compatibility:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@uniweb/content-reader",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.5",
|
|
4
4
|
"description": "Markdown to ProseMirror document structure converter",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
"author": "Proximify Inc.",
|
|
17
17
|
"license": "GPL-3.0-or-later",
|
|
18
18
|
"dependencies": {
|
|
19
|
+
"js-yaml": "^4.1.0",
|
|
19
20
|
"marked": "^11.1.0"
|
|
20
21
|
},
|
|
21
22
|
"devDependencies": {
|
package/src/parser/block.js
CHANGED
|
@@ -3,22 +3,23 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
import { marked } from "marked";
|
|
6
|
+
import yaml from "js-yaml";
|
|
6
7
|
import { parseInline } from "./inline.js";
|
|
7
8
|
import { parseList } from "./lists.js";
|
|
8
9
|
import { parseTable } from "./tables.js";
|
|
9
10
|
|
|
10
11
|
/**
|
|
11
|
-
* Process code block info string (e.g., "
|
|
12
|
+
* Process code block info string (e.g., "json:tag-name")
|
|
12
13
|
* @param {string} info - Code block info string
|
|
13
|
-
* @returns {Object} Language and
|
|
14
|
+
* @returns {Object} Language and optional tag
|
|
14
15
|
*/
|
|
15
16
|
function processCodeInfo(info) {
|
|
16
|
-
if (!info) return { language: null,
|
|
17
|
+
if (!info) return { language: null, tag: null };
|
|
17
18
|
|
|
18
19
|
const parts = info.split(":");
|
|
19
20
|
return {
|
|
20
21
|
language: parts[0] || null,
|
|
21
|
-
|
|
22
|
+
tag: parts[1] || null,
|
|
22
23
|
};
|
|
23
24
|
}
|
|
24
25
|
|
|
@@ -39,6 +40,37 @@ function cleanCodeText(text) {
|
|
|
39
40
|
.trim();
|
|
40
41
|
}
|
|
41
42
|
|
|
43
|
+
/**
|
|
44
|
+
* Parse code block content based on language
|
|
45
|
+
* Only parses tagged blocks with json/yaml language
|
|
46
|
+
* @param {string} text - Raw code block text
|
|
47
|
+
* @param {string} language - Code block language
|
|
48
|
+
* @returns {*} Parsed data or null if not parseable
|
|
49
|
+
*/
|
|
50
|
+
function parseCodeBlockData(text, language) {
|
|
51
|
+
if (!text) return null;
|
|
52
|
+
|
|
53
|
+
const lang = (language || "").toLowerCase();
|
|
54
|
+
|
|
55
|
+
if (lang === "json") {
|
|
56
|
+
try {
|
|
57
|
+
return JSON.parse(text);
|
|
58
|
+
} catch {
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (lang === "yaml" || lang === "yml") {
|
|
64
|
+
try {
|
|
65
|
+
return yaml.load(text);
|
|
66
|
+
} catch {
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
|
|
42
74
|
/**
|
|
43
75
|
* Parse a paragraph's content by tokenizing with marked
|
|
44
76
|
* @param {Object} token - Marked token for paragraph
|
|
@@ -142,16 +174,32 @@ function parseBlock(token, schema) {
|
|
|
142
174
|
}
|
|
143
175
|
|
|
144
176
|
if (token.type === "code") {
|
|
145
|
-
const { language,
|
|
177
|
+
const { language, tag } = processCodeInfo(token.lang);
|
|
178
|
+
const rawText = cleanCodeText(token.text);
|
|
179
|
+
|
|
180
|
+
// Tagged blocks become dataBlocks (structured data, not code for display)
|
|
181
|
+
if (tag) {
|
|
182
|
+
const parsedData = parseCodeBlockData(rawText, language);
|
|
183
|
+
if (parsedData !== null) {
|
|
184
|
+
// Successfully parsed - it's a dataBlock
|
|
185
|
+
return {
|
|
186
|
+
type: "dataBlock",
|
|
187
|
+
attrs: { tag, data: parsedData },
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
// Parsing failed - fall back to codeBlock with language for runtime fallback
|
|
191
|
+
return {
|
|
192
|
+
type: "codeBlock",
|
|
193
|
+
attrs: { language, tag },
|
|
194
|
+
content: [{ type: "text", text: rawText }],
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Untagged code block - for display with syntax highlighting
|
|
146
199
|
return {
|
|
147
200
|
type: "codeBlock",
|
|
148
|
-
attrs: { language
|
|
149
|
-
content: [
|
|
150
|
-
{
|
|
151
|
-
type: "text",
|
|
152
|
-
text: cleanCodeText(token.text),
|
|
153
|
-
},
|
|
154
|
-
],
|
|
201
|
+
attrs: { language },
|
|
202
|
+
content: [{ type: "text", text: rawText }],
|
|
155
203
|
};
|
|
156
204
|
}
|
|
157
205
|
|
package/src/parser/inline.js
CHANGED
|
@@ -62,6 +62,40 @@ function parseInline(token, schema, removeNewLine = false) {
|
|
|
62
62
|
];
|
|
63
63
|
}
|
|
64
64
|
|
|
65
|
+
if (token.type === "span") {
|
|
66
|
+
// Bracketed span: [text]{.class}
|
|
67
|
+
// Supports nested formatting via tokens
|
|
68
|
+
const { class: className, id, ...otherAttrs } = token.attrs || {};
|
|
69
|
+
|
|
70
|
+
const spanMark = {
|
|
71
|
+
type: "span",
|
|
72
|
+
attrs: {
|
|
73
|
+
...(className && { class: className }),
|
|
74
|
+
...(id && { id }),
|
|
75
|
+
...otherAttrs,
|
|
76
|
+
},
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
// If there are child tokens (nested formatting), process them
|
|
80
|
+
if (token.tokens && token.tokens.length > 0) {
|
|
81
|
+
return token.tokens.flatMap((t) =>
|
|
82
|
+
parseInline(t, schema, removeNewLine).map((node) => ({
|
|
83
|
+
...node,
|
|
84
|
+
marks: [...(node.marks || []), spanMark],
|
|
85
|
+
}))
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Simple text span
|
|
90
|
+
return [
|
|
91
|
+
{
|
|
92
|
+
type: "text",
|
|
93
|
+
marks: [spanMark],
|
|
94
|
+
text: token.text,
|
|
95
|
+
},
|
|
96
|
+
];
|
|
97
|
+
}
|
|
98
|
+
|
|
65
99
|
if (token.type === "link") {
|
|
66
100
|
// Check for button: prefix or .button class in attrs
|
|
67
101
|
const hasButtonPrefix = token.href.startsWith("button:");
|
|
@@ -19,6 +19,11 @@ const PATTERNS = {
|
|
|
19
19
|
// Link: [text](href "title"){attrs}
|
|
20
20
|
// Captures: text, href, title (optional), attrs (optional)
|
|
21
21
|
link: /^\[([^\]]+)\]\(([^)"'\s]+)(?:\s+["']([^"']*)["'])?\)(?:\{([^}]*)\})?/,
|
|
22
|
+
|
|
23
|
+
// Span (bracketed span): [text]{attrs}
|
|
24
|
+
// Pandoc-style bracketed spans - text with attributes but no href
|
|
25
|
+
// Captures: text, attrs
|
|
26
|
+
span: /^\[([^\]]+)\]\{([^}]+)\}/,
|
|
22
27
|
}
|
|
23
28
|
|
|
24
29
|
/**
|
|
@@ -100,6 +105,58 @@ export function createLinkExtension() {
|
|
|
100
105
|
}
|
|
101
106
|
}
|
|
102
107
|
|
|
108
|
+
/**
|
|
109
|
+
* Create a marked extension for bracketed spans (Pandoc-style)
|
|
110
|
+
*
|
|
111
|
+
* Syntax: [text]{.class #id key=value}
|
|
112
|
+
*
|
|
113
|
+
* Used for inline text with semantic attributes like:
|
|
114
|
+
* - [highlighted text]{.highlight}
|
|
115
|
+
* - [muted note]{.muted}
|
|
116
|
+
* - [important]{.callout}
|
|
117
|
+
*
|
|
118
|
+
* @returns {Object} Marked tokenizer extension
|
|
119
|
+
*/
|
|
120
|
+
export function createSpanExtension() {
|
|
121
|
+
return {
|
|
122
|
+
name: 'span',
|
|
123
|
+
level: 'inline',
|
|
124
|
+
start(src) {
|
|
125
|
+
// Find [ but we need to check it's not a link or image
|
|
126
|
+
const idx = src.indexOf('[')
|
|
127
|
+
return idx
|
|
128
|
+
},
|
|
129
|
+
tokenizer(src) {
|
|
130
|
+
// Don't match images or links
|
|
131
|
+
if (src.startsWith(' - if so, skip
|
|
134
|
+
// We need to match span ONLY if there's no () after ]
|
|
135
|
+
const match = PATTERNS.span.exec(src)
|
|
136
|
+
if (!match) return
|
|
137
|
+
|
|
138
|
+
// Make sure this isn't actually a link (check there's no ( after ])
|
|
139
|
+
const bracketEnd = src.indexOf(']')
|
|
140
|
+
if (bracketEnd > 0 && src[bracketEnd + 1] === '(') return
|
|
141
|
+
|
|
142
|
+
const [raw, text, attrString] = match
|
|
143
|
+
|
|
144
|
+
// Parse attributes from curly braces
|
|
145
|
+
const attrs = parseAttributeString(attrString)
|
|
146
|
+
|
|
147
|
+
return {
|
|
148
|
+
type: 'span',
|
|
149
|
+
raw,
|
|
150
|
+
text,
|
|
151
|
+
attrs,
|
|
152
|
+
// Include tokens for nested formatting (bold, italic, etc.)
|
|
153
|
+
tokens: [],
|
|
154
|
+
}
|
|
155
|
+
},
|
|
156
|
+
childTokens: ['tokens'],
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
103
160
|
/**
|
|
104
161
|
* Get all custom marked extensions
|
|
105
162
|
*
|
|
@@ -110,6 +167,7 @@ export function getMarkedExtensions() {
|
|
|
110
167
|
extensions: [
|
|
111
168
|
createImageExtension(),
|
|
112
169
|
createLinkExtension(),
|
|
170
|
+
createSpanExtension(),
|
|
113
171
|
],
|
|
114
172
|
}
|
|
115
173
|
}
|
package/tests/code.test.js
CHANGED
|
@@ -12,7 +12,6 @@ describe("Code Parsing", () => {
|
|
|
12
12
|
type: "codeBlock",
|
|
13
13
|
attrs: {
|
|
14
14
|
language: "javascript",
|
|
15
|
-
filename: null,
|
|
16
15
|
},
|
|
17
16
|
content: [
|
|
18
17
|
{
|
|
@@ -25,25 +24,19 @@ describe("Code Parsing", () => {
|
|
|
25
24
|
});
|
|
26
25
|
});
|
|
27
26
|
|
|
28
|
-
test("parses code blocks
|
|
29
|
-
const markdown = "```
|
|
27
|
+
test("parses tagged code blocks as dataBlocks", () => {
|
|
28
|
+
const markdown = "```json:nav-links\n[{\"label\": \"Home\"}]\n```";
|
|
30
29
|
const result = markdownToProseMirror(markdown);
|
|
31
30
|
|
|
32
31
|
expect(result).toEqual({
|
|
33
32
|
type: "doc",
|
|
34
33
|
content: [
|
|
35
34
|
{
|
|
36
|
-
type: "
|
|
35
|
+
type: "dataBlock", // Structured data, not code for display
|
|
37
36
|
attrs: {
|
|
38
|
-
|
|
39
|
-
|
|
37
|
+
tag: "nav-links",
|
|
38
|
+
data: [{ label: "Home" }],
|
|
40
39
|
},
|
|
41
|
-
content: [
|
|
42
|
-
{
|
|
43
|
-
type: "text",
|
|
44
|
-
text: "const x = 1;",
|
|
45
|
-
},
|
|
46
|
-
],
|
|
47
40
|
},
|
|
48
41
|
],
|
|
49
42
|
});
|
|
@@ -60,7 +53,6 @@ describe("Code Parsing", () => {
|
|
|
60
53
|
type: "codeBlock",
|
|
61
54
|
attrs: {
|
|
62
55
|
language: null,
|
|
63
|
-
filename: null,
|
|
64
56
|
},
|
|
65
57
|
content: [
|
|
66
58
|
{
|
|
@@ -107,7 +99,6 @@ describe("Code Parsing", () => {
|
|
|
107
99
|
type: "codeBlock",
|
|
108
100
|
attrs: {
|
|
109
101
|
language: null,
|
|
110
|
-
filename: null,
|
|
111
102
|
},
|
|
112
103
|
content: [
|
|
113
104
|
{
|
package/tests/parser.test.js
CHANGED
|
@@ -570,3 +570,85 @@ describe("Curly Brace Attributes", () => {
|
|
|
570
570
|
});
|
|
571
571
|
});
|
|
572
572
|
});
|
|
573
|
+
|
|
574
|
+
describe("Bracketed Spans", () => {
|
|
575
|
+
test("parses span with class", () => {
|
|
576
|
+
const markdown = "This is [highlighted text]{.highlight} in a sentence.";
|
|
577
|
+
const result = markdownToProseMirror(markdown);
|
|
578
|
+
|
|
579
|
+
expect(result.content[0].content).toEqual([
|
|
580
|
+
{ type: "text", text: "This is " },
|
|
581
|
+
{
|
|
582
|
+
type: "text",
|
|
583
|
+
text: "highlighted text",
|
|
584
|
+
marks: [{ type: "span", attrs: { class: "highlight" } }],
|
|
585
|
+
},
|
|
586
|
+
{ type: "text", text: " in a sentence." },
|
|
587
|
+
]);
|
|
588
|
+
});
|
|
589
|
+
|
|
590
|
+
test("parses span with multiple classes", () => {
|
|
591
|
+
const markdown = "[important note]{.callout .bold}";
|
|
592
|
+
const result = markdownToProseMirror(markdown);
|
|
593
|
+
|
|
594
|
+
expect(result.content[0].content[0]).toEqual({
|
|
595
|
+
type: "text",
|
|
596
|
+
text: "important note",
|
|
597
|
+
marks: [{ type: "span", attrs: { class: "callout bold" } }],
|
|
598
|
+
});
|
|
599
|
+
});
|
|
600
|
+
|
|
601
|
+
test("parses muted span", () => {
|
|
602
|
+
const markdown = "[This is less important]{.muted}";
|
|
603
|
+
const result = markdownToProseMirror(markdown);
|
|
604
|
+
|
|
605
|
+
expect(result.content[0].content[0]).toEqual({
|
|
606
|
+
type: "text",
|
|
607
|
+
text: "This is less important",
|
|
608
|
+
marks: [{ type: "span", attrs: { class: "muted" } }],
|
|
609
|
+
});
|
|
610
|
+
});
|
|
611
|
+
|
|
612
|
+
test("parses span with id and class", () => {
|
|
613
|
+
const markdown = "[key term]{#glossary-term .highlight}";
|
|
614
|
+
const result = markdownToProseMirror(markdown);
|
|
615
|
+
|
|
616
|
+
expect(result.content[0].content[0]).toEqual({
|
|
617
|
+
type: "text",
|
|
618
|
+
text: "key term",
|
|
619
|
+
marks: [{ type: "span", attrs: { class: "highlight", id: "glossary-term" } }],
|
|
620
|
+
});
|
|
621
|
+
});
|
|
622
|
+
|
|
623
|
+
test("parses span with custom attributes", () => {
|
|
624
|
+
const markdown = "[tooltip text]{data-tooltip=\"More info\" .info}";
|
|
625
|
+
const result = markdownToProseMirror(markdown);
|
|
626
|
+
|
|
627
|
+
expect(result.content[0].content[0]).toEqual({
|
|
628
|
+
type: "text",
|
|
629
|
+
text: "tooltip text",
|
|
630
|
+
marks: [{ type: "span", attrs: { class: "info", "data-tooltip": "More info" } }],
|
|
631
|
+
});
|
|
632
|
+
});
|
|
633
|
+
|
|
634
|
+
test("does not confuse span with link", () => {
|
|
635
|
+
const markdown = "[Link](https://example.com) and [span]{.highlight}";
|
|
636
|
+
const result = markdownToProseMirror(markdown);
|
|
637
|
+
|
|
638
|
+
const content = result.content[0].content;
|
|
639
|
+
// First should be a link
|
|
640
|
+
expect(content[0].marks[0].type).toBe("link");
|
|
641
|
+
// Last should be a span
|
|
642
|
+
expect(content[content.length - 1].marks[0].type).toBe("span");
|
|
643
|
+
});
|
|
644
|
+
|
|
645
|
+
test("parses multiple spans in same paragraph", () => {
|
|
646
|
+
const markdown = "[first]{.highlight} normal [second]{.muted}";
|
|
647
|
+
const result = markdownToProseMirror(markdown);
|
|
648
|
+
|
|
649
|
+
const content = result.content[0].content;
|
|
650
|
+
expect(content[0].marks[0].attrs.class).toBe("highlight");
|
|
651
|
+
expect(content[1].text).toBe(" normal ");
|
|
652
|
+
expect(content[2].marks[0].attrs.class).toBe("muted");
|
|
653
|
+
});
|
|
654
|
+
});
|