@uniweb/content-reader 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -105,6 +105,46 @@ Buttons can be created using the `.button` class or the legacy `button:` prefix:
105
105
  | `icon` | Icon name or path |
106
106
  | `target`, `rel`, `download` | Same as links |
107
107
 
108
+ #### Bracketed Spans
109
+
110
+ Style inline text with semantic classes using Pandoc-style bracketed spans:
111
+
112
+ ```markdown
113
+ # Basic class
114
+ This has [highlighted text]{.highlight} for emphasis.
115
+
116
+ # Multiple classes
117
+ Here's [styled text]{.highlight .large} with two classes.
118
+
119
+ # ID attribute
120
+ Jump to [this section]{#anchor-point}.
121
+
122
+ # Class and ID together
123
+ [Important note]{.callout #note-1}
124
+
125
+ # Custom attributes
126
+ [Hover me]{.tooltip data-tip="More info here"}
127
+ ```
128
+
129
+ Output structure:
130
+
131
+ ```js
132
+ {
133
+ type: "text",
134
+ text: "highlighted text",
135
+ marks: [{ type: "span", attrs: { class: "highlight" } }]
136
+ }
137
+ ```
138
+
139
+ | Syntax | Result |
140
+ |--------|--------|
141
+ | `[text]{.class}` | `<span class="class">` |
142
+ | `[text]{#id}` | `<span id="id">` |
143
+ | `[text]{.a .b}` | `<span class="a b">` |
144
+ | `[text]{key=value}` | `<span key="value">` |
145
+
146
+ Spans can be combined with other marks (bold, italic, links).
147
+
108
148
  #### Legacy Prefix Syntax
109
149
 
110
150
  The original prefix syntax is still supported for backward compatibility:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@uniweb/content-reader",
3
- "version": "1.0.3",
3
+ "version": "1.0.5",
4
4
  "description": "Markdown to ProseMirror document structure converter",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -16,6 +16,7 @@
16
16
  "author": "Proximify Inc.",
17
17
  "license": "GPL-3.0-or-later",
18
18
  "dependencies": {
19
+ "js-yaml": "^4.1.0",
19
20
  "marked": "^11.1.0"
20
21
  },
21
22
  "devDependencies": {
@@ -3,22 +3,23 @@
3
3
  */
4
4
 
5
5
  import { marked } from "marked";
6
+ import yaml from "js-yaml";
6
7
  import { parseInline } from "./inline.js";
7
8
  import { parseList } from "./lists.js";
8
9
  import { parseTable } from "./tables.js";
9
10
 
10
11
  /**
11
- * Process code block info string (e.g., "javascript:example.js")
12
+ * Process code block info string (e.g., "json:tag-name")
12
13
  * @param {string} info - Code block info string
13
- * @returns {Object} Language and filename
14
+ * @returns {Object} Language and optional tag
14
15
  */
15
16
  function processCodeInfo(info) {
16
- if (!info) return { language: null, filename: null };
17
+ if (!info) return { language: null, tag: null };
17
18
 
18
19
  const parts = info.split(":");
19
20
  return {
20
21
  language: parts[0] || null,
21
- filename: parts[1] || null,
22
+ tag: parts[1] || null,
22
23
  };
23
24
  }
24
25
 
@@ -39,6 +40,37 @@ function cleanCodeText(text) {
39
40
  .trim();
40
41
  }
41
42
 
43
+ /**
44
+ * Parse code block content based on language
45
+ * Only parses tagged blocks with json/yaml language
46
+ * @param {string} text - Raw code block text
47
+ * @param {string} language - Code block language
48
+ * @returns {*} Parsed data or null if not parseable
49
+ */
50
+ function parseCodeBlockData(text, language) {
51
+ if (!text) return null;
52
+
53
+ const lang = (language || "").toLowerCase();
54
+
55
+ if (lang === "json") {
56
+ try {
57
+ return JSON.parse(text);
58
+ } catch {
59
+ return null;
60
+ }
61
+ }
62
+
63
+ if (lang === "yaml" || lang === "yml") {
64
+ try {
65
+ return yaml.load(text);
66
+ } catch {
67
+ return null;
68
+ }
69
+ }
70
+
71
+ return null;
72
+ }
73
+
42
74
  /**
43
75
  * Parse a paragraph's content by tokenizing with marked
44
76
  * @param {Object} token - Marked token for paragraph
@@ -142,16 +174,32 @@ function parseBlock(token, schema) {
142
174
  }
143
175
 
144
176
  if (token.type === "code") {
145
- const { language, filename } = processCodeInfo(token.lang);
177
+ const { language, tag } = processCodeInfo(token.lang);
178
+ const rawText = cleanCodeText(token.text);
179
+
180
+ // Tagged blocks become dataBlocks (structured data, not code for display)
181
+ if (tag) {
182
+ const parsedData = parseCodeBlockData(rawText, language);
183
+ if (parsedData !== null) {
184
+ // Successfully parsed - it's a dataBlock
185
+ return {
186
+ type: "dataBlock",
187
+ attrs: { tag, data: parsedData },
188
+ };
189
+ }
190
+ // Parsing failed - fall back to codeBlock with language for runtime fallback
191
+ return {
192
+ type: "codeBlock",
193
+ attrs: { language, tag },
194
+ content: [{ type: "text", text: rawText }],
195
+ };
196
+ }
197
+
198
+ // Untagged code block - for display with syntax highlighting
146
199
  return {
147
200
  type: "codeBlock",
148
- attrs: { language, filename },
149
- content: [
150
- {
151
- type: "text",
152
- text: cleanCodeText(token.text),
153
- },
154
- ],
201
+ attrs: { language },
202
+ content: [{ type: "text", text: rawText }],
155
203
  };
156
204
  }
157
205
 
@@ -62,6 +62,40 @@ function parseInline(token, schema, removeNewLine = false) {
62
62
  ];
63
63
  }
64
64
 
65
+ if (token.type === "span") {
66
+ // Bracketed span: [text]{.class}
67
+ // Supports nested formatting via tokens
68
+ const { class: className, id, ...otherAttrs } = token.attrs || {};
69
+
70
+ const spanMark = {
71
+ type: "span",
72
+ attrs: {
73
+ ...(className && { class: className }),
74
+ ...(id && { id }),
75
+ ...otherAttrs,
76
+ },
77
+ };
78
+
79
+ // If there are child tokens (nested formatting), process them
80
+ if (token.tokens && token.tokens.length > 0) {
81
+ return token.tokens.flatMap((t) =>
82
+ parseInline(t, schema, removeNewLine).map((node) => ({
83
+ ...node,
84
+ marks: [...(node.marks || []), spanMark],
85
+ }))
86
+ );
87
+ }
88
+
89
+ // Simple text span
90
+ return [
91
+ {
92
+ type: "text",
93
+ marks: [spanMark],
94
+ text: token.text,
95
+ },
96
+ ];
97
+ }
98
+
65
99
  if (token.type === "link") {
66
100
  // Check for button: prefix or .button class in attrs
67
101
  const hasButtonPrefix = token.href.startsWith("button:");
@@ -19,6 +19,11 @@ const PATTERNS = {
19
19
  // Link: [text](href "title"){attrs}
20
20
  // Captures: text, href, title (optional), attrs (optional)
21
21
  link: /^\[([^\]]+)\]\(([^)"'\s]+)(?:\s+["']([^"']*)["'])?\)(?:\{([^}]*)\})?/,
22
+
23
+ // Span (bracketed span): [text]{attrs}
24
+ // Pandoc-style bracketed spans - text with attributes but no href
25
+ // Captures: text, attrs
26
+ span: /^\[([^\]]+)\]\{([^}]+)\}/,
22
27
  }
23
28
 
24
29
  /**
@@ -100,6 +105,58 @@ export function createLinkExtension() {
100
105
  }
101
106
  }
102
107
 
108
+ /**
109
+ * Create a marked extension for bracketed spans (Pandoc-style)
110
+ *
111
+ * Syntax: [text]{.class #id key=value}
112
+ *
113
+ * Used for inline text with semantic attributes like:
114
+ * - [highlighted text]{.highlight}
115
+ * - [muted note]{.muted}
116
+ * - [important]{.callout}
117
+ *
118
+ * @returns {Object} Marked tokenizer extension
119
+ */
120
+ export function createSpanExtension() {
121
+ return {
122
+ name: 'span',
123
+ level: 'inline',
124
+ start(src) {
125
+ // Find [ but we need to check it's not a link or image
126
+ const idx = src.indexOf('[')
127
+ return idx
128
+ },
129
+ tokenizer(src) {
130
+ // Don't match images or links
131
+ if (src.startsWith('![')) return
132
+
133
+ // Check if this is a link [text](url) - if so, skip
134
+ // We need to match span ONLY if there's no () after ]
135
+ const match = PATTERNS.span.exec(src)
136
+ if (!match) return
137
+
138
+ // Make sure this isn't actually a link (check there's no ( after ])
139
+ const bracketEnd = src.indexOf(']')
140
+ if (bracketEnd > 0 && src[bracketEnd + 1] === '(') return
141
+
142
+ const [raw, text, attrString] = match
143
+
144
+ // Parse attributes from curly braces
145
+ const attrs = parseAttributeString(attrString)
146
+
147
+ return {
148
+ type: 'span',
149
+ raw,
150
+ text,
151
+ attrs,
152
+ // Include tokens for nested formatting (bold, italic, etc.)
153
+ tokens: [],
154
+ }
155
+ },
156
+ childTokens: ['tokens'],
157
+ }
158
+ }
159
+
103
160
  /**
104
161
  * Get all custom marked extensions
105
162
  *
@@ -110,6 +167,7 @@ export function getMarkedExtensions() {
110
167
  extensions: [
111
168
  createImageExtension(),
112
169
  createLinkExtension(),
170
+ createSpanExtension(),
113
171
  ],
114
172
  }
115
173
  }
@@ -12,7 +12,6 @@ describe("Code Parsing", () => {
12
12
  type: "codeBlock",
13
13
  attrs: {
14
14
  language: "javascript",
15
- filename: null,
16
15
  },
17
16
  content: [
18
17
  {
@@ -25,25 +24,19 @@ describe("Code Parsing", () => {
25
24
  });
26
25
  });
27
26
 
28
- test("parses code blocks with filenames", () => {
29
- const markdown = "```javascript:example.js\nconst x = 1;\n```";
27
+ test("parses tagged code blocks as dataBlocks", () => {
28
+ const markdown = "```json:nav-links\n[{\"label\": \"Home\"}]\n```";
30
29
  const result = markdownToProseMirror(markdown);
31
30
 
32
31
  expect(result).toEqual({
33
32
  type: "doc",
34
33
  content: [
35
34
  {
36
- type: "codeBlock",
35
+ type: "dataBlock", // Structured data, not code for display
37
36
  attrs: {
38
- language: "javascript",
39
- filename: "example.js",
37
+ tag: "nav-links",
38
+ data: [{ label: "Home" }],
40
39
  },
41
- content: [
42
- {
43
- type: "text",
44
- text: "const x = 1;",
45
- },
46
- ],
47
40
  },
48
41
  ],
49
42
  });
@@ -60,7 +53,6 @@ describe("Code Parsing", () => {
60
53
  type: "codeBlock",
61
54
  attrs: {
62
55
  language: null,
63
- filename: null,
64
56
  },
65
57
  content: [
66
58
  {
@@ -107,7 +99,6 @@ describe("Code Parsing", () => {
107
99
  type: "codeBlock",
108
100
  attrs: {
109
101
  language: null,
110
- filename: null,
111
102
  },
112
103
  content: [
113
104
  {
@@ -570,3 +570,85 @@ describe("Curly Brace Attributes", () => {
570
570
  });
571
571
  });
572
572
  });
573
+
574
+ describe("Bracketed Spans", () => {
575
+ test("parses span with class", () => {
576
+ const markdown = "This is [highlighted text]{.highlight} in a sentence.";
577
+ const result = markdownToProseMirror(markdown);
578
+
579
+ expect(result.content[0].content).toEqual([
580
+ { type: "text", text: "This is " },
581
+ {
582
+ type: "text",
583
+ text: "highlighted text",
584
+ marks: [{ type: "span", attrs: { class: "highlight" } }],
585
+ },
586
+ { type: "text", text: " in a sentence." },
587
+ ]);
588
+ });
589
+
590
+ test("parses span with multiple classes", () => {
591
+ const markdown = "[important note]{.callout .bold}";
592
+ const result = markdownToProseMirror(markdown);
593
+
594
+ expect(result.content[0].content[0]).toEqual({
595
+ type: "text",
596
+ text: "important note",
597
+ marks: [{ type: "span", attrs: { class: "callout bold" } }],
598
+ });
599
+ });
600
+
601
+ test("parses muted span", () => {
602
+ const markdown = "[This is less important]{.muted}";
603
+ const result = markdownToProseMirror(markdown);
604
+
605
+ expect(result.content[0].content[0]).toEqual({
606
+ type: "text",
607
+ text: "This is less important",
608
+ marks: [{ type: "span", attrs: { class: "muted" } }],
609
+ });
610
+ });
611
+
612
+ test("parses span with id and class", () => {
613
+ const markdown = "[key term]{#glossary-term .highlight}";
614
+ const result = markdownToProseMirror(markdown);
615
+
616
+ expect(result.content[0].content[0]).toEqual({
617
+ type: "text",
618
+ text: "key term",
619
+ marks: [{ type: "span", attrs: { class: "highlight", id: "glossary-term" } }],
620
+ });
621
+ });
622
+
623
+ test("parses span with custom attributes", () => {
624
+ const markdown = "[tooltip text]{data-tooltip=\"More info\" .info}";
625
+ const result = markdownToProseMirror(markdown);
626
+
627
+ expect(result.content[0].content[0]).toEqual({
628
+ type: "text",
629
+ text: "tooltip text",
630
+ marks: [{ type: "span", attrs: { class: "info", "data-tooltip": "More info" } }],
631
+ });
632
+ });
633
+
634
+ test("does not confuse span with link", () => {
635
+ const markdown = "[Link](https://example.com) and [span]{.highlight}";
636
+ const result = markdownToProseMirror(markdown);
637
+
638
+ const content = result.content[0].content;
639
+ // First should be a link
640
+ expect(content[0].marks[0].type).toBe("link");
641
+ // Last should be a span
642
+ expect(content[content.length - 1].marks[0].type).toBe("span");
643
+ });
644
+
645
+ test("parses multiple spans in same paragraph", () => {
646
+ const markdown = "[first]{.highlight} normal [second]{.muted}";
647
+ const result = markdownToProseMirror(markdown);
648
+
649
+ const content = result.content[0].content;
650
+ expect(content[0].marks[0].attrs.class).toBe("highlight");
651
+ expect(content[1].text).toBe(" normal ");
652
+ expect(content[2].marks[0].attrs.class).toBe("muted");
653
+ });
654
+ });