npm - @uniweb/content-reader - Versions diffs - 1.0.0 - Mend

@uniweb/content-reader 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,174 @@
+# Content Reader
+A JavaScript library for converting Markdown content into ProseMirror-compatible document structures. This library is designed to work seamlessly with TipTap v2 and provides enhanced Markdown parsing capabilities with support for extended syntax.
+## Features
+### Basic Markdown Support
+- Paragraphs and basic text formatting (bold, italic)
+- Headings with automatic ID generation
+- Links and images
+- Ordered and unordered lists with nesting support
+- Code blocks with language and filename support
+- Tables with alignment and formatting
+- Block quotes
+- Horizontal rules
+### Extended Syntax
+- **Enhanced Images**: Support for image roles (content, background, icon, gallery) using prefix syntax:
+  ```markdown
+  ![Alt text](icon:path/to/icon.svg)
+  ![Alt text](background:path/to/bg.jpg)
+  ```
+- **Enhanced Links**: Button variants with predefined styles:
+  ```markdown
+  [Button Text](button:https://example.com)
+  ```
+- **Tables with Alignment**: Full support for aligned columns:
+  ```markdown
+  | Left | Center | Right |
+  | :--- | :----: | ----: |
+  | Text |  Text  |  Text |
+  ```
+### Developer-Friendly Features
+- Clean, well-documented code
+- Comprehensive test suite
+- Modular architecture for easy extension
+- Compatible with TipTap v2 document structure
+- Full TypeScript type definitions
+## Installation
+```bash
+npm install @uniwebcms/content-reader
+```
+## Usage
+Basic usage:
+```javascript
+const { markdownToProseMirror } = require("@uniwebcms/content-reader");
+const markdown = `
+# Hello World
+This is a **bold** statement with a [link](https://example.com).
+- List item 1
+- List item 2
+  - Nested item
+`;
+const doc = markdownToProseMirror(markdown);
+```
+### Using with TipTap
+The library is designed to work seamlessly with TipTap editors:
+```javascript
+import { Editor } from "@tiptap/core";
+import { markdownToProseMirror } from "@uniwebcms/content-reader";
+const editor = new Editor({
+  content: markdownToProseMirror(markdown),
+  // ... other TipTap configuration
+});
+```
+### Advanced Features
+#### Working with Image Roles
+The library supports extended image syntax for different display contexts:
+```javascript
+const markdown = `
+![Header image](background:header.jpg)
+![Profile photo](gallery:profile.jpg)
+![Settings](icon:settings.svg)
+`;
+const doc = markdownToProseMirror(markdown);
+// Each image will have a 'role' attribute in its output structure
+```
+#### Handling Tables with Alignment
+Tables support column alignment and formatted content:
+```javascript
+const markdown = `
+| Name | Status | Actions |
+|:-----|:------:|--------:|
+| John | Active | **Edit** |
+| Jane | Away   | *View*   |
+`;
+const doc = markdownToProseMirror(markdown);
+// Table cells will have appropriate alignment attributes
+```
+## Architecture
+The library is organized into several modules:
+- **Parser Core**: Handles the main parsing logic and orchestration
+- **Block Parser**: Processes block-level elements
+- **Inline Parser**: Handles inline formatting and text
+- **Extensions**: Manages extended syntax features
+- **Schema**: Defines the document structure
+## Contributing
+We welcome contributions! Please see our contributing guidelines for details.
+### Development Setup
+1. Clone the repository:
+   ```bash
+   git clone https://github.com/uniwebcms/content-reader.git
+   ```
+2. Install dependencies:
+   ```bash
+   npm install
+   ```
+3. Run tests:
+   ```bash
+   npm test
+   ```
+### Testing
+The project uses Jest for testing. Run the test suite:
+```bash
+npm test
+```
+Or in watch mode:
+```bash
+npm run test:watch
+```
+## License
+Apache License 2.0 - See [LICENSE](LICENSE) for details.
+## Credits
+Developed and maintained by UniWeb CMS. Special thanks to all contributors.

package/package.json ADDED Viewed

@@ -0,0 +1,44 @@
+{
+    "name": "@uniweb/content-reader",
+    "version": "1.0.0",
+    "description": "Markdown to ProseMirror document structure converter",
+    "type": "module",
+    "main": "src/index.js",
+    "exports": {
+        ".": "./src/index.js"
+    },
+    "scripts": {
+        "test": "NODE_OPTIONS=--experimental-vm-modules jest",
+        "test:watch": "NODE_OPTIONS=--experimental-vm-modules jest --watch",
+        "test-report": "NODE_OPTIONS=--experimental-vm-modules jest --json > test-results.json 2>&1"
+    },
+    "keywords": [
+        "markdown",
+        "prosemirror",
+        "tiptap",
+        "parser"
+    ],
+    "author": "Proximify Inc.",
+    "license": "GPL-3.0-or-later",
+    "dependencies": {
+        "marked": "^11.1.0"
+    },
+    "devDependencies": {
+        "jest": "^29.7.0"
+    },
+    "jest": {
+        "testEnvironment": "node",
+        "verbose": true
+    },
+    "directories": {
+        "test": "tests"
+    },
+    "repository": {
+        "type": "git",
+        "url": "git+https://github.com/uniweb/content-reader.git"
+    },
+    "bugs": {
+        "url": "https://github.com/uniweb/content-reader/issues"
+    },
+    "homepage": "https://github.com/uniweb/content-reader#readme"
+}

package/src/index.js ADDED Viewed

@@ -0,0 +1,22 @@
+/**
+ * @fileoverview Main entry point for the content-reader package.
+ * Exports the main function to convert markdown to ProseMirror structure.
+ */
+import { marked } from "marked";
+import { parseMarkdownContent } from "./parser/index.js";
+import { getBaseSchema } from "./schema/index.js";
+import { isValidUniwebMarkdown } from "./utils.js";
+/**
+ * Convert markdown content to ProseMirror document structure
+ * @param {string} markdown - The markdown content to parse
+ * @returns {Object} ProseMirror document structure
+ */
+function markdownToProseMirror(markdown) {
+    const schema = getBaseSchema();
+    const tokens = marked.lexer(markdown);
+    return parseMarkdownContent(tokens, schema);
+}
+export { markdownToProseMirror, isValidUniwebMarkdown };

package/src/parser/block.js ADDED Viewed

@@ -0,0 +1,170 @@
+/**
+ * @fileoverview Parse block-level markdown elements
+ */
+import { marked } from "marked";
+import { parseInline } from "./inline.js";
+import { parseList } from "./lists.js";
+import { parseTable } from "./tables.js";
+/**
+ * Process code block info string (e.g., "javascript:example.js")
+ * @param {string} info - Code block info string
+ * @returns {Object} Language and filename
+ */
+function processCodeInfo(info) {
+    if (!info) return { language: null, filename: null };
+    const parts = info.split(":");
+    return {
+        language: parts[0] || null,
+        filename: parts[1] || null,
+    };
+}
+/**
+ * Clean code block text
+ * @param {string} text - Raw code block text
+ * @returns {string} Cleaned text
+ */
+function cleanCodeText(text) {
+    // Remove common indent (for indented code blocks)
+    const lines = text.split("\n");
+    const indent = lines[0].match(/^\s*/)[0];
+    return lines
+        .map((line) =>
+            line.startsWith(indent) ? line.slice(indent.length) : line
+        )
+        .join("\n")
+        .trim();
+}
+/**
+ * Parse a paragraph's content by tokenizing with marked
+ * @param {Object} token - Marked token for paragraph
+ * @param {Object} schema - ProseMirror schema
+ * @returns {Array} Array of ProseMirror inline nodes
+ */
+function parseParagraph(token, schema) {
+    // // Use marked's inline lexer to properly handle inline code
+    // const inlineTokens = marked.Lexer.lexInline(token.text || token.raw);
+    // return inlineTokens.flatMap((t) => parseInline(t, schema));
+    // Use the pre-parsed tokens instead of re-lexing
+    return token.tokens.flatMap((t) => parseInline(t, schema));
+}
+/**
+ * Parse block level content
+ * @param {Object} token - Marked token for block content
+ * @param {Object} schema - ProseMirror schema
+ * @returns {Object|null} ProseMirror block node or null if empty
+ */
+function parseBlock(token, schema) {
+    // console.log("BLOCK TOKEN: ", token);
+    // Skip HTML comments
+    if (token.type === "html" && token.text.startsWith("<!--")) {
+        return null;
+    }
+    if (token.type === "paragraph") {
+        const content = parseParagraph(token, schema);
+        if (!content.length) {
+            return null;
+        }
+        // extract images to the root level
+        const result = [];
+        let currentParagraph = null;
+        content.forEach((element) => {
+            if (element.type === "image") {
+                // If there's an open paragraph, push it to the result before the image
+                if (currentParagraph) {
+                    result.push({
+                        type: "paragraph",
+                        content: currentParagraph,
+                    });
+                    currentParagraph = null; // Reset the current paragraph
+                }
+                // Push the image directly to the result
+                result.push(element);
+            } else {
+                // Start a new paragraph if there isn't one open
+                if (!currentParagraph) {
+                    currentParagraph = [];
+                }
+                // Add the non-image element to the current paragraph
+                currentParagraph.push(element);
+            }
+        });
+        // If there's an open paragraph after the last element, push it to the result
+        if (currentParagraph) {
+            result.push({ type: "paragraph", content: currentParagraph });
+        }
+        return result;
+        // return {
+        //     type: "paragraph",
+        //     content,
+        // };
+    }
+    if (token.type === "heading") {
+        const headingContent = parseParagraph(token, schema);
+        return {
+            type: "heading",
+            attrs: {
+                level: token.depth,
+                id: null,
+            },
+            content: headingContent,
+        };
+    }
+    if (token.type === "blockquote") {
+        const content = token.tokens.flatMap((t) => parseBlock(t, schema));
+        return {
+            type: "blockquote",
+            content,
+        };
+    }
+    if (token.type === "hr") {
+        return {
+            type: "divider",
+            attrs: { style: "dot", size: "normal" },
+        };
+    }
+    if (token.type === "code") {
+        const { language, filename } = processCodeInfo(token.lang);
+        return {
+            type: "codeBlock",
+            attrs: { language, filename },
+            content: [
+                {
+                    type: "text",
+                    text: cleanCodeText(token.text),
+                },
+            ],
+        };
+    }
+    if (token.type === "list") {
+        return parseList(token, schema);
+    }
+    if (token.type === "table") {
+        return parseTable(token, schema);
+    }
+    // Handle unknown block types as null
+    return null;
+}
+export { parseBlock, parseParagraph };

package/src/parser/index.js ADDED Viewed

@@ -0,0 +1,54 @@
+/**
+ * @fileoverview Main parser orchestration
+ */
+import { parseBlock } from "./block.js";
+import { isEyebrowPattern, parseEyebrowPattern } from "./patterns.js";
+import { isEmptyContent } from "./utils.js";
+/**
+ * Parse markdown content into ProseMirror document structure
+ * @param {Array} tokens - Array of marked tokens
+ * @param {Object} schema - ProseMirror schema
+ * @returns {Object} ProseMirror document
+ */
+function parseMarkdownContent(tokens, schema) {
+    const content = [];
+    let skipNext = false;
+    // console.log("tokens:", tokens);
+    for (let i = 0; i < tokens.length; i++) {
+        if (skipNext) {
+            skipNext = false;
+            continue;
+        }
+        // Handle eyebrow pattern
+        // if (isEyebrowPattern(tokens, i)) {
+        //   content.push(...parseEyebrowPattern(tokens, i, schema));
+        //   skipNext = true;
+        //   continue;
+        // }
+        const node = parseBlock(tokens[i], schema);
+        if (node) {
+            if (Array.isArray(node)) {
+                content.push(...node);
+            } else {
+                content.push(node);
+            }
+        }
+    }
+    // Filter out any remaining null nodes and empty paragraphs
+    return {
+        type: "doc",
+        content: content.filter((node) => {
+            if (!node) return false;
+            if (node.type === "paragraph" && isEmptyContent(node.content))
+                return false;
+            return true;
+        }),
+    };
+}
+export { parseMarkdownContent };

package/src/parser/inline.js ADDED Viewed

@@ -0,0 +1,117 @@
+/**
+ * @fileoverview Parse inline markdown elements
+ */
+/**
+ * Parse inline markdown content into ProseMirror/Tiptap nodes
+ * @param {Object} token - Marked token for inline content
+ * @param {Object} schema - ProseMirror schema
+ * @returns {Array} Array of ProseMirror inline nodes
+ *
+ * Notes on implementation choices:
+ * - We use token.raw for plain text to avoid HTML entity encoding
+ * - For formatted text (bold/italic), we use token.tokens to handle nested formatting
+ * - Tiptap represents formatting as marks on text nodes, not nested structures
+ * - HTML entities are only decoded for specific token types (codespan, link) where
+ *   we need the processed content
+ */
+function parseInline(token, schema, removeNewLine = false) {
+    if (token.type === "text") {
+        if (removeNewLine && token.raw) {
+            token.raw = token.raw.replace(/\n/g, "");
+        }
+        // Use raw to get unencoded characters (', ", &, etc.)
+        // marked's .text property encodes these as HTML entities
+        return token.raw ? [{ type: "text", text: token.raw }] : [];
+    }
+    if (token.type === "strong" || token.type === "em") {
+        // Tiptap represents formatting as marks on text nodes
+        // For nested formatting like **_text_**, all marks are applied to the same text node
+        const mark = { type: token.type === "strong" ? "bold" : "italic" };
+        return token.tokens.flatMap((t) =>
+            parseInline(t, schema, removeNewLine).map((node) => ({
+                ...node,
+                marks: [...(node.marks || []), mark],
+            }))
+        );
+    }
+    if (token.type === "html") {
+        // Handle HTML tokens however you need
+        // You might want to strip the < > or process them differently
+        return [{ type: "text", text: token.raw }];
+    } else if (token.type === "br") {
+        return [{ type: "text", text: "\n" }];
+    }
+    // Decode HTML entities
+    const text = token.text
+        .replace(/&#39;/g, "'")
+        .replace(/&quot;/g, '"')
+        .replace(/&amp;/g, "&");
+    if (token.type === "codespan") {
+        return [
+            {
+                type: "text",
+                marks: [{ type: "code" }],
+                text,
+            },
+        ];
+    }
+    if (token.type === "link") {
+        const isButton = token.href.startsWith("button:");
+        const href = isButton ? token.href.substring(7) : token.href;
+        return [
+            {
+                type: "text",
+                marks: [
+                    {
+                        type: isButton ? "button" : "link",
+                        attrs: {
+                            href,
+                            title: token.title || null,
+                            ...(isButton && { variant: "primary" }),
+                        },
+                    },
+                ],
+                text,
+            },
+        ];
+    }
+    if (token.type === "image") {
+        let role, src;
+        // Find the first colon to handle role:url format correctly
+        if (token.href.includes(":") && !token.href.startsWith("http")) {
+            const colonIndex = token.href.indexOf(":");
+            role = token.href.substring(0, colonIndex);
+            src = token.href.substring(colonIndex + 1);
+        } else {
+            role = "image";
+            src = token.href;
+        }
+        return [
+            {
+                type: "image",
+                attrs: {
+                    src,
+                    caption: token.title || null,
+                    alt: text || null,
+                    role,
+                },
+            },
+        ];
+    }
+    // Handle unknown token types as plain text
+    return token.raw ? [{ type: "text", text: token.raw }] : [];
+}
+export { parseInline };

package/src/parser/lists.js ADDED Viewed

@@ -0,0 +1,106 @@
+/**
+ * @fileoverview Parse markdown lists
+ */
+import { marked } from "marked";
+import { parseInline } from "./inline.js";
+/**
+ * Extract main content from a list item, excluding nested list content
+ * @param {Object} item - List item token
+ * @returns {string} Main content text
+ */
+function extractMainContent(item) {
+    // Remove nested list markdown from the text
+    const text = item.text || "";
+    const lines = text.split("\n");
+    return lines
+        .filter(
+            (line) =>
+                !line.trim().startsWith("-") && !line.trim().match(/^\d+\./)
+        )
+        .join("\n");
+}
+/**
+ * Parse list item text content
+ * @param {Object} item - List item token
+ * @param {Object} schema - ProseMirror schema
+ * @returns {Array} Array of ProseMirror nodes for the item content
+ */
+function parseListItemContent(item, schema) {
+    const mainContent = extractMainContent(item);
+    const inlineTokens = marked.Lexer.lexInline(mainContent);
+    const content = [
+        {
+            type: "paragraph",
+            content: inlineTokens.flatMap((t) => parseInline(t, schema, true)),
+        },
+    ];
+    // Handle nested lists by parsing them as new markdown
+    if (item.text) {
+        const lines = item.text.split("\n");
+        let currentNested = [];
+        let isNested = false;
+        for (const line of lines) {
+            const trimmed = line.trim();
+            if (trimmed.startsWith("-") || trimmed.match(/^\d+\./)) {
+                currentNested.push(line);
+                isNested = true;
+            } else if (isNested && trimmed === "") {
+                currentNested.push(line);
+            }
+        }
+        if (currentNested.length > 0) {
+            const nestedMarkdown = currentNested.join("\n");
+            const nestedTokens = marked.lexer(nestedMarkdown);
+            for (const token of nestedTokens) {
+                if (token.type === "list") {
+                    content.push({
+                        type: token.ordered ? "orderedList" : "bulletList",
+                        ...(token.ordered && {
+                            attrs: { start: token.start || 1 },
+                        }),
+                        content: parseListItems(token.items, schema),
+                    });
+                }
+            }
+        }
+    }
+    return content;
+}
+/**
+ * Parse list items recursively
+ * @param {Array} items - Array of list item tokens
+ * @param {Object} schema - ProseMirror schema
+ * @returns {Array} Array of ProseMirror list item nodes
+ */
+function parseListItems(items, schema) {
+    return items.map((item) => ({
+        type: "listItem",
+        content: parseListItemContent(item, schema),
+    }));
+}
+/**
+ * Parse list block
+ * @param {Object} token - List token
+ * @param {Object} schema - ProseMirror schema
+ * @returns {Object} ProseMirror list node
+ */
+function parseList(token, schema) {
+    return {
+        type: token.ordered ? "orderedList" : "bulletList",
+        ...(token.ordered && { attrs: { start: token.start || 1 } }),
+        content: parseListItems(token.items, schema),
+    };
+}
+export { parseList, parseListItems };