npm - @rokelamen/md2html - Versions diffs - 0.1.3 → 0.1.4 - Mend

@rokelamen/md2html 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -2,8 +2,14 @@
 A simple markdown-html conventer written in Typescript.
+## Goal
 > I create this project for learning TS and node dev, not for the purpose of building another better markdown parse engine.
+Markdown syntax was first promoted with the release of `markdown.pl` by John Gruber. This leads to Markdown has no explicit definition, which means how markdown is parsed to HTML highly depends on the implementation of the tool. *And I choosed a simplest way(line-by-line parsing)*
+To stay as close as possible to the 'Standard Markdown', [CommonMark](https://commonmark.org/) is a great reference.
 ## Development Log
 Why I choose to use [`rollup`](https://rollupjs.org/)?

package/bin/cli.cjs CHANGED Viewed

@@ -4336,7 +4336,96 @@ const {
   Help,
 } = commander;
-const headerReg = /^\s*(#{1,6})(?:\s+|$)(.*)$/;
+/* For markdown line pattern pair */
+const headingReg = /^\s*(#{1,6})(?:\s+|$)(.*)$/;
+const quoteReg = /^>\s*(.*)$/;
+const ulistReg = /^\s*([-+*])(?:\s+|$)(.*)$/;
+const olistReg = /^\s*(\d+)(.|\))(?:\s+|$)(.*)$/;
+const codeStartReg = /^```([^`]*)$/;
+const codeEndReg = /^```\s*$/;
+/* For text inline pattern pair */
+const inlineCodeReg = /(`+)([^`]+?)\1/g;
+const imgReg = /!\[([^\]]+)\]\(([^)\s]+)\)/g;
+const linkReg = /\[([^\]]+)\]\(([^)\s]+)\)/g;
+const boldItalicReg = /(\*\*\*|___)([^*_]+)\1/g;
+const boldReg = /(\*\*|__)([^*_]+)\1/g;
+const italicReg = /([*_])([^*_]+)\1/g;
+/**
+ * When a scope in Markdown is of `code` type,
+ * the content inside this area must not be parsed as either Markdown or HTML.
+ * It should be treated as pure text content.
+ * Therefore, it can not carry any semantic representation in HTML.
+ * This function is intended to remove all such representations.
+ */
+function escapeHtml(content) {
+    return content;
+}
+/* traverse markdown content elements and wrap text with tags at proper positions. */
+function renderToHtml(mdElements) {
+    let result = '';
+    for (const element of mdElements) {
+        const type = element.type;
+        switch (type) {
+            case "text":
+                result += `<p>${inlineParse(element.content)}</p>\n`;
+                break;
+            case "heading":
+                result += `<h${element.level}>${inlineParse(element.content)}</h${element.level}>\n`;
+                break;
+            case "quote":
+                result += `<quote>${inlineParse(element.content)}</quote>\n`;
+                break;
+            case 'ulist':
+                result += '<ul>\n' +
+                    element.items
+                        .map(item => `  <li>${inlineParse(item)}</li>`)
+                        .join('\n') +
+                    '\n</ul>\n';
+                break;
+            case 'olist':
+                result += `<ol start="${element.start}">\n` +
+                    element.items
+                        .map(item => `  <li>${inlineParse(item)}</li>`)
+                        .join('\n') +
+                    '\n</ol>\n';
+                break;
+            case "code":
+                result += '<code>\n' +
+                    element.items
+                        .map(item => `  <p>${escapeHtml(item)}</p>`)
+                        .join('\n') +
+                    '\n</code>\n';
+                break;
+        }
+    }
+    return result;
+}
+function inlineParse(content) {
+    const placeholders = [];
+    let idx = 0;
+    /* Make placeholders for code */
+    const stash = (html) => {
+        const key = `\u0000${idx}\u0000`;
+        placeholders.push(html);
+        idx++;
+        return key;
+    };
+    // 1. code
+    content = content
+        .replace(inlineCodeReg, (_, __, code) => stash(`<code>${code}</code>`));
+    // 2. link and emphasis
+    content = content
+        .replace(imgReg, '<img src="$2" alt="$1">')
+        .replace(linkReg, '<a href="$2">$1</a>')
+        .replace(boldItalicReg, '<strong><em>$2</em></strong>')
+        .replace(boldReg, '<strong>$2</strong>')
+        .replace(italicReg, '<em>$2</em>');
+    // 3. restore codes
+    content = content.replace(/\u0000(\d+)\u0000/g, (_, i) => escapeHtml(placeholders[i]));
+    return content;
+}
 /**
  * Since AST-based parsing is too complex and not
@@ -4368,82 +4457,136 @@ function parse(markdown) {
     const crlfReg = /\r?\n/;
     const lines = markdown.split(crlfReg);
     // console.log(lines);
-    const mdBlocks = parseToBlocks(lines);
-    // console.log(mdBlocks);
-    const html = handleTags(mdBlocks);
+    const mdElements = parseToElements(lines);
+    // console.log(mdElements);
+    const html = renderToHtml(mdElements);
     return html;
 }
 /**
- * Traverse lines to turn to blocks with different types
+ * Traverse lines to turn to markdown elements with different well-designed structures
  */
-function parseToBlocks(lines) {
-    let lastTextQuoteBlock = { content: '', type: 'text' };
-    let pushed = true;
-    const mdBlocks = [];
+function parseToElements(lines) {
+    let lastFlowElement = null;
+    const mdElements = [];
+    /* Push last flow text element into the return value */
+    const flush = () => {
+        if (lastFlowElement) {
+            mdElements.push(lastFlowElement);
+            lastFlowElement = null;
+        }
+    };
     for (const line of lines) {
-        // Empty line
-        if (!Boolean(line.trim())) {
-            if (!pushed) {
-                mdBlocks.push(lastTextQuoteBlock);
-                pushed = true;
+        // Code End
+        if (lastFlowElement?.type === 'code') {
+            if (codeEndReg.test(line)) {
+                flush();
             }
+            else {
+                lastFlowElement.items.push(line);
+            }
+            continue;
+        }
+        // Empty line
+        if (!line.trim()) {
+            flush();
             continue;
         }
-        // Header
-        const headerM = line.match(headerReg);
-        if (headerM) {
-            mdBlocks.push({
-                type: 'header',
-                level: headerM[1].length,
-                content: headerM[2].trim()
+        // Headings
+        const headingM = line.match(headingReg);
+        if (headingM) {
+            flush();
+            mdElements.push({
+                type: 'heading',
+                level: headingM[1].length,
+                content: headingM[2].trim()
             });
             continue;
         }
+        // Quote
+        const quoteM = line.match(quoteReg);
+        if (quoteM) {
+            /* Last line is quote as well */
+            if (lastFlowElement?.type === 'quote') {
+                lastFlowElement.content += ' ' + quoteM[1].trim();
+            }
+            else {
+                flush();
+                lastFlowElement = {
+                    type: 'quote',
+                    content: quoteM[1].trim()
+                };
+            }
+            continue;
+        }
+        // Unordered List
+        const ulistM = line.match(ulistReg);
+        if (ulistM) {
+            if (lastFlowElement?.type === 'ulist' && lastFlowElement.sign === ulistM[1]) {
+                lastFlowElement.items.push(ulistM[2].trim());
+            }
+            else {
+                flush();
+                lastFlowElement = {
+                    type: 'ulist',
+                    sign: ulistM[1],
+                    items: [ulistM[2].trim()]
+                };
+            }
+            continue;
+        }
+        // Ordered List
+        const olistM = line.match(olistReg);
+        if (olistM) {
+            if (lastFlowElement?.type === 'olist' && lastFlowElement.delimiter === olistM[2]) {
+                lastFlowElement.items.push(olistM[3].trim());
+            }
+            else {
+                flush();
+                lastFlowElement = {
+                    type: 'olist',
+                    start: parseInt(olistM[1]),
+                    delimiter: olistM[2],
+                    items: [olistM[3].trim()]
+                };
+            }
+            continue;
+        }
+        // Code Start
+        const codeStartM = line.match(codeStartReg);
+        if (codeStartM) {
+            flush();
+            lastFlowElement = {
+                type: 'code',
+                lang: codeStartM[1],
+                items: []
+            };
+            continue;
+        }
         // Fall back to plain text
-        if (!pushed) {
-            /* last line is also text */
-            lastTextQuoteBlock.content += ' ' + line.trim();
+        if (lastFlowElement &&
+            ['text', 'quote', 'ulist', 'olist'].includes(lastFlowElement.type)) {
+            if (lastFlowElement.type === 'ulist' || lastFlowElement.type === 'olist') {
+                lastFlowElement.items[lastFlowElement.items.length - 1] += ' ' + line.trim();
+            }
+            else {
+                lastFlowElement.content += ' ' + line.trim();
+            }
         }
         else {
-            lastTextQuoteBlock = {
+            flush();
+            lastFlowElement = {
                 type: 'text',
                 content: line.trim()
             };
-            pushed = false;
         }
     }
-    // Avoid the last block is omitted
-    if (!pushed) {
-        mdBlocks.push(lastTextQuoteBlock);
-        pushed = true;
-    }
-    return mdBlocks;
-}
-/* traverse markdown content blocks and wrap text with tags at proper positions. */
-function handleTags(mdBlocks) {
-    let result = '';
-    for (const block of mdBlocks) {
-        const type = block.type;
-        const content = tagSwtich(block);
-        switch (type) {
-            case "text":
-                result += `<p>${content}</p>` +
-                    '\n';
-                break;
-            case "header":
-                result += `<h${block.level}>${content}</h${block.level}>` +
-                    '\n';
-                break;
-        }
-    }
-    return result;
-}
-function tagSwtich(block) {
-    return block.content;
+    // Avoid the last element is omitted
+    flush();
+    return mdElements;
 }
 var name = "@rokelamen/md2html";
-var version = "0.1.3";
+var version = "0.1.4";
 var description = "A simple tool to convert markdown content to html";
 /* Command-line tool logic */
@@ -4456,6 +4599,7 @@ function command() {
     /* Config arguments info */
     program
         .option('-f, --file <path>', 'source file path')
+        .option('-o, --output <path>', 'output file path')
         .argument('[input]', 'input content');
     /* Parse the cli options */
     program.parse(process.argv);
@@ -4481,6 +4625,11 @@ function command() {
             }
         })()
         : input;
-    console.log(parse(content));
+    const html = parse(content);
+    if (typeof options.output === 'string') {
+        fs__namespace.writeFileSync(options.output, html, 'utf-8');
+        return;
+    }
+    console.log(html);
 }
 command();

package/dist/index.js CHANGED Viewed

@@ -1,4 +1,93 @@
-const headerReg = /^\s*(#{1,6})(?:\s+|$)(.*)$/;
+/* For markdown line pattern pair */
+const headingReg = /^\s*(#{1,6})(?:\s+|$)(.*)$/;
+const quoteReg = /^>\s*(.*)$/;
+const ulistReg = /^\s*([-+*])(?:\s+|$)(.*)$/;
+const olistReg = /^\s*(\d+)(.|\))(?:\s+|$)(.*)$/;
+const codeStartReg = /^```([^`]*)$/;
+const codeEndReg = /^```\s*$/;
+/* For text inline pattern pair */
+const inlineCodeReg = /(`+)([^`]+?)\1/g;
+const imgReg = /!\[([^\]]+)\]\(([^)\s]+)\)/g;
+const linkReg = /\[([^\]]+)\]\(([^)\s]+)\)/g;
+const boldItalicReg = /(\*\*\*|___)([^*_]+)\1/g;
+const boldReg = /(\*\*|__)([^*_]+)\1/g;
+const italicReg = /([*_])([^*_]+)\1/g;
+/**
+ * When a scope in Markdown is of `code` type,
+ * the content inside this area must not be parsed as either Markdown or HTML.
+ * It should be treated as pure text content.
+ * Therefore, it can not carry any semantic representation in HTML.
+ * This function is intended to remove all such representations.
+ */
+function escapeHtml(content) {
+    return content;
+}
+/* traverse markdown content elements and wrap text with tags at proper positions. */
+function renderToHtml(mdElements) {
+    let result = '';
+    for (const element of mdElements) {
+        const type = element.type;
+        switch (type) {
+            case "text":
+                result += `<p>${inlineParse(element.content)}</p>\n`;
+                break;
+            case "heading":
+                result += `<h${element.level}>${inlineParse(element.content)}</h${element.level}>\n`;
+                break;
+            case "quote":
+                result += `<quote>${inlineParse(element.content)}</quote>\n`;
+                break;
+            case 'ulist':
+                result += '<ul>\n' +
+                    element.items
+                        .map(item => `  <li>${inlineParse(item)}</li>`)
+                        .join('\n') +
+                    '\n</ul>\n';
+                break;
+            case 'olist':
+                result += `<ol start="${element.start}">\n` +
+                    element.items
+                        .map(item => `  <li>${inlineParse(item)}</li>`)
+                        .join('\n') +
+                    '\n</ol>\n';
+                break;
+            case "code":
+                result += '<code>\n' +
+                    element.items
+                        .map(item => `  <p>${escapeHtml(item)}</p>`)
+                        .join('\n') +
+                    '\n</code>\n';
+                break;
+        }
+    }
+    return result;
+}
+function inlineParse(content) {
+    const placeholders = [];
+    let idx = 0;
+    /* Make placeholders for code */
+    const stash = (html) => {
+        const key = `\u0000${idx}\u0000`;
+        placeholders.push(html);
+        idx++;
+        return key;
+    };
+    // 1. code
+    content = content
+        .replace(inlineCodeReg, (_, __, code) => stash(`<code>${code}</code>`));
+    // 2. link and emphasis
+    content = content
+        .replace(imgReg, '<img src="$2" alt="$1">')
+        .replace(linkReg, '<a href="$2">$1</a>')
+        .replace(boldItalicReg, '<strong><em>$2</em></strong>')
+        .replace(boldReg, '<strong>$2</strong>')
+        .replace(italicReg, '<em>$2</em>');
+    // 3. restore codes
+    content = content.replace(/\u0000(\d+)\u0000/g, (_, i) => escapeHtml(placeholders[i]));
+    return content;
+}
 /**
  * Since AST-based parsing is too complex and not
@@ -30,78 +119,132 @@ function parse(markdown) {
     const crlfReg = /\r?\n/;
     const lines = markdown.split(crlfReg);
     // console.log(lines);
-    const mdBlocks = parseToBlocks(lines);
-    // console.log(mdBlocks);
-    const html = handleTags(mdBlocks);
+    const mdElements = parseToElements(lines);
+    // console.log(mdElements);
+    const html = renderToHtml(mdElements);
     return html;
 }
 /**
- * Traverse lines to turn to blocks with different types
+ * Traverse lines to turn to markdown elements with different well-designed structures
  */
-function parseToBlocks(lines) {
-    let lastTextQuoteBlock = { content: '', type: 'text' };
-    let pushed = true;
-    const mdBlocks = [];
+function parseToElements(lines) {
+    let lastFlowElement = null;
+    const mdElements = [];
+    /* Push last flow text element into the return value */
+    const flush = () => {
+        if (lastFlowElement) {
+            mdElements.push(lastFlowElement);
+            lastFlowElement = null;
+        }
+    };
     for (const line of lines) {
-        // Empty line
-        if (!Boolean(line.trim())) {
-            if (!pushed) {
-                mdBlocks.push(lastTextQuoteBlock);
-                pushed = true;
+        // Code End
+        if (lastFlowElement?.type === 'code') {
+            if (codeEndReg.test(line)) {
+                flush();
             }
+            else {
+                lastFlowElement.items.push(line);
+            }
+            continue;
+        }
+        // Empty line
+        if (!line.trim()) {
+            flush();
             continue;
         }
-        // Header
-        const headerM = line.match(headerReg);
-        if (headerM) {
-            mdBlocks.push({
-                type: 'header',
-                level: headerM[1].length,
-                content: headerM[2].trim()
+        // Headings
+        const headingM = line.match(headingReg);
+        if (headingM) {
+            flush();
+            mdElements.push({
+                type: 'heading',
+                level: headingM[1].length,
+                content: headingM[2].trim()
             });
             continue;
         }
+        // Quote
+        const quoteM = line.match(quoteReg);
+        if (quoteM) {
+            /* Last line is quote as well */
+            if (lastFlowElement?.type === 'quote') {
+                lastFlowElement.content += ' ' + quoteM[1].trim();
+            }
+            else {
+                flush();
+                lastFlowElement = {
+                    type: 'quote',
+                    content: quoteM[1].trim()
+                };
+            }
+            continue;
+        }
+        // Unordered List
+        const ulistM = line.match(ulistReg);
+        if (ulistM) {
+            if (lastFlowElement?.type === 'ulist' && lastFlowElement.sign === ulistM[1]) {
+                lastFlowElement.items.push(ulistM[2].trim());
+            }
+            else {
+                flush();
+                lastFlowElement = {
+                    type: 'ulist',
+                    sign: ulistM[1],
+                    items: [ulistM[2].trim()]
+                };
+            }
+            continue;
+        }
+        // Ordered List
+        const olistM = line.match(olistReg);
+        if (olistM) {
+            if (lastFlowElement?.type === 'olist' && lastFlowElement.delimiter === olistM[2]) {
+                lastFlowElement.items.push(olistM[3].trim());
+            }
+            else {
+                flush();
+                lastFlowElement = {
+                    type: 'olist',
+                    start: parseInt(olistM[1]),
+                    delimiter: olistM[2],
+                    items: [olistM[3].trim()]
+                };
+            }
+            continue;
+        }
+        // Code Start
+        const codeStartM = line.match(codeStartReg);
+        if (codeStartM) {
+            flush();
+            lastFlowElement = {
+                type: 'code',
+                lang: codeStartM[1],
+                items: []
+            };
+            continue;
+        }
         // Fall back to plain text
-        if (!pushed) {
-            /* last line is also text */
-            lastTextQuoteBlock.content += ' ' + line.trim();
+        if (lastFlowElement &&
+            ['text', 'quote', 'ulist', 'olist'].includes(lastFlowElement.type)) {
+            if (lastFlowElement.type === 'ulist' || lastFlowElement.type === 'olist') {
+                lastFlowElement.items[lastFlowElement.items.length - 1] += ' ' + line.trim();
+            }
+            else {
+                lastFlowElement.content += ' ' + line.trim();
+            }
         }
         else {
-            lastTextQuoteBlock = {
+            flush();
+            lastFlowElement = {
                 type: 'text',
                 content: line.trim()
             };
-            pushed = false;
         }
     }
-    // Avoid the last block is omitted
-    if (!pushed) {
-        mdBlocks.push(lastTextQuoteBlock);
-        pushed = true;
-    }
-    return mdBlocks;
-}
-/* traverse markdown content blocks and wrap text with tags at proper positions. */
-function handleTags(mdBlocks) {
-    let result = '';
-    for (const block of mdBlocks) {
-        const type = block.type;
-        const content = tagSwtich(block);
-        switch (type) {
-            case "text":
-                result += `<p>${content}</p>` +
-                    '\n';
-                break;
-            case "header":
-                result += `<h${block.level}>${content}</h${block.level}>` +
-                    '\n';
-                break;
-        }
-    }
-    return result;
-}
-function tagSwtich(block) {
-    return block.content;
+    // Avoid the last element is omitted
+    flush();
+    return mdElements;
 }
 export { parse };

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@rokelamen/md2html",
   "type": "module",
-  "version": "0.1.3",
+  "version": "0.1.4",
   "description": "A simple tool to convert markdown content to html",
   "author": "rokelamen <rogerskelamen@gmail.com>",
   "license": "MIT",