@rokelamen/md2html 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,8 +2,14 @@
2
2
 
3
3
  A simple markdown-html conventer written in Typescript.
4
4
 
5
+ ## Goal
6
+
5
7
  > I create this project for learning TS and node dev, not for the purpose of building another better markdown parse engine.
6
8
 
9
+ Markdown syntax was first promoted with the release of `markdown.pl` by John Gruber. This leads to Markdown has no explicit definition, which means how markdown is parsed to HTML highly depends on the implementation of the tool. *And I choosed a simplest way(line-by-line parsing)*
10
+
11
+ To stay as close as possible to the 'Standard Markdown', [CommonMark](https://commonmark.org/) is a great reference.
12
+
7
13
  ## Development Log
8
14
 
9
15
  Why I choose to use [`rollup`](https://rollupjs.org/)?
package/bin/cli.cjs CHANGED
@@ -4336,7 +4336,96 @@ const {
4336
4336
  Help,
4337
4337
  } = commander;
4338
4338
 
4339
- const headerReg = /^\s*(#{1,6})(?:\s+|$)(.*)$/;
4339
+ /* For markdown line pattern pair */
4340
+ const headingReg = /^\s*(#{1,6})(?:\s+|$)(.*)$/;
4341
+ const quoteReg = /^>\s*(.*)$/;
4342
+ const ulistReg = /^\s*([-+*])(?:\s+|$)(.*)$/;
4343
+ const olistReg = /^\s*(\d+)(.|\))(?:\s+|$)(.*)$/;
4344
+ const codeStartReg = /^```([^`]*)$/;
4345
+ const codeEndReg = /^```\s*$/;
4346
+ /* For text inline pattern pair */
4347
+ const inlineCodeReg = /(`+)([^`]+?)\1/g;
4348
+ const imgReg = /!\[([^\]]+)\]\(([^)\s]+)\)/g;
4349
+ const linkReg = /\[([^\]]+)\]\(([^)\s]+)\)/g;
4350
+ const boldItalicReg = /(\*\*\*|___)([^*_]+)\1/g;
4351
+ const boldReg = /(\*\*|__)([^*_]+)\1/g;
4352
+ const italicReg = /([*_])([^*_]+)\1/g;
4353
+
4354
+ /**
4355
+ * When a scope in Markdown is of `code` type,
4356
+ * the content inside this area must not be parsed as either Markdown or HTML.
4357
+ * It should be treated as pure text content.
4358
+ * Therefore, it can not carry any semantic representation in HTML.
4359
+ * This function is intended to remove all such representations.
4360
+ */
4361
+ function escapeHtml(content) {
4362
+ return content;
4363
+ }
4364
+
4365
+ /* traverse markdown content elements and wrap text with tags at proper positions. */
4366
+ function renderToHtml(mdElements) {
4367
+ let result = '';
4368
+ for (const element of mdElements) {
4369
+ const type = element.type;
4370
+ switch (type) {
4371
+ case "text":
4372
+ result += `<p>${inlineParse(element.content)}</p>\n`;
4373
+ break;
4374
+ case "heading":
4375
+ result += `<h${element.level}>${inlineParse(element.content)}</h${element.level}>\n`;
4376
+ break;
4377
+ case "quote":
4378
+ result += `<quote>${inlineParse(element.content)}</quote>\n`;
4379
+ break;
4380
+ case 'ulist':
4381
+ result += '<ul>\n' +
4382
+ element.items
4383
+ .map(item => ` <li>${inlineParse(item)}</li>`)
4384
+ .join('\n') +
4385
+ '\n</ul>\n';
4386
+ break;
4387
+ case 'olist':
4388
+ result += `<ol start="${element.start}">\n` +
4389
+ element.items
4390
+ .map(item => ` <li>${inlineParse(item)}</li>`)
4391
+ .join('\n') +
4392
+ '\n</ol>\n';
4393
+ break;
4394
+ case "code":
4395
+ result += '<code>\n' +
4396
+ element.items
4397
+ .map(item => ` <p>${escapeHtml(item)}</p>`)
4398
+ .join('\n') +
4399
+ '\n</code>\n';
4400
+ break;
4401
+ }
4402
+ }
4403
+ return result;
4404
+ }
4405
+ function inlineParse(content) {
4406
+ const placeholders = [];
4407
+ let idx = 0;
4408
+ /* Make placeholders for code */
4409
+ const stash = (html) => {
4410
+ const key = `\u0000${idx}\u0000`;
4411
+ placeholders.push(html);
4412
+ idx++;
4413
+ return key;
4414
+ };
4415
+ // 1. code
4416
+ content = content
4417
+ .replace(inlineCodeReg, (_, __, code) => stash(`<code>${code}</code>`));
4418
+ // 2. link and emphasis
4419
+ content = content
4420
+ .replace(imgReg, '<img src="$2" alt="$1">')
4421
+ .replace(linkReg, '<a href="$2">$1</a>')
4422
+ .replace(boldItalicReg, '<strong><em>$2</em></strong>')
4423
+ .replace(boldReg, '<strong>$2</strong>')
4424
+ .replace(italicReg, '<em>$2</em>');
4425
+ // 3. restore codes
4426
+ content = content.replace(/\u0000(\d+)\u0000/g, (_, i) => escapeHtml(placeholders[i]));
4427
+ return content;
4428
+ }
4340
4429
 
4341
4430
  /**
4342
4431
  * Since AST-based parsing is too complex and not
@@ -4368,82 +4457,136 @@ function parse(markdown) {
4368
4457
  const crlfReg = /\r?\n/;
4369
4458
  const lines = markdown.split(crlfReg);
4370
4459
  // console.log(lines);
4371
- const mdBlocks = parseToBlocks(lines);
4372
- // console.log(mdBlocks);
4373
- const html = handleTags(mdBlocks);
4460
+ const mdElements = parseToElements(lines);
4461
+ // console.log(mdElements);
4462
+ const html = renderToHtml(mdElements);
4374
4463
  return html;
4375
4464
  }
4376
4465
  /**
4377
- * Traverse lines to turn to blocks with different types
4466
+ * Traverse lines to turn to markdown elements with different well-designed structures
4378
4467
  */
4379
- function parseToBlocks(lines) {
4380
- let lastTextQuoteBlock = { content: '', type: 'text' };
4381
- let pushed = true;
4382
- const mdBlocks = [];
4468
+ function parseToElements(lines) {
4469
+ let lastFlowElement = null;
4470
+ const mdElements = [];
4471
+ /* Push last flow text element into the return value */
4472
+ const flush = () => {
4473
+ if (lastFlowElement) {
4474
+ mdElements.push(lastFlowElement);
4475
+ lastFlowElement = null;
4476
+ }
4477
+ };
4383
4478
  for (const line of lines) {
4384
- // Empty line
4385
- if (!Boolean(line.trim())) {
4386
- if (!pushed) {
4387
- mdBlocks.push(lastTextQuoteBlock);
4388
- pushed = true;
4479
+ // Code End
4480
+ if (lastFlowElement?.type === 'code') {
4481
+ if (codeEndReg.test(line)) {
4482
+ flush();
4389
4483
  }
4484
+ else {
4485
+ lastFlowElement.items.push(line);
4486
+ }
4487
+ continue;
4488
+ }
4489
+ // Empty line
4490
+ if (!line.trim()) {
4491
+ flush();
4390
4492
  continue;
4391
4493
  }
4392
- // Header
4393
- const headerM = line.match(headerReg);
4394
- if (headerM) {
4395
- mdBlocks.push({
4396
- type: 'header',
4397
- level: headerM[1].length,
4398
- content: headerM[2].trim()
4494
+ // Headings
4495
+ const headingM = line.match(headingReg);
4496
+ if (headingM) {
4497
+ flush();
4498
+ mdElements.push({
4499
+ type: 'heading',
4500
+ level: headingM[1].length,
4501
+ content: headingM[2].trim()
4399
4502
  });
4400
4503
  continue;
4401
4504
  }
4505
+ // Quote
4506
+ const quoteM = line.match(quoteReg);
4507
+ if (quoteM) {
4508
+ /* Last line is quote as well */
4509
+ if (lastFlowElement?.type === 'quote') {
4510
+ lastFlowElement.content += ' ' + quoteM[1].trim();
4511
+ }
4512
+ else {
4513
+ flush();
4514
+ lastFlowElement = {
4515
+ type: 'quote',
4516
+ content: quoteM[1].trim()
4517
+ };
4518
+ }
4519
+ continue;
4520
+ }
4521
+ // Unordered List
4522
+ const ulistM = line.match(ulistReg);
4523
+ if (ulistM) {
4524
+ if (lastFlowElement?.type === 'ulist' && lastFlowElement.sign === ulistM[1]) {
4525
+ lastFlowElement.items.push(ulistM[2].trim());
4526
+ }
4527
+ else {
4528
+ flush();
4529
+ lastFlowElement = {
4530
+ type: 'ulist',
4531
+ sign: ulistM[1],
4532
+ items: [ulistM[2].trim()]
4533
+ };
4534
+ }
4535
+ continue;
4536
+ }
4537
+ // Ordered List
4538
+ const olistM = line.match(olistReg);
4539
+ if (olistM) {
4540
+ if (lastFlowElement?.type === 'olist' && lastFlowElement.delimiter === olistM[2]) {
4541
+ lastFlowElement.items.push(olistM[3].trim());
4542
+ }
4543
+ else {
4544
+ flush();
4545
+ lastFlowElement = {
4546
+ type: 'olist',
4547
+ start: parseInt(olistM[1]),
4548
+ delimiter: olistM[2],
4549
+ items: [olistM[3].trim()]
4550
+ };
4551
+ }
4552
+ continue;
4553
+ }
4554
+ // Code Start
4555
+ const codeStartM = line.match(codeStartReg);
4556
+ if (codeStartM) {
4557
+ flush();
4558
+ lastFlowElement = {
4559
+ type: 'code',
4560
+ lang: codeStartM[1],
4561
+ items: []
4562
+ };
4563
+ continue;
4564
+ }
4402
4565
  // Fall back to plain text
4403
- if (!pushed) {
4404
- /* last line is also text */
4405
- lastTextQuoteBlock.content += ' ' + line.trim();
4566
+ if (lastFlowElement &&
4567
+ ['text', 'quote', 'ulist', 'olist'].includes(lastFlowElement.type)) {
4568
+ if (lastFlowElement.type === 'ulist' || lastFlowElement.type === 'olist') {
4569
+ lastFlowElement.items[lastFlowElement.items.length - 1] += ' ' + line.trim();
4570
+ }
4571
+ else {
4572
+ lastFlowElement.content += ' ' + line.trim();
4573
+ }
4406
4574
  }
4407
4575
  else {
4408
- lastTextQuoteBlock = {
4576
+ flush();
4577
+ lastFlowElement = {
4409
4578
  type: 'text',
4410
4579
  content: line.trim()
4411
4580
  };
4412
- pushed = false;
4413
4581
  }
4414
4582
  }
4415
- // Avoid the last block is omitted
4416
- if (!pushed) {
4417
- mdBlocks.push(lastTextQuoteBlock);
4418
- pushed = true;
4419
- }
4420
- return mdBlocks;
4421
- }
4422
- /* traverse markdown content blocks and wrap text with tags at proper positions. */
4423
- function handleTags(mdBlocks) {
4424
- let result = '';
4425
- for (const block of mdBlocks) {
4426
- const type = block.type;
4427
- const content = tagSwtich(block);
4428
- switch (type) {
4429
- case "text":
4430
- result += `<p>${content}</p>` +
4431
- '\n';
4432
- break;
4433
- case "header":
4434
- result += `<h${block.level}>${content}</h${block.level}>` +
4435
- '\n';
4436
- break;
4437
- }
4438
- }
4439
- return result;
4440
- }
4441
- function tagSwtich(block) {
4442
- return block.content;
4583
+ // Avoid the last element is omitted
4584
+ flush();
4585
+ return mdElements;
4443
4586
  }
4444
4587
 
4445
4588
  var name = "@rokelamen/md2html";
4446
- var version = "0.1.3";
4589
+ var version = "0.1.4";
4447
4590
  var description = "A simple tool to convert markdown content to html";
4448
4591
 
4449
4592
  /* Command-line tool logic */
@@ -4456,6 +4599,7 @@ function command() {
4456
4599
  /* Config arguments info */
4457
4600
  program
4458
4601
  .option('-f, --file <path>', 'source file path')
4602
+ .option('-o, --output <path>', 'output file path')
4459
4603
  .argument('[input]', 'input content');
4460
4604
  /* Parse the cli options */
4461
4605
  program.parse(process.argv);
@@ -4481,6 +4625,11 @@ function command() {
4481
4625
  }
4482
4626
  })()
4483
4627
  : input;
4484
- console.log(parse(content));
4628
+ const html = parse(content);
4629
+ if (typeof options.output === 'string') {
4630
+ fs__namespace.writeFileSync(options.output, html, 'utf-8');
4631
+ return;
4632
+ }
4633
+ console.log(html);
4485
4634
  }
4486
4635
  command();
package/dist/index.js CHANGED
@@ -1,4 +1,93 @@
1
- const headerReg = /^\s*(#{1,6})(?:\s+|$)(.*)$/;
1
+ /* For markdown line pattern pair */
2
+ const headingReg = /^\s*(#{1,6})(?:\s+|$)(.*)$/;
3
+ const quoteReg = /^>\s*(.*)$/;
4
+ const ulistReg = /^\s*([-+*])(?:\s+|$)(.*)$/;
5
+ const olistReg = /^\s*(\d+)(.|\))(?:\s+|$)(.*)$/;
6
+ const codeStartReg = /^```([^`]*)$/;
7
+ const codeEndReg = /^```\s*$/;
8
+ /* For text inline pattern pair */
9
+ const inlineCodeReg = /(`+)([^`]+?)\1/g;
10
+ const imgReg = /!\[([^\]]+)\]\(([^)\s]+)\)/g;
11
+ const linkReg = /\[([^\]]+)\]\(([^)\s]+)\)/g;
12
+ const boldItalicReg = /(\*\*\*|___)([^*_]+)\1/g;
13
+ const boldReg = /(\*\*|__)([^*_]+)\1/g;
14
+ const italicReg = /([*_])([^*_]+)\1/g;
15
+
16
+ /**
17
+ * When a scope in Markdown is of `code` type,
18
+ * the content inside this area must not be parsed as either Markdown or HTML.
19
+ * It should be treated as pure text content.
20
+ * Therefore, it can not carry any semantic representation in HTML.
21
+ * This function is intended to remove all such representations.
22
+ */
23
+ function escapeHtml(content) {
24
+ return content;
25
+ }
26
+
27
+ /* traverse markdown content elements and wrap text with tags at proper positions. */
28
+ function renderToHtml(mdElements) {
29
+ let result = '';
30
+ for (const element of mdElements) {
31
+ const type = element.type;
32
+ switch (type) {
33
+ case "text":
34
+ result += `<p>${inlineParse(element.content)}</p>\n`;
35
+ break;
36
+ case "heading":
37
+ result += `<h${element.level}>${inlineParse(element.content)}</h${element.level}>\n`;
38
+ break;
39
+ case "quote":
40
+ result += `<quote>${inlineParse(element.content)}</quote>\n`;
41
+ break;
42
+ case 'ulist':
43
+ result += '<ul>\n' +
44
+ element.items
45
+ .map(item => ` <li>${inlineParse(item)}</li>`)
46
+ .join('\n') +
47
+ '\n</ul>\n';
48
+ break;
49
+ case 'olist':
50
+ result += `<ol start="${element.start}">\n` +
51
+ element.items
52
+ .map(item => ` <li>${inlineParse(item)}</li>`)
53
+ .join('\n') +
54
+ '\n</ol>\n';
55
+ break;
56
+ case "code":
57
+ result += '<code>\n' +
58
+ element.items
59
+ .map(item => ` <p>${escapeHtml(item)}</p>`)
60
+ .join('\n') +
61
+ '\n</code>\n';
62
+ break;
63
+ }
64
+ }
65
+ return result;
66
+ }
67
+ function inlineParse(content) {
68
+ const placeholders = [];
69
+ let idx = 0;
70
+ /* Make placeholders for code */
71
+ const stash = (html) => {
72
+ const key = `\u0000${idx}\u0000`;
73
+ placeholders.push(html);
74
+ idx++;
75
+ return key;
76
+ };
77
+ // 1. code
78
+ content = content
79
+ .replace(inlineCodeReg, (_, __, code) => stash(`<code>${code}</code>`));
80
+ // 2. link and emphasis
81
+ content = content
82
+ .replace(imgReg, '<img src="$2" alt="$1">')
83
+ .replace(linkReg, '<a href="$2">$1</a>')
84
+ .replace(boldItalicReg, '<strong><em>$2</em></strong>')
85
+ .replace(boldReg, '<strong>$2</strong>')
86
+ .replace(italicReg, '<em>$2</em>');
87
+ // 3. restore codes
88
+ content = content.replace(/\u0000(\d+)\u0000/g, (_, i) => escapeHtml(placeholders[i]));
89
+ return content;
90
+ }
2
91
 
3
92
  /**
4
93
  * Since AST-based parsing is too complex and not
@@ -30,78 +119,132 @@ function parse(markdown) {
30
119
  const crlfReg = /\r?\n/;
31
120
  const lines = markdown.split(crlfReg);
32
121
  // console.log(lines);
33
- const mdBlocks = parseToBlocks(lines);
34
- // console.log(mdBlocks);
35
- const html = handleTags(mdBlocks);
122
+ const mdElements = parseToElements(lines);
123
+ // console.log(mdElements);
124
+ const html = renderToHtml(mdElements);
36
125
  return html;
37
126
  }
38
127
  /**
39
- * Traverse lines to turn to blocks with different types
128
+ * Traverse lines to turn to markdown elements with different well-designed structures
40
129
  */
41
- function parseToBlocks(lines) {
42
- let lastTextQuoteBlock = { content: '', type: 'text' };
43
- let pushed = true;
44
- const mdBlocks = [];
130
+ function parseToElements(lines) {
131
+ let lastFlowElement = null;
132
+ const mdElements = [];
133
+ /* Push last flow text element into the return value */
134
+ const flush = () => {
135
+ if (lastFlowElement) {
136
+ mdElements.push(lastFlowElement);
137
+ lastFlowElement = null;
138
+ }
139
+ };
45
140
  for (const line of lines) {
46
- // Empty line
47
- if (!Boolean(line.trim())) {
48
- if (!pushed) {
49
- mdBlocks.push(lastTextQuoteBlock);
50
- pushed = true;
141
+ // Code End
142
+ if (lastFlowElement?.type === 'code') {
143
+ if (codeEndReg.test(line)) {
144
+ flush();
51
145
  }
146
+ else {
147
+ lastFlowElement.items.push(line);
148
+ }
149
+ continue;
150
+ }
151
+ // Empty line
152
+ if (!line.trim()) {
153
+ flush();
52
154
  continue;
53
155
  }
54
- // Header
55
- const headerM = line.match(headerReg);
56
- if (headerM) {
57
- mdBlocks.push({
58
- type: 'header',
59
- level: headerM[1].length,
60
- content: headerM[2].trim()
156
+ // Headings
157
+ const headingM = line.match(headingReg);
158
+ if (headingM) {
159
+ flush();
160
+ mdElements.push({
161
+ type: 'heading',
162
+ level: headingM[1].length,
163
+ content: headingM[2].trim()
61
164
  });
62
165
  continue;
63
166
  }
167
+ // Quote
168
+ const quoteM = line.match(quoteReg);
169
+ if (quoteM) {
170
+ /* Last line is quote as well */
171
+ if (lastFlowElement?.type === 'quote') {
172
+ lastFlowElement.content += ' ' + quoteM[1].trim();
173
+ }
174
+ else {
175
+ flush();
176
+ lastFlowElement = {
177
+ type: 'quote',
178
+ content: quoteM[1].trim()
179
+ };
180
+ }
181
+ continue;
182
+ }
183
+ // Unordered List
184
+ const ulistM = line.match(ulistReg);
185
+ if (ulistM) {
186
+ if (lastFlowElement?.type === 'ulist' && lastFlowElement.sign === ulistM[1]) {
187
+ lastFlowElement.items.push(ulistM[2].trim());
188
+ }
189
+ else {
190
+ flush();
191
+ lastFlowElement = {
192
+ type: 'ulist',
193
+ sign: ulistM[1],
194
+ items: [ulistM[2].trim()]
195
+ };
196
+ }
197
+ continue;
198
+ }
199
+ // Ordered List
200
+ const olistM = line.match(olistReg);
201
+ if (olistM) {
202
+ if (lastFlowElement?.type === 'olist' && lastFlowElement.delimiter === olistM[2]) {
203
+ lastFlowElement.items.push(olistM[3].trim());
204
+ }
205
+ else {
206
+ flush();
207
+ lastFlowElement = {
208
+ type: 'olist',
209
+ start: parseInt(olistM[1]),
210
+ delimiter: olistM[2],
211
+ items: [olistM[3].trim()]
212
+ };
213
+ }
214
+ continue;
215
+ }
216
+ // Code Start
217
+ const codeStartM = line.match(codeStartReg);
218
+ if (codeStartM) {
219
+ flush();
220
+ lastFlowElement = {
221
+ type: 'code',
222
+ lang: codeStartM[1],
223
+ items: []
224
+ };
225
+ continue;
226
+ }
64
227
  // Fall back to plain text
65
- if (!pushed) {
66
- /* last line is also text */
67
- lastTextQuoteBlock.content += ' ' + line.trim();
228
+ if (lastFlowElement &&
229
+ ['text', 'quote', 'ulist', 'olist'].includes(lastFlowElement.type)) {
230
+ if (lastFlowElement.type === 'ulist' || lastFlowElement.type === 'olist') {
231
+ lastFlowElement.items[lastFlowElement.items.length - 1] += ' ' + line.trim();
232
+ }
233
+ else {
234
+ lastFlowElement.content += ' ' + line.trim();
235
+ }
68
236
  }
69
237
  else {
70
- lastTextQuoteBlock = {
238
+ flush();
239
+ lastFlowElement = {
71
240
  type: 'text',
72
241
  content: line.trim()
73
242
  };
74
- pushed = false;
75
243
  }
76
244
  }
77
- // Avoid the last block is omitted
78
- if (!pushed) {
79
- mdBlocks.push(lastTextQuoteBlock);
80
- pushed = true;
81
- }
82
- return mdBlocks;
83
- }
84
- /* traverse markdown content blocks and wrap text with tags at proper positions. */
85
- function handleTags(mdBlocks) {
86
- let result = '';
87
- for (const block of mdBlocks) {
88
- const type = block.type;
89
- const content = tagSwtich(block);
90
- switch (type) {
91
- case "text":
92
- result += `<p>${content}</p>` +
93
- '\n';
94
- break;
95
- case "header":
96
- result += `<h${block.level}>${content}</h${block.level}>` +
97
- '\n';
98
- break;
99
- }
100
- }
101
- return result;
102
- }
103
- function tagSwtich(block) {
104
- return block.content;
245
+ // Avoid the last element is omitted
246
+ flush();
247
+ return mdElements;
105
248
  }
106
249
 
107
250
  export { parse };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@rokelamen/md2html",
3
3
  "type": "module",
4
- "version": "0.1.3",
4
+ "version": "0.1.4",
5
5
  "description": "A simple tool to convert markdown content to html",
6
6
  "author": "rokelamen <rogerskelamen@gmail.com>",
7
7
  "license": "MIT",