red_quilt 0.7.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -0
- data/README.md +38 -0
- data/docs/api.md +11 -0
- data/lib/red_quilt/arena.rb +96 -0
- data/lib/red_quilt/block_parser.rb +22 -384
- data/lib/red_quilt/cli.rb +8 -2
- data/lib/red_quilt/code_block.rb +139 -0
- data/lib/red_quilt/document.rb +18 -4
- data/lib/red_quilt/footnote_anchors.rb +24 -0
- data/lib/red_quilt/footnote_pass.rb +6 -2
- data/lib/red_quilt/frontmatter.rb +54 -0
- data/lib/red_quilt/html_block.rb +161 -0
- data/lib/red_quilt/indentation.rb +35 -0
- data/lib/red_quilt/inline/builder.rb +9 -186
- data/lib/red_quilt/inline/emphasis_resolver.rb +184 -0
- data/lib/red_quilt/inline/url_sanitizer.rb +64 -0
- data/lib/red_quilt/line.rb +6 -1
- data/lib/red_quilt/lint_pass.rb +2 -2
- data/lib/red_quilt/node_ref.rb +20 -11
- data/lib/red_quilt/renderer/html.rb +32 -20
- data/lib/red_quilt/renderer/mdast.rb +11 -11
- data/lib/red_quilt/table.rb +97 -0
- data/lib/red_quilt/version.rb +1 -1
- data/lib/red_quilt.rb +19 -4
- data/sig/red_quilt.rbs +18 -0
- metadata +9 -2
|
@@ -14,6 +14,9 @@ module RedQuilt
|
|
|
14
14
|
@list_parser = List::Parser.new(self)
|
|
15
15
|
@blockquote_parser = Blockquote::Parser.new(self)
|
|
16
16
|
@footnote_parser = FootnoteDefinition::Parser.new(self)
|
|
17
|
+
@code_block_parser = CodeBlock::Parser.new(self)
|
|
18
|
+
@html_block_parser = HtmlBlock::Parser.new(self)
|
|
19
|
+
@table_parser = Table::Parser.new(self)
|
|
17
20
|
end
|
|
18
21
|
|
|
19
22
|
attr_reader :references, :arena, :diagnostics
|
|
@@ -60,29 +63,29 @@ module RedQuilt
|
|
|
60
63
|
next
|
|
61
64
|
end
|
|
62
65
|
|
|
63
|
-
if (fence =
|
|
64
|
-
index =
|
|
66
|
+
if (fence = CodeBlock.fenced_start(content))
|
|
67
|
+
index = @code_block_parser.parse_fenced(parent_id, lines, index, fence)
|
|
65
68
|
elsif (heading = atx_heading(content))
|
|
66
69
|
append_heading(parent_id, line, heading, transformed)
|
|
67
70
|
index += 1
|
|
68
71
|
elsif thematic_break?(content)
|
|
69
|
-
@arena.append_child(parent_id, @arena.add_node(NodeType::THEMATIC_BREAK, source_start: line.start_byte, source_len: span_len
|
|
72
|
+
@arena.append_child(parent_id, @arena.add_node(NodeType::THEMATIC_BREAK, source_start: line.start_byte, source_len: line.span_len))
|
|
70
73
|
index += 1
|
|
71
74
|
elsif @footnotes && (footnote = FootnoteDefinition.match(content))
|
|
72
75
|
index = @footnote_parser.parse(lines, index, footnote, @footnotes, @root_id)
|
|
73
76
|
elsif (reference = ReferenceDefinition.consume(lines, index))
|
|
74
77
|
store_reference(reference[:reference], reference[:source_span])
|
|
75
78
|
index += reference[:consumed]
|
|
76
|
-
elsif
|
|
77
|
-
index =
|
|
78
|
-
elsif
|
|
79
|
-
index =
|
|
79
|
+
elsif Table.start?(lines, index)
|
|
80
|
+
index = @table_parser.parse(parent_id, lines, index)
|
|
81
|
+
elsif HtmlBlock.start?(content)
|
|
82
|
+
index = @html_block_parser.parse(parent_id, lines, index)
|
|
80
83
|
elsif Blockquote.match?(content)
|
|
81
84
|
index = @blockquote_parser.parse(parent_id, lines, index)
|
|
82
85
|
elsif List.match(content)
|
|
83
86
|
index = @list_parser.parse(parent_id, lines, index)
|
|
84
|
-
elsif
|
|
85
|
-
index =
|
|
87
|
+
elsif CodeBlock.indented_line?(content)
|
|
88
|
+
index = @code_block_parser.parse_indented(parent_id, lines, index)
|
|
86
89
|
else
|
|
87
90
|
index = parse_paragraph(parent_id, lines, index, transformed)
|
|
88
91
|
end
|
|
@@ -101,16 +104,16 @@ module RedQuilt
|
|
|
101
104
|
line = lines[index]
|
|
102
105
|
return true if atx_heading(line.content)
|
|
103
106
|
return true if thematic_break?(line.content)
|
|
104
|
-
return true if
|
|
107
|
+
return true if CodeBlock.fenced_start(line.content)
|
|
105
108
|
# HTML type 7 doesn't break lazy continuation either.
|
|
106
|
-
if (type =
|
|
109
|
+
if (type = HtmlBlock.type(line.content)) && type != 7
|
|
107
110
|
return true
|
|
108
111
|
end
|
|
109
112
|
return true if Blockquote.match?(line.content)
|
|
110
113
|
if (li = List.match(line.content)) && List.interrupts_paragraph?(li)
|
|
111
114
|
return true
|
|
112
115
|
end
|
|
113
|
-
return true if
|
|
116
|
+
return true if Table.start?(lines, index)
|
|
114
117
|
|
|
115
118
|
false
|
|
116
119
|
end
|
|
@@ -128,11 +131,11 @@ module RedQuilt
|
|
|
128
131
|
end
|
|
129
132
|
|
|
130
133
|
def paragraph_eligible_line?(content)
|
|
131
|
-
return false if
|
|
132
|
-
return false if
|
|
134
|
+
return false if CodeBlock.indented_line?(content)
|
|
135
|
+
return false if CodeBlock.fenced_start(content)
|
|
133
136
|
return false if atx_heading(content)
|
|
134
137
|
return false if thematic_break?(content)
|
|
135
|
-
return false if
|
|
138
|
+
return false if HtmlBlock.start?(content)
|
|
136
139
|
return false if List.match(content)
|
|
137
140
|
return false if Blockquote.match?(content)
|
|
138
141
|
|
|
@@ -183,173 +186,6 @@ module RedQuilt
|
|
|
183
186
|
true
|
|
184
187
|
end
|
|
185
188
|
|
|
186
|
-
def parse_fenced_code(parent_id, lines, index, fence)
|
|
187
|
-
start_line = lines[index]
|
|
188
|
-
content_lines = []
|
|
189
|
-
index += 1
|
|
190
|
-
while index < lines.length
|
|
191
|
-
break if fenced_code_close?(lines[index].content, fence[:char], fence[:count])
|
|
192
|
-
|
|
193
|
-
content_lines << lines[index]
|
|
194
|
-
index += 1
|
|
195
|
-
end
|
|
196
|
-
index += 1 if index < lines.length
|
|
197
|
-
|
|
198
|
-
# Each content line is stripped of up to the fence's own leading
|
|
199
|
-
# indent (CommonMark spec: a fence indented by N spaces strips up
|
|
200
|
-
# to N spaces from every content line, but never more). Manual
|
|
201
|
-
# byte scan beats compiling an interpolated regex per block and
|
|
202
|
-
# short-circuits when the fence had no indent (the common case).
|
|
203
|
-
indent_n = fence[:indent] || 0
|
|
204
|
-
code = content_lines.map { |l| strip_leading_spaces(l.content, indent_n) }.join("\n")
|
|
205
|
-
code << "\n" unless content_lines.empty?
|
|
206
|
-
source_start = content_lines.empty? ? start_line.start_byte : content_lines.first.start_byte
|
|
207
|
-
source_end = content_lines.empty? ? start_line.end_byte : content_lines.last.end_byte
|
|
208
|
-
code_id = @arena.add_node(NodeType::CODE_BLOCK,
|
|
209
|
-
source_start: source_start,
|
|
210
|
-
source_len: source_end - source_start,
|
|
211
|
-
str1: code,
|
|
212
|
-
str2: fence[:info])
|
|
213
|
-
@arena.append_child(parent_id, code_id)
|
|
214
|
-
index
|
|
215
|
-
end
|
|
216
|
-
|
|
217
|
-
def parse_indented_code(parent_id, lines, index)
|
|
218
|
-
start_index = index
|
|
219
|
-
code_lines = []
|
|
220
|
-
while index < lines.length
|
|
221
|
-
line = lines[index]
|
|
222
|
-
break unless line.blank || indented_code_line?(line.content)
|
|
223
|
-
|
|
224
|
-
# CommonMark: strip up to 4 columns of leading whitespace
|
|
225
|
-
# (tab-aware) from every line, including blank lines whose
|
|
226
|
-
# content beyond column 4 must be preserved verbatim.
|
|
227
|
-
code_lines << Indentation.strip_columns(line.content, 4)
|
|
228
|
-
index += 1
|
|
229
|
-
end
|
|
230
|
-
|
|
231
|
-
# Trailing blank lines are not part of the code block.
|
|
232
|
-
while !code_lines.empty? && code_lines.last.strip.empty?
|
|
233
|
-
code_lines.pop
|
|
234
|
-
index -= 1
|
|
235
|
-
end
|
|
236
|
-
|
|
237
|
-
start_byte = lines[start_index].start_byte
|
|
238
|
-
end_byte = lines[index - 1].end_byte
|
|
239
|
-
code = code_lines.empty? ? "" : code_lines.join("\n") + "\n"
|
|
240
|
-
|
|
241
|
-
code_id = @arena.add_node(NodeType::CODE_BLOCK,
|
|
242
|
-
source_start: start_byte,
|
|
243
|
-
source_len: end_byte - start_byte,
|
|
244
|
-
str1: code)
|
|
245
|
-
@arena.append_child(parent_id, code_id)
|
|
246
|
-
index
|
|
247
|
-
end
|
|
248
|
-
|
|
249
|
-
HTML_BLOCK_FIXED_TERMINATORS = {
|
|
250
|
-
2 => "-->",
|
|
251
|
-
3 => "?>",
|
|
252
|
-
4 => ">",
|
|
253
|
-
5 => "]]>",
|
|
254
|
-
}.freeze
|
|
255
|
-
|
|
256
|
-
private_constant :HTML_BLOCK_FIXED_TERMINATORS
|
|
257
|
-
|
|
258
|
-
def parse_html_block(parent_id, lines, index)
|
|
259
|
-
start_index = index
|
|
260
|
-
type = html_block_type(lines[index].content)
|
|
261
|
-
end_index = locate_html_block_end(lines, index, type)
|
|
262
|
-
|
|
263
|
-
start_byte = lines[start_index].start_byte
|
|
264
|
-
end_byte = lines[end_index].end_byte
|
|
265
|
-
html_lines = (start_index..end_index).map { |i| lines[i].content }
|
|
266
|
-
html_id = @arena.add_node(NodeType::HTML_BLOCK,
|
|
267
|
-
source_start: start_byte,
|
|
268
|
-
source_len: end_byte - start_byte,
|
|
269
|
-
str1: html_lines.join("\n"))
|
|
270
|
-
@arena.append_child(parent_id, html_id)
|
|
271
|
-
end_index + 1
|
|
272
|
-
end
|
|
273
|
-
|
|
274
|
-
def locate_html_block_end(lines, index, type)
|
|
275
|
-
terminator = html_block_terminator(type, lines[index].content)
|
|
276
|
-
|
|
277
|
-
if terminator
|
|
278
|
-
case_insensitive = (type == 1)
|
|
279
|
-
while index < lines.length
|
|
280
|
-
line = lines[index].content
|
|
281
|
-
haystack = case_insensitive ? line.downcase : line
|
|
282
|
-
return index if haystack.include?(terminator)
|
|
283
|
-
|
|
284
|
-
index += 1
|
|
285
|
-
end
|
|
286
|
-
lines.length - 1
|
|
287
|
-
else
|
|
288
|
-
# Types 6 & 7: terminated by blank line (or end of input)
|
|
289
|
-
index += 1 while index < lines.length && !lines[index].blank
|
|
290
|
-
index - 1
|
|
291
|
-
end
|
|
292
|
-
end
|
|
293
|
-
|
|
294
|
-
def html_block_terminator(type, first_line)
|
|
295
|
-
case type
|
|
296
|
-
when 1
|
|
297
|
-
"</#{extract_closing_tag_name(first_line)}>"
|
|
298
|
-
when 2..5
|
|
299
|
-
HTML_BLOCK_FIXED_TERMINATORS[type]
|
|
300
|
-
end
|
|
301
|
-
end
|
|
302
|
-
|
|
303
|
-
def extract_closing_tag_name(text)
|
|
304
|
-
match = /\A<(script|pre|style|textarea)/i.match(text)
|
|
305
|
-
match ? match[1].downcase : "script"
|
|
306
|
-
end
|
|
307
|
-
|
|
308
|
-
def parse_table(parent_id, lines, index)
|
|
309
|
-
# Caller must have verified table_start?(lines, index), which validates
|
|
310
|
-
# both the delimiter pattern and the header/separator column count match.
|
|
311
|
-
start_index = index
|
|
312
|
-
header_cells = split_table_row(lines[index].content)
|
|
313
|
-
row_lines = [lines[index]]
|
|
314
|
-
index += 2
|
|
315
|
-
while index < lines.length
|
|
316
|
-
break if lines[index].blank
|
|
317
|
-
break unless table_row?(lines[index].content)
|
|
318
|
-
|
|
319
|
-
row_lines << lines[index]
|
|
320
|
-
index += 1
|
|
321
|
-
end
|
|
322
|
-
|
|
323
|
-
table_id = @arena.add_node(NodeType::TABLE,
|
|
324
|
-
source_start: lines[start_index].start_byte,
|
|
325
|
-
source_len: row_lines.last.end_byte - lines[start_index].start_byte)
|
|
326
|
-
@arena.append_child(parent_id, table_id)
|
|
327
|
-
|
|
328
|
-
append_table_row(table_id, lines[start_index], header_cells, true)
|
|
329
|
-
row_lines.drop(1).each do |row_line|
|
|
330
|
-
append_table_row(table_id, row_line, split_table_row(row_line.content), false)
|
|
331
|
-
end
|
|
332
|
-
|
|
333
|
-
index
|
|
334
|
-
end
|
|
335
|
-
|
|
336
|
-
def append_table_row(table_id, line, cells, header)
|
|
337
|
-
row_id = @arena.add_node(NodeType::TABLE_ROW,
|
|
338
|
-
source_start: line.start_byte,
|
|
339
|
-
source_len: span_len(line),
|
|
340
|
-
int1: header ? 1 : 0)
|
|
341
|
-
@arena.append_child(table_id, row_id)
|
|
342
|
-
cells.each do |cell_text|
|
|
343
|
-
stripped = cell_text.strip
|
|
344
|
-
cell_id = @arena.add_node(NodeType::TABLE_CELL,
|
|
345
|
-
source_start: line.start_byte,
|
|
346
|
-
source_len: span_len(line),
|
|
347
|
-
int1: header ? 1 : 0,
|
|
348
|
-
str1: stripped)
|
|
349
|
-
@arena.append_child(row_id, cell_id)
|
|
350
|
-
end
|
|
351
|
-
end
|
|
352
|
-
|
|
353
189
|
def append_heading(parent_id, line, heading, transformed)
|
|
354
190
|
content = heading[:content].to_s.rstrip
|
|
355
191
|
source_start = line.start_byte + heading[:content_start]
|
|
@@ -403,7 +239,7 @@ module RedQuilt
|
|
|
403
239
|
# reaches this branch). Continuation lines have no fixed indent
|
|
404
240
|
# cap — all leading whitespace is stripped before joining.
|
|
405
241
|
stripped = paragraph_lines.map.with_index do |l, i|
|
|
406
|
-
i.zero? ? strip_leading_spaces(l.content, 3) : strip_leading_whitespace(l.content)
|
|
242
|
+
i.zero? ? Indentation.strip_leading_spaces(l.content, 3) : Indentation.strip_leading_whitespace(l.content)
|
|
407
243
|
end
|
|
408
244
|
# Trailing whitespace on the last line is dropped (no hard-break
|
|
409
245
|
# without a following content line).
|
|
@@ -454,54 +290,21 @@ module RedQuilt
|
|
|
454
290
|
return false unless index > 0
|
|
455
291
|
return true if atx_heading(line.content)
|
|
456
292
|
return true if thematic_break?(line.content)
|
|
457
|
-
return true if
|
|
293
|
+
return true if CodeBlock.fenced_start(line.content)
|
|
458
294
|
# CommonMark: HTML block types 1–6 interrupt paragraphs; type 7
|
|
459
295
|
# (a bare valid tag on its own line) does not.
|
|
460
|
-
if (type =
|
|
296
|
+
if (type = HtmlBlock.type(line.content)) && type != 7
|
|
461
297
|
return true
|
|
462
298
|
end
|
|
463
299
|
return true if Blockquote.match?(line.content)
|
|
464
300
|
if (li = List.match(line.content)) && List.interrupts_paragraph?(li)
|
|
465
301
|
return true
|
|
466
302
|
end
|
|
467
|
-
return true if
|
|
303
|
+
return true if Table.start?(lines, index)
|
|
468
304
|
|
|
469
305
|
false
|
|
470
306
|
end
|
|
471
307
|
|
|
472
|
-
# Strips up to `max` leading 0x20 bytes from `text`. Returns the
|
|
473
|
-
# original string when nothing changed, so callers avoid an
|
|
474
|
-
# allocation in the common no-indent case.
|
|
475
|
-
def strip_leading_spaces(text, max)
|
|
476
|
-
return text if max <= 0
|
|
477
|
-
|
|
478
|
-
bytes = text.bytesize
|
|
479
|
-
i = 0
|
|
480
|
-
while i < max && i < bytes && text.getbyte(i) == 0x20
|
|
481
|
-
i += 1
|
|
482
|
-
end
|
|
483
|
-
return text if i.zero?
|
|
484
|
-
|
|
485
|
-
text.byteslice(i..)
|
|
486
|
-
end
|
|
487
|
-
|
|
488
|
-
# Strips all leading 0x20 / 0x09 bytes from `text`. Same no-alloc
|
|
489
|
-
# return as `strip_leading_spaces` when the string already starts
|
|
490
|
-
# at a non-whitespace byte.
|
|
491
|
-
def strip_leading_whitespace(text)
|
|
492
|
-
bytes = text.bytesize
|
|
493
|
-
i = 0
|
|
494
|
-
while i < bytes
|
|
495
|
-
b = text.getbyte(i)
|
|
496
|
-
break unless b == 0x20 || b == 0x09
|
|
497
|
-
|
|
498
|
-
i += 1
|
|
499
|
-
end
|
|
500
|
-
return text if i.zero?
|
|
501
|
-
|
|
502
|
-
text.byteslice(i..)
|
|
503
|
-
end
|
|
504
|
-
|
|
505
308
|
def build_lines(source)
|
|
506
309
|
# split("\n", -1) avoids the extra slice/allocation that
|
|
507
310
|
# each_line + chomp incurs per line. The blank-line check uses
|
|
@@ -540,167 +343,6 @@ module RedQuilt
|
|
|
540
343
|
{ level: match[1].length, content: content, content_start: content_index }
|
|
541
344
|
end
|
|
542
345
|
|
|
543
|
-
def fenced_code_start(text)
|
|
544
|
-
match = /\A( {0,3})(`{3,}|~{3,})[ \t]*(.*?)\s*\z/.match(text)
|
|
545
|
-
return unless match
|
|
546
|
-
|
|
547
|
-
info = match[3]
|
|
548
|
-
# CommonMark: a backtick-style fence cannot have backticks in its
|
|
549
|
-
# info string (they'd be ambiguous with the fence itself).
|
|
550
|
-
return if match[2].start_with?("`") && info.include?("`")
|
|
551
|
-
|
|
552
|
-
{
|
|
553
|
-
char: match[2][0],
|
|
554
|
-
count: match[2].length,
|
|
555
|
-
info: ReferenceDefinition.unescape_text(info),
|
|
556
|
-
indent: match[1].length,
|
|
557
|
-
}
|
|
558
|
-
end
|
|
559
|
-
|
|
560
|
-
def fenced_code_close?(text, char, count)
|
|
561
|
-
# Manual byte scan beats compiling a per-(char,count) regex on
|
|
562
|
-
# every line of a fenced block. Pattern: 0-3 spaces, >=count of
|
|
563
|
-
# `char`, optional trailing spaces/tabs, end-of-line.
|
|
564
|
-
bytes = text.bytesize
|
|
565
|
-
i = 0
|
|
566
|
-
# CommonMark spec: at most 3 spaces of indent.
|
|
567
|
-
while i < 3 && i < bytes && text.getbyte(i) == 0x20
|
|
568
|
-
i += 1
|
|
569
|
-
end
|
|
570
|
-
char_byte = char.getbyte(0)
|
|
571
|
-
fence_start = i
|
|
572
|
-
while i < bytes && text.getbyte(i) == char_byte
|
|
573
|
-
i += 1
|
|
574
|
-
end
|
|
575
|
-
return false if i - fence_start < count
|
|
576
|
-
|
|
577
|
-
while i < bytes
|
|
578
|
-
b = text.getbyte(i)
|
|
579
|
-
return false unless b == 0x20 || b == 0x09
|
|
580
|
-
|
|
581
|
-
i += 1
|
|
582
|
-
end
|
|
583
|
-
true
|
|
584
|
-
end
|
|
585
|
-
|
|
586
|
-
def indented_code_line?(text)
|
|
587
|
-
# CommonMark: 4+ columns of leading whitespace, where tabs expand
|
|
588
|
-
# virtually to a tab stop of 4 columns.
|
|
589
|
-
Indentation.leading_columns(text) >= 4
|
|
590
|
-
end
|
|
591
|
-
|
|
592
|
-
# Returns the column count of leading whitespace, treating tabs as
|
|
593
|
-
# advancing to the next multiple-of-4 column.
|
|
594
|
-
def html_block_start?(text)
|
|
595
|
-
# Indented code block takes precedence (4+ spaces)
|
|
596
|
-
return false if text.start_with?(" ")
|
|
597
|
-
|
|
598
|
-
!html_block_type(text).nil?
|
|
599
|
-
end
|
|
600
|
-
|
|
601
|
-
def html_block_type(text)
|
|
602
|
-
# Fast reject: every HTML block starts with `<`. lstrip strips
|
|
603
|
-
# 0-3 indent spaces (more would already be indented code), so peek
|
|
604
|
-
# the leading non-space byte before doing any allocations.
|
|
605
|
-
i = 0
|
|
606
|
-
# CommonMark: HTML block lines may have 0-3 spaces of indent.
|
|
607
|
-
while i < 3 && i < text.length && text.getbyte(i) == 0x20
|
|
608
|
-
i += 1
|
|
609
|
-
end
|
|
610
|
-
return nil unless i < text.length && text.getbyte(i) == 0x3C
|
|
611
|
-
|
|
612
|
-
stripped = i.zero? ? text : text[i..]
|
|
613
|
-
|
|
614
|
-
# Type 1: <script|pre|style|textarea (case-insensitive) followed by
|
|
615
|
-
# space/tab/end-of-line or `>`. CommonMark restricts the separator
|
|
616
|
-
# to space, tab, or a line ending (not any whitespace class).
|
|
617
|
-
return 1 if stripped.match?(%r{\A<(script|pre|style|textarea)(?:[ \t]|>|$)}i)
|
|
618
|
-
|
|
619
|
-
# Type 2: <!--
|
|
620
|
-
return 2 if stripped.start_with?("<!--")
|
|
621
|
-
|
|
622
|
-
# Type 3: <?
|
|
623
|
-
return 3 if stripped.start_with?("<?")
|
|
624
|
-
|
|
625
|
-
# Type 4: <! followed by uppercase ASCII letter
|
|
626
|
-
return 4 if stripped.match?(%r{\A<![A-Z]})
|
|
627
|
-
|
|
628
|
-
# Type 5: <![CDATA[
|
|
629
|
-
return 5 if stripped.start_with?("<![CDATA[")
|
|
630
|
-
|
|
631
|
-
# Type 6: line opens with one of the listed block-level tags.
|
|
632
|
-
return 6 if stripped.match?(HTML_BLOCK_TYPE_6_RE)
|
|
633
|
-
|
|
634
|
-
# Type 7: a complete open or closing tag spanning the line.
|
|
635
|
-
return 7 if valid_html_tag?(stripped)
|
|
636
|
-
|
|
637
|
-
nil
|
|
638
|
-
end
|
|
639
|
-
|
|
640
|
-
HTML_BLOCK_TYPE_6_NAMES = %w[
|
|
641
|
-
address article aside base basefont blockquote body caption center
|
|
642
|
-
col colgroup dd details dialog dir div dl dt fieldset figcaption
|
|
643
|
-
figure footer form frame frameset h1 h2 h3 h4 h5 h6 head header
|
|
644
|
-
hr html iframe legend li link main menu menuitem nav noframes ol
|
|
645
|
-
optgroup option p param search section summary table tbody td
|
|
646
|
-
tfoot th thead title tr track ul
|
|
647
|
-
].freeze
|
|
648
|
-
HTML_BLOCK_TYPE_6_RE =
|
|
649
|
-
%r{\A</?(?:#{HTML_BLOCK_TYPE_6_NAMES.join('|')})(?:[ \t]|>|/>|\z)}i
|
|
650
|
-
|
|
651
|
-
private_constant :HTML_BLOCK_TYPE_6_NAMES, :HTML_BLOCK_TYPE_6_RE
|
|
652
|
-
|
|
653
|
-
def table_start?(lines, index)
|
|
654
|
-
return false if index + 1 >= lines.length
|
|
655
|
-
return false unless table_row?(lines[index].content)
|
|
656
|
-
|
|
657
|
-
header_cells = split_table_row(lines[index].content)
|
|
658
|
-
separators = split_table_row(lines[index + 1].content)
|
|
659
|
-
return false if separators.empty?
|
|
660
|
-
|
|
661
|
-
# GFM spec: separator row must have valid delimiters AND match header column count.
|
|
662
|
-
# "The header row must match the delimiter row in the number of cells.
|
|
663
|
-
# If not, a table will not be recognized."
|
|
664
|
-
return false unless header_cells.length == separators.length
|
|
665
|
-
|
|
666
|
-
separators.all? { |cell| cell.strip.match?(/\A:?-+:?\z/) }
|
|
667
|
-
end
|
|
668
|
-
|
|
669
|
-
def table_row?(text)
|
|
670
|
-
text.include?("|")
|
|
671
|
-
end
|
|
672
|
-
|
|
673
|
-
def split_table_row(text)
|
|
674
|
-
body = text.strip
|
|
675
|
-
body = body[1..] if body.start_with?("|")
|
|
676
|
-
body = body[0...-1] if body.end_with?("|")
|
|
677
|
-
body.split("|", -1)
|
|
678
|
-
end
|
|
679
|
-
|
|
680
|
-
# Type 7: a complete open or closing tag on its own line.
|
|
681
|
-
# Closing tags must not have attributes.
|
|
682
|
-
#
|
|
683
|
-
# HTML tag separators per CommonMark 6.6 are space, tab, or up to one
|
|
684
|
-
# line ending -- not the broader \s class (which would include form
|
|
685
|
-
# feed and vertical tab).
|
|
686
|
-
HTML_TYPE_7_OPEN_TAG_RE = %r{
|
|
687
|
-
\A
|
|
688
|
-
<[A-Za-z][A-Za-z0-9-]*
|
|
689
|
-
(?:[ \t\r\n]+[A-Za-z_:][A-Za-z0-9_.:-]*(?:[ \t\r\n]*=[ \t\r\n]*(?:"[^"\n]*"|'[^'\n]*'|[^ \t\r\n"'=<>`]+))?)*
|
|
690
|
-
[ \t\r\n]*/?>
|
|
691
|
-
\z
|
|
692
|
-
}x
|
|
693
|
-
HTML_TYPE_7_CLOSING_TAG_RE = %r{\A</[A-Za-z][A-Za-z0-9-]*[ \t\r\n]*>\z}
|
|
694
|
-
|
|
695
|
-
private_constant :HTML_TYPE_7_OPEN_TAG_RE, :HTML_TYPE_7_CLOSING_TAG_RE
|
|
696
|
-
|
|
697
|
-
def valid_html_tag?(text)
|
|
698
|
-
# Fast reject: every type-7 tag must begin with `<`.
|
|
699
|
-
return false unless text.start_with?("<")
|
|
700
|
-
|
|
701
|
-
HTML_TYPE_7_OPEN_TAG_RE.match?(text) || HTML_TYPE_7_CLOSING_TAG_RE.match?(text)
|
|
702
|
-
end
|
|
703
|
-
|
|
704
346
|
def store_reference(reference, source_span)
|
|
705
347
|
if @references.key?(reference[:label])
|
|
706
348
|
@diagnostics << Diagnostic.new(
|
|
@@ -716,9 +358,5 @@ module RedQuilt
|
|
|
716
358
|
title: reference[:title],
|
|
717
359
|
}
|
|
718
360
|
end
|
|
719
|
-
|
|
720
|
-
def span_len(line)
|
|
721
|
-
line.end_byte - line.start_byte
|
|
722
|
-
end
|
|
723
361
|
end
|
|
724
362
|
end
|
data/lib/red_quilt/cli.rb
CHANGED
|
@@ -33,12 +33,13 @@ module RedQuilt
|
|
|
33
33
|
standalone: true,
|
|
34
34
|
auto_title: false,
|
|
35
35
|
title: nil,
|
|
36
|
-
lang:
|
|
36
|
+
lang: nil,
|
|
37
37
|
css: nil,
|
|
38
38
|
theme: :default,
|
|
39
39
|
output: nil,
|
|
40
40
|
open: false,
|
|
41
41
|
mermaid: false,
|
|
42
|
+
frontmatter: false,
|
|
42
43
|
}.freeze
|
|
43
44
|
|
|
44
45
|
THEMES = %i[none default].freeze
|
|
@@ -63,7 +64,8 @@ module RedQuilt
|
|
|
63
64
|
allow_html: options[:allow_html],
|
|
64
65
|
disallow_raw_html: options[:disallow_raw_html],
|
|
65
66
|
extended_autolinks: options[:extended_autolinks],
|
|
66
|
-
lint: options[:lint]
|
|
67
|
+
lint: options[:lint],
|
|
68
|
+
frontmatter: options[:frontmatter])
|
|
67
69
|
|
|
68
70
|
unless options[:diagnostics_only]
|
|
69
71
|
emit_output(doc, options, source_path: source_path, stdout: stdout, stderr: stderr)
|
|
@@ -159,6 +161,10 @@ module RedQuilt
|
|
|
159
161
|
"Render `mermaid` code blocks as diagrams (loads mermaid.js from a CDN in standalone output)") do
|
|
160
162
|
options[:mermaid] = true
|
|
161
163
|
end
|
|
164
|
+
opts.on("--frontmatter",
|
|
165
|
+
"Parse leading YAML frontmatter (---) as metadata; fills <title>/lang in standalone output") do
|
|
166
|
+
options[:frontmatter] = true
|
|
167
|
+
end
|
|
162
168
|
opts.on("--diagnostics", "Also print diagnostics to stderr") do
|
|
163
169
|
options[:diagnostics] = true
|
|
164
170
|
end
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RedQuilt
|
|
4
|
+
# Fenced and indented code blocks (CommonMark 4.4 / 4.5). The module
|
|
5
|
+
# functions detect a code-block start; the nested Parser builds the arena
|
|
6
|
+
# node, mirroring the List / Blockquote split (detection used by the
|
|
7
|
+
# block dispatch, construction by a cached collaborator).
|
|
8
|
+
module CodeBlock
|
|
9
|
+
module_function
|
|
10
|
+
|
|
11
|
+
# Detects a fenced code opener. Returns a Hash describing the fence
|
|
12
|
+
# ({ char:, count:, info:, indent: }) or nil.
|
|
13
|
+
def fenced_start(text)
|
|
14
|
+
match = /\A( {0,3})(`{3,}|~{3,})[ \t]*(.*?)\s*\z/.match(text)
|
|
15
|
+
return unless match
|
|
16
|
+
|
|
17
|
+
info = match[3]
|
|
18
|
+
# CommonMark: a backtick-style fence cannot have backticks in its
|
|
19
|
+
# info string (they'd be ambiguous with the fence itself).
|
|
20
|
+
return if match[2].start_with?("`") && info.include?("`")
|
|
21
|
+
|
|
22
|
+
{
|
|
23
|
+
char: match[2][0],
|
|
24
|
+
count: match[2].length,
|
|
25
|
+
info: ReferenceDefinition.unescape_text(info),
|
|
26
|
+
indent: match[1].length,
|
|
27
|
+
}
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# True when `text` is an indented code line: 4+ columns of leading
|
|
31
|
+
# whitespace (tabs expand to a 4-column tab stop).
|
|
32
|
+
def indented_line?(text)
|
|
33
|
+
Indentation.leading_columns(text) >= 4
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Cached collaborator for BlockParser. A single instance is created in
|
|
37
|
+
# BlockParser#initialize and reused; per-call state lives in method
|
|
38
|
+
# locals so reentrant calls are safe.
|
|
39
|
+
class Parser
|
|
40
|
+
def initialize(block_parser)
|
|
41
|
+
@arena = block_parser.arena
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Parses a fenced block. `fence` is CodeBlock.fenced_start's result
|
|
45
|
+
# for lines[index]. Returns the index past the block.
|
|
46
|
+
def parse_fenced(parent_id, lines, index, fence)
|
|
47
|
+
start_line = lines[index]
|
|
48
|
+
content_lines = []
|
|
49
|
+
index += 1
|
|
50
|
+
while index < lines.length
|
|
51
|
+
break if fence_close?(lines[index].content, fence[:char], fence[:count])
|
|
52
|
+
|
|
53
|
+
content_lines << lines[index]
|
|
54
|
+
index += 1
|
|
55
|
+
end
|
|
56
|
+
index += 1 if index < lines.length
|
|
57
|
+
|
|
58
|
+
# Each content line is stripped of up to the fence's own leading
|
|
59
|
+
# indent (CommonMark spec: a fence indented by N spaces strips up
|
|
60
|
+
# to N spaces from every content line, but never more). Manual
|
|
61
|
+
# byte scan beats compiling an interpolated regex per block and
|
|
62
|
+
# short-circuits when the fence had no indent (the common case).
|
|
63
|
+
indent_n = fence[:indent] || 0
|
|
64
|
+
code = content_lines.map { |l| Indentation.strip_leading_spaces(l.content, indent_n) }.join("\n")
|
|
65
|
+
code << "\n" unless content_lines.empty?
|
|
66
|
+
source_start = content_lines.empty? ? start_line.start_byte : content_lines.first.start_byte
|
|
67
|
+
source_end = content_lines.empty? ? start_line.end_byte : content_lines.last.end_byte
|
|
68
|
+
code_id = @arena.add_node(NodeType::CODE_BLOCK,
|
|
69
|
+
source_start: source_start,
|
|
70
|
+
source_len: source_end - source_start,
|
|
71
|
+
str1: code,
|
|
72
|
+
str2: fence[:info])
|
|
73
|
+
@arena.append_child(parent_id, code_id)
|
|
74
|
+
index
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Parses an indented code block. Returns the index past the block.
|
|
78
|
+
def parse_indented(parent_id, lines, index)
|
|
79
|
+
start_index = index
|
|
80
|
+
code_lines = []
|
|
81
|
+
while index < lines.length
|
|
82
|
+
line = lines[index]
|
|
83
|
+
break unless line.blank || CodeBlock.indented_line?(line.content)
|
|
84
|
+
|
|
85
|
+
# CommonMark: strip up to 4 columns of leading whitespace
|
|
86
|
+
# (tab-aware) from every line, including blank lines whose
|
|
87
|
+
# content beyond column 4 must be preserved verbatim.
|
|
88
|
+
code_lines << Indentation.strip_columns(line.content, 4)
|
|
89
|
+
index += 1
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Trailing blank lines are not part of the code block.
|
|
93
|
+
while !code_lines.empty? && code_lines.last.strip.empty?
|
|
94
|
+
code_lines.pop
|
|
95
|
+
index -= 1
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
start_byte = lines[start_index].start_byte
|
|
99
|
+
end_byte = lines[index - 1].end_byte
|
|
100
|
+
code = code_lines.empty? ? "" : code_lines.join("\n") + "\n"
|
|
101
|
+
|
|
102
|
+
code_id = @arena.add_node(NodeType::CODE_BLOCK,
|
|
103
|
+
source_start: start_byte,
|
|
104
|
+
source_len: end_byte - start_byte,
|
|
105
|
+
str1: code)
|
|
106
|
+
@arena.append_child(parent_id, code_id)
|
|
107
|
+
index
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
private
|
|
111
|
+
|
|
112
|
+
def fence_close?(text, char, count)
|
|
113
|
+
# Manual byte scan beats compiling a per-(char,count) regex on
|
|
114
|
+
# every line of a fenced block. Pattern: 0-3 spaces, >=count of
|
|
115
|
+
# `char`, optional trailing spaces/tabs, end-of-line.
|
|
116
|
+
bytes = text.bytesize
|
|
117
|
+
i = 0
|
|
118
|
+
# CommonMark spec: at most 3 spaces of indent.
|
|
119
|
+
while i < 3 && i < bytes && text.getbyte(i) == 0x20
|
|
120
|
+
i += 1
|
|
121
|
+
end
|
|
122
|
+
char_byte = char.getbyte(0)
|
|
123
|
+
fence_start = i
|
|
124
|
+
while i < bytes && text.getbyte(i) == char_byte
|
|
125
|
+
i += 1
|
|
126
|
+
end
|
|
127
|
+
return false if i - fence_start < count
|
|
128
|
+
|
|
129
|
+
while i < bytes
|
|
130
|
+
b = text.getbyte(i)
|
|
131
|
+
return false unless b == 0x20 || b == 0x09
|
|
132
|
+
|
|
133
|
+
i += 1
|
|
134
|
+
end
|
|
135
|
+
true
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|