red_quilt 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,9 @@ module RedQuilt
14
14
  @list_parser = List::Parser.new(self)
15
15
  @blockquote_parser = Blockquote::Parser.new(self)
16
16
  @footnote_parser = FootnoteDefinition::Parser.new(self)
17
+ @code_block_parser = CodeBlock::Parser.new(self)
18
+ @html_block_parser = HtmlBlock::Parser.new(self)
19
+ @table_parser = Table::Parser.new(self)
17
20
  end
18
21
 
19
22
  attr_reader :references, :arena, :diagnostics
@@ -60,29 +63,29 @@ module RedQuilt
60
63
  next
61
64
  end
62
65
 
63
- if (fence = fenced_code_start(content))
64
- index = parse_fenced_code(parent_id, lines, index, fence)
66
+ if (fence = CodeBlock.fenced_start(content))
67
+ index = @code_block_parser.parse_fenced(parent_id, lines, index, fence)
65
68
  elsif (heading = atx_heading(content))
66
69
  append_heading(parent_id, line, heading, transformed)
67
70
  index += 1
68
71
  elsif thematic_break?(content)
69
- @arena.append_child(parent_id, @arena.add_node(NodeType::THEMATIC_BREAK, source_start: line.start_byte, source_len: span_len(line)))
72
+ @arena.append_child(parent_id, @arena.add_node(NodeType::THEMATIC_BREAK, source_start: line.start_byte, source_len: line.span_len))
70
73
  index += 1
71
74
  elsif @footnotes && (footnote = FootnoteDefinition.match(content))
72
75
  index = @footnote_parser.parse(lines, index, footnote, @footnotes, @root_id)
73
76
  elsif (reference = ReferenceDefinition.consume(lines, index))
74
77
  store_reference(reference[:reference], reference[:source_span])
75
78
  index += reference[:consumed]
76
- elsif table_start?(lines, index)
77
- index = parse_table(parent_id, lines, index)
78
- elsif html_block_start?(content)
79
- index = parse_html_block(parent_id, lines, index)
79
+ elsif Table.start?(lines, index)
80
+ index = @table_parser.parse(parent_id, lines, index)
81
+ elsif HtmlBlock.start?(content)
82
+ index = @html_block_parser.parse(parent_id, lines, index)
80
83
  elsif Blockquote.match?(content)
81
84
  index = @blockquote_parser.parse(parent_id, lines, index)
82
85
  elsif List.match(content)
83
86
  index = @list_parser.parse(parent_id, lines, index)
84
- elsif indented_code_line?(content)
85
- index = parse_indented_code(parent_id, lines, index)
87
+ elsif CodeBlock.indented_line?(content)
88
+ index = @code_block_parser.parse_indented(parent_id, lines, index)
86
89
  else
87
90
  index = parse_paragraph(parent_id, lines, index, transformed)
88
91
  end
@@ -101,16 +104,16 @@ module RedQuilt
101
104
  line = lines[index]
102
105
  return true if atx_heading(line.content)
103
106
  return true if thematic_break?(line.content)
104
- return true if fenced_code_start(line.content)
107
+ return true if CodeBlock.fenced_start(line.content)
105
108
  # HTML type 7 doesn't break lazy continuation either.
106
- if (type = html_block_type(line.content)) && type != 7
109
+ if (type = HtmlBlock.type(line.content)) && type != 7
107
110
  return true
108
111
  end
109
112
  return true if Blockquote.match?(line.content)
110
113
  if (li = List.match(line.content)) && List.interrupts_paragraph?(li)
111
114
  return true
112
115
  end
113
- return true if table_start?(lines, index)
116
+ return true if Table.start?(lines, index)
114
117
 
115
118
  false
116
119
  end
@@ -128,11 +131,11 @@ module RedQuilt
128
131
  end
129
132
 
130
133
  def paragraph_eligible_line?(content)
131
- return false if indented_code_line?(content)
132
- return false if fenced_code_start(content)
134
+ return false if CodeBlock.indented_line?(content)
135
+ return false if CodeBlock.fenced_start(content)
133
136
  return false if atx_heading(content)
134
137
  return false if thematic_break?(content)
135
- return false if html_block_start?(content)
138
+ return false if HtmlBlock.start?(content)
136
139
  return false if List.match(content)
137
140
  return false if Blockquote.match?(content)
138
141
 
@@ -183,173 +186,6 @@ module RedQuilt
183
186
  true
184
187
  end
185
188
 
186
- def parse_fenced_code(parent_id, lines, index, fence)
187
- start_line = lines[index]
188
- content_lines = []
189
- index += 1
190
- while index < lines.length
191
- break if fenced_code_close?(lines[index].content, fence[:char], fence[:count])
192
-
193
- content_lines << lines[index]
194
- index += 1
195
- end
196
- index += 1 if index < lines.length
197
-
198
- # Each content line is stripped of up to the fence's own leading
199
- # indent (CommonMark spec: a fence indented by N spaces strips up
200
- # to N spaces from every content line, but never more). Manual
201
- # byte scan beats compiling an interpolated regex per block and
202
- # short-circuits when the fence had no indent (the common case).
203
- indent_n = fence[:indent] || 0
204
- code = content_lines.map { |l| strip_leading_spaces(l.content, indent_n) }.join("\n")
205
- code << "\n" unless content_lines.empty?
206
- source_start = content_lines.empty? ? start_line.start_byte : content_lines.first.start_byte
207
- source_end = content_lines.empty? ? start_line.end_byte : content_lines.last.end_byte
208
- code_id = @arena.add_node(NodeType::CODE_BLOCK,
209
- source_start: source_start,
210
- source_len: source_end - source_start,
211
- str1: code,
212
- str2: fence[:info])
213
- @arena.append_child(parent_id, code_id)
214
- index
215
- end
216
-
217
- def parse_indented_code(parent_id, lines, index)
218
- start_index = index
219
- code_lines = []
220
- while index < lines.length
221
- line = lines[index]
222
- break unless line.blank || indented_code_line?(line.content)
223
-
224
- # CommonMark: strip up to 4 columns of leading whitespace
225
- # (tab-aware) from every line, including blank lines whose
226
- # content beyond column 4 must be preserved verbatim.
227
- code_lines << Indentation.strip_columns(line.content, 4)
228
- index += 1
229
- end
230
-
231
- # Trailing blank lines are not part of the code block.
232
- while !code_lines.empty? && code_lines.last.strip.empty?
233
- code_lines.pop
234
- index -= 1
235
- end
236
-
237
- start_byte = lines[start_index].start_byte
238
- end_byte = lines[index - 1].end_byte
239
- code = code_lines.empty? ? "" : code_lines.join("\n") + "\n"
240
-
241
- code_id = @arena.add_node(NodeType::CODE_BLOCK,
242
- source_start: start_byte,
243
- source_len: end_byte - start_byte,
244
- str1: code)
245
- @arena.append_child(parent_id, code_id)
246
- index
247
- end
248
-
249
- HTML_BLOCK_FIXED_TERMINATORS = {
250
- 2 => "-->",
251
- 3 => "?>",
252
- 4 => ">",
253
- 5 => "]]>",
254
- }.freeze
255
-
256
- private_constant :HTML_BLOCK_FIXED_TERMINATORS
257
-
258
- def parse_html_block(parent_id, lines, index)
259
- start_index = index
260
- type = html_block_type(lines[index].content)
261
- end_index = locate_html_block_end(lines, index, type)
262
-
263
- start_byte = lines[start_index].start_byte
264
- end_byte = lines[end_index].end_byte
265
- html_lines = (start_index..end_index).map { |i| lines[i].content }
266
- html_id = @arena.add_node(NodeType::HTML_BLOCK,
267
- source_start: start_byte,
268
- source_len: end_byte - start_byte,
269
- str1: html_lines.join("\n"))
270
- @arena.append_child(parent_id, html_id)
271
- end_index + 1
272
- end
273
-
274
- def locate_html_block_end(lines, index, type)
275
- terminator = html_block_terminator(type, lines[index].content)
276
-
277
- if terminator
278
- case_insensitive = (type == 1)
279
- while index < lines.length
280
- line = lines[index].content
281
- haystack = case_insensitive ? line.downcase : line
282
- return index if haystack.include?(terminator)
283
-
284
- index += 1
285
- end
286
- lines.length - 1
287
- else
288
- # Types 6 & 7: terminated by blank line (or end of input)
289
- index += 1 while index < lines.length && !lines[index].blank
290
- index - 1
291
- end
292
- end
293
-
294
- def html_block_terminator(type, first_line)
295
- case type
296
- when 1
297
- "</#{extract_closing_tag_name(first_line)}>"
298
- when 2..5
299
- HTML_BLOCK_FIXED_TERMINATORS[type]
300
- end
301
- end
302
-
303
- def extract_closing_tag_name(text)
304
- match = /\A<(script|pre|style|textarea)/i.match(text)
305
- match ? match[1].downcase : "script"
306
- end
307
-
308
- def parse_table(parent_id, lines, index)
309
- # Caller must have verified table_start?(lines, index), which validates
310
- # both the delimiter pattern and the header/separator column count match.
311
- start_index = index
312
- header_cells = split_table_row(lines[index].content)
313
- row_lines = [lines[index]]
314
- index += 2
315
- while index < lines.length
316
- break if lines[index].blank
317
- break unless table_row?(lines[index].content)
318
-
319
- row_lines << lines[index]
320
- index += 1
321
- end
322
-
323
- table_id = @arena.add_node(NodeType::TABLE,
324
- source_start: lines[start_index].start_byte,
325
- source_len: row_lines.last.end_byte - lines[start_index].start_byte)
326
- @arena.append_child(parent_id, table_id)
327
-
328
- append_table_row(table_id, lines[start_index], header_cells, true)
329
- row_lines.drop(1).each do |row_line|
330
- append_table_row(table_id, row_line, split_table_row(row_line.content), false)
331
- end
332
-
333
- index
334
- end
335
-
336
- def append_table_row(table_id, line, cells, header)
337
- row_id = @arena.add_node(NodeType::TABLE_ROW,
338
- source_start: line.start_byte,
339
- source_len: span_len(line),
340
- int1: header ? 1 : 0)
341
- @arena.append_child(table_id, row_id)
342
- cells.each do |cell_text|
343
- stripped = cell_text.strip
344
- cell_id = @arena.add_node(NodeType::TABLE_CELL,
345
- source_start: line.start_byte,
346
- source_len: span_len(line),
347
- int1: header ? 1 : 0,
348
- str1: stripped)
349
- @arena.append_child(row_id, cell_id)
350
- end
351
- end
352
-
353
189
  def append_heading(parent_id, line, heading, transformed)
354
190
  content = heading[:content].to_s.rstrip
355
191
  source_start = line.start_byte + heading[:content_start]
@@ -403,7 +239,7 @@ module RedQuilt
403
239
  # reaches this branch). Continuation lines have no fixed indent
404
240
  # cap — all leading whitespace is stripped before joining.
405
241
  stripped = paragraph_lines.map.with_index do |l, i|
406
- i.zero? ? strip_leading_spaces(l.content, 3) : strip_leading_whitespace(l.content)
242
+ i.zero? ? Indentation.strip_leading_spaces(l.content, 3) : Indentation.strip_leading_whitespace(l.content)
407
243
  end
408
244
  # Trailing whitespace on the last line is dropped (no hard-break
409
245
  # without a following content line).
@@ -454,54 +290,21 @@ module RedQuilt
454
290
  return false unless index > 0
455
291
  return true if atx_heading(line.content)
456
292
  return true if thematic_break?(line.content)
457
- return true if fenced_code_start(line.content)
293
+ return true if CodeBlock.fenced_start(line.content)
458
294
  # CommonMark: HTML block types 1–6 interrupt paragraphs; type 7
459
295
  # (a bare valid tag on its own line) does not.
460
- if (type = html_block_type(line.content)) && type != 7
296
+ if (type = HtmlBlock.type(line.content)) && type != 7
461
297
  return true
462
298
  end
463
299
  return true if Blockquote.match?(line.content)
464
300
  if (li = List.match(line.content)) && List.interrupts_paragraph?(li)
465
301
  return true
466
302
  end
467
- return true if table_start?(lines, index)
303
+ return true if Table.start?(lines, index)
468
304
 
469
305
  false
470
306
  end
471
307
 
472
- # Strips up to `max` leading 0x20 bytes from `text`. Returns the
473
- # original string when nothing changed, so callers avoid an
474
- # allocation in the common no-indent case.
475
- def strip_leading_spaces(text, max)
476
- return text if max <= 0
477
-
478
- bytes = text.bytesize
479
- i = 0
480
- while i < max && i < bytes && text.getbyte(i) == 0x20
481
- i += 1
482
- end
483
- return text if i.zero?
484
-
485
- text.byteslice(i..)
486
- end
487
-
488
- # Strips all leading 0x20 / 0x09 bytes from `text`. Same no-alloc
489
- # return as `strip_leading_spaces` when the string already starts
490
- # at a non-whitespace byte.
491
- def strip_leading_whitespace(text)
492
- bytes = text.bytesize
493
- i = 0
494
- while i < bytes
495
- b = text.getbyte(i)
496
- break unless b == 0x20 || b == 0x09
497
-
498
- i += 1
499
- end
500
- return text if i.zero?
501
-
502
- text.byteslice(i..)
503
- end
504
-
505
308
  def build_lines(source)
506
309
  # split("\n", -1) avoids the extra slice/allocation that
507
310
  # each_line + chomp incurs per line. The blank-line check uses
@@ -540,167 +343,6 @@ module RedQuilt
540
343
  { level: match[1].length, content: content, content_start: content_index }
541
344
  end
542
345
 
543
- def fenced_code_start(text)
544
- match = /\A( {0,3})(`{3,}|~{3,})[ \t]*(.*?)\s*\z/.match(text)
545
- return unless match
546
-
547
- info = match[3]
548
- # CommonMark: a backtick-style fence cannot have backticks in its
549
- # info string (they'd be ambiguous with the fence itself).
550
- return if match[2].start_with?("`") && info.include?("`")
551
-
552
- {
553
- char: match[2][0],
554
- count: match[2].length,
555
- info: ReferenceDefinition.unescape_text(info),
556
- indent: match[1].length,
557
- }
558
- end
559
-
560
- def fenced_code_close?(text, char, count)
561
- # Manual byte scan beats compiling a per-(char,count) regex on
562
- # every line of a fenced block. Pattern: 0-3 spaces, >=count of
563
- # `char`, optional trailing spaces/tabs, end-of-line.
564
- bytes = text.bytesize
565
- i = 0
566
- # CommonMark spec: at most 3 spaces of indent.
567
- while i < 3 && i < bytes && text.getbyte(i) == 0x20
568
- i += 1
569
- end
570
- char_byte = char.getbyte(0)
571
- fence_start = i
572
- while i < bytes && text.getbyte(i) == char_byte
573
- i += 1
574
- end
575
- return false if i - fence_start < count
576
-
577
- while i < bytes
578
- b = text.getbyte(i)
579
- return false unless b == 0x20 || b == 0x09
580
-
581
- i += 1
582
- end
583
- true
584
- end
585
-
586
- def indented_code_line?(text)
587
- # CommonMark: 4+ columns of leading whitespace, where tabs expand
588
- # virtually to a tab stop of 4 columns.
589
- Indentation.leading_columns(text) >= 4
590
- end
591
-
592
- # Returns the column count of leading whitespace, treating tabs as
593
- # advancing to the next multiple-of-4 column.
594
- def html_block_start?(text)
595
- # Indented code block takes precedence (4+ spaces)
596
- return false if text.start_with?(" ")
597
-
598
- !html_block_type(text).nil?
599
- end
600
-
601
- def html_block_type(text)
602
- # Fast reject: every HTML block starts with `<`. lstrip strips
603
- # 0-3 indent spaces (more would already be indented code), so peek
604
- # the leading non-space byte before doing any allocations.
605
- i = 0
606
- # CommonMark: HTML block lines may have 0-3 spaces of indent.
607
- while i < 3 && i < text.length && text.getbyte(i) == 0x20
608
- i += 1
609
- end
610
- return nil unless i < text.length && text.getbyte(i) == 0x3C
611
-
612
- stripped = i.zero? ? text : text[i..]
613
-
614
- # Type 1: <script|pre|style|textarea (case-insensitive) followed by
615
- # space/tab/end-of-line or `>`. CommonMark restricts the separator
616
- # to space, tab, or a line ending (not any whitespace class).
617
- return 1 if stripped.match?(%r{\A<(script|pre|style|textarea)(?:[ \t]|>|$)}i)
618
-
619
- # Type 2: <!--
620
- return 2 if stripped.start_with?("<!--")
621
-
622
- # Type 3: <?
623
- return 3 if stripped.start_with?("<?")
624
-
625
- # Type 4: <! followed by uppercase ASCII letter
626
- return 4 if stripped.match?(%r{\A<![A-Z]})
627
-
628
- # Type 5: <![CDATA[
629
- return 5 if stripped.start_with?("<![CDATA[")
630
-
631
- # Type 6: line opens with one of the listed block-level tags.
632
- return 6 if stripped.match?(HTML_BLOCK_TYPE_6_RE)
633
-
634
- # Type 7: a complete open or closing tag spanning the line.
635
- return 7 if valid_html_tag?(stripped)
636
-
637
- nil
638
- end
639
-
640
- HTML_BLOCK_TYPE_6_NAMES = %w[
641
- address article aside base basefont blockquote body caption center
642
- col colgroup dd details dialog dir div dl dt fieldset figcaption
643
- figure footer form frame frameset h1 h2 h3 h4 h5 h6 head header
644
- hr html iframe legend li link main menu menuitem nav noframes ol
645
- optgroup option p param search section summary table tbody td
646
- tfoot th thead title tr track ul
647
- ].freeze
648
- HTML_BLOCK_TYPE_6_RE =
649
- %r{\A</?(?:#{HTML_BLOCK_TYPE_6_NAMES.join('|')})(?:[ \t]|>|/>|\z)}i
650
-
651
- private_constant :HTML_BLOCK_TYPE_6_NAMES, :HTML_BLOCK_TYPE_6_RE
652
-
653
- def table_start?(lines, index)
654
- return false if index + 1 >= lines.length
655
- return false unless table_row?(lines[index].content)
656
-
657
- header_cells = split_table_row(lines[index].content)
658
- separators = split_table_row(lines[index + 1].content)
659
- return false if separators.empty?
660
-
661
- # GFM spec: separator row must have valid delimiters AND match header column count.
662
- # "The header row must match the delimiter row in the number of cells.
663
- # If not, a table will not be recognized."
664
- return false unless header_cells.length == separators.length
665
-
666
- separators.all? { |cell| cell.strip.match?(/\A:?-+:?\z/) }
667
- end
668
-
669
- def table_row?(text)
670
- text.include?("|")
671
- end
672
-
673
- def split_table_row(text)
674
- body = text.strip
675
- body = body[1..] if body.start_with?("|")
676
- body = body[0...-1] if body.end_with?("|")
677
- body.split("|", -1)
678
- end
679
-
680
- # Type 7: a complete open or closing tag on its own line.
681
- # Closing tags must not have attributes.
682
- #
683
- # HTML tag separators per CommonMark 6.6 are space, tab, or up to one
684
- # line ending -- not the broader \s class (which would include form
685
- # feed and vertical tab).
686
- HTML_TYPE_7_OPEN_TAG_RE = %r{
687
- \A
688
- <[A-Za-z][A-Za-z0-9-]*
689
- (?:[ \t\r\n]+[A-Za-z_:][A-Za-z0-9_.:-]*(?:[ \t\r\n]*=[ \t\r\n]*(?:"[^"\n]*"|'[^'\n]*'|[^ \t\r\n"'=<>`]+))?)*
690
- [ \t\r\n]*/?>
691
- \z
692
- }x
693
- HTML_TYPE_7_CLOSING_TAG_RE = %r{\A</[A-Za-z][A-Za-z0-9-]*[ \t\r\n]*>\z}
694
-
695
- private_constant :HTML_TYPE_7_OPEN_TAG_RE, :HTML_TYPE_7_CLOSING_TAG_RE
696
-
697
- def valid_html_tag?(text)
698
- # Fast reject: every type-7 tag must begin with `<`.
699
- return false unless text.start_with?("<")
700
-
701
- HTML_TYPE_7_OPEN_TAG_RE.match?(text) || HTML_TYPE_7_CLOSING_TAG_RE.match?(text)
702
- end
703
-
704
346
  def store_reference(reference, source_span)
705
347
  if @references.key?(reference[:label])
706
348
  @diagnostics << Diagnostic.new(
@@ -716,9 +358,5 @@ module RedQuilt
716
358
  title: reference[:title],
717
359
  }
718
360
  end
719
-
720
- def span_len(line)
721
- line.end_byte - line.start_byte
722
- end
723
361
  end
724
362
  end
data/lib/red_quilt/cli.rb CHANGED
@@ -33,11 +33,13 @@ module RedQuilt
33
33
  standalone: true,
34
34
  auto_title: false,
35
35
  title: nil,
36
- lang: "en",
36
+ lang: nil,
37
37
  css: nil,
38
38
  theme: :default,
39
39
  output: nil,
40
40
  open: false,
41
+ mermaid: false,
42
+ frontmatter: false,
41
43
  }.freeze
42
44
 
43
45
  THEMES = %i[none default].freeze
@@ -62,7 +64,8 @@ module RedQuilt
62
64
  allow_html: options[:allow_html],
63
65
  disallow_raw_html: options[:disallow_raw_html],
64
66
  extended_autolinks: options[:extended_autolinks],
65
- lint: options[:lint])
67
+ lint: options[:lint],
68
+ frontmatter: options[:frontmatter])
66
69
 
67
70
  unless options[:diagnostics_only]
68
71
  emit_output(doc, options, source_path: source_path, stdout: stdout, stderr: stderr)
@@ -154,6 +157,14 @@ module RedQuilt
154
157
  "Write HTML to a file and open it in the default browser (forces --standalone)") do
155
158
  options[:open] = true
156
159
  end
160
+ opts.on("--mermaid",
161
+ "Render `mermaid` code blocks as diagrams (loads mermaid.js from a CDN in standalone output)") do
162
+ options[:mermaid] = true
163
+ end
164
+ opts.on("--frontmatter",
165
+ "Parse leading YAML frontmatter (---) as metadata; fills <title>/lang in standalone output") do
166
+ options[:frontmatter] = true
167
+ end
157
168
  opts.on("--diagnostics", "Also print diagnostics to stderr") do
158
169
  options[:diagnostics] = true
159
170
  end
@@ -206,6 +217,7 @@ module RedQuilt
206
217
  lang: options[:lang],
207
218
  css: options[:css],
208
219
  theme: options[:theme],
220
+ mermaid: options[:mermaid],
209
221
  )
210
222
  end
211
223
 
@@ -0,0 +1,139 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedQuilt
4
+ # Fenced and indented code blocks (CommonMark 4.4 / 4.5). The module
5
+ # functions detect a code-block start; the nested Parser builds the arena
6
+ # node, mirroring the List / Blockquote split (detection used by the
7
+ # block dispatch, construction by a cached collaborator).
8
+ module CodeBlock
9
+ module_function
10
+
11
+ # Detects a fenced code opener. Returns a Hash describing the fence
12
+ # ({ char:, count:, info:, indent: }) or nil.
13
+ def fenced_start(text)
14
+ match = /\A( {0,3})(`{3,}|~{3,})[ \t]*(.*?)\s*\z/.match(text)
15
+ return unless match
16
+
17
+ info = match[3]
18
+ # CommonMark: a backtick-style fence cannot have backticks in its
19
+ # info string (they'd be ambiguous with the fence itself).
20
+ return if match[2].start_with?("`") && info.include?("`")
21
+
22
+ {
23
+ char: match[2][0],
24
+ count: match[2].length,
25
+ info: ReferenceDefinition.unescape_text(info),
26
+ indent: match[1].length,
27
+ }
28
+ end
29
+
30
+ # True when `text` is an indented code line: 4+ columns of leading
31
+ # whitespace (tabs expand to a 4-column tab stop).
32
+ def indented_line?(text)
33
+ Indentation.leading_columns(text) >= 4
34
+ end
35
+
36
+ # Cached collaborator for BlockParser. A single instance is created in
37
+ # BlockParser#initialize and reused; per-call state lives in method
38
+ # locals so reentrant calls are safe.
39
+ class Parser
40
+ def initialize(block_parser)
41
+ @arena = block_parser.arena
42
+ end
43
+
44
+ # Parses a fenced block. `fence` is CodeBlock.fenced_start's result
45
+ # for lines[index]. Returns the index past the block.
46
+ def parse_fenced(parent_id, lines, index, fence)
47
+ start_line = lines[index]
48
+ content_lines = []
49
+ index += 1
50
+ while index < lines.length
51
+ break if fence_close?(lines[index].content, fence[:char], fence[:count])
52
+
53
+ content_lines << lines[index]
54
+ index += 1
55
+ end
56
+ index += 1 if index < lines.length
57
+
58
+ # Each content line is stripped of up to the fence's own leading
59
+ # indent (CommonMark spec: a fence indented by N spaces strips up
60
+ # to N spaces from every content line, but never more). Manual
61
+ # byte scan beats compiling an interpolated regex per block and
62
+ # short-circuits when the fence had no indent (the common case).
63
+ indent_n = fence[:indent] || 0
64
+ code = content_lines.map { |l| Indentation.strip_leading_spaces(l.content, indent_n) }.join("\n")
65
+ code << "\n" unless content_lines.empty?
66
+ source_start = content_lines.empty? ? start_line.start_byte : content_lines.first.start_byte
67
+ source_end = content_lines.empty? ? start_line.end_byte : content_lines.last.end_byte
68
+ code_id = @arena.add_node(NodeType::CODE_BLOCK,
69
+ source_start: source_start,
70
+ source_len: source_end - source_start,
71
+ str1: code,
72
+ str2: fence[:info])
73
+ @arena.append_child(parent_id, code_id)
74
+ index
75
+ end
76
+
77
+ # Parses an indented code block. Returns the index past the block.
78
+ def parse_indented(parent_id, lines, index)
79
+ start_index = index
80
+ code_lines = []
81
+ while index < lines.length
82
+ line = lines[index]
83
+ break unless line.blank || CodeBlock.indented_line?(line.content)
84
+
85
+ # CommonMark: strip up to 4 columns of leading whitespace
86
+ # (tab-aware) from every line, including blank lines whose
87
+ # content beyond column 4 must be preserved verbatim.
88
+ code_lines << Indentation.strip_columns(line.content, 4)
89
+ index += 1
90
+ end
91
+
92
+ # Trailing blank lines are not part of the code block.
93
+ while !code_lines.empty? && code_lines.last.strip.empty?
94
+ code_lines.pop
95
+ index -= 1
96
+ end
97
+
98
+ start_byte = lines[start_index].start_byte
99
+ end_byte = lines[index - 1].end_byte
100
+ code = code_lines.empty? ? "" : code_lines.join("\n") + "\n"
101
+
102
+ code_id = @arena.add_node(NodeType::CODE_BLOCK,
103
+ source_start: start_byte,
104
+ source_len: end_byte - start_byte,
105
+ str1: code)
106
+ @arena.append_child(parent_id, code_id)
107
+ index
108
+ end
109
+
110
+ private
111
+
112
+ def fence_close?(text, char, count)
113
+ # Manual byte scan beats compiling a per-(char,count) regex on
114
+ # every line of a fenced block. Pattern: 0-3 spaces, >=count of
115
+ # `char`, optional trailing spaces/tabs, end-of-line.
116
+ bytes = text.bytesize
117
+ i = 0
118
+ # CommonMark spec: at most 3 spaces of indent.
119
+ while i < 3 && i < bytes && text.getbyte(i) == 0x20
120
+ i += 1
121
+ end
122
+ char_byte = char.getbyte(0)
123
+ fence_start = i
124
+ while i < bytes && text.getbyte(i) == char_byte
125
+ i += 1
126
+ end
127
+ return false if i - fence_start < count
128
+
129
+ while i < bytes
130
+ b = text.getbyte(i)
131
+ return false unless b == 0x20 || b == 0x09
132
+
133
+ i += 1
134
+ end
135
+ true
136
+ end
137
+ end
138
+ end
139
+ end