red_quilt 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,9 +2,9 @@
2
2
 
3
3
  module RedQuilt
4
4
  class Document
5
- attr_reader :source, :arena, :root_id, :references, :footnotes
5
+ attr_reader :source, :arena, :root_id, :references, :footnotes, :frontmatter
6
6
 
7
- def initialize(source, arena, root_id, allow_html: false, disallow_raw_html: false, references: {}, footnotes: nil)
7
+ def initialize(source, arena, root_id, allow_html: false, disallow_raw_html: false, references: {}, footnotes: nil, frontmatter: nil)
8
8
  @source = source
9
9
  @arena = arena
10
10
  @root_id = root_id
@@ -12,6 +12,7 @@ module RedQuilt
12
12
  @disallow_raw_html = disallow_raw_html
13
13
  @references = references
14
14
  @footnotes = footnotes
15
+ @frontmatter = frontmatter
15
16
  end
16
17
 
17
18
  def allow_html?
@@ -47,11 +48,22 @@ module RedQuilt
47
48
  # (an external stylesheet link) is independent and may be combined.
48
49
  # heading_ids: when true, every heading gets a slugified `id` (Unicode
49
50
  # preserving, deduplicated within the document) for anchor links.
50
- def to_html(standalone: false, title: nil, lang: "en", css: nil, theme: :none, heading_ids: false)
51
- body = Renderer::HTML.new(self, heading_ids: heading_ids).render
51
+ # mermaid: when true, fenced code blocks tagged `mermaid` render as
52
+ # `<pre class="mermaid">` containers instead of `<pre><code>`. In
53
+ # standalone mode the mermaid.js runtime is also loaded from a CDN so
54
+ # the diagrams render in the browser without further setup.
55
+ #
56
+ # When standalone and the document was parsed with `frontmatter: true`,
57
+ # the frontmatter's `title` / `lang` keys fill in the corresponding
58
+ # `<title>` / `<html lang>` if no explicit argument was given
59
+ # (explicit argument > frontmatter > default).
60
+ def to_html(standalone: false, title: nil, lang: nil, css: nil, theme: :none, heading_ids: false, mermaid: false)
61
+ body = Renderer::HTML.new(self, heading_ids: heading_ids, mermaid: mermaid).render
52
62
  return body unless standalone
53
63
 
54
- wrap_standalone_html(body, title: title.to_s, lang: lang.to_s, css: css, theme: Theme.css(theme))
64
+ effective_title = title || frontmatter_value("title")
65
+ effective_lang = lang || frontmatter_value("lang") || "en"
66
+ wrap_standalone_html(body, title: effective_title.to_s, lang: effective_lang.to_s, css: css, theme: Theme.css(theme), mermaid: mermaid)
55
67
  end
56
68
 
57
69
  def to_ast
@@ -87,7 +99,74 @@ module RedQuilt
87
99
 
88
100
  private
89
101
 
90
- def wrap_standalone_html(body, title:, lang:, css:, theme:)
102
+ def frontmatter_value(key)
103
+ return nil unless @frontmatter.is_a?(Hash)
104
+
105
+ @frontmatter[key]
106
+ end
107
+
108
+ # Self-contained assets embedded in standalone output when mermaid
109
+ # support is enabled. Loads the mermaid.js runtime from a CDN as an ES
110
+ # module, renders every `<pre class="mermaid">` container, then makes
111
+ # each diagram interactive with svg-pan-zoom (also from a CDN): mouse
112
+ # wheel zooms, drag pans, and a small control panel offers +/-/reset.
113
+ MERMAID_SCRIPT = <<~HTML
114
+ <style>
115
+ .rq-mermaid-pz {
116
+ /* Break out of the body's max-width column so the viewport isn't a
117
+ narrow peephole: span most of the viewport width, centered. */
118
+ width: 80vw;
119
+ margin-left: calc(50% - 40vw);
120
+ height: 80vh;
121
+ border: 1px solid #d0d7de;
122
+ border-radius: 6px;
123
+ overflow: hidden;
124
+ }
125
+ .rq-mermaid-pz svg {
126
+ width: 100%;
127
+ height: 100%;
128
+ max-width: none;
129
+ display: block;
130
+ cursor: grab;
131
+ }
132
+ @media (prefers-color-scheme: dark) {
133
+ .rq-mermaid-pz { border-color: #30363d; }
134
+ }
135
+ </style>
136
+ <script type="module">
137
+ import mermaid from "https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.esm.min.mjs";
138
+ import svgPanZoom from "https://cdn.jsdelivr.net/npm/svg-pan-zoom@3.6.1/+esm";
139
+ mermaid.initialize({ startOnLoad: false });
140
+ await mermaid.run();
141
+
142
+ for (const pre of document.querySelectorAll("pre.mermaid")) {
143
+ const svg = pre.querySelector("svg");
144
+ if (!svg) continue;
145
+ // Drop mermaid's inline max-width and let the SVG fill a sized box so
146
+ // svg-pan-zoom has room to zoom/pan. The whole viewBox scales as one,
147
+ // so every element stays aligned.
148
+ svg.removeAttribute("style");
149
+ svg.setAttribute("width", "100%");
150
+ svg.setAttribute("height", "100%");
151
+ const box = document.createElement("div");
152
+ box.className = "rq-mermaid-pz";
153
+ pre.replaceWith(box);
154
+ box.appendChild(svg);
155
+ svgPanZoom(svg, {
156
+ zoomEnabled: true,
157
+ controlIconsEnabled: true,
158
+ fit: true,
159
+ center: true,
160
+ zoomScaleSensitivity: 0.3,
161
+ minZoom: 0.2,
162
+ maxZoom: 20,
163
+ });
164
+ }
165
+ </script>
166
+ HTML
167
+ private_constant :MERMAID_SCRIPT
168
+
169
+ def wrap_standalone_html(body, title:, lang:, css:, theme:, mermaid: false)
91
170
  out = +"<!DOCTYPE html>\n"
92
171
  out << %(<html lang="#{html_escape_attr(lang)}">\n)
93
172
  out << "<head>\n"
@@ -97,6 +176,7 @@ module RedQuilt
97
176
  out << "<style>\n#{theme}</style>\n" if theme
98
177
  out << "</head>\n<body>\n"
99
178
  out << body
179
+ out << MERMAID_SCRIPT if mermaid
100
180
  out << "</body>\n</html>\n"
101
181
  out
102
182
  end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedQuilt
4
+ # Single source of truth for the HTML element ids used to wire footnote
5
+ # references to their definitions and back. Both the reference (`<sup>`),
6
+ # the definition (`<li>`), and the back-reference links must agree on
7
+ # these strings, so they live in one place rather than being rebuilt at
8
+ # each call site.
9
+ module FootnoteAnchors
10
+ module_function
11
+
12
+ # Id of the definition `<li>` and the target of a reference link.
13
+ def definition_id(number)
14
+ "fn-#{number}"
15
+ end
16
+
17
+ # Id of a reference `<sup>` and the target of a back-reference link.
18
+ # A repeated reference (occurrence > 1) gets a `-N` suffix so every
19
+ # back-reference has a unique anchor.
20
+ def reference_id(number, occurrence)
21
+ occurrence > 1 ? "fnref-#{number}-#{occurrence}" : "fnref-#{number}"
22
+ end
23
+ end
24
+ end
@@ -29,10 +29,14 @@ module RedQuilt
29
29
 
30
30
  # Re-append referenced definitions in first-reference order; detaching
31
31
  # all current children first means unreferenced definitions are left
32
- # orphaned (and so never rendered).
32
+ # orphaned (and so never rendered). The number and reference count are
33
+ # materialized onto each definition node so the renderer reads them off
34
+ # the arena rather than consulting the registry.
33
35
  @arena.child_ids(section_id).to_a.each { |child| @arena.detach(child) }
34
36
  @registry.referenced_labels.each do |label|
35
- @arena.append_child(section_id, @registry.definition_node(label))
37
+ def_id = @registry.definition_node(label)
38
+ @arena.resolve_footnote_definition(def_id, @registry.number(label), @registry.occurrences(label))
39
+ @arena.append_child(section_id, def_id)
36
40
  end
37
41
  end
38
42
  end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "psych"
4
+ require "date"
5
+
6
+ module RedQuilt
7
+ # Extracts a leading YAML frontmatter block from a Markdown source.
8
+ module Frontmatter
9
+ # Matches a frontmatter block at the very start of the document.
10
+ PATTERN = /\A---\n(.*?)\n(?:---|\.\.\.)[ \t]*(?:\n|\z)/m
11
+ private_constant :PATTERN
12
+
13
+ module_function
14
+
15
+ # Extracts frontmatter from +source+, returning a two-element array:
16
+ # [data, body]. +data+ is the parsed Hash (or nil when there is no
17
+ # frontmatter), and +body+ is the source with the frontmatter region
18
+ # blanked out.
19
+ #
20
+ # +diagnostics+ is an optional array; on a YAML syntax error a warning
21
+ # Diagnostic is appended and +data+ is returned as nil.
22
+ def extract(source, diagnostics: nil)
23
+ match = PATTERN.match(source)
24
+ return [nil, source] unless match
25
+
26
+ data = parse_yaml(match[1], diagnostics: diagnostics)
27
+ body = blank_out(source, match.end(0))
28
+ [data, body]
29
+ end
30
+
31
+ # Parses the YAML body with a restricted loader (no arbitrary object
32
+ # instantiation; Date / Time permitted for common frontmatter dates).
33
+ # Returns the parsed value, or nil on a syntax error.
34
+ def parse_yaml(yaml, diagnostics: nil)
35
+ Psych.safe_load(yaml, permitted_classes: [Date, Time], aliases: false)
36
+ rescue Psych::SyntaxError => e
37
+ diagnostics&.push(
38
+ Diagnostic.new(
39
+ severity: :warning,
40
+ rule: :frontmatter,
41
+ message: "invalid YAML frontmatter: #{e.message}",
42
+ ),
43
+ )
44
+ nil
45
+ end
46
+
47
+ # Replaces every character before +offset+ with a blank line for each
48
+ # consumed source line, keeping later line numbers intact.
49
+ def blank_out(source, offset)
50
+ consumed = source[0, offset]
51
+ ("\n" * consumed.count("\n")) + source[offset..]
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedQuilt
4
+ # CommonMark HTML-block classification (spec 4.6). Pure functions over a
5
+ # line's text: given the raw line they decide whether it opens an HTML
6
+ # block and of which of the seven types. No arena or parser state is
7
+ # involved, so this lives apart from BlockParser's node construction.
8
+ module HtmlBlock
9
+ module_function
10
+
11
+ # True when `text` opens an HTML block (any of the 7 types). Indented
12
+ # code (4+ leading spaces) takes precedence and is never an HTML block.
13
+ def start?(text)
14
+ return false if text.start_with?(" ")
15
+
16
+ !type(text).nil?
17
+ end
18
+
19
+ # The HTML block type (1..7) opened by `text`, or nil if it opens none.
20
+ def type(text)
21
+ # Fast reject: every HTML block starts with `<`. lstrip strips
22
+ # 0-3 indent spaces (more would already be indented code), so peek
23
+ # the leading non-space byte before doing any allocations.
24
+ i = 0
25
+ # CommonMark: HTML block lines may have 0-3 spaces of indent.
26
+ while i < 3 && i < text.length && text.getbyte(i) == 0x20
27
+ i += 1
28
+ end
29
+ return nil unless i < text.length && text.getbyte(i) == 0x3C
30
+
31
+ stripped = i.zero? ? text : text[i..]
32
+
33
+ # Type 1: <script|pre|style|textarea (case-insensitive) followed by
34
+ # space/tab/end-of-line or `>`. CommonMark restricts the separator
35
+ # to space, tab, or a line ending (not any whitespace class).
36
+ return 1 if stripped.match?(%r{\A<(script|pre|style|textarea)(?:[ \t]|>|$)}i)
37
+
38
+ # Type 2: <!--
39
+ return 2 if stripped.start_with?("<!--")
40
+
41
+ # Type 3: <?
42
+ return 3 if stripped.start_with?("<?")
43
+
44
+ # Type 4: <! followed by uppercase ASCII letter
45
+ return 4 if stripped.match?(%r{\A<![A-Z]})
46
+
47
+ # Type 5: <![CDATA[
48
+ return 5 if stripped.start_with?("<![CDATA[")
49
+
50
+ # Type 6: line opens with one of the listed block-level tags.
51
+ return 6 if stripped.match?(TYPE_6_RE)
52
+
53
+ # Type 7: a complete open or closing tag spanning the line.
54
+ return 7 if valid_tag?(stripped)
55
+
56
+ nil
57
+ end
58
+
59
+ TYPE_6_NAMES = %w[
60
+ address article aside base basefont blockquote body caption center
61
+ col colgroup dd details dialog dir div dl dt fieldset figcaption
62
+ figure footer form frame frameset h1 h2 h3 h4 h5 h6 head header
63
+ hr html iframe legend li link main menu menuitem nav noframes ol
64
+ optgroup option p param search section summary table tbody td
65
+ tfoot th thead title tr track ul
66
+ ].freeze
67
+ TYPE_6_RE = %r{\A</?(?:#{TYPE_6_NAMES.join('|')})(?:[ \t]|>|/>|\z)}i
68
+ private_constant :TYPE_6_NAMES, :TYPE_6_RE
69
+
70
+ # Type 7: a complete open or closing tag on its own line.
71
+ # Closing tags must not have attributes.
72
+ #
73
+ # HTML tag separators per CommonMark 6.6 are space, tab, or up to one
74
+ # line ending -- not the broader \s class (which would include form
75
+ # feed and vertical tab).
76
+ TYPE_7_OPEN_TAG_RE = %r{
77
+ \A
78
+ <[A-Za-z][A-Za-z0-9-]*
79
+ (?:[ \t\r\n]+[A-Za-z_:][A-Za-z0-9_.:-]*(?:[ \t\r\n]*=[ \t\r\n]*(?:"[^"\n]*"|'[^'\n]*'|[^ \t\r\n"'=<>`]+))?)*
80
+ [ \t\r\n]*/?>
81
+ \z
82
+ }x
83
+ TYPE_7_CLOSING_TAG_RE = %r{\A</[A-Za-z][A-Za-z0-9-]*[ \t\r\n]*>\z}
84
+ private_constant :TYPE_7_OPEN_TAG_RE, :TYPE_7_CLOSING_TAG_RE
85
+
86
+ def valid_tag?(text)
87
+ # Fast reject: every type-7 tag must begin with `<`.
88
+ return false unless text.start_with?("<")
89
+
90
+ TYPE_7_OPEN_TAG_RE.match?(text) || TYPE_7_CLOSING_TAG_RE.match?(text)
91
+ end
92
+
93
+ # Closing-condition strings for HTML block types 2-5 (types 1, 6, 7 use
94
+ # dynamic / blank-line termination).
95
+ FIXED_TERMINATORS = { 2 => "-->", 3 => "?>", 4 => ">", 5 => "]]>" }.freeze
96
+ private_constant :FIXED_TERMINATORS
97
+
98
+ # Cached collaborator for BlockParser. A single instance is created in
99
+ # BlockParser#initialize and reused; per-call state lives in method
100
+ # locals so reentrant calls are safe.
101
+ class Parser
102
+ def initialize(block_parser)
103
+ @arena = block_parser.arena
104
+ end
105
+
106
+ # Parses the HTML block starting at lines[index] (its type already
107
+ # confirmed by HtmlBlock.start?). Returns the index past the block.
108
+ def parse(parent_id, lines, index)
109
+ start_index = index
110
+ type = HtmlBlock.type(lines[index].content)
111
+ end_index = locate_end(lines, index, type)
112
+
113
+ start_byte = lines[start_index].start_byte
114
+ end_byte = lines[end_index].end_byte
115
+ html_lines = (start_index..end_index).map { |i| lines[i].content }
116
+ html_id = @arena.add_node(NodeType::HTML_BLOCK,
117
+ source_start: start_byte,
118
+ source_len: end_byte - start_byte,
119
+ str1: html_lines.join("\n"))
120
+ @arena.append_child(parent_id, html_id)
121
+ end_index + 1
122
+ end
123
+
124
+ private
125
+
126
+ def locate_end(lines, index, type)
127
+ terminator = terminator_for(type, lines[index].content)
128
+
129
+ if terminator
130
+ case_insensitive = (type == 1)
131
+ while index < lines.length
132
+ line = lines[index].content
133
+ haystack = case_insensitive ? line.downcase : line
134
+ return index if haystack.include?(terminator)
135
+
136
+ index += 1
137
+ end
138
+ lines.length - 1
139
+ else
140
+ # Types 6 & 7: terminated by blank line (or end of input)
141
+ index += 1 while index < lines.length && !lines[index].blank
142
+ index - 1
143
+ end
144
+ end
145
+
146
+ def terminator_for(type, first_line)
147
+ case type
148
+ when 1
149
+ "</#{closing_tag_name(first_line)}>"
150
+ when 2..5
151
+ FIXED_TERMINATORS[type]
152
+ end
153
+ end
154
+
155
+ def closing_tag_name(text)
156
+ match = /\A<(script|pre|style|textarea)/i.match(text)
157
+ match ? match[1].downcase : "script"
158
+ end
159
+ end
160
+ end
161
+ end
@@ -57,6 +57,41 @@ module RedQuilt
57
57
  end
58
58
  end
59
59
 
60
+ # Strips up to `max` leading 0x20 (space) bytes from `text`, returning
61
+ # the rest. Unlike #strip_columns this is a plain byte strip (tabs are
62
+ # not expanded); used where the spec counts literal spaces, e.g. a
63
+ # fenced code block stripping its own opening indent. No-alloc return
64
+ # when `text` already starts at a non-space byte.
65
+ def strip_leading_spaces(text, max)
66
+ return text if max <= 0
67
+
68
+ bytes = text.bytesize
69
+ i = 0
70
+ while i < max && i < bytes && text.getbyte(i) == 0x20
71
+ i += 1
72
+ end
73
+ return text if i.zero?
74
+
75
+ text.byteslice(i..)
76
+ end
77
+
78
+ # Strips all leading 0x20 / 0x09 bytes from `text` (spaces and tabs,
79
+ # no column cap). Same no-alloc return as #strip_leading_spaces when
80
+ # `text` already starts at a non-whitespace byte.
81
+ def strip_leading_whitespace(text)
82
+ bytes = text.bytesize
83
+ i = 0
84
+ while i < bytes
85
+ b = text.getbyte(i)
86
+ break unless b == 0x20 || b == 0x09
87
+
88
+ i += 1
89
+ end
90
+ return text if i.zero?
91
+
92
+ text.byteslice(i..)
93
+ end
94
+
60
95
  # Bytes of literal leading 0x20 / 0x09 in `text`.
61
96
  def leading_ws_bytes(text)
62
97
  i = 0
@@ -9,22 +9,10 @@ module RedQuilt
9
9
  # 1. linear_pass — code spans, brackets (link/image), autolinks,
10
10
  # HTML, simple inlines. Emphasis delimiter runs are added as
11
11
  # provisional TEXT nodes and pushed onto a delimiter stack.
12
- # 2. process_emphasis — CommonMark spec 6.2 algorithm pairs up
13
- # delimiter stack entries into EMPHASIS / STRONG nodes.
12
+ # 2. EmphasisResolver#resolve — CommonMark spec 6.2 algorithm pairs
13
+ # up delimiter stack entries into EMPHASIS / STRONG nodes
14
+ # (delegated to Inline::EmphasisResolver).
14
15
  class Builder
15
- SAFE_SCHEMES = %w[http https mailto ftp tel ssh].freeze
16
- # Autolinks (`<scheme:...>`) are not run through the SAFE_SCHEMES
17
- # allowlist: CommonMark permits arbitrary schemes there (e.g.
18
- # `<made-up-scheme://x>`), and an allowlist would break that
19
- # conformance. Only the schemes that execute script when the link
20
- # is navigated are denied.
21
- UNSAFE_AUTOLINK_SCHEMES = %w[javascript vbscript data].freeze
22
-
23
- # `count` is the CommonMark delimiter-run length; a Delimiter is
24
- # never enumerated, so shadowing Struct#count (from Enumerable) is
25
- # intentional rather than a footgun.
26
- Delimiter = Struct.new(:node_id, :char, :count, :can_open, :can_close) # rubocop:disable Lint/StructNewOverride
27
-
28
16
  Bracket = Struct.new(:token_id, :node_id, :image, :active, :delim_stack_size)
29
17
 
30
18
  # track_source: when true, arena nodes carry the byte ranges supplied
@@ -49,6 +37,7 @@ module RedQuilt
49
37
  @diagnostics = diagnostics
50
38
  @footnotes = footnotes
51
39
  @link_scanner = LinkScanner.new(source)
40
+ @emphasis = EmphasisResolver.new(arena, track_source: track_source)
52
41
  end
53
42
 
54
43
  def build(parent_id, tokens)
@@ -58,7 +47,7 @@ module RedQuilt
58
47
  @bracket_stack = []
59
48
  @provisional_nodes = {}
60
49
  linear_pass
61
- process_emphasis(@delimiter_stack)
50
+ @emphasis.resolve(@delimiter_stack, @provisional_nodes)
62
51
  end
63
52
 
64
53
  private
@@ -228,29 +217,12 @@ module RedQuilt
228
217
  link_id = add_arena_node(
229
218
  NodeType::LINK,
230
219
  @tokens.start_byte(id), @tokens.end_byte(id),
231
- str1: block_unsafe_autolink(@link_scanner.normalize_uri(destination)),
220
+ str1: UrlSanitizer.block_unsafe_autolink(@link_scanner.normalize_uri(destination), @diagnostics),
232
221
  )
233
222
  @arena.append_child(@parent_id, link_id)
234
223
  @arena.append_child(link_id, @arena.add_node(NodeType::TEXT, str1: label))
235
224
  end
236
225
 
237
- # Returns "" (blocking the href) for autolink destinations whose
238
- # scheme executes script on navigation; otherwise the destination
239
- # is returned unchanged. Unlike sanitize_destination this is a
240
- # denylist, to stay CommonMark-conformant for benign custom schemes.
241
- def block_unsafe_autolink(destination)
242
- scheme = destination[%r{\A([a-zA-Z][a-zA-Z0-9+\-.]*):}, 1]
243
- return destination if scheme.nil?
244
- return destination unless UNSAFE_AUTOLINK_SCHEMES.include?(scheme.downcase)
245
-
246
- report_diagnostic(
247
- severity: :warning,
248
- rule: :unsafe_url,
249
- message: "Unsafe URL scheme #{scheme.downcase.inspect} blocked",
250
- )
251
- ""
252
- end
253
-
254
226
  # --------------------------- code spans -----------------------------
255
227
 
256
228
  # Find the closing backtick run for a code span by scanning the
@@ -400,7 +372,7 @@ module RedQuilt
400
372
  link_kind = opener.image ? NodeType::IMAGE : NodeType::LINK
401
373
  link_id = add_arena_node(
402
374
  link_kind, opener_start, match[:end_byte],
403
- str1: sanitize_destination(match[:destination]),
375
+ str1: UrlSanitizer.sanitize_destination(match[:destination], @diagnostics),
404
376
  str2: match[:title],
405
377
  )
406
378
 
@@ -416,7 +388,7 @@ module RedQuilt
416
388
  @arena.detach(opener.node_id)
417
389
 
418
390
  inner_delims = @delimiter_stack.slice!(opener.delim_stack_size..) || []
419
- process_emphasis(inner_delims)
391
+ @emphasis.resolve(inner_delims, @provisional_nodes)
420
392
 
421
393
  @bracket_stack.delete_at(opener_index)
422
394
 
@@ -489,22 +461,6 @@ module RedQuilt
489
461
  last
490
462
  end
491
463
 
492
- def sanitize_destination(destination)
493
- return "" if destination.nil?
494
- return destination if destination.start_with?("/", "#")
495
-
496
- scheme = destination[%r{\A([a-zA-Z][a-zA-Z0-9+\-.]*):}, 1]
497
- return destination if scheme.nil?
498
- return destination if SAFE_SCHEMES.include?(scheme.downcase)
499
-
500
- report_diagnostic(
501
- severity: :warning,
502
- rule: :unsafe_url,
503
- message: "Unsafe URL scheme #{scheme.downcase.inspect} blocked",
504
- )
505
- ""
506
- end
507
-
508
464
  def report_diagnostic(severity:, rule:, message:, source_span: nil)
509
465
  return unless @diagnostics
510
466
 
@@ -530,145 +486,12 @@ module RedQuilt
530
486
  @arena.append_child(@parent_id, node_id)
531
487
  @provisional_nodes[node_id] = true
532
488
 
533
- @delimiter_stack << Delimiter.new(
489
+ @delimiter_stack << EmphasisResolver::Delimiter.new(
534
490
  node_id, char, count,
535
491
  (flags & 0b10) != 0,
536
492
  (flags & 0b01) != 0,
537
493
  )
538
494
  end
539
-
540
- def process_emphasis(stack)
541
- # NB: the CommonMark spec describes an `openers_bottom`
542
- # optimization keyed by closer character / length / flanking
543
- # flags. Implementing that correctly is subtle (a single
544
- # per-character bottom blocks valid matches like
545
- # `*foo**bar**baz*`), so the implementation here just walks
546
- # back to the start of the stack for every closer. This is
547
- # O(stack^2) in the worst case but stacks are tiny in practice.
548
- closer_idx = 0
549
-
550
- while closer_idx < stack.length
551
- closer = stack[closer_idx]
552
- unless closer.can_close
553
- closer_idx += 1
554
- next
555
- end
556
-
557
- opener_idx = closer_idx - 1
558
- found = false
559
- while opener_idx >= 0
560
- opener = stack[opener_idx]
561
- if opener.can_open && opener.char == closer.char
562
- skip = false
563
- if (opener.can_close || closer.can_open) &&
564
- ((opener.count + closer.count) % 3).zero? &&
565
- !((opener.count % 3).zero? && (closer.count % 3).zero?)
566
- skip = true
567
- end
568
- unless skip
569
- found = true
570
- break
571
- end
572
- end
573
- opener_idx -= 1
574
- end
575
-
576
- unless found
577
- unless closer.can_open
578
- @provisional_nodes.delete(closer.node_id)
579
- stack.delete_at(closer_idx)
580
- end
581
- closer_idx += 1
582
- next
583
- end
584
-
585
- opener = stack[opener_idx]
586
- strength = [opener.count, closer.count].min >= 2 ? 2 : 1
587
- if closer.char == "~"
588
- # GFM strikethrough only forms on `~~` runs. A single `~`
589
- # leaves the delimiter as text; advance the cursor so future
590
- # `~~` pairs can still match.
591
- if strength < 2
592
- closer_idx += 1
593
- next
594
- end
595
- kind = NodeType::STRIKETHROUGH
596
- else
597
- kind = strength == 2 ? NodeType::STRONG : NodeType::EMPHASIS
598
- end
599
-
600
- # CommonMark spec: any delimiters strictly between this opener and
601
- # closer can't open or close anything in this scope, so drop them
602
- # from the stack before we rebuild the tree. Their arena nodes
603
- # stay where they are (they'll be reparented into the new emphasis
604
- # alongside the surrounding content), but they must no longer be
605
- # candidates for future iterations. Without this, the next
606
- # iteration would try to pair stranded delimiters that have
607
- # already been moved into a different parent, which corrupts the
608
- # sibling chain (Arena#reparent walks into @parent[-1]).
609
- if closer_idx > opener_idx + 1
610
- removed = stack.slice!((opener_idx + 1)...closer_idx)
611
- removed.each { |e| @provisional_nodes.delete(e.node_id) }
612
- closer_idx = opener_idx + 1
613
- closer = stack[closer_idx]
614
- end
615
-
616
- opener_node = opener.node_id
617
- closer_node = closer.node_id
618
-
619
- if @track_source
620
- opener_match_start = @arena.source_end(opener_node) - strength
621
- closer_match_end = @arena.source_start(closer_node) + strength
622
- else
623
- opener_match_start = -1
624
- closer_match_end = 0
625
- end
626
- emphasis_id = add_arena_node(kind, opener_match_start, closer_match_end)
627
-
628
- first_inside = @arena.raw_next_sibling_id(opener_node)
629
- last_inside = @arena.raw_prev_sibling_id(closer_node)
630
- if first_inside != -1 && last_inside != -1 &&
631
- first_inside != closer_node && last_inside != opener_node
632
- @arena.reparent(emphasis_id, first_inside, last_inside)
633
- end
634
-
635
- parent_id = @arena.raw_parent_id(opener_node)
636
- @arena.insert_before(parent_id, closer_node, emphasis_id)
637
-
638
- if opener.count == strength
639
- @provisional_nodes.delete(opener_node)
640
- @arena.detach(opener_node)
641
- stack.delete_at(opener_idx)
642
- closer_idx -= 1
643
- else
644
- opener.count -= strength
645
- str = @arena.str1(opener_node)
646
- @arena.update_str1(opener_node, str[0...-strength])
647
- if @track_source
648
- new_end = @arena.source_end(opener_node) - strength
649
- @arena.update_span(opener_node, @arena.source_start(opener_node), new_end)
650
- end
651
- end
652
-
653
- if closer.count == strength
654
- @provisional_nodes.delete(closer_node)
655
- @arena.detach(closer_node)
656
- stack.delete_at(closer_idx)
657
- else
658
- closer.count -= strength
659
- str = @arena.str1(closer_node)
660
- @arena.update_str1(closer_node, str[strength..])
661
- if @track_source
662
- new_start = @arena.source_start(closer_node) + strength
663
- new_end = @arena.source_end(closer_node)
664
- @arena.update_span(closer_node, new_start, new_end)
665
- end
666
- end
667
- end
668
-
669
- stack.each { |e| @provisional_nodes.delete(e.node_id) }
670
- stack.clear
671
- end
672
495
  end
673
496
  end
674
497
  end