red_quilt 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,184 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedQuilt
4
+ module Inline
5
+ # CommonMark emphasis algorithm (spec 6.2). Phase 2 of inline parsing:
6
+ # given the delimiter stack the linear pass collected (provisional TEXT
7
+ # nodes for each `*` / `_` / `~` run), it pairs openers with closers and
8
+ # rebuilds the arena subtree into EMPHASIS / STRONG / STRIKETHROUGH
9
+ # nodes.
10
+ #
11
+ # Kept separate from Builder because it is a closed algorithm with a
12
+ # narrow interface: it only needs the arena, the set of still-provisional
13
+ # nodes (so consumed delimiters can be unmarked), and whether source
14
+ # spans are tracked. Builder owns the linear pass and bracket handling;
15
+ # it hands this resolver a delimiter stack to collapse.
16
+ class EmphasisResolver
17
+ # `count` is the CommonMark delimiter-run length; a Delimiter is
18
+ # never enumerated, so shadowing Struct#count (from Enumerable) is
19
+ # intentional rather than a footgun.
20
+ Delimiter = Struct.new(:node_id, :char, :count, :can_open, :can_close) # rubocop:disable Lint/StructNewOverride
21
+
22
+ def initialize(arena, track_source:)
23
+ @arena = arena
24
+ @track_source = track_source
25
+ end
26
+
27
+ # Collapses `stack` (an Array of Delimiter) in place, removing
28
+ # consumed entries from `provisional_nodes`. Used both for the
29
+ # document-level stack and for the inner delimiters of a resolved
30
+ # link/image (see Builder#finalize_link).
31
+ def resolve(stack, provisional_nodes)
32
+ # NB: the CommonMark spec describes an `openers_bottom`
33
+ # optimization keyed by closer character / length / flanking
34
+ # flags. Implementing that correctly is subtle (a single
35
+ # per-character bottom blocks valid matches like
36
+ # `*foo**bar**baz*`), so the implementation here just walks
37
+ # back to the start of the stack for every closer. This is
38
+ # O(stack^2) in the worst case but stacks are tiny in practice.
39
+ closer_idx = 0
40
+
41
+ while closer_idx < stack.length
42
+ closer = stack[closer_idx]
43
+ unless closer.can_close
44
+ closer_idx += 1
45
+ next
46
+ end
47
+
48
+ opener_idx = closer_idx - 1
49
+ found = false
50
+ while opener_idx >= 0
51
+ opener = stack[opener_idx]
52
+ if opener.can_open && opener.char == closer.char
53
+ skip = false
54
+ if (opener.can_close || closer.can_open) &&
55
+ ((opener.count + closer.count) % 3).zero? &&
56
+ !((opener.count % 3).zero? && (closer.count % 3).zero?)
57
+ skip = true
58
+ end
59
+ unless skip
60
+ found = true
61
+ break
62
+ end
63
+ end
64
+ opener_idx -= 1
65
+ end
66
+
67
+ unless found
68
+ unless closer.can_open
69
+ provisional_nodes.delete(closer.node_id)
70
+ stack.delete_at(closer_idx)
71
+ end
72
+ closer_idx += 1
73
+ next
74
+ end
75
+
76
+ opener = stack[opener_idx]
77
+ strength = [opener.count, closer.count].min >= 2 ? 2 : 1
78
+ if closer.char == "~"
79
+ # GFM strikethrough only forms on `~~` runs. A single `~`
80
+ # leaves the delimiter as text; advance the cursor so future
81
+ # `~~` pairs can still match.
82
+ if strength < 2
83
+ closer_idx += 1
84
+ next
85
+ end
86
+ kind = NodeType::STRIKETHROUGH
87
+ else
88
+ kind = strength == 2 ? NodeType::STRONG : NodeType::EMPHASIS
89
+ end
90
+
91
+ # CommonMark spec: any delimiters strictly between this opener and
92
+ # closer can't open or close anything in this scope, so drop them
93
+ # from the stack before we rebuild the tree. Their arena nodes
94
+ # stay where they are (they'll be reparented into the new emphasis
95
+ # alongside the surrounding content), but they must no longer be
96
+ # candidates for future iterations. Without this, the next
97
+ # iteration would try to pair stranded delimiters that have
98
+ # already been moved into a different parent, which corrupts the
99
+ # sibling chain (Arena#reparent walks into @parent[-1]).
100
+ if closer_idx > opener_idx + 1
101
+ removed = stack.slice!((opener_idx + 1)...closer_idx)
102
+ removed.each { |e| provisional_nodes.delete(e.node_id) }
103
+ closer_idx = opener_idx + 1
104
+ closer = stack[closer_idx]
105
+ end
106
+
107
+ opener_node = opener.node_id
108
+ closer_node = closer.node_id
109
+
110
+ if @track_source
111
+ opener_match_start = @arena.source_end(opener_node) - strength
112
+ closer_match_end = @arena.source_start(closer_node) + strength
113
+ else
114
+ opener_match_start = -1
115
+ closer_match_end = 0
116
+ end
117
+ emphasis_id = add_node(kind, opener_match_start, closer_match_end)
118
+
119
+ first_inside = @arena.raw_next_sibling_id(opener_node)
120
+ last_inside = @arena.raw_prev_sibling_id(closer_node)
121
+ if first_inside != -1 && last_inside != -1 &&
122
+ first_inside != closer_node && last_inside != opener_node
123
+ @arena.reparent(emphasis_id, first_inside, last_inside)
124
+ end
125
+
126
+ parent_id = @arena.raw_parent_id(opener_node)
127
+ @arena.insert_before(parent_id, closer_node, emphasis_id)
128
+
129
+ # Consume `strength` characters from the inner end of each
130
+ # delimiter. The opener is trimmed on its right (trailing) end,
131
+ # the closer on its left (leading) end; removing the opener from
132
+ # the stack shifts the closer one slot left.
133
+ closer_idx -= 1 if consume_delimiter(opener, opener_idx, stack, strength, provisional_nodes, from_start: false)
134
+ consume_delimiter(closer, closer_idx, stack, strength, provisional_nodes, from_start: true)
135
+ end
136
+
137
+ stack.each { |e| provisional_nodes.delete(e.node_id) }
138
+ stack.clear
139
+ end
140
+
141
+ private
142
+
143
+ # Mirrors Builder#add_arena_node for the nodes this resolver creates
144
+ # (emphasis wrappers only ever take a type and a span).
145
+ def add_node(type, start_byte, end_byte)
146
+ if @track_source
147
+ @arena.add_node(type, source_start: start_byte, source_len: end_byte - start_byte)
148
+ else
149
+ @arena.add_node(type, source_start: -1, source_len: 0)
150
+ end
151
+ end
152
+
153
+ # Removes `strength` characters from one end of a delimiter run. When
154
+ # the whole run is consumed the node is detached and dropped from the
155
+ # stack (returns true); otherwise its count, str1, and — in
156
+ # source-tracking mode — its span are trimmed on the requested side
157
+ # (`from_start` trims the leading end, used for closers; trailing for
158
+ # openers) and it stays on the stack (returns false).
159
+ def consume_delimiter(entry, index, stack, strength, provisional_nodes, from_start:)
160
+ node = entry.node_id
161
+ if entry.count == strength
162
+ provisional_nodes.delete(node)
163
+ @arena.detach(node)
164
+ stack.delete_at(index)
165
+ return true
166
+ end
167
+
168
+ entry.count -= strength
169
+ str = @arena.str1(node)
170
+ @arena.update_str1(node, from_start ? str[strength..] : str[0...-strength])
171
+ if @track_source
172
+ start_byte = @arena.source_start(node)
173
+ end_byte = @arena.source_end(node)
174
+ if from_start
175
+ @arena.update_span(node, start_byte + strength, end_byte)
176
+ else
177
+ @arena.update_span(node, start_byte, end_byte - strength)
178
+ end
179
+ end
180
+ false
181
+ end
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedQuilt
4
+ module Inline
5
+ # URL-scheme security policy for inline link / image / autolink
6
+ # destinations. Kept separate from Builder so the "which schemes are
7
+ # safe and how blocking is reported" concern has a single home and can
8
+ # change without touching the inline construction logic.
9
+ #
10
+ # Stateless (module_function); diagnostics are appended to the caller's
11
+ # array (or skipped when it is nil), so there is no per-call allocation.
12
+ module UrlSanitizer
13
+ module_function
14
+
15
+ SAFE_SCHEMES = %w[http https mailto ftp tel ssh].freeze
16
+
17
+ # Autolinks (`<scheme:...>`) are not run through the SAFE_SCHEMES
18
+ # allowlist: CommonMark permits arbitrary schemes there (e.g.
19
+ # `<made-up-scheme://x>`), and an allowlist would break that
20
+ # conformance. Only the schemes that execute script when the link
21
+ # is navigated are denied.
22
+ UNSAFE_AUTOLINK_SCHEMES = %w[javascript vbscript data].freeze
23
+
24
+ SCHEME_RE = /\A([a-zA-Z][a-zA-Z0-9+\-.]*):/
25
+
26
+ # Link / image destinations: allowlist. Relative URLs (starting `/`
27
+ # or `#`) and scheme-less URLs pass; an unknown scheme is blocked
28
+ # (href emptied) and a diagnostic is recorded.
29
+ def sanitize_destination(destination, diagnostics)
30
+ return "" if destination.nil?
31
+ return destination if destination.start_with?("/", "#")
32
+
33
+ scheme = destination[SCHEME_RE, 1]
34
+ return destination if scheme.nil?
35
+ return destination if SAFE_SCHEMES.include?(scheme.downcase)
36
+
37
+ report_blocked(diagnostics, scheme)
38
+ ""
39
+ end
40
+
41
+ # Autolink destinations: denylist. The destination is returned
42
+ # unchanged unless its scheme executes script on navigation, in which
43
+ # case the href is emptied and a diagnostic is recorded.
44
+ def block_unsafe_autolink(destination, diagnostics)
45
+ scheme = destination[SCHEME_RE, 1]
46
+ return destination if scheme.nil?
47
+ return destination unless UNSAFE_AUTOLINK_SCHEMES.include?(scheme.downcase)
48
+
49
+ report_blocked(diagnostics, scheme)
50
+ ""
51
+ end
52
+
53
+ def report_blocked(diagnostics, scheme)
54
+ return unless diagnostics
55
+
56
+ diagnostics << Diagnostic.new(
57
+ severity: :warning,
58
+ rule: :unsafe_url,
59
+ message: "Unsafe URL scheme #{scheme.downcase.inspect} blocked",
60
+ )
61
+ end
62
+ end
63
+ end
64
+ end
@@ -10,5 +10,10 @@ module RedQuilt
10
10
  # Positional (not keyword_init): one Line is built per source line, so
11
11
  # the ~2.5x faster positional constructor matters on large documents.
12
12
  # Argument order: content, start_byte, end_byte, blank, lazy_continuation.
13
- Line = Struct.new(:content, :start_byte, :end_byte, :blank, :lazy_continuation)
13
+ Line = Struct.new(:content, :start_byte, :end_byte, :blank, :lazy_continuation) do
14
+ # Byte length of the line's span in the original source.
15
+ def span_len
16
+ end_byte - start_byte
17
+ end
18
+ end
14
19
  end
@@ -22,7 +22,7 @@ module RedQuilt
22
22
  walk(@document.root_id) do |id|
23
23
  case @arena.type(id)
24
24
  when NodeType::HEADING
25
- level = @arena.int1(id)
25
+ level = @arena.heading_level(id)
26
26
  if last_heading_level.positive? && level > last_heading_level + 1
27
27
  push(:info, :heading_level_skip,
28
28
  "Heading jumps from h#{last_heading_level} to h#{level}",
@@ -45,7 +45,7 @@ module RedQuilt
45
45
  end
46
46
 
47
47
  def check_empty_link(node_id)
48
- return unless @arena.str1(node_id).to_s.empty?
48
+ return unless @arena.link_destination(node_id).to_s.empty?
49
49
 
50
50
  push(:warning, :empty_link,
51
51
  "Link has no destination",
@@ -46,6 +46,13 @@ module RedQuilt
46
46
  text
47
47
  end
48
48
 
49
+ # Returns the fence info string of a CODE_BLOCK node.
50
+ def info
51
+ return "" unless @arena.type(@node_id) == NodeType::CODE_BLOCK
52
+
53
+ @arena.code_block_info(@node_id).to_s
54
+ end
55
+
49
56
  def source_span
50
57
  @arena.source_span(@node_id)
51
58
  end
@@ -86,26 +93,28 @@ module RedQuilt
86
93
  def ast_attributes
87
94
  case @arena.type(@node_id)
88
95
  when NodeType::HEADING
89
- { level: @arena.int1(@node_id), text: text }
96
+ { level: @arena.heading_level(@node_id), text: text }
90
97
  when NodeType::LIST
91
98
  {
92
- ordered: @arena.int1(@node_id) == 1,
93
- start_number: @arena.int2(@node_id),
94
- tight: @arena.int3(@node_id) == 1,
95
- delimiter: @arena.str1(@node_id),
99
+ ordered: @arena.list_ordered?(@node_id),
100
+ start_number: @arena.list_start(@node_id),
101
+ tight: @arena.list_tight?(@node_id),
102
+ delimiter: @arena.list_delimiter(@node_id),
96
103
  }
97
- when NodeType::TABLE_ROW, NodeType::TABLE_CELL
98
- { header: @arena.int1(@node_id) == 1, text: text }
104
+ when NodeType::TABLE_ROW
105
+ { header: @arena.table_row_header?(@node_id), text: text }
106
+ when NodeType::TABLE_CELL
107
+ { header: @arena.table_cell_header?(@node_id), text: text }
99
108
  when NodeType::TEXT, NodeType::CODE_SPAN, NodeType::HTML_BLOCK, NodeType::HTML_INLINE, NodeType::PARAGRAPH
100
109
  { text: text }
101
110
  when NodeType::CODE_BLOCK
102
- { text: @arena.text(@node_id), info: @arena.str2(@node_id) }
111
+ { text: @arena.text(@node_id), info: @arena.code_block_info(@node_id) }
103
112
  when NodeType::LINK, NodeType::IMAGE
104
- { destination: @arena.str1(@node_id), title: @arena.str2(@node_id), text: text }
113
+ { destination: @arena.link_destination(@node_id), title: @arena.link_title(@node_id), text: text }
105
114
  when NodeType::FOOTNOTE_REFERENCE
106
- { label: @arena.str1(@node_id), number: @arena.int1(@node_id) }
115
+ { label: @arena.footnote_label(@node_id), number: @arena.footnote_number(@node_id) }
107
116
  when NodeType::FOOTNOTE_DEFINITION
108
- { label: @arena.str1(@node_id) }
117
+ { label: @arena.footnote_label(@node_id) }
109
118
  else
110
119
  {}
111
120
  end
@@ -3,11 +3,12 @@
3
3
  module RedQuilt
4
4
  module Renderer
5
5
  class HTML
6
- def initialize(document, heading_ids: false)
6
+ def initialize(document, heading_ids: false, mermaid: false)
7
7
  @document = document
8
8
  @arena = document.arena
9
9
  @out = +""
10
10
  @slugger = Slug::Counter.new if heading_ids
11
+ @mermaid = mermaid
11
12
  end
12
13
 
13
14
  def render
@@ -15,6 +16,18 @@ module RedQuilt
15
16
  @out
16
17
  end
17
18
 
19
+ # Renders the given nodes (an Array of NodeRef) in order and returns
20
+ # the resulting HTML fragment, without affecting the main render
21
+ # output.
22
+ def render_fragment(nodes)
23
+ saved = @out
24
+ @out = +""
25
+ nodes.each { |node| render_node(node.node_id) }
26
+ @out
27
+ ensure
28
+ @out = saved
29
+ end
30
+
18
31
  private
19
32
 
20
33
  # CommonMark-compliant HTML escape: only `&`, `<`, `>`, `"` are
@@ -45,7 +58,7 @@ module RedQuilt
45
58
  render_children(node_id)
46
59
  @out << "</p>\n"
47
60
  when NodeType::HEADING
48
- level = @arena.int1(node_id)
61
+ level = @arena.heading_level(node_id)
49
62
  if @slugger
50
63
  id = @slugger.generate(PlainText.from(@arena, node_id))
51
64
  @out << %(<h#{level} id="#{escape_html(id)}">)
@@ -61,9 +74,9 @@ module RedQuilt
61
74
  render_children(node_id)
62
75
  @out << "</blockquote>\n"
63
76
  when NodeType::LIST
64
- ordered = @arena.int1(node_id) == 1
77
+ ordered = @arena.list_ordered?(node_id)
65
78
  tag = ordered ? "ol" : "ul"
66
- start_number = @arena.int2(node_id)
79
+ start_number = @arena.list_start(node_id)
67
80
  attrs = ordered && start_number != 1 ? %( start="#{start_number}") : ""
68
81
  @out << "<#{tag}#{attrs}>\n"
69
82
  render_children(node_id)
@@ -73,12 +86,21 @@ module RedQuilt
73
86
  render_list_item(node_id)
74
87
  @out << "</li>\n"
75
88
  when NodeType::CODE_BLOCK
76
- @out << "<pre><code"
77
- info_word = @arena.str2(node_id).to_s.split.first.to_s
78
- @out << %( class="language-#{escape_html(info_word)}") unless info_word.empty?
79
- @out << ">"
80
- @out << escape_html(@arena.text(node_id).to_s)
81
- @out << "</code></pre>\n"
89
+ info_word = @arena.code_block_info(node_id).to_s.split.first.to_s
90
+ if @mermaid && info_word == "mermaid"
91
+ # Emit a container mermaid.js recognizes via class="mermaid".
92
+ # The diagram source is still HTML-escaped; the browser decodes
93
+ # the entities back into textContent, which is what mermaid reads.
94
+ @out << %(<pre class="mermaid">)
95
+ @out << escape_html(@arena.text(node_id).to_s)
96
+ @out << "</pre>\n"
97
+ else
98
+ @out << "<pre><code"
99
+ @out << %( class="language-#{escape_html(info_word)}") unless info_word.empty?
100
+ @out << ">"
101
+ @out << escape_html(@arena.text(node_id).to_s)
102
+ @out << "</code></pre>\n"
103
+ end
82
104
  when NodeType::HTML_BLOCK
83
105
  render_raw_html(@arena.text(node_id).to_s, block: true)
84
106
  when NodeType::TABLE
@@ -106,7 +128,7 @@ module RedQuilt
106
128
  when NodeType::CODE_SPAN
107
129
  @out << "<code>#{escape_html(@arena.text(node_id).to_s)}</code>"
108
130
  when NodeType::LINK
109
- dest = escape_html(@arena.str1(node_id).to_s)
131
+ dest = escape_html(@arena.link_destination(node_id).to_s)
110
132
  @out << %(<a href="#{dest}")
111
133
  append_title_attribute(node_id)
112
134
  @out << ">"
@@ -114,7 +136,7 @@ module RedQuilt
114
136
  @out << "</a>"
115
137
  when NodeType::IMAGE
116
138
  alt = PlainText.from(@arena, node_id)
117
- dest = escape_html(@arena.str1(node_id).to_s)
139
+ dest = escape_html(@arena.link_destination(node_id).to_s)
118
140
  @out << %(<img src="#{dest}" alt="#{escape_html(alt)}")
119
141
  append_title_attribute(node_id)
120
142
  @out << " />"
@@ -131,10 +153,11 @@ module RedQuilt
131
153
  # element ids use the footnote number; a second+ reference to the
132
154
  # same footnote gets a `-M` suffix so each backref has a unique target.
133
155
  def render_footnote_reference(node_id)
134
- number = @arena.int1(node_id)
135
- occurrence = @arena.int2(node_id)
136
- ref_id = occurrence > 1 ? "fnref-#{number}-#{occurrence}" : "fnref-#{number}"
137
- @out << %(<sup><a href="#fn-#{number}" id="#{ref_id}">#{number}</a></sup>)
156
+ number = @arena.footnote_number(node_id)
157
+ occurrence = @arena.footnote_occurrence(node_id)
158
+ ref_id = FootnoteAnchors.reference_id(number, occurrence)
159
+ def_id = FootnoteAnchors.definition_id(number)
160
+ @out << %(<sup><a href="##{def_id}" id="#{ref_id}">#{number}</a></sup>)
138
161
  end
139
162
 
140
163
  def render_footnotes_section(node_id)
@@ -144,10 +167,9 @@ module RedQuilt
144
167
  end
145
168
 
146
169
  def render_footnote_definition(def_id)
147
- label = @arena.str1(def_id).to_s
148
- number = @document.footnotes.number(label)
149
- occurrences = @document.footnotes.occurrences(label)
150
- @out << %(<li id="fn-#{number}">\n)
170
+ number = @arena.footnote_number(def_id)
171
+ occurrences = @arena.footnote_total_references(def_id)
172
+ @out << %(<li id="#{FootnoteAnchors.definition_id(number)}">\n)
151
173
 
152
174
  # Append the backref(s) inside the definition's last paragraph (GFM);
153
175
  # if the last block isn't a paragraph, emit a standalone one.
@@ -174,7 +196,7 @@ module RedQuilt
174
196
  def footnote_backrefs(number, occurrences)
175
197
  out = +""
176
198
  (1..occurrences).each do |occ|
177
- ref_id = occ > 1 ? "fnref-#{number}-#{occ}" : "fnref-#{number}"
199
+ ref_id = FootnoteAnchors.reference_id(number, occ)
178
200
  suffix = occ > 1 ? "<sup>#{occ}</sup>" : ""
179
201
  out << %( <a href="##{ref_id}">&#8617;#{suffix}</a>)
180
202
  end
@@ -183,8 +205,8 @@ module RedQuilt
183
205
 
184
206
  def render_table(table_id)
185
207
  rows = @arena.child_ids(table_id).to_a
186
- header_rows = rows.select { |row_id| @arena.int1(row_id) == 1 }
187
- body_rows = rows.reject { |row_id| @arena.int1(row_id) == 1 }
208
+ header_rows = rows.select { |row_id| @arena.table_row_header?(row_id) }
209
+ body_rows = rows.reject { |row_id| @arena.table_row_header?(row_id) }
188
210
 
189
211
  unless header_rows.empty?
190
212
  @out << "<thead>\n"
@@ -200,7 +222,7 @@ module RedQuilt
200
222
 
201
223
  def render_list_item(node_id)
202
224
  parent_id = @arena.raw_parent_id(node_id)
203
- tight = parent_id != -1 && @arena.type(parent_id) == NodeType::LIST && @arena.int3(parent_id) == 1
225
+ tight = parent_id != -1 && @arena.type(parent_id) == NodeType::LIST && @arena.list_tight?(parent_id)
204
226
 
205
227
  first_child_id = @arena.raw_first_child_id(node_id)
206
228
  first_is_para = first_child_id != -1 &&
@@ -239,7 +261,7 @@ module RedQuilt
239
261
  def render_table_row(row_id)
240
262
  @out << "<tr>"
241
263
  @arena.each_child(row_id) do |cell_id|
242
- tag = @arena.int1(cell_id) == 1 ? "th" : "td"
264
+ tag = @arena.table_cell_header?(cell_id) ? "th" : "td"
243
265
  @out << "<#{tag}>"
244
266
  render_children(cell_id)
245
267
  @out << "</#{tag}>"
@@ -275,7 +297,7 @@ module RedQuilt
275
297
  end
276
298
 
277
299
  def append_title_attribute(node_id)
278
- title = @arena.str2(node_id).to_s
300
+ title = @arena.link_title(node_id).to_s
279
301
  return if title.empty?
280
302
 
281
303
  @out << %( title="#{escape_html(title)}")
@@ -57,19 +57,19 @@ module RedQuilt
57
57
 
58
58
  case type_int
59
59
  when NodeType::HEADING
60
- result["depth"] = @arena.int1(node_id)
60
+ result["depth"] = @arena.heading_level(node_id)
61
61
  result["children"] = children(node_id)
62
62
  when NodeType::LIST
63
- result["ordered"] = @arena.int1(node_id) == 1
64
- tight = @arena.int3(node_id) == 1
65
- result["start"] = @arena.int2(node_id) if result["ordered"]
63
+ result["ordered"] = @arena.list_ordered?(node_id)
64
+ tight = @arena.list_tight?(node_id)
65
+ result["start"] = @arena.list_start(node_id) if result["ordered"]
66
66
  result["spread"] = !tight
67
67
  result["children"] = children(node_id, parent_spread: !tight)
68
68
  when NodeType::LIST_ITEM
69
69
  result["spread"] = parent_spread
70
70
  result["children"] = children(node_id)
71
71
  when NodeType::CODE_BLOCK
72
- info = @arena.str2(node_id)
72
+ info = @arena.code_block_info(node_id)
73
73
  lang = info && !info.empty? ? info.split.first : nil
74
74
  result["lang"] = lang
75
75
  result["value"] = @arena.text(node_id).to_s
@@ -81,21 +81,21 @@ module RedQuilt
81
81
  when NodeType::CODE_SPAN
82
82
  result["value"] = @arena.text(node_id).to_s
83
83
  when NodeType::LINK
84
- result["url"] = @arena.str1(node_id).to_s
85
- title = @arena.str2(node_id)
84
+ result["url"] = @arena.link_destination(node_id).to_s
85
+ title = @arena.link_title(node_id)
86
86
  result["title"] = title && !title.empty? ? title : nil
87
87
  result["children"] = children(node_id)
88
88
  when NodeType::IMAGE
89
- result["url"] = @arena.str1(node_id).to_s
90
- title = @arena.str2(node_id)
89
+ result["url"] = @arena.link_destination(node_id).to_s
90
+ title = @arena.link_title(node_id)
91
91
  result["title"] = title && !title.empty? ? title : nil
92
92
  result["alt"] = NodeRef.new(@document, node_id).text.to_s
93
93
  when NodeType::FOOTNOTE_REFERENCE
94
- label = @arena.str1(node_id).to_s
94
+ label = @arena.footnote_label(node_id).to_s
95
95
  result["identifier"] = label
96
96
  result["label"] = label
97
97
  when NodeType::FOOTNOTE_DEFINITION
98
- label = @arena.str1(node_id).to_s
98
+ label = @arena.footnote_label(node_id).to_s
99
99
  result["identifier"] = label
100
100
  result["label"] = label
101
101
  result["children"] = children(node_id)
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedQuilt
4
+ # GFM table detection (spec 4.10). Pure functions over line text: whether
5
+ # a line could be a table row and whether a header+delimiter pair starts a
6
+ # table. Cell splitting lives here too so the recognition rules and the
7
+ # splitting rules they depend on stay together. Node construction stays in
8
+ # BlockParser.
9
+ module Table
10
+ module_function
11
+
12
+ # True when lines[index] / lines[index+1] form a header + delimiter pair
13
+ # that starts a GFM table.
14
+ def start?(lines, index)
15
+ return false if index + 1 >= lines.length
16
+ return false unless row?(lines[index].content)
17
+
18
+ header_cells = split_row(lines[index].content)
19
+ separators = split_row(lines[index + 1].content)
20
+ return false if separators.empty?
21
+
22
+ # GFM spec: separator row must have valid delimiters AND match header column count.
23
+ # "The header row must match the delimiter row in the number of cells.
24
+ # If not, a table will not be recognized."
25
+ return false unless header_cells.length == separators.length
26
+
27
+ separators.all? { |cell| cell.strip.match?(/\A:?-+:?\z/) }
28
+ end
29
+
30
+ def row?(text)
31
+ text.include?("|")
32
+ end
33
+
34
+ def split_row(text)
35
+ body = text.strip
36
+ body = body[1..] if body.start_with?("|")
37
+ body = body[0...-1] if body.end_with?("|")
38
+ body.split("|", -1)
39
+ end
40
+
41
+ # Cached collaborator for BlockParser. A single instance is created in
42
+ # BlockParser#initialize and reused; per-call state lives in method
43
+ # locals so reentrant calls are safe.
44
+ class Parser
45
+ def initialize(block_parser)
46
+ @arena = block_parser.arena
47
+ end
48
+
49
+ # Parses the table starting at lines[index] (already confirmed by
50
+ # Table.start?). Returns the index past the table.
51
+ def parse(parent_id, lines, index)
52
+ start_index = index
53
+ header_cells = Table.split_row(lines[index].content)
54
+ row_lines = [lines[index]]
55
+ index += 2
56
+ while index < lines.length
57
+ break if lines[index].blank
58
+ break unless Table.row?(lines[index].content)
59
+
60
+ row_lines << lines[index]
61
+ index += 1
62
+ end
63
+
64
+ table_id = @arena.add_node(NodeType::TABLE,
65
+ source_start: lines[start_index].start_byte,
66
+ source_len: row_lines.last.end_byte - lines[start_index].start_byte)
67
+ @arena.append_child(parent_id, table_id)
68
+
69
+ append_row(table_id, lines[start_index], header_cells, true)
70
+ row_lines.drop(1).each do |row_line|
71
+ append_row(table_id, row_line, Table.split_row(row_line.content), false)
72
+ end
73
+
74
+ index
75
+ end
76
+
77
+ private
78
+
79
+ def append_row(table_id, line, cells, header)
80
+ row_id = @arena.add_node(NodeType::TABLE_ROW,
81
+ source_start: line.start_byte,
82
+ source_len: line.span_len,
83
+ int1: header ? 1 : 0)
84
+ @arena.append_child(table_id, row_id)
85
+ cells.each do |cell_text|
86
+ stripped = cell_text.strip
87
+ cell_id = @arena.add_node(NodeType::TABLE_CELL,
88
+ source_start: line.start_byte,
89
+ source_len: line.span_len,
90
+ int1: header ? 1 : 0,
91
+ str1: stripped)
92
+ @arena.append_child(row_id, cell_id)
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedQuilt
4
- VERSION = "0.7.1"
4
+ VERSION = "0.8.0"
5
5
  end