red_quilt 0.7.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,184 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedQuilt
4
+ module Inline
5
+ # CommonMark emphasis algorithm (spec 6.2). Phase 2 of inline parsing:
6
+ # given the delimiter stack the linear pass collected (provisional TEXT
7
+ # nodes for each `*` / `_` / `~` run), it pairs openers with closers and
8
+ # rebuilds the arena subtree into EMPHASIS / STRONG / STRIKETHROUGH
9
+ # nodes.
10
+ #
11
+ # Kept separate from Builder because it is a closed algorithm with a
12
+ # narrow interface: it only needs the arena, the set of still-provisional
13
+ # nodes (so consumed delimiters can be unmarked), and whether source
14
+ # spans are tracked. Builder owns the linear pass and bracket handling;
15
+ # it hands this resolver a delimiter stack to collapse.
16
+ class EmphasisResolver
17
+ # `count` is the CommonMark delimiter-run length; a Delimiter is
18
+ # never enumerated, so shadowing Struct#count (from Enumerable) is
19
+ # intentional rather than a footgun.
20
+ Delimiter = Struct.new(:node_id, :char, :count, :can_open, :can_close) # rubocop:disable Lint/StructNewOverride
21
+
22
+ def initialize(arena, track_source:)
23
+ @arena = arena
24
+ @track_source = track_source
25
+ end
26
+
27
+ # Collapses `stack` (an Array of Delimiter) in place, removing
28
+ # consumed entries from `provisional_nodes`. Used both for the
29
+ # document-level stack and for the inner delimiters of a resolved
30
+ # link/image (see Builder#finalize_link).
31
+ def resolve(stack, provisional_nodes)
32
+ # NB: the CommonMark spec describes an `openers_bottom`
33
+ # optimization keyed by closer character / length / flanking
34
+ # flags. Implementing that correctly is subtle (a single
35
+ # per-character bottom blocks valid matches like
36
+ # `*foo**bar**baz*`), so the implementation here just walks
37
+ # back to the start of the stack for every closer. This is
38
+ # O(stack^2) in the worst case but stacks are tiny in practice.
39
+ closer_idx = 0
40
+
41
+ while closer_idx < stack.length
42
+ closer = stack[closer_idx]
43
+ unless closer.can_close
44
+ closer_idx += 1
45
+ next
46
+ end
47
+
48
+ opener_idx = closer_idx - 1
49
+ found = false
50
+ while opener_idx >= 0
51
+ opener = stack[opener_idx]
52
+ if opener.can_open && opener.char == closer.char
53
+ skip = false
54
+ if (opener.can_close || closer.can_open) &&
55
+ ((opener.count + closer.count) % 3).zero? &&
56
+ !((opener.count % 3).zero? && (closer.count % 3).zero?)
57
+ skip = true
58
+ end
59
+ unless skip
60
+ found = true
61
+ break
62
+ end
63
+ end
64
+ opener_idx -= 1
65
+ end
66
+
67
+ unless found
68
+ unless closer.can_open
69
+ provisional_nodes.delete(closer.node_id)
70
+ stack.delete_at(closer_idx)
71
+ end
72
+ closer_idx += 1
73
+ next
74
+ end
75
+
76
+ opener = stack[opener_idx]
77
+ strength = [opener.count, closer.count].min >= 2 ? 2 : 1
78
+ if closer.char == "~"
79
+ # GFM strikethrough only forms on `~~` runs. A single `~`
80
+ # leaves the delimiter as text; advance the cursor so future
81
+ # `~~` pairs can still match.
82
+ if strength < 2
83
+ closer_idx += 1
84
+ next
85
+ end
86
+ kind = NodeType::STRIKETHROUGH
87
+ else
88
+ kind = strength == 2 ? NodeType::STRONG : NodeType::EMPHASIS
89
+ end
90
+
91
+ # CommonMark spec: any delimiters strictly between this opener and
92
+ # closer can't open or close anything in this scope, so drop them
93
+ # from the stack before we rebuild the tree. Their arena nodes
94
+ # stay where they are (they'll be reparented into the new emphasis
95
+ # alongside the surrounding content), but they must no longer be
96
+ # candidates for future iterations. Without this, the next
97
+ # iteration would try to pair stranded delimiters that have
98
+ # already been moved into a different parent, which corrupts the
99
+ # sibling chain (Arena#reparent walks into @parent[-1]).
100
+ if closer_idx > opener_idx + 1
101
+ removed = stack.slice!((opener_idx + 1)...closer_idx)
102
+ removed.each { |e| provisional_nodes.delete(e.node_id) }
103
+ closer_idx = opener_idx + 1
104
+ closer = stack[closer_idx]
105
+ end
106
+
107
+ opener_node = opener.node_id
108
+ closer_node = closer.node_id
109
+
110
+ if @track_source
111
+ opener_match_start = @arena.source_end(opener_node) - strength
112
+ closer_match_end = @arena.source_start(closer_node) + strength
113
+ else
114
+ opener_match_start = -1
115
+ closer_match_end = 0
116
+ end
117
+ emphasis_id = add_node(kind, opener_match_start, closer_match_end)
118
+
119
+ first_inside = @arena.raw_next_sibling_id(opener_node)
120
+ last_inside = @arena.raw_prev_sibling_id(closer_node)
121
+ if first_inside != -1 && last_inside != -1 &&
122
+ first_inside != closer_node && last_inside != opener_node
123
+ @arena.reparent(emphasis_id, first_inside, last_inside)
124
+ end
125
+
126
+ parent_id = @arena.raw_parent_id(opener_node)
127
+ @arena.insert_before(parent_id, closer_node, emphasis_id)
128
+
129
+ # Consume `strength` characters from the inner end of each
130
+ # delimiter. The opener is trimmed on its right (trailing) end,
131
+ # the closer on its left (leading) end; removing the opener from
132
+ # the stack shifts the closer one slot left.
133
+ closer_idx -= 1 if consume_delimiter(opener, opener_idx, stack, strength, provisional_nodes, from_start: false)
134
+ consume_delimiter(closer, closer_idx, stack, strength, provisional_nodes, from_start: true)
135
+ end
136
+
137
+ stack.each { |e| provisional_nodes.delete(e.node_id) }
138
+ stack.clear
139
+ end
140
+
141
+ private
142
+
143
+ # Mirrors Builder#add_arena_node for the nodes this resolver creates
144
+ # (emphasis wrappers only ever take a type and a span).
145
+ def add_node(type, start_byte, end_byte)
146
+ if @track_source
147
+ @arena.add_node(type, source_start: start_byte, source_len: end_byte - start_byte)
148
+ else
149
+ @arena.add_node(type, source_start: -1, source_len: 0)
150
+ end
151
+ end
152
+
153
+ # Removes `strength` characters from one end of a delimiter run. When
154
+ # the whole run is consumed the node is detached and dropped from the
155
+ # stack (returns true); otherwise its count, str1, and — in
156
+ # source-tracking mode — its span are trimmed on the requested side
157
+ # (`from_start` trims the leading end, used for closers; trailing for
158
+ # openers) and it stays on the stack (returns false).
159
+ def consume_delimiter(entry, index, stack, strength, provisional_nodes, from_start:)
160
+ node = entry.node_id
161
+ if entry.count == strength
162
+ provisional_nodes.delete(node)
163
+ @arena.detach(node)
164
+ stack.delete_at(index)
165
+ return true
166
+ end
167
+
168
+ entry.count -= strength
169
+ str = @arena.str1(node)
170
+ @arena.update_str1(node, from_start ? str[strength..] : str[0...-strength])
171
+ if @track_source
172
+ start_byte = @arena.source_start(node)
173
+ end_byte = @arena.source_end(node)
174
+ if from_start
175
+ @arena.update_span(node, start_byte + strength, end_byte)
176
+ else
177
+ @arena.update_span(node, start_byte, end_byte - strength)
178
+ end
179
+ end
180
+ false
181
+ end
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedQuilt
4
+ module Inline
5
+ # URL-scheme security policy for inline link / image / autolink
6
+ # destinations. Kept separate from Builder so the "which schemes are
7
+ # safe and how blocking is reported" concern has a single home and can
8
+ # change without touching the inline construction logic.
9
+ #
10
+ # Stateless (module_function); diagnostics are appended to the caller's
11
+ # array (or skipped when it is nil), so there is no per-call allocation.
12
+ module UrlSanitizer
13
+ module_function
14
+
15
+ SAFE_SCHEMES = %w[http https mailto ftp tel ssh].freeze
16
+
17
+ # Autolinks (`<scheme:...>`) are not run through the SAFE_SCHEMES
18
+ # allowlist: CommonMark permits arbitrary schemes there (e.g.
19
+ # `<made-up-scheme://x>`), and an allowlist would break that
20
+ # conformance. Only the schemes that execute script when the link
21
+ # is navigated are denied.
22
+ UNSAFE_AUTOLINK_SCHEMES = %w[javascript vbscript data].freeze
23
+
24
+ SCHEME_RE = /\A([a-zA-Z][a-zA-Z0-9+\-.]*):/
25
+
26
+ # Link / image destinations: allowlist. Relative URLs (starting `/`
27
+ # or `#`) and scheme-less URLs pass; an unknown scheme is blocked
28
+ # (href emptied) and a diagnostic is recorded.
29
+ def sanitize_destination(destination, diagnostics)
30
+ return "" if destination.nil?
31
+ return destination if destination.start_with?("/", "#")
32
+
33
+ scheme = destination[SCHEME_RE, 1]
34
+ return destination if scheme.nil?
35
+ return destination if SAFE_SCHEMES.include?(scheme.downcase)
36
+
37
+ report_blocked(diagnostics, scheme)
38
+ ""
39
+ end
40
+
41
+ # Autolink destinations: denylist. The destination is returned
42
+ # unchanged unless its scheme executes script on navigation, in which
43
+ # case the href is emptied and a diagnostic is recorded.
44
+ def block_unsafe_autolink(destination, diagnostics)
45
+ scheme = destination[SCHEME_RE, 1]
46
+ return destination if scheme.nil?
47
+ return destination unless UNSAFE_AUTOLINK_SCHEMES.include?(scheme.downcase)
48
+
49
+ report_blocked(diagnostics, scheme)
50
+ ""
51
+ end
52
+
53
+ def report_blocked(diagnostics, scheme)
54
+ return unless diagnostics
55
+
56
+ diagnostics << Diagnostic.new(
57
+ severity: :warning,
58
+ rule: :unsafe_url,
59
+ message: "Unsafe URL scheme #{scheme.downcase.inspect} blocked",
60
+ )
61
+ end
62
+ end
63
+ end
64
+ end
@@ -10,5 +10,10 @@ module RedQuilt
10
10
  # Positional (not keyword_init): one Line is built per source line, so
11
11
  # the ~2.5x faster positional constructor matters on large documents.
12
12
  # Argument order: content, start_byte, end_byte, blank, lazy_continuation.
13
- Line = Struct.new(:content, :start_byte, :end_byte, :blank, :lazy_continuation)
13
+ Line = Struct.new(:content, :start_byte, :end_byte, :blank, :lazy_continuation) do
14
+ # Byte length of the line's span in the original source.
15
+ def span_len
16
+ end_byte - start_byte
17
+ end
18
+ end
14
19
  end
@@ -22,7 +22,7 @@ module RedQuilt
22
22
  walk(@document.root_id) do |id|
23
23
  case @arena.type(id)
24
24
  when NodeType::HEADING
25
- level = @arena.int1(id)
25
+ level = @arena.heading_level(id)
26
26
  if last_heading_level.positive? && level > last_heading_level + 1
27
27
  push(:info, :heading_level_skip,
28
28
  "Heading jumps from h#{last_heading_level} to h#{level}",
@@ -45,7 +45,7 @@ module RedQuilt
45
45
  end
46
46
 
47
47
  def check_empty_link(node_id)
48
- return unless @arena.str1(node_id).to_s.empty?
48
+ return unless @arena.link_destination(node_id).to_s.empty?
49
49
 
50
50
  push(:warning, :empty_link,
51
51
  "Link has no destination",
@@ -46,6 +46,13 @@ module RedQuilt
46
46
  text
47
47
  end
48
48
 
49
+ # Returns the fence info string of a CODE_BLOCK node.
50
+ def info
51
+ return "" unless @arena.type(@node_id) == NodeType::CODE_BLOCK
52
+
53
+ @arena.code_block_info(@node_id).to_s
54
+ end
55
+
49
56
  def source_span
50
57
  @arena.source_span(@node_id)
51
58
  end
@@ -86,26 +93,28 @@ module RedQuilt
86
93
  def ast_attributes
87
94
  case @arena.type(@node_id)
88
95
  when NodeType::HEADING
89
- { level: @arena.int1(@node_id), text: text }
96
+ { level: @arena.heading_level(@node_id), text: text }
90
97
  when NodeType::LIST
91
98
  {
92
- ordered: @arena.int1(@node_id) == 1,
93
- start_number: @arena.int2(@node_id),
94
- tight: @arena.int3(@node_id) == 1,
95
- delimiter: @arena.str1(@node_id),
99
+ ordered: @arena.list_ordered?(@node_id),
100
+ start_number: @arena.list_start(@node_id),
101
+ tight: @arena.list_tight?(@node_id),
102
+ delimiter: @arena.list_delimiter(@node_id),
96
103
  }
97
- when NodeType::TABLE_ROW, NodeType::TABLE_CELL
98
- { header: @arena.int1(@node_id) == 1, text: text }
104
+ when NodeType::TABLE_ROW
105
+ { header: @arena.table_row_header?(@node_id), text: text }
106
+ when NodeType::TABLE_CELL
107
+ { header: @arena.table_cell_header?(@node_id), text: text }
99
108
  when NodeType::TEXT, NodeType::CODE_SPAN, NodeType::HTML_BLOCK, NodeType::HTML_INLINE, NodeType::PARAGRAPH
100
109
  { text: text }
101
110
  when NodeType::CODE_BLOCK
102
- { text: @arena.text(@node_id), info: @arena.str2(@node_id) }
111
+ { text: @arena.text(@node_id), info: @arena.code_block_info(@node_id) }
103
112
  when NodeType::LINK, NodeType::IMAGE
104
- { destination: @arena.str1(@node_id), title: @arena.str2(@node_id), text: text }
113
+ { destination: @arena.link_destination(@node_id), title: @arena.link_title(@node_id), text: text }
105
114
  when NodeType::FOOTNOTE_REFERENCE
106
- { label: @arena.str1(@node_id), number: @arena.int1(@node_id) }
115
+ { label: @arena.footnote_label(@node_id), number: @arena.footnote_number(@node_id) }
107
116
  when NodeType::FOOTNOTE_DEFINITION
108
- { label: @arena.str1(@node_id) }
117
+ { label: @arena.footnote_label(@node_id) }
109
118
  else
110
119
  {}
111
120
  end
@@ -16,6 +16,18 @@ module RedQuilt
16
16
  @out
17
17
  end
18
18
 
19
+ # Renders the given nodes (an Array of NodeRef) in order and returns
20
+ # the resulting HTML fragment, without affecting the main render
21
+ # output.
22
+ def render_fragment(nodes)
23
+ saved = @out
24
+ @out = +""
25
+ nodes.each { |node| render_node(node.node_id) }
26
+ @out
27
+ ensure
28
+ @out = saved
29
+ end
30
+
19
31
  private
20
32
 
21
33
  # CommonMark-compliant HTML escape: only `&`, `<`, `>`, `"` are
@@ -46,7 +58,7 @@ module RedQuilt
46
58
  render_children(node_id)
47
59
  @out << "</p>\n"
48
60
  when NodeType::HEADING
49
- level = @arena.int1(node_id)
61
+ level = @arena.heading_level(node_id)
50
62
  if @slugger
51
63
  id = @slugger.generate(PlainText.from(@arena, node_id))
52
64
  @out << %(<h#{level} id="#{escape_html(id)}">)
@@ -62,9 +74,9 @@ module RedQuilt
62
74
  render_children(node_id)
63
75
  @out << "</blockquote>\n"
64
76
  when NodeType::LIST
65
- ordered = @arena.int1(node_id) == 1
77
+ ordered = @arena.list_ordered?(node_id)
66
78
  tag = ordered ? "ol" : "ul"
67
- start_number = @arena.int2(node_id)
79
+ start_number = @arena.list_start(node_id)
68
80
  attrs = ordered && start_number != 1 ? %( start="#{start_number}") : ""
69
81
  @out << "<#{tag}#{attrs}>\n"
70
82
  render_children(node_id)
@@ -74,7 +86,7 @@ module RedQuilt
74
86
  render_list_item(node_id)
75
87
  @out << "</li>\n"
76
88
  when NodeType::CODE_BLOCK
77
- info_word = @arena.str2(node_id).to_s.split.first.to_s
89
+ info_word = @arena.code_block_info(node_id).to_s.split.first.to_s
78
90
  if @mermaid && info_word == "mermaid"
79
91
  # Emit a container mermaid.js recognizes via class="mermaid".
80
92
  # The diagram source is still HTML-escaped; the browser decodes
@@ -116,7 +128,7 @@ module RedQuilt
116
128
  when NodeType::CODE_SPAN
117
129
  @out << "<code>#{escape_html(@arena.text(node_id).to_s)}</code>"
118
130
  when NodeType::LINK
119
- dest = escape_html(@arena.str1(node_id).to_s)
131
+ dest = escape_html(@arena.link_destination(node_id).to_s)
120
132
  @out << %(<a href="#{dest}")
121
133
  append_title_attribute(node_id)
122
134
  @out << ">"
@@ -124,7 +136,7 @@ module RedQuilt
124
136
  @out << "</a>"
125
137
  when NodeType::IMAGE
126
138
  alt = PlainText.from(@arena, node_id)
127
- dest = escape_html(@arena.str1(node_id).to_s)
139
+ dest = escape_html(@arena.link_destination(node_id).to_s)
128
140
  @out << %(<img src="#{dest}" alt="#{escape_html(alt)}")
129
141
  append_title_attribute(node_id)
130
142
  @out << " />"
@@ -141,10 +153,11 @@ module RedQuilt
141
153
  # element ids use the footnote number; a second+ reference to the
142
154
  # same footnote gets a `-M` suffix so each backref has a unique target.
143
155
  def render_footnote_reference(node_id)
144
- number = @arena.int1(node_id)
145
- occurrence = @arena.int2(node_id)
146
- ref_id = occurrence > 1 ? "fnref-#{number}-#{occurrence}" : "fnref-#{number}"
147
- @out << %(<sup><a href="#fn-#{number}" id="#{ref_id}">#{number}</a></sup>)
156
+ number = @arena.footnote_number(node_id)
157
+ occurrence = @arena.footnote_occurrence(node_id)
158
+ ref_id = FootnoteAnchors.reference_id(number, occurrence)
159
+ def_id = FootnoteAnchors.definition_id(number)
160
+ @out << %(<sup><a href="##{def_id}" id="#{ref_id}">#{number}</a></sup>)
148
161
  end
149
162
 
150
163
  def render_footnotes_section(node_id)
@@ -154,10 +167,9 @@ module RedQuilt
154
167
  end
155
168
 
156
169
  def render_footnote_definition(def_id)
157
- label = @arena.str1(def_id).to_s
158
- number = @document.footnotes.number(label)
159
- occurrences = @document.footnotes.occurrences(label)
160
- @out << %(<li id="fn-#{number}">\n)
170
+ number = @arena.footnote_number(def_id)
171
+ occurrences = @arena.footnote_total_references(def_id)
172
+ @out << %(<li id="#{FootnoteAnchors.definition_id(number)}">\n)
161
173
 
162
174
  # Append the backref(s) inside the definition's last paragraph (GFM);
163
175
  # if the last block isn't a paragraph, emit a standalone one.
@@ -184,7 +196,7 @@ module RedQuilt
184
196
  def footnote_backrefs(number, occurrences)
185
197
  out = +""
186
198
  (1..occurrences).each do |occ|
187
- ref_id = occ > 1 ? "fnref-#{number}-#{occ}" : "fnref-#{number}"
199
+ ref_id = FootnoteAnchors.reference_id(number, occ)
188
200
  suffix = occ > 1 ? "<sup>#{occ}</sup>" : ""
189
201
  out << %( <a href="##{ref_id}">&#8617;#{suffix}</a>)
190
202
  end
@@ -193,8 +205,8 @@ module RedQuilt
193
205
 
194
206
  def render_table(table_id)
195
207
  rows = @arena.child_ids(table_id).to_a
196
- header_rows = rows.select { |row_id| @arena.int1(row_id) == 1 }
197
- body_rows = rows.reject { |row_id| @arena.int1(row_id) == 1 }
208
+ header_rows = rows.select { |row_id| @arena.table_row_header?(row_id) }
209
+ body_rows = rows.reject { |row_id| @arena.table_row_header?(row_id) }
198
210
 
199
211
  unless header_rows.empty?
200
212
  @out << "<thead>\n"
@@ -210,7 +222,7 @@ module RedQuilt
210
222
 
211
223
  def render_list_item(node_id)
212
224
  parent_id = @arena.raw_parent_id(node_id)
213
- tight = parent_id != -1 && @arena.type(parent_id) == NodeType::LIST && @arena.int3(parent_id) == 1
225
+ tight = parent_id != -1 && @arena.type(parent_id) == NodeType::LIST && @arena.list_tight?(parent_id)
214
226
 
215
227
  first_child_id = @arena.raw_first_child_id(node_id)
216
228
  first_is_para = first_child_id != -1 &&
@@ -249,7 +261,7 @@ module RedQuilt
249
261
  def render_table_row(row_id)
250
262
  @out << "<tr>"
251
263
  @arena.each_child(row_id) do |cell_id|
252
- tag = @arena.int1(cell_id) == 1 ? "th" : "td"
264
+ tag = @arena.table_cell_header?(cell_id) ? "th" : "td"
253
265
  @out << "<#{tag}>"
254
266
  render_children(cell_id)
255
267
  @out << "</#{tag}>"
@@ -285,7 +297,7 @@ module RedQuilt
285
297
  end
286
298
 
287
299
  def append_title_attribute(node_id)
288
- title = @arena.str2(node_id).to_s
300
+ title = @arena.link_title(node_id).to_s
289
301
  return if title.empty?
290
302
 
291
303
  @out << %( title="#{escape_html(title)}")
@@ -57,19 +57,19 @@ module RedQuilt
57
57
 
58
58
  case type_int
59
59
  when NodeType::HEADING
60
- result["depth"] = @arena.int1(node_id)
60
+ result["depth"] = @arena.heading_level(node_id)
61
61
  result["children"] = children(node_id)
62
62
  when NodeType::LIST
63
- result["ordered"] = @arena.int1(node_id) == 1
64
- tight = @arena.int3(node_id) == 1
65
- result["start"] = @arena.int2(node_id) if result["ordered"]
63
+ result["ordered"] = @arena.list_ordered?(node_id)
64
+ tight = @arena.list_tight?(node_id)
65
+ result["start"] = @arena.list_start(node_id) if result["ordered"]
66
66
  result["spread"] = !tight
67
67
  result["children"] = children(node_id, parent_spread: !tight)
68
68
  when NodeType::LIST_ITEM
69
69
  result["spread"] = parent_spread
70
70
  result["children"] = children(node_id)
71
71
  when NodeType::CODE_BLOCK
72
- info = @arena.str2(node_id)
72
+ info = @arena.code_block_info(node_id)
73
73
  lang = info && !info.empty? ? info.split.first : nil
74
74
  result["lang"] = lang
75
75
  result["value"] = @arena.text(node_id).to_s
@@ -81,21 +81,21 @@ module RedQuilt
81
81
  when NodeType::CODE_SPAN
82
82
  result["value"] = @arena.text(node_id).to_s
83
83
  when NodeType::LINK
84
- result["url"] = @arena.str1(node_id).to_s
85
- title = @arena.str2(node_id)
84
+ result["url"] = @arena.link_destination(node_id).to_s
85
+ title = @arena.link_title(node_id)
86
86
  result["title"] = title && !title.empty? ? title : nil
87
87
  result["children"] = children(node_id)
88
88
  when NodeType::IMAGE
89
- result["url"] = @arena.str1(node_id).to_s
90
- title = @arena.str2(node_id)
89
+ result["url"] = @arena.link_destination(node_id).to_s
90
+ title = @arena.link_title(node_id)
91
91
  result["title"] = title && !title.empty? ? title : nil
92
92
  result["alt"] = NodeRef.new(@document, node_id).text.to_s
93
93
  when NodeType::FOOTNOTE_REFERENCE
94
- label = @arena.str1(node_id).to_s
94
+ label = @arena.footnote_label(node_id).to_s
95
95
  result["identifier"] = label
96
96
  result["label"] = label
97
97
  when NodeType::FOOTNOTE_DEFINITION
98
- label = @arena.str1(node_id).to_s
98
+ label = @arena.footnote_label(node_id).to_s
99
99
  result["identifier"] = label
100
100
  result["label"] = label
101
101
  result["children"] = children(node_id)
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedQuilt
4
+ # GFM table detection (spec 4.10). Pure functions over line text: whether
5
+ # a line could be a table row and whether a header+delimiter pair starts a
6
+ # table. Cell splitting lives here too so the recognition rules and the
7
+ # splitting rules they depend on stay together. Node construction stays in
8
+ # BlockParser.
9
+ module Table
10
+ module_function
11
+
12
+ # True when lines[index] / lines[index+1] form a header + delimiter pair
13
+ # that starts a GFM table.
14
+ def start?(lines, index)
15
+ return false if index + 1 >= lines.length
16
+ return false unless row?(lines[index].content)
17
+
18
+ header_cells = split_row(lines[index].content)
19
+ separators = split_row(lines[index + 1].content)
20
+ return false if separators.empty?
21
+
22
+ # GFM spec: separator row must have valid delimiters AND match header column count.
23
+ # "The header row must match the delimiter row in the number of cells.
24
+ # If not, a table will not be recognized."
25
+ return false unless header_cells.length == separators.length
26
+
27
+ separators.all? { |cell| cell.strip.match?(/\A:?-+:?\z/) }
28
+ end
29
+
30
+ def row?(text)
31
+ text.include?("|")
32
+ end
33
+
34
+ def split_row(text)
35
+ body = text.strip
36
+ body = body[1..] if body.start_with?("|")
37
+ body = body[0...-1] if body.end_with?("|")
38
+ body.split("|", -1)
39
+ end
40
+
41
+ # Cached collaborator for BlockParser. A single instance is created in
42
+ # BlockParser#initialize and reused; per-call state lives in method
43
+ # locals so reentrant calls are safe.
44
+ class Parser
45
+ def initialize(block_parser)
46
+ @arena = block_parser.arena
47
+ end
48
+
49
+ # Parses the table starting at lines[index] (already confirmed by
50
+ # Table.start?). Returns the index past the table.
51
+ def parse(parent_id, lines, index)
52
+ start_index = index
53
+ header_cells = Table.split_row(lines[index].content)
54
+ row_lines = [lines[index]]
55
+ index += 2
56
+ while index < lines.length
57
+ break if lines[index].blank
58
+ break unless Table.row?(lines[index].content)
59
+
60
+ row_lines << lines[index]
61
+ index += 1
62
+ end
63
+
64
+ table_id = @arena.add_node(NodeType::TABLE,
65
+ source_start: lines[start_index].start_byte,
66
+ source_len: row_lines.last.end_byte - lines[start_index].start_byte)
67
+ @arena.append_child(parent_id, table_id)
68
+
69
+ append_row(table_id, lines[start_index], header_cells, true)
70
+ row_lines.drop(1).each do |row_line|
71
+ append_row(table_id, row_line, Table.split_row(row_line.content), false)
72
+ end
73
+
74
+ index
75
+ end
76
+
77
+ private
78
+
79
+ def append_row(table_id, line, cells, header)
80
+ row_id = @arena.add_node(NodeType::TABLE_ROW,
81
+ source_start: line.start_byte,
82
+ source_len: line.span_len,
83
+ int1: header ? 1 : 0)
84
+ @arena.append_child(table_id, row_id)
85
+ cells.each do |cell_text|
86
+ stripped = cell_text.strip
87
+ cell_id = @arena.add_node(NodeType::TABLE_CELL,
88
+ source_start: line.start_byte,
89
+ source_len: line.span_len,
90
+ int1: header ? 1 : 0,
91
+ str1: stripped)
92
+ @arena.append_child(row_id, cell_id)
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RedQuilt
4
- VERSION = "0.7.2"
4
+ VERSION = "0.8.0"
5
5
  end