red_quilt 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +109 -0
  4. data/.rubocop_todo.yml +7 -0
  5. data/CHANGELOG.md +57 -0
  6. data/README.md +284 -0
  7. data/Rakefile +8 -0
  8. data/ast-spec.md +1227 -0
  9. data/docs/architecture.md +81 -0
  10. data/docs/arena-usage.md +363 -0
  11. data/docs/commonmark-conformance.md +241 -0
  12. data/exe/redquilt +7 -0
  13. data/lib/red_quilt/arena.rb +366 -0
  14. data/lib/red_quilt/block_parser.rb +724 -0
  15. data/lib/red_quilt/blockquote.rb +151 -0
  16. data/lib/red_quilt/cli.rb +182 -0
  17. data/lib/red_quilt/diagnostic.rb +47 -0
  18. data/lib/red_quilt/document.rb +126 -0
  19. data/lib/red_quilt/extended_autolink_pass.rb +185 -0
  20. data/lib/red_quilt/footnote_definition.rb +147 -0
  21. data/lib/red_quilt/footnote_pass.rb +39 -0
  22. data/lib/red_quilt/footnote_registry.rb +68 -0
  23. data/lib/red_quilt/indentation.rb +73 -0
  24. data/lib/red_quilt/inline/builder.rb +674 -0
  25. data/lib/red_quilt/inline/flanking.rb +120 -0
  26. data/lib/red_quilt/inline/html_entities.rb +2180 -0
  27. data/lib/red_quilt/inline/lexer.rb +280 -0
  28. data/lib/red_quilt/inline/link_scanner.rb +315 -0
  29. data/lib/red_quilt/inline/token_kind.rb +39 -0
  30. data/lib/red_quilt/inline/tokens.rb +73 -0
  31. data/lib/red_quilt/inline.rb +34 -0
  32. data/lib/red_quilt/inline_pass.rb +53 -0
  33. data/lib/red_quilt/line.rb +14 -0
  34. data/lib/red_quilt/lint_pass.rb +71 -0
  35. data/lib/red_quilt/list.rb +317 -0
  36. data/lib/red_quilt/node_ref.rb +114 -0
  37. data/lib/red_quilt/node_type.rb +66 -0
  38. data/lib/red_quilt/plain_text.rb +46 -0
  39. data/lib/red_quilt/reference_definition.rb +309 -0
  40. data/lib/red_quilt/renderer/html.rb +279 -0
  41. data/lib/red_quilt/renderer/mdast.rb +152 -0
  42. data/lib/red_quilt/source_map.rb +29 -0
  43. data/lib/red_quilt/source_span.rb +26 -0
  44. data/lib/red_quilt/theme.rb +28 -0
  45. data/lib/red_quilt/themes/default.css +87 -0
  46. data/lib/red_quilt/version.rb +5 -0
  47. data/lib/red_quilt.rb +86 -0
  48. data/mise.toml +2 -0
  49. data/sig/red_quilt.rbs +45 -0
  50. metadata +91 -0
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedQuilt
4
+ # Extracts plain-text content from an arena subtree.
5
+ #
6
+ # TEXT / CODE_SPAN contribute their textual content as-is;
7
+ # SOFTBREAK / HARDBREAK become a single space; every other inline
8
+ # structure (EMPHASIS, STRONG, LINK, ...) is transparently recursed
9
+ # into. The starting `node_id` itself is treated as a container — its
10
+ # children are visited, but the node's own type does not appear in
11
+ # the output (callers pass an IMAGE / HEADING / PARAGRAPH and want
12
+ # the assembled inner text).
13
+ #
14
+ # Used by:
15
+ # - Renderer::HTML for an image's alt attribute
16
+ # - Document#first_heading_text for the CLI's --auto-title
17
+ # - LintPass#check_missing_alt
18
+ module PlainText
19
+ module_function
20
+
21
+ def from(arena, node_id)
22
+ out = +""
23
+ walk(arena, node_id, out)
24
+ out
25
+ end
26
+
27
+ def walk(arena, node_id, out)
28
+ arena.each_child(node_id) do |child_id|
29
+ case arena.type(child_id)
30
+ when NodeType::TEXT
31
+ # TEXT may be span-only or carry a literal (entity / escape
32
+ # decoded). Arena#text handles both.
33
+ out << arena.text(child_id).to_s
34
+ when NodeType::CODE_SPAN
35
+ # CODE_SPAN always has str1 (normalized content), so read it
36
+ # directly to skip Arena#text's nil-check / byteslice branch.
37
+ out << arena.str1(child_id).to_s
38
+ when NodeType::SOFTBREAK, NodeType::HARDBREAK
39
+ out << " "
40
+ else
41
+ walk(arena, child_id, out)
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,309 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedQuilt
4
+ # CommonMark link reference definitions (`[label]: dest "title"`).
5
+ #
6
+ # Module-level functions are stateless helpers reused by BlockParser
7
+ # (fenced-code info string also calls `unescape_text`) and
8
+ # Inline::Builder (reference lookup uses `normalize_label`).
9
+ # `ReferenceDefinition::Parser` carries the per-call state (`@lines`,
10
+ # `@index`) and walks the lines for one definition attempt.
11
+ module ReferenceDefinition
12
+ # A reference label may contain `\[` / `\]` (backslash-escaped),
13
+ # but never an unescaped `[` or `]`. Newlines inside the label are
14
+ # allowed and collapsed by normalize_label.
15
+ REF_DEF_RE = /\A {0,3}\[((?:[^\\\[\]]|\\.)+)\]:(.*)\z/m
16
+
17
+ TITLE_CLOSERS = { '"' => '"', "'" => "'", "(" => ")" }.freeze
18
+
19
+ # CommonMark spec: "A link label can have at most 999 characters
20
+ # inside the square brackets." Applies to both reference definitions
21
+ # and reference link uses.
22
+ LABEL_MAX_LENGTH = 999
23
+
24
+ # CommonMark 6.3 link-tail whitespace: space/tab only (line endings
25
+ # are handled separately by the caller). Intentionally narrower than
26
+ # Ruby's `strip`/`lstrip`, which also match FF (U+000C) and VT
27
+ # (U+000B). Mirrors Inline::Builder#link_tail_whitespace_byte?.
28
+ LINK_TAIL_WS_RE = /[ \t]/
29
+
30
+ module_function
31
+
32
+ # True when `text` exceeds the spec's link-label length limit.
33
+ def label_too_long?(text)
34
+ text.to_s.length > LABEL_MAX_LENGTH
35
+ end
36
+
37
+ # Narrow lstrip: only space and tab. Used for the spec-defined
38
+ # whitespace around link destinations and titles in reference
39
+ # definitions.
40
+ def link_lstrip(text)
41
+ text.sub(/\A[ \t]+/, "")
42
+ end
43
+
44
+ # True when the string is empty or contains only spaces and tabs.
45
+ def link_blank?(text)
46
+ text.match?(/\A[ \t]*\z/)
47
+ end
48
+
49
+ # Attempts to consume a reference definition starting at `lines[index]`.
50
+ # Returns `{ reference: { label:, destination:, title: }, consumed: N,
51
+ # source_span: SourceSpan }` or nil. The reference hash is what
52
+ # BlockParser should store in its @references table; the source_span
53
+ # covers the byte range of the consumed lines (useful for
54
+ # duplicate-definition diagnostics).
55
+ def consume(lines, index)
56
+ Parser.new(lines, index).consume
57
+ end
58
+
59
+ # Unescape Markdown text: backslash-escapes for ASCII punctuation and
60
+ # HTML entity references. Also used by BlockParser#fenced_code_start
61
+ # for the info string, which shares the same unescape semantics.
62
+ def unescape_text(text)
63
+ out = text.gsub(/\\([!-\/:-@\[-`{-~])/, "\\1")
64
+ out.gsub(Inline::ENTITY_RE) { |m| Inline.decode_entity(m) }
65
+ end
66
+
67
+ # Spec-required normalization: full Unicode case fold + whitespace
68
+ # collapse. Inline::Builder uses the same rule when looking up the
69
+ # destination of a reference link.
70
+ def normalize_label(label)
71
+ # CommonMark spec: full Unicode case fold (`downcase(:fold)`), not
72
+ # the default per-codepoint lowercase. This makes labels like `ẞ`
73
+ # (U+1E9E) match a definition of `SS` because the case-fold of `ẞ`
74
+ # is `ss`.
75
+ label.to_s.strip.downcase(:fold).gsub(/[ \t\r\n]+/, " ")
76
+ end
77
+
78
+ class Parser
79
+ def initialize(lines, index)
80
+ @lines = lines
81
+ @index = index
82
+ end
83
+
84
+ def consume
85
+ text = @lines[@index].content
86
+ return unless text.match?(/\A {0,3}\[/)
87
+
88
+ match, consumed = match_label(text)
89
+ return unless match
90
+
91
+ label = ReferenceDefinition.normalize_label(match[1])
92
+ return if label.empty?
93
+
94
+ remainder = match[2].to_s
95
+ chunks, consumed = collect_destination_chunks(remainder, consumed)
96
+ return unless chunks
97
+
98
+ destination, rest = parse_destination(chunks.shift.to_s)
99
+ if destination.nil?
100
+ destination, rest = parse_destination(chunks.first.to_s)
101
+ return unless destination
102
+
103
+ chunks.shift
104
+ end
105
+
106
+ title, consumed = consume_title(rest, consumed)
107
+ return if title == :invalid
108
+
109
+ {
110
+ reference: {
111
+ label: label,
112
+ destination: ReferenceDefinition.unescape_text(strip_angle_brackets(destination)),
113
+ title: title,
114
+ },
115
+ consumed: consumed,
116
+ source_span: SourceSpan.new(@lines[@index].start_byte,
117
+ @lines[@index + consumed - 1].end_byte),
118
+ }
119
+ end
120
+
121
+ private
122
+
123
+ def match_label(text)
124
+ match = REF_DEF_RE.match(text)
125
+ if match
126
+ return [nil, nil] if ReferenceDefinition.label_too_long?(match[1])
127
+
128
+ return [match, 1]
129
+ end
130
+
131
+ # Multi-line label: accumulate subsequent lines until `]:` is
132
+ # found. Blank lines terminate the attempt.
133
+ accumulated = text
134
+ extra = 0
135
+ loop do
136
+ probe = @index + 1 + extra
137
+ return [nil, nil] if probe >= @lines.length
138
+
139
+ next_line = @lines[probe]
140
+ return [nil, nil] if next_line.blank
141
+
142
+ accumulated += "\n" + next_line.content
143
+ extra += 1
144
+ m = REF_DEF_RE.match(accumulated)
145
+ next unless m
146
+ return [nil, nil] if ReferenceDefinition.label_too_long?(m[1])
147
+
148
+ return [m, 1 + extra]
149
+ end
150
+ end
151
+
152
+ def collect_destination_chunks(remainder, consumed)
153
+ chunks = [remainder]
154
+ return [chunks, consumed] unless ReferenceDefinition.link_blank?(remainder)
155
+
156
+ return [nil, nil] if @index + consumed >= @lines.length
157
+
158
+ next_line = @lines[@index + consumed]
159
+ return [nil, nil] if next_line.blank
160
+
161
+ chunks << next_line.content
162
+ [chunks, consumed + 1]
163
+ end
164
+
165
+ def consume_title(rest, consumed)
166
+ title_source = rest.to_s
167
+ consumed_before_title = consumed
168
+ title_on_separate_line = false
169
+ if ReferenceDefinition.link_blank?(title_source) && @index + consumed < @lines.length
170
+ next_line = @lines[@index + consumed]
171
+ if next_line && potential_title_start?(next_line.content)
172
+ title_source = next_line.content
173
+ consumed += 1
174
+ title_on_separate_line = true
175
+ end
176
+ end
177
+
178
+ while @index + consumed < @lines.length && title_needs_more_lines?(title_source)
179
+ next_line = @lines[@index + consumed]
180
+ break if next_line.blank
181
+
182
+ title_source = title_source.empty? ? next_line.content : "#{title_source}\n#{next_line.content}"
183
+ consumed += 1
184
+ end
185
+
186
+ title, trailing = parse_title(title_source)
187
+ if trailing && trailing.match?(/\S/)
188
+ if title_on_separate_line
189
+ # The title was pulled from a follow-up line; back off so
190
+ # that line is reparsed as ordinary content and the def is
191
+ # still accepted (sans title).
192
+ return [nil, consumed_before_title]
193
+ else
194
+ # Title was on the destination line itself; the whole def is
195
+ # invalid.
196
+ return [:invalid, consumed]
197
+ end
198
+ end
199
+
200
+ [title, consumed]
201
+ end
202
+
203
+ def strip_angle_brackets(destination)
204
+ destination.start_with?("<") && destination.end_with?(">") ? destination[1...-1] : destination
205
+ end
206
+
207
+ def parse_destination(text)
208
+ source = ReferenceDefinition.link_lstrip(text)
209
+ return [nil, nil] if source.empty?
210
+
211
+ if source.start_with?("<")
212
+ close = source.index(">")
213
+ if close
214
+ tail = source[(close + 1)..].to_s
215
+ if tail.empty? || tail.match?(/\A[ \t\r\n]/)
216
+ return [source[0..close], tail]
217
+ end
218
+ end
219
+ # Raw destinations cannot start with `<`, so once the angle
220
+ # form fails there is no fallback.
221
+ return [nil, nil]
222
+ end
223
+
224
+ parse_raw_destination(source)
225
+ end
226
+
227
+ # Raw destination per CommonMark 6.3: no ASCII control chars or
228
+ # space; parentheses must be balanced or backslash-escaped. Mirrors
229
+ # the inline-link logic in Inline::Builder#parse_raw_destination
230
+ # so a reference definition is not more permissive than an inline
231
+ # link destination.
232
+ RAW_DEST_FORBIDDEN_RE = /[\u0000-\u0020\u007F]/
233
+ ASCII_PUNCT_RE = /[!-\/:-@\[-`{-~]/
234
+
235
+ def parse_raw_destination(source)
236
+ depth = 0
237
+ i = 0
238
+ len = source.length
239
+ while i < len
240
+ c = source[i]
241
+ if c == "\\" && i + 1 < len && ASCII_PUNCT_RE.match?(source[i + 1])
242
+ i += 2
243
+ next
244
+ end
245
+ break if RAW_DEST_FORBIDDEN_RE.match?(c)
246
+
247
+ if c == "("
248
+ depth += 1
249
+ elsif c == ")"
250
+ break if depth.zero?
251
+
252
+ depth -= 1
253
+ end
254
+ i += 1
255
+ end
256
+
257
+ return [nil, nil] if i.zero?
258
+ return [nil, nil] unless depth.zero?
259
+
260
+ [source[0...i], source[i..].to_s]
261
+ end
262
+
263
+ def title_needs_more_lines?(text)
264
+ stripped = ReferenceDefinition.link_lstrip(text)
265
+ return false if stripped.empty?
266
+
267
+ quote = stripped[0]
268
+ closer = TITLE_CLOSERS[quote]
269
+ return false unless closer
270
+ return false if stripped.length > 1 && stripped.end_with?(closer)
271
+
272
+ true
273
+ end
274
+
275
+ def potential_title_start?(text)
276
+ %w[" ' (].include?(ReferenceDefinition.link_lstrip(text)[0])
277
+ end
278
+
279
+ def parse_title(text)
280
+ stripped = ReferenceDefinition.link_lstrip(text)
281
+ return [nil, stripped] if stripped.empty?
282
+
283
+ opener = stripped[0]
284
+ closer = TITLE_CLOSERS[opener]
285
+ return [nil, stripped] unless closer
286
+
287
+ body = +""
288
+ escaped = false
289
+ index = 1
290
+ while index < stripped.length
291
+ char = stripped[index]
292
+ if char == "\\" && !escaped
293
+ escaped = true
294
+ body << char
295
+ elsif char == closer && !escaped
296
+ trailing = stripped[(index + 1)..].to_s
297
+ return [ReferenceDefinition.unescape_text(body), trailing]
298
+ else
299
+ body << char
300
+ escaped = false
301
+ end
302
+ index += 1
303
+ end
304
+
305
+ [nil, stripped]
306
+ end
307
+ end
308
+ end
309
+ end
@@ -0,0 +1,279 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedQuilt
4
+ module Renderer
5
+ class HTML
6
+ def initialize(document)
7
+ @document = document
8
+ @arena = document.arena
9
+ @out = +""
10
+ end
11
+
12
+ def render
13
+ render_children(@document.root_id)
14
+ @out
15
+ end
16
+
17
+ private
18
+
19
+ # CommonMark-compliant HTML escape: only `&`, `<`, `>`, `"` are
20
+ # rewritten. Apostrophes are left as-is (escape_html on Ruby
21
+ # 3.0+ rewrites `'` -> `&#39;` which fails CommonMark spec
22
+ # comparisons).
23
+ HTML_ESCAPE_TABLE = { "&" => "&amp;", "<" => "&lt;", ">" => "&gt;", '"' => "&quot;" }.freeze
24
+ HTML_ESCAPE_RE = /[&<>"]/
25
+
26
+ def escape_html(str)
27
+ return str unless HTML_ESCAPE_RE.match?(str)
28
+
29
+ str.gsub(HTML_ESCAPE_RE, HTML_ESCAPE_TABLE)
30
+ end
31
+
32
+ def render_children(node_id)
33
+ child_id = @arena.raw_first_child_id(node_id)
34
+ until child_id == -1
35
+ render_node(child_id)
36
+ child_id = @arena.raw_next_sibling_id(child_id)
37
+ end
38
+ end
39
+
40
+ def render_node(node_id)
41
+ case @arena.type(node_id)
42
+ when NodeType::PARAGRAPH
43
+ @out << "<p>"
44
+ render_children(node_id)
45
+ @out << "</p>\n"
46
+ when NodeType::HEADING
47
+ level = @arena.int1(node_id)
48
+ @out << "<h#{level}>"
49
+ render_children(node_id)
50
+ @out << "</h#{level}>\n"
51
+ when NodeType::THEMATIC_BREAK
52
+ @out << "<hr />\n"
53
+ when NodeType::BLOCKQUOTE
54
+ @out << "<blockquote>\n"
55
+ render_children(node_id)
56
+ @out << "</blockquote>\n"
57
+ when NodeType::LIST
58
+ ordered = @arena.int1(node_id) == 1
59
+ tag = ordered ? "ol" : "ul"
60
+ start_number = @arena.int2(node_id)
61
+ attrs = ordered && start_number != 1 ? %( start="#{start_number}") : ""
62
+ @out << "<#{tag}#{attrs}>\n"
63
+ render_children(node_id)
64
+ @out << "</#{tag}>\n"
65
+ when NodeType::LIST_ITEM
66
+ @out << "<li>"
67
+ render_list_item(node_id)
68
+ @out << "</li>\n"
69
+ when NodeType::CODE_BLOCK
70
+ @out << "<pre><code"
71
+ info_word = @arena.str2(node_id).to_s.split.first.to_s
72
+ @out << %( class="language-#{escape_html(info_word)}") unless info_word.empty?
73
+ @out << ">"
74
+ @out << escape_html(@arena.text(node_id).to_s)
75
+ @out << "</code></pre>\n"
76
+ when NodeType::HTML_BLOCK
77
+ render_raw_html(@arena.text(node_id).to_s, block: true)
78
+ when NodeType::TABLE
79
+ @out << "<table>\n"
80
+ render_table(node_id)
81
+ @out << "</table>\n"
82
+ when NodeType::TEXT
83
+ @out << escape_html(@arena.text(node_id).to_s)
84
+ when NodeType::SOFTBREAK
85
+ @out << "\n"
86
+ when NodeType::HARDBREAK
87
+ @out << "<br />\n"
88
+ when NodeType::EMPHASIS
89
+ @out << "<em>"
90
+ render_children(node_id)
91
+ @out << "</em>"
92
+ when NodeType::STRONG
93
+ @out << "<strong>"
94
+ render_children(node_id)
95
+ @out << "</strong>"
96
+ when NodeType::STRIKETHROUGH
97
+ @out << "<del>"
98
+ render_children(node_id)
99
+ @out << "</del>"
100
+ when NodeType::CODE_SPAN
101
+ @out << "<code>#{escape_html(@arena.text(node_id).to_s)}</code>"
102
+ when NodeType::LINK
103
+ dest = escape_html(@arena.str1(node_id).to_s)
104
+ @out << %(<a href="#{dest}")
105
+ append_title_attribute(node_id)
106
+ @out << ">"
107
+ render_children(node_id)
108
+ @out << "</a>"
109
+ when NodeType::IMAGE
110
+ alt = PlainText.from(@arena, node_id)
111
+ dest = escape_html(@arena.str1(node_id).to_s)
112
+ @out << %(<img src="#{dest}" alt="#{escape_html(alt)}")
113
+ append_title_attribute(node_id)
114
+ @out << " />"
115
+ when NodeType::HTML_INLINE
116
+ render_raw_html(@arena.text(node_id).to_s, block: false)
117
+ when NodeType::FOOTNOTE_REFERENCE
118
+ render_footnote_reference(node_id)
119
+ when NodeType::FOOTNOTES_SECTION
120
+ render_footnotes_section(node_id)
121
+ end
122
+ end
123
+
124
+ # `[^label]` reference: a superscript link to the definition. The
125
+ # element ids use the footnote number; a second+ reference to the
126
+ # same footnote gets a `-M` suffix so each backref has a unique target.
127
+ def render_footnote_reference(node_id)
128
+ number = @arena.int1(node_id)
129
+ occurrence = @arena.int2(node_id)
130
+ ref_id = occurrence > 1 ? "fnref-#{number}-#{occurrence}" : "fnref-#{number}"
131
+ @out << %(<sup><a href="#fn-#{number}" id="#{ref_id}">#{number}</a></sup>)
132
+ end
133
+
134
+ def render_footnotes_section(node_id)
135
+ @out << %(<section class="footnotes">\n<ol>\n)
136
+ @arena.each_child(node_id) { |def_id| render_footnote_definition(def_id) }
137
+ @out << "</ol>\n</section>\n"
138
+ end
139
+
140
+ def render_footnote_definition(def_id)
141
+ label = @arena.str1(def_id).to_s
142
+ number = @document.footnotes.number(label)
143
+ occurrences = @document.footnotes.occurrences(label)
144
+ @out << %(<li id="fn-#{number}">\n)
145
+
146
+ # Append the backref(s) inside the definition's last paragraph (GFM);
147
+ # if the last block isn't a paragraph, emit a standalone one.
148
+ last = @arena.raw_last_child_id(def_id)
149
+ child = @arena.raw_first_child_id(def_id)
150
+ until child == -1
151
+ if child == last && @arena.type(child) == NodeType::PARAGRAPH
152
+ @out << "<p>"
153
+ render_children(child)
154
+ @out << footnote_backrefs(number, occurrences)
155
+ @out << "</p>\n"
156
+ else
157
+ render_node(child)
158
+ end
159
+ child = @arena.raw_next_sibling_id(child)
160
+ end
161
+ if last == -1 || @arena.type(last) != NodeType::PARAGRAPH
162
+ @out << "<p>#{footnote_backrefs(number, occurrences)}</p>\n"
163
+ end
164
+
165
+ @out << "</li>\n"
166
+ end
167
+
168
+ def footnote_backrefs(number, occurrences)
169
+ out = +""
170
+ (1..occurrences).each do |occ|
171
+ ref_id = occ > 1 ? "fnref-#{number}-#{occ}" : "fnref-#{number}"
172
+ suffix = occ > 1 ? "<sup>#{occ}</sup>" : ""
173
+ out << %( <a href="##{ref_id}">&#8617;#{suffix}</a>)
174
+ end
175
+ out
176
+ end
177
+
178
+ def render_table(table_id)
179
+ rows = @arena.child_ids(table_id).to_a
180
+ header_rows = rows.select { |row_id| @arena.int1(row_id) == 1 }
181
+ body_rows = rows.reject { |row_id| @arena.int1(row_id) == 1 }
182
+
183
+ unless header_rows.empty?
184
+ @out << "<thead>\n"
185
+ header_rows.each { |row_id| render_table_row(row_id) }
186
+ @out << "</thead>\n"
187
+ end
188
+ unless body_rows.empty?
189
+ @out << "<tbody>\n"
190
+ body_rows.each { |row_id| render_table_row(row_id) }
191
+ @out << "</tbody>\n"
192
+ end
193
+ end
194
+
195
+ def render_list_item(node_id)
196
+ parent_id = @arena.raw_parent_id(node_id)
197
+ tight = parent_id != -1 && @arena.type(parent_id) == NodeType::LIST && @arena.int3(parent_id) == 1
198
+
199
+ first_child_id = @arena.raw_first_child_id(node_id)
200
+ first_is_para = first_child_id != -1 &&
201
+ @arena.type(first_child_id) == NodeType::PARAGRAPH
202
+
203
+ # Empty <li> renders inline; otherwise loose lists and tight
204
+ # items opening with a non-paragraph block get a leading newline.
205
+ if first_child_id != -1 && (!tight || !first_is_para)
206
+ @out << "\n"
207
+ end
208
+
209
+ child_id = first_child_id
210
+ prev_type = nil
211
+ until child_id == -1
212
+ type = @arena.type(child_id)
213
+ if tight && type == NodeType::PARAGRAPH
214
+ # Paragraph in a tight list: drop the wrapping <p>. Only
215
+ # insert a separator `\n` when the previous child was also
216
+ # a tight paragraph — every other block already trails its
217
+ # own `\n`, so adding another would double-space the gap.
218
+ @out << "\n" if prev_type == NodeType::PARAGRAPH
219
+ render_children(child_id)
220
+ else
221
+ # Non-paragraph block. Tight list paragraphs were emitted
222
+ # without their tag, so follow them with `\n` to land the
223
+ # next block on a fresh line. Other blocks already end with
224
+ # their own `\n`, so no extra separator is needed.
225
+ @out << "\n" if tight && prev_type == NodeType::PARAGRAPH
226
+ render_node(child_id)
227
+ end
228
+ prev_type = type
229
+ child_id = @arena.raw_next_sibling_id(child_id)
230
+ end
231
+ end
232
+
233
+ def render_table_row(row_id)
234
+ @out << "<tr>"
235
+ @arena.each_child(row_id) do |cell_id|
236
+ tag = @arena.int1(cell_id) == 1 ? "th" : "td"
237
+ @out << "<#{tag}>"
238
+ render_children(cell_id)
239
+ @out << "</#{tag}>"
240
+ end
241
+ @out << "</tr>\n"
242
+ end
243
+
244
+ # GFM "Disallowed Raw HTML" extension: when allow_html is on but
245
+ # the caller has opted into filtering, the 9 dangerous tag names
246
+ # have their leading `<` rewritten to `&lt;` so the browser sees
247
+ # them as text. Word boundary (\b) prevents over-filtering
248
+ # (e.g. `<scripts>` is left alone).
249
+ DISALLOWED_RAW_TAGS = %w[title textarea style xmp iframe noembed noframes script plaintext].freeze
250
+ DISALLOWED_RAW_TAG_RE = /<(?=\/?(?:#{DISALLOWED_RAW_TAGS.join('|')})\b)/i
251
+
252
+ def render_raw_html(text, block:)
253
+ if @document.allow_html?
254
+ out_text = @document.disallow_raw_html? ? filter_disallowed_raw(text) : text
255
+ @out << out_text
256
+ @out << "\n" if block
257
+ else
258
+ escaped = escape_html(text)
259
+ if block
260
+ @out << escaped << "\n"
261
+ else
262
+ @out << escaped
263
+ end
264
+ end
265
+ end
266
+
267
+ def filter_disallowed_raw(text)
268
+ text.gsub(DISALLOWED_RAW_TAG_RE, "&lt;")
269
+ end
270
+
271
+ def append_title_attribute(node_id)
272
+ title = @arena.str2(node_id).to_s
273
+ return if title.empty?
274
+
275
+ @out << %( title="#{escape_html(title)}")
276
+ end
277
+ end
278
+ end
279
+ end