red_quilt 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +109 -0
  4. data/.rubocop_todo.yml +7 -0
  5. data/CHANGELOG.md +57 -0
  6. data/README.md +284 -0
  7. data/Rakefile +8 -0
  8. data/ast-spec.md +1227 -0
  9. data/docs/architecture.md +81 -0
  10. data/docs/arena-usage.md +363 -0
  11. data/docs/commonmark-conformance.md +241 -0
  12. data/exe/redquilt +7 -0
  13. data/lib/red_quilt/arena.rb +366 -0
  14. data/lib/red_quilt/block_parser.rb +724 -0
  15. data/lib/red_quilt/blockquote.rb +151 -0
  16. data/lib/red_quilt/cli.rb +182 -0
  17. data/lib/red_quilt/diagnostic.rb +47 -0
  18. data/lib/red_quilt/document.rb +126 -0
  19. data/lib/red_quilt/extended_autolink_pass.rb +185 -0
  20. data/lib/red_quilt/footnote_definition.rb +147 -0
  21. data/lib/red_quilt/footnote_pass.rb +39 -0
  22. data/lib/red_quilt/footnote_registry.rb +68 -0
  23. data/lib/red_quilt/indentation.rb +73 -0
  24. data/lib/red_quilt/inline/builder.rb +674 -0
  25. data/lib/red_quilt/inline/flanking.rb +120 -0
  26. data/lib/red_quilt/inline/html_entities.rb +2180 -0
  27. data/lib/red_quilt/inline/lexer.rb +280 -0
  28. data/lib/red_quilt/inline/link_scanner.rb +315 -0
  29. data/lib/red_quilt/inline/token_kind.rb +39 -0
  30. data/lib/red_quilt/inline/tokens.rb +73 -0
  31. data/lib/red_quilt/inline.rb +34 -0
  32. data/lib/red_quilt/inline_pass.rb +53 -0
  33. data/lib/red_quilt/line.rb +14 -0
  34. data/lib/red_quilt/lint_pass.rb +71 -0
  35. data/lib/red_quilt/list.rb +317 -0
  36. data/lib/red_quilt/node_ref.rb +114 -0
  37. data/lib/red_quilt/node_type.rb +66 -0
  38. data/lib/red_quilt/plain_text.rb +46 -0
  39. data/lib/red_quilt/reference_definition.rb +309 -0
  40. data/lib/red_quilt/renderer/html.rb +279 -0
  41. data/lib/red_quilt/renderer/mdast.rb +152 -0
  42. data/lib/red_quilt/source_map.rb +29 -0
  43. data/lib/red_quilt/source_span.rb +26 -0
  44. data/lib/red_quilt/theme.rb +28 -0
  45. data/lib/red_quilt/themes/default.css +87 -0
  46. data/lib/red_quilt/version.rb +5 -0
  47. data/lib/red_quilt.rb +86 -0
  48. data/mise.toml +2 -0
  49. data/sig/red_quilt.rbs +45 -0
  50. metadata +91 -0
@@ -0,0 +1,674 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RedQuilt
4
+ module Inline
5
+ # Consumes a token stream produced by Lexer and adds inline nodes
6
+ # to the arena under parent_id.
7
+ #
8
+ # Processing happens in two phases:
9
+ # 1. linear_pass — code spans, brackets (link/image), autolinks,
10
+ # HTML, simple inlines. Emphasis delimiter runs are added as
11
+ # provisional TEXT nodes and pushed onto a delimiter stack.
12
+ # 2. process_emphasis — CommonMark spec 6.2 algorithm pairs up
13
+ # delimiter stack entries into EMPHASIS / STRONG nodes.
14
+ class Builder
15
+ SAFE_SCHEMES = %w[http https mailto ftp tel ssh].freeze
16
+ # Autolinks (`<scheme:...>`) are not run through the SAFE_SCHEMES
17
+ # allowlist: CommonMark permits arbitrary schemes there (e.g.
18
+ # `<made-up-scheme://x>`), and an allowlist would break that
19
+ # conformance. Only the schemes that execute script when the link
20
+ # is navigated are denied.
21
+ UNSAFE_AUTOLINK_SCHEMES = %w[javascript vbscript data].freeze
22
+
23
+ # `count` is the CommonMark delimiter-run length; a Delimiter is
24
+ # never enumerated, so shadowing Struct#count (from Enumerable) is
25
+ # intentional rather than a footgun.
26
+ Delimiter = Struct.new(:node_id, :char, :count, :can_open, :can_close) # rubocop:disable Lint/StructNewOverride
27
+
28
+ Bracket = Struct.new(:token_id, :node_id, :image, :active, :delim_stack_size)
29
+
30
+ # track_source: when true, arena nodes carry the byte ranges supplied
31
+ # by the lexer. When false (used for inputs whose source has been
32
+ # materialized into a separate string, e.g. transformed blockquote
33
+ # lines), source_start/source_len are not recorded; in that mode every
34
+ # text node carries its content in str1 so Arena#text still works.
35
+ #
36
+ # diagnostics: an optional Array the builder appends warnings to
37
+ # (unsafe URL schemes, missing references, ...). The caller — usually
38
+ # InlinePass — is expected to forward Document#diagnostics here.
39
+ def initialize(arena, source, references, track_source: true, diagnostics: nil, footnotes: nil)
40
+ @arena = arena
41
+ @source = source
42
+ # Binary view of the source for String#byteindex hot paths:
43
+ # byteindex on a UTF-8 string raises when the byte offset falls
44
+ # inside a multibyte sequence; the binary view treats every byte
45
+ # as its own character.
46
+ @source_b = source.b
47
+ @references = references
48
+ @track_source = track_source
49
+ @diagnostics = diagnostics
50
+ @footnotes = footnotes
51
+ @link_scanner = LinkScanner.new(source)
52
+ end
53
+
54
+ def build(parent_id, tokens)
55
+ @parent_id = parent_id
56
+ @tokens = tokens
57
+ @delimiter_stack = []
58
+ @bracket_stack = []
59
+ @provisional_nodes = {}
60
+ linear_pass
61
+ process_emphasis(@delimiter_stack)
62
+ end
63
+
64
+ private
65
+
66
+ # --------------------------- node helpers ---------------------------
67
+
68
+ def add_arena_node(type, start_byte, end_byte, str1: nil, str2: nil, int1: 0, int2: 0)
69
+ if @track_source
70
+ @arena.add_node(type,
71
+ source_start: start_byte,
72
+ source_len: end_byte - start_byte,
73
+ str1: str1, str2: str2, int1: int1, int2: int2)
74
+ else
75
+ @arena.add_node(type, source_start: -1, source_len: 0,
76
+ str1: str1, str2: str2, int1: int1, int2: int2)
77
+ end
78
+ end
79
+
80
+ def update_arena_span(node_id, start_byte, end_byte)
81
+ return unless @track_source
82
+
83
+ @arena.update_span(node_id, start_byte, end_byte)
84
+ end
85
+
86
+ # --------------------------- linear pass ----------------------------
87
+
88
+ def linear_pass
89
+ id = 0
90
+ last = @tokens.length
91
+ while id < last
92
+ case @tokens.kind(id)
93
+ when TokenKind::TEXT
94
+ append_text(@tokens.start_byte(id), @tokens.end_byte(id), nil)
95
+ when TokenKind::ENTITY, TokenKind::ESCAPED_CHAR
96
+ append_text(@tokens.start_byte(id), @tokens.end_byte(id), @tokens.str1(id))
97
+ when TokenKind::LINE_ENDING
98
+ append_line_ending(id)
99
+ when TokenKind::HTML_INLINE
100
+ append_html_inline(id)
101
+ when TokenKind::AUTOLINK_URI
102
+ append_autolink(id, @tokens.str1(id), @tokens.str1(id))
103
+ when TokenKind::AUTOLINK_EMAIL
104
+ email = @tokens.str1(id)
105
+ append_autolink(id, "mailto:#{email}", email)
106
+ when TokenKind::CODE_DELIMITER
107
+ next_id = resolve_code_span(id)
108
+ if next_id
109
+ id = next_id
110
+ next
111
+ end
112
+ append_text(@tokens.start_byte(id), @tokens.end_byte(id), nil)
113
+ when TokenKind::LBRACKET
114
+ push_bracket(id, image: false)
115
+ when TokenKind::BANG_LBRACKET
116
+ push_bracket(id, image: true)
117
+ when TokenKind::RBRACKET
118
+ next_id = resolve_rbracket(id, id + 1)
119
+ if next_id
120
+ id = next_id
121
+ next
122
+ end
123
+ when TokenKind::DELIM_RUN
124
+ push_delim_run(id)
125
+ end
126
+ id += 1
127
+ end
128
+ end
129
+
130
+ # ---------------------------- text ----------------------------------
131
+
132
+ def append_text(start_byte, end_byte, literal)
133
+ materialized = if literal
134
+ literal
135
+ elsif !@track_source
136
+ @source.byteslice(start_byte, end_byte - start_byte).to_s
137
+ end
138
+
139
+ last = @arena.raw_last_child_id(@parent_id)
140
+ if last != -1 && @arena.type(last) == NodeType::TEXT &&
141
+ !@provisional_nodes[last] && can_coalesce?(last, start_byte)
142
+ coalesce_text(last, materialized, start_byte, end_byte)
143
+ return
144
+ end
145
+
146
+ node = add_arena_node(NodeType::TEXT, start_byte, end_byte, str1: materialized)
147
+ @arena.append_child(@parent_id, node)
148
+ end
149
+
150
+ def can_coalesce?(last_id, start_byte)
151
+ if @track_source
152
+ @arena.source_end(last_id) == start_byte
153
+ else
154
+ !@arena.str1(last_id).nil?
155
+ end
156
+ end
157
+
158
+ def coalesce_text(last_id, materialized, start_byte, end_byte)
159
+ if @track_source
160
+ last_lit = @arena.str1(last_id)
161
+ if materialized.nil? && last_lit.nil?
162
+ update_arena_span(last_id, @arena.source_start(last_id), end_byte)
163
+ return
164
+ end
165
+ existing = last_lit || @arena.text(last_id).to_s
166
+ incoming = materialized || @source.byteslice(start_byte, end_byte - start_byte).to_s
167
+ @arena.update_str1(last_id, existing + incoming)
168
+ update_arena_span(last_id, @arena.source_start(last_id), end_byte)
169
+ else
170
+ @arena.update_str1(last_id, @arena.str1(last_id) + materialized.to_s)
171
+ end
172
+ end
173
+
174
+ # --------------------------- line endings ---------------------------
175
+
176
+ def append_line_ending(id)
177
+ start_byte = @tokens.start_byte(id)
178
+ end_byte = @tokens.end_byte(id)
179
+ trailing_spaces = @tokens.int1(id)
180
+ backslash_form = @tokens.int2(id) == 1
181
+
182
+ if trailing_spaces >= 2 || backslash_form
183
+ strip_trailing_spaces(trailing_spaces) if trailing_spaces.positive?
184
+ kind = NodeType::HARDBREAK
185
+ else
186
+ # Soft line break: spec also strips trailing spaces from the
187
+ # previous line so a single trailing space doesn't survive into
188
+ # the output.
189
+ strip_trailing_spaces(trailing_spaces) if trailing_spaces.positive?
190
+ kind = NodeType::SOFTBREAK
191
+ end
192
+
193
+ @arena.append_child(@parent_id,
194
+ add_arena_node(kind, start_byte, end_byte, str1: "\n"))
195
+ end
196
+
197
+ def strip_trailing_spaces(count)
198
+ last = @arena.raw_last_child_id(@parent_id)
199
+ return if last == -1 || @arena.type(last) != NodeType::TEXT
200
+
201
+ lit = @arena.str1(last)
202
+ if lit
203
+ new_lit = lit.sub(/ {#{count},}\z/, "")
204
+ @arena.update_str1(last, new_lit)
205
+ end
206
+
207
+ return unless @track_source
208
+
209
+ new_len = @arena.source_len(last) - count
210
+ new_len = 0 if new_len.negative?
211
+ @arena.update_span(last,
212
+ @arena.source_start(last),
213
+ @arena.source_start(last) + new_len)
214
+ end
215
+
216
+ # --------------------------- HTML / autolink ------------------------
217
+
218
+ def append_html_inline(id)
219
+ node = add_arena_node(
220
+ NodeType::HTML_INLINE,
221
+ @tokens.start_byte(id), @tokens.end_byte(id),
222
+ str1: @tokens.str1(id),
223
+ )
224
+ @arena.append_child(@parent_id, node)
225
+ end
226
+
227
+ def append_autolink(id, destination, label)
228
+ link_id = add_arena_node(
229
+ NodeType::LINK,
230
+ @tokens.start_byte(id), @tokens.end_byte(id),
231
+ str1: block_unsafe_autolink(@link_scanner.normalize_uri(destination)),
232
+ )
233
+ @arena.append_child(@parent_id, link_id)
234
+ @arena.append_child(link_id, @arena.add_node(NodeType::TEXT, str1: label))
235
+ end
236
+
237
+ # Returns "" (blocking the href) for autolink destinations whose
238
+ # scheme executes script on navigation; otherwise the destination
239
+ # is returned unchanged. Unlike sanitize_destination this is a
240
+ # denylist, to stay CommonMark-conformant for benign custom schemes.
241
+ def block_unsafe_autolink(destination)
242
+ scheme = destination[%r{\A([a-zA-Z][a-zA-Z0-9+\-.]*):}, 1]
243
+ return destination if scheme.nil?
244
+ return destination unless UNSAFE_AUTOLINK_SCHEMES.include?(scheme.downcase)
245
+
246
+ report_diagnostic(
247
+ severity: :warning,
248
+ rule: :unsafe_url,
249
+ message: "Unsafe URL scheme #{scheme.downcase.inspect} blocked",
250
+ )
251
+ ""
252
+ end
253
+
254
+ # --------------------------- code spans -----------------------------
255
+
256
+ # Find the closing backtick run for a code span by scanning the
257
+ # source bytes directly. CommonMark: backslash escapes do not
258
+ # apply inside a code span, so once we're past the opener every
259
+ # backtick run is a real candidate (token-level ESCAPED_CHAR is
260
+ # ignored). byteindex jumps over non-backtick byte stretches at
261
+ # C speed.
262
+ def resolve_code_span(opener_id)
263
+ run_len = @tokens.int1(opener_id)
264
+ pos = @tokens.end_byte(opener_id)
265
+ bytesize = @source_b.bytesize
266
+ while pos < bytesize
267
+ run_start = @source_b.byteindex(BACKTICK_BYTE, pos)
268
+ break if run_start.nil?
269
+
270
+ pos = run_start + 1
271
+ pos += 1 while pos < bytesize && @source_b.getbyte(pos) == 0x60
272
+ if pos - run_start == run_len
273
+ emit_code_span_bytes(opener_id, run_start, pos)
274
+ return next_token_after(pos, opener_id + 1)
275
+ end
276
+ end
277
+ nil
278
+ end
279
+
280
+ BACKTICK_BYTE = "`".b.freeze
281
+
282
+ def emit_code_span_bytes(opener_id, closer_start_byte, closer_end_byte)
283
+ body_start = @tokens.end_byte(opener_id)
284
+ body_end = closer_start_byte
285
+ span_start = @tokens.start_byte(opener_id)
286
+ span_end = closer_end_byte
287
+ raw = @source.byteslice(body_start, body_end - body_start).to_s
288
+ node = add_arena_node(NodeType::CODE_SPAN, span_start, span_end,
289
+ str1: normalize_code_span(raw))
290
+ @arena.append_child(@parent_id, node)
291
+ end
292
+
293
+ def normalize_code_span(text)
294
+ text = text.tr("\n", " ")
295
+ if text.length >= 2 && text.start_with?(" ") && text.end_with?(" ") && text.match?(/[^ ]/)
296
+ text = text[1..-2]
297
+ end
298
+ text
299
+ end
300
+
301
+ # --------------------------- brackets -------------------------------
302
+
303
+ def push_bracket(token_id, image:)
304
+ text = image ? "![" : "["
305
+ node_id = add_arena_node(
306
+ NodeType::TEXT,
307
+ @tokens.start_byte(token_id), @tokens.end_byte(token_id),
308
+ str1: text,
309
+ )
310
+ @arena.append_child(@parent_id, node_id)
311
+ @provisional_nodes[node_id] = true
312
+ @bracket_stack << Bracket.new(token_id, node_id, image, true, @delimiter_stack.length)
313
+ end
314
+
315
+ def resolve_rbracket(rbracket_token_id, search_from_id)
316
+ # CommonMark spec algorithm: peek the TOP of the bracket stack
317
+ # (don't search past inactive brackets). If the top opener is
318
+ # inactive, pop it and turn `]` into text — an inactive `[`
319
+ # earlier in the input must not be jumped over to reach an
320
+ # outer `[` or `![`, otherwise nested-image precedence (spec
321
+ # example 520) resolves the wrong way.
322
+ if @bracket_stack.empty?
323
+ append_text(@tokens.start_byte(rbracket_token_id),
324
+ @tokens.end_byte(rbracket_token_id), "]")
325
+ return nil
326
+ end
327
+
328
+ opener_index = @bracket_stack.length - 1
329
+ unless @bracket_stack[opener_index].active
330
+ @bracket_stack.pop
331
+ append_text(@tokens.start_byte(rbracket_token_id),
332
+ @tokens.end_byte(rbracket_token_id), "]")
333
+ return nil
334
+ end
335
+
336
+ opener = @bracket_stack[opener_index]
337
+ rbracket_end = @tokens.end_byte(rbracket_token_id)
338
+
339
+ # Footnote references (`[^label]`) take precedence over link forms.
340
+ if @footnotes && !opener.image && (footnote = try_footnote_reference(opener, rbracket_token_id))
341
+ finalize_footnote(opener, opener_index, footnote, rbracket_end)
342
+ return next_token_after(rbracket_end, search_from_id)
343
+ end
344
+
345
+ match = @link_scanner.inline_link(rbracket_end) ||
346
+ try_reference_link(opener, rbracket_token_id, rbracket_end)
347
+ unless match
348
+ @bracket_stack.delete_at(opener_index)
349
+ append_text(@tokens.start_byte(rbracket_token_id),
350
+ @tokens.end_byte(rbracket_token_id), "]")
351
+ return nil
352
+ end
353
+
354
+ finalize_link(opener, opener_index, match)
355
+ next_token_after(match[:end_byte], search_from_id)
356
+ end
357
+
358
+ def try_reference_link(opener, rbracket_token_id, start_byte)
359
+ label_start = @tokens.end_byte(opener.token_id)
360
+ label_end = @tokens.start_byte(rbracket_token_id)
361
+ text_label = @source.byteslice(label_start, label_end - label_start).to_s
362
+ return nil if ReferenceDefinition.label_too_long?(text_label)
363
+
364
+ if start_byte < @source.bytesize && @source.getbyte(start_byte) == 0x5B
365
+ ref_label, after_byte = @link_scanner.reference_label(start_byte)
366
+ return nil unless after_byte
367
+
368
+ lookup = ref_label.empty? ? text_label : ref_label
369
+ normalized = ReferenceDefinition.normalize_label(lookup)
370
+ ref = @references[normalized]
371
+ unless ref
372
+ # Full reference `[text][ref]` with a missing definition is
373
+ # usually a typo worth surfacing.
374
+ report_diagnostic(
375
+ severity: :warning,
376
+ rule: :missing_reference,
377
+ message: "Reference #{normalized.inspect} is not defined",
378
+ )
379
+ return nil
380
+ end
381
+ return {
382
+ end_byte: after_byte,
383
+ destination: @link_scanner.normalize_uri(ref[:destination].to_s),
384
+ title: ref[:title],
385
+ }
386
+ end
387
+
388
+ ref = @references[ReferenceDefinition.normalize_label(text_label)]
389
+ return nil unless ref
390
+
391
+ {
392
+ end_byte: start_byte,
393
+ destination: @link_scanner.normalize_uri(ref[:destination].to_s),
394
+ title: ref[:title],
395
+ }
396
+ end
397
+
398
+ def finalize_link(opener, opener_index, match)
399
+ opener_start = @tokens.start_byte(opener.token_id)
400
+ link_kind = opener.image ? NodeType::IMAGE : NodeType::LINK
401
+ link_id = add_arena_node(
402
+ link_kind, opener_start, match[:end_byte],
403
+ str1: sanitize_destination(match[:destination]),
404
+ str2: match[:title],
405
+ )
406
+
407
+ @arena.insert_before(@parent_id, opener.node_id, link_id)
408
+
409
+ first_inside = @arena.raw_next_sibling_id(opener.node_id)
410
+ last_inside = @arena.raw_last_child_id(@parent_id)
411
+ if first_inside != -1 && last_inside != -1 && first_inside != link_id
412
+ @arena.reparent(link_id, first_inside, last_inside)
413
+ end
414
+
415
+ @provisional_nodes.delete(opener.node_id)
416
+ @arena.detach(opener.node_id)
417
+
418
+ inner_delims = @delimiter_stack.slice!(opener.delim_stack_size..) || []
419
+ process_emphasis(inner_delims)
420
+
421
+ @bracket_stack.delete_at(opener_index)
422
+
423
+ unless opener.image
424
+ @bracket_stack.each { |b| b.active = false unless b.image }
425
+ end
426
+ end
427
+
428
+ # A footnote reference is a non-image bracket whose inner text is
429
+ # `^label` (label non-empty, no whitespace or `]`). Returns
430
+ # { label:, number:, occurrence: } when the label has a registered
431
+ # definition, else nil (so the bracket falls back to link logic).
432
+ FOOTNOTE_REF_RE = /\A\^([^\]\s]+)\z/
433
+
434
+ def try_footnote_reference(opener, rbracket_token_id)
435
+ inner_start = @tokens.end_byte(opener.token_id)
436
+ inner_end = @tokens.start_byte(rbracket_token_id)
437
+ match = FOOTNOTE_REF_RE.match(@source.byteslice(inner_start, inner_end - inner_start).to_s)
438
+ return nil unless match
439
+
440
+ label = ReferenceDefinition.normalize_label(match[1])
441
+ number, occurrence = @footnotes.reference(label)
442
+ return nil unless number
443
+
444
+ { label: label, number: number, occurrence: occurrence }
445
+ end
446
+
447
+ def finalize_footnote(opener, opener_index, footnote, rbracket_end)
448
+ opener_start = @tokens.start_byte(opener.token_id)
449
+ fn_id = add_arena_node(
450
+ NodeType::FOOTNOTE_REFERENCE, opener_start, rbracket_end,
451
+ str1: footnote[:label], int1: footnote[:number], int2: footnote[:occurrence],
452
+ )
453
+ @arena.insert_before(@parent_id, opener.node_id, fn_id)
454
+
455
+ # Drop the provisional `[` node and the inner `^label` text node(s);
456
+ # the footnote reference replaces them entirely.
457
+ cursor = opener.node_id
458
+ while cursor != -1
459
+ nxt = @arena.raw_next_sibling_id(cursor)
460
+ @provisional_nodes.delete(cursor)
461
+ @arena.detach(cursor)
462
+ cursor = nxt
463
+ end
464
+
465
+ # Discard any delimiters opened inside the (literal) label.
466
+ @delimiter_stack.slice!(opener.delim_stack_size..)
467
+ @bracket_stack.delete_at(opener_index)
468
+ end
469
+
470
+ def next_token_after(byte_offset, from_id)
471
+ id = from_id
472
+ last = @tokens.length
473
+ while id < last
474
+ s = @tokens.start_byte(id)
475
+ e = @tokens.end_byte(id)
476
+ if s >= byte_offset
477
+ return id
478
+ elsif e > byte_offset
479
+ # A multi-byte token (HTML inline, autolink, ...) overlaps
480
+ # the boundary of an earlier-resolved code span / link. The
481
+ # part inside the resolved span is already consumed; surface
482
+ # the tail bytes as plain text so they aren't silently lost.
483
+ append_text(byte_offset, e, nil)
484
+ return id + 1
485
+ end
486
+
487
+ id += 1
488
+ end
489
+ last
490
+ end
491
+
492
+ def sanitize_destination(destination)
493
+ return "" if destination.nil?
494
+ return destination if destination.start_with?("/", "#")
495
+
496
+ scheme = destination[%r{\A([a-zA-Z][a-zA-Z0-9+\-.]*):}, 1]
497
+ return destination if scheme.nil?
498
+ return destination if SAFE_SCHEMES.include?(scheme.downcase)
499
+
500
+ report_diagnostic(
501
+ severity: :warning,
502
+ rule: :unsafe_url,
503
+ message: "Unsafe URL scheme #{scheme.downcase.inspect} blocked",
504
+ )
505
+ ""
506
+ end
507
+
508
+ def report_diagnostic(severity:, rule:, message:, source_span: nil)
509
+ return unless @diagnostics
510
+
511
+ @diagnostics << Diagnostic.new(
512
+ severity: severity, rule: rule, message: message, source_span: source_span,
513
+ )
514
+ end
515
+
516
+ # --------------------------- delim runs / emphasis ------------------
517
+
518
+ def push_delim_run(token_id)
519
+ char_byte = @tokens.int1(token_id)
520
+ count = @tokens.int2(token_id)
521
+ flags = @tokens.int3(token_id)
522
+
523
+ char = Inline::BYTE_CHR[char_byte]
524
+ text = char * count
525
+ node_id = add_arena_node(
526
+ NodeType::TEXT,
527
+ @tokens.start_byte(token_id), @tokens.end_byte(token_id),
528
+ str1: text,
529
+ )
530
+ @arena.append_child(@parent_id, node_id)
531
+ @provisional_nodes[node_id] = true
532
+
533
+ @delimiter_stack << Delimiter.new(
534
+ node_id, char, count,
535
+ (flags & 0b10) != 0,
536
+ (flags & 0b01) != 0,
537
+ )
538
+ end
539
+
540
+ def process_emphasis(stack)
541
+ # NB: the CommonMark spec describes an `openers_bottom`
542
+ # optimization keyed by closer character / length / flanking
543
+ # flags. Implementing that correctly is subtle (a single
544
+ # per-character bottom blocks valid matches like
545
+ # `*foo**bar**baz*`), so the implementation here just walks
546
+ # back to the start of the stack for every closer. This is
547
+ # O(stack^2) in the worst case but stacks are tiny in practice.
548
+ closer_idx = 0
549
+
550
+ while closer_idx < stack.length
551
+ closer = stack[closer_idx]
552
+ unless closer.can_close
553
+ closer_idx += 1
554
+ next
555
+ end
556
+
557
+ opener_idx = closer_idx - 1
558
+ found = false
559
+ while opener_idx >= 0
560
+ opener = stack[opener_idx]
561
+ if opener.can_open && opener.char == closer.char
562
+ skip = false
563
+ if (opener.can_close || closer.can_open) &&
564
+ ((opener.count + closer.count) % 3).zero? &&
565
+ !((opener.count % 3).zero? && (closer.count % 3).zero?)
566
+ skip = true
567
+ end
568
+ unless skip
569
+ found = true
570
+ break
571
+ end
572
+ end
573
+ opener_idx -= 1
574
+ end
575
+
576
+ unless found
577
+ unless closer.can_open
578
+ @provisional_nodes.delete(closer.node_id)
579
+ stack.delete_at(closer_idx)
580
+ end
581
+ closer_idx += 1
582
+ next
583
+ end
584
+
585
+ opener = stack[opener_idx]
586
+ strength = [opener.count, closer.count].min >= 2 ? 2 : 1
587
+ if closer.char == "~"
588
+ # GFM strikethrough only forms on `~~` runs. A single `~`
589
+ # leaves the delimiter as text; advance the cursor so future
590
+ # `~~` pairs can still match.
591
+ if strength < 2
592
+ closer_idx += 1
593
+ next
594
+ end
595
+ kind = NodeType::STRIKETHROUGH
596
+ else
597
+ kind = strength == 2 ? NodeType::STRONG : NodeType::EMPHASIS
598
+ end
599
+
600
+ # CommonMark spec: any delimiters strictly between this opener and
601
+ # closer can't open or close anything in this scope, so drop them
602
+ # from the stack before we rebuild the tree. Their arena nodes
603
+ # stay where they are (they'll be reparented into the new emphasis
604
+ # alongside the surrounding content), but they must no longer be
605
+ # candidates for future iterations. Without this, the next
606
+ # iteration would try to pair stranded delimiters that have
607
+ # already been moved into a different parent, which corrupts the
608
+ # sibling chain (Arena#reparent walks into @parent[-1]).
609
+ if closer_idx > opener_idx + 1
610
+ removed = stack.slice!((opener_idx + 1)...closer_idx)
611
+ removed.each { |e| @provisional_nodes.delete(e.node_id) }
612
+ closer_idx = opener_idx + 1
613
+ closer = stack[closer_idx]
614
+ end
615
+
616
+ opener_node = opener.node_id
617
+ closer_node = closer.node_id
618
+
619
+ if @track_source
620
+ opener_match_start = @arena.source_end(opener_node) - strength
621
+ closer_match_end = @arena.source_start(closer_node) + strength
622
+ else
623
+ opener_match_start = -1
624
+ closer_match_end = 0
625
+ end
626
+ emphasis_id = add_arena_node(kind, opener_match_start, closer_match_end)
627
+
628
+ first_inside = @arena.raw_next_sibling_id(opener_node)
629
+ last_inside = @arena.raw_prev_sibling_id(closer_node)
630
+ if first_inside != -1 && last_inside != -1 &&
631
+ first_inside != closer_node && last_inside != opener_node
632
+ @arena.reparent(emphasis_id, first_inside, last_inside)
633
+ end
634
+
635
+ parent_id = @arena.raw_parent_id(opener_node)
636
+ @arena.insert_before(parent_id, closer_node, emphasis_id)
637
+
638
+ if opener.count == strength
639
+ @provisional_nodes.delete(opener_node)
640
+ @arena.detach(opener_node)
641
+ stack.delete_at(opener_idx)
642
+ closer_idx -= 1
643
+ else
644
+ opener.count -= strength
645
+ str = @arena.str1(opener_node)
646
+ @arena.update_str1(opener_node, str[0...-strength])
647
+ if @track_source
648
+ new_end = @arena.source_end(opener_node) - strength
649
+ @arena.update_span(opener_node, @arena.source_start(opener_node), new_end)
650
+ end
651
+ end
652
+
653
+ if closer.count == strength
654
+ @provisional_nodes.delete(closer_node)
655
+ @arena.detach(closer_node)
656
+ stack.delete_at(closer_idx)
657
+ else
658
+ closer.count -= strength
659
+ str = @arena.str1(closer_node)
660
+ @arena.update_str1(closer_node, str[strength..])
661
+ if @track_source
662
+ new_start = @arena.source_start(closer_node) + strength
663
+ new_end = @arena.source_end(closer_node)
664
+ @arena.update_span(closer_node, new_start, new_end)
665
+ end
666
+ end
667
+ end
668
+
669
+ stack.each { |e| @provisional_nodes.delete(e.node_id) }
670
+ stack.clear
671
+ end
672
+ end
673
+ end
674
+ end