markbridge 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/lib/markbridge/all.rb +4 -7
  3. data/lib/markbridge/ast/document.rb +1 -1
  4. data/lib/markbridge/ast/element.rb +2 -2
  5. data/lib/markbridge/ast/list.rb +2 -2
  6. data/lib/markbridge/ast/table.rb +61 -0
  7. data/lib/markbridge/ast/text.rb +5 -1
  8. data/lib/markbridge/ast.rb +1 -0
  9. data/lib/markbridge/bbcode.rb +4 -0
  10. data/lib/markbridge/gem_loader.rb +2 -3
  11. data/lib/markbridge/html.rb +4 -0
  12. data/lib/markbridge/mediawiki.rb +4 -0
  13. data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +0 -10
  14. data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -4
  15. data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +64 -44
  16. data/lib/markbridge/parsers/bbcode/handler_registry.rb +26 -11
  17. data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +17 -12
  18. data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +0 -10
  19. data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +6 -10
  20. data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +13 -19
  21. data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +1 -5
  22. data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +1 -2
  23. data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +30 -35
  24. data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +2 -6
  25. data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +4 -4
  26. data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +26 -0
  27. data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +32 -0
  28. data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +35 -0
  29. data/lib/markbridge/parsers/bbcode/parser.rb +5 -8
  30. data/lib/markbridge/parsers/bbcode/parser_state.rb +12 -18
  31. data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +9 -59
  32. data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +2 -2
  33. data/lib/markbridge/parsers/bbcode/scanner.rb +49 -63
  34. data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +1 -5
  35. data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +1 -6
  36. data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +1 -7
  37. data/lib/markbridge/parsers/bbcode/tokens/token.rb +1 -1
  38. data/lib/markbridge/parsers/bbcode.rb +4 -0
  39. data/lib/markbridge/parsers/html/handler_registry.rb +32 -44
  40. data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -3
  41. data/lib/markbridge/parsers/html/handlers/image_handler.rb +1 -4
  42. data/lib/markbridge/parsers/html/handlers/table_cell_handler.rb +24 -0
  43. data/lib/markbridge/parsers/html/handlers/table_handler.rb +24 -0
  44. data/lib/markbridge/parsers/html/handlers/table_row_handler.rb +24 -0
  45. data/lib/markbridge/parsers/html/parser.rb +16 -15
  46. data/lib/markbridge/parsers/html.rb +3 -0
  47. data/lib/markbridge/parsers/media_wiki/inline_parser.rb +115 -151
  48. data/lib/markbridge/parsers/media_wiki/inline_tag_registry.rb +103 -0
  49. data/lib/markbridge/parsers/media_wiki/parser.rb +174 -71
  50. data/lib/markbridge/parsers/media_wiki.rb +1 -0
  51. data/lib/markbridge/parsers/text_formatter/handler_registry.rb +10 -36
  52. data/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb +26 -0
  53. data/lib/markbridge/parsers/text_formatter/parser.rb +3 -8
  54. data/lib/markbridge/parsers/text_formatter.rb +1 -0
  55. data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +111 -92
  56. data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +13 -7
  57. data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +11 -20
  58. data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +10 -48
  59. data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +38 -63
  60. data/lib/markbridge/processors/discourse_markdown/scanner.rb +36 -41
  61. data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +6 -6
  62. data/lib/markbridge/renderers/discourse/html_escaper.rb +20 -0
  63. data/lib/markbridge/renderers/discourse/markdown_escaper.rb +262 -205
  64. data/lib/markbridge/renderers/discourse/render_context.rb +23 -11
  65. data/lib/markbridge/renderers/discourse/renderer.rb +54 -11
  66. data/lib/markbridge/renderers/discourse/rendering_interface.rb +12 -4
  67. data/lib/markbridge/renderers/discourse/tag.rb +14 -1
  68. data/lib/markbridge/renderers/discourse/tag_library.rb +30 -25
  69. data/lib/markbridge/renderers/discourse/tags/align_tag.rb +15 -7
  70. data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +1 -1
  71. data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +2 -0
  72. data/lib/markbridge/renderers/discourse/tags/code_tag.rb +14 -8
  73. data/lib/markbridge/renderers/discourse/tags/email_tag.rb +5 -3
  74. data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -3
  75. data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +6 -2
  76. data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +2 -2
  77. data/lib/markbridge/renderers/discourse/tags/image_tag.rb +12 -1
  78. data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +2 -0
  79. data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +2 -2
  80. data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +24 -47
  81. data/lib/markbridge/renderers/discourse/tags/list_tag.rb +10 -15
  82. data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +6 -2
  83. data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +10 -0
  84. data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +9 -4
  85. data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +17 -11
  86. data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +9 -0
  87. data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +2 -0
  88. data/lib/markbridge/renderers/discourse/tags/table_cell_tag.rb +18 -0
  89. data/lib/markbridge/renderers/discourse/tags/table_row_tag.rb +18 -0
  90. data/lib/markbridge/renderers/discourse/tags/table_tag.rb +128 -0
  91. data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +10 -3
  92. data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +28 -1
  93. data/lib/markbridge/renderers/discourse/tags/url_tag.rb +5 -3
  94. data/lib/markbridge/renderers/discourse.rb +4 -0
  95. data/lib/markbridge/textformatter.rb +4 -0
  96. data/lib/markbridge/version.rb +1 -1
  97. data/lib/markbridge.rb +27 -62
  98. metadata +19 -2
@@ -36,15 +36,15 @@ module Markbridge
36
36
  # breaks disabled by default.
37
37
  def initialize(escape_hard_line_breaks: false)
38
38
  @escape_hard_line_breaks = escape_hard_line_breaks
39
+ # @inline_content / @inline_result / @inline_len are set by
40
+ # escape_inline on every call before any helper reads them;
41
+ # no defensive init needed.
39
42
  end
40
43
 
41
- # Fast-path check: any character that might need escaping
42
- # Only includes characters we actually escape (removed ], {, }, ^)
43
- # > is needed for blockquote detection at line start
44
+ # Fast-path: skip escape_text entirely for content with no special
45
+ # chars. `>` is needed for blockquote detection at line start.
44
46
  MAYBE_SPECIAL = /[\\`*_\[#+\-.!<>&|~=>)]/
45
47
 
46
- # Check for indented code on any line
47
- # Matches: 4+ spaces, tab, or space+tab combinations that reach column 4+
48
48
  MAYBE_INDENTED_CODE = /(?:^|\n)(?: {4}|\t| {1,3}\t)/
49
49
 
50
50
  # Block-level patterns
@@ -119,8 +119,7 @@ module Markbridge
119
119
  # @return [String] the escaped text, or empty string if input is nil
120
120
  # @note Multi-line HTML tags and blocks are handled by escaping the opening <
121
121
  def escape(text)
122
- return "".freeze if text.nil?
123
- return text if text.empty?
122
+ return "" if text.nil?
124
123
 
125
124
  # Neutralize hard line breaks (trailing 2+ spaces before newline)
126
125
  text = text.gsub(/ +\n/, "\n") if @escape_hard_line_breaks && text.include?(" \n")
@@ -137,7 +136,8 @@ module Markbridge
137
136
  return escape_line(lines[0], false) if lines.size == 1
138
137
 
139
138
  # Pre-allocate result buffer
140
- result = String.new(capacity: text.bytesize + text.bytesize / 3, encoding: text.encoding)
139
+ bytesize = text.bytesize
140
+ result = String.new(capacity: bytesize + bytesize / 3, encoding: text.encoding)
141
141
  prev_was_paragraph = false
142
142
  first = true
143
143
 
@@ -154,35 +154,32 @@ module Markbridge
154
154
  end
155
155
 
156
156
  def escape_line(line, prev_was_paragraph)
157
- return line if line.empty?
158
-
159
- # Handle indented code blocks first
157
+ # No `line.empty?` early-return: it's redundant with the
158
+ # `line.getbyte(indent_len).nil?` guard below, which catches both
159
+ # empty and whitespace-only lines while also preserving object
160
+ # identity (returns `line`).
160
161
  return escape_indented_code(line) if INDENTED_CODE.match?(line)
161
162
 
162
- # Extract 0-3 space indent
163
+ # After INDENTED_CODE, line has at most 3 leading spaces, so the
164
+ # `< 3` bound keeps this a tight YJIT-friendly hot loop.
163
165
  indent_len = 0
164
- while indent_len < 3 && indent_len < line.length && line.getbyte(indent_len) == SPACE
165
- indent_len += 1
166
- end
166
+ indent_len += 1 while indent_len < 3 && line.getbyte(indent_len) == SPACE
167
167
 
168
- return line if indent_len >= line.length
168
+ # Whitespace-only line (1-3 spaces) getbyte past end is nil.
169
+ return line if line.getbyte(indent_len).nil?
169
170
 
170
- content = indent_len > 0 ? line[indent_len..] : line
171
+ has_indent = indent_len > 0
172
+ content = has_indent ? line[indent_len..] : line
171
173
 
172
- # Apply block-level escaping (which may also do inline escaping)
173
174
  escaped, skip_inline = escape_block_level(content, prev_was_paragraph)
174
-
175
- # Apply inline escaping if block-level didn't handle it
176
175
  escaped = escape_inline(escaped) unless skip_inline
177
176
 
178
- # Prepend indent if present, preserve encoding
179
- if indent_len > 0
177
+ if has_indent
180
178
  result = String.new(encoding: line.encoding)
181
179
  result << line[0, indent_len] << escaped
182
180
  result
183
181
  else
184
- # Preserve original encoding
185
- escaped.is_a?(String) ? escaped.force_encoding(line.encoding) : escaped
182
+ escaped.force_encoding(line.encoding)
186
183
  end
187
184
  end
188
185
 
@@ -197,21 +194,21 @@ module Markbridge
197
194
  # - Content doesn't start at valid block position (no lists, headings, etc.)
198
195
  # - Visual indentation is preserved (NBSP renders as space)
199
196
  # We still escape inline content since it's no longer protected.
200
- i = 0
201
- while i < line.length
202
- b = line.getbyte(i)
203
- break if b != SPACE && b != TAB
204
- i += 1
197
+ # Caller (escape_line) guarantees INDENTED_CODE matched, so line
198
+ # starts with at least one SPACE or TAB; ws_end is always ≥ 1.
199
+ line_length = line.length
200
+ ws_end = 0
201
+ while ws_end < line_length && ((byte = line.getbyte(ws_end)) == SPACE || byte == TAB)
202
+ ws_end += 1
205
203
  end
206
204
 
207
- return line if i == 0 # No leading whitespace (shouldn't happen, but safe)
208
- return line if i >= line.length # Whitespace-only line
205
+ return line if ws_end >= line_length # Whitespace-only line
209
206
 
210
207
  # Convert leading whitespace to NBSP (tab = 4 NBSP for visual consistency)
211
208
  nbsp_indent = String.new(encoding: line.encoding)
212
- line[0, i].each_char { |c| nbsp_indent << (c == "\t" ? (NBSP * 4) : NBSP) }
209
+ line[0, ws_end].each_char { |char| nbsp_indent << (char == "\t" ? (NBSP * 4) : NBSP) }
213
210
 
214
- content = line[i..]
211
+ content = line[ws_end..]
215
212
  "#{nbsp_indent}#{escape_inline(content)}"
216
213
  end
217
214
 
@@ -220,22 +217,15 @@ module Markbridge
220
217
 
221
218
  case first_byte
222
219
  when HASH
223
- return "\\##{escape_inline(content[1..])}", true if ATX_HEADING.match?(content)
220
+ return escape_first_char_inline(content, "\\#") if ATX_HEADING.match?(content)
224
221
  when GT
225
- return "\\>#{escape_inline(content[1..])}", true
222
+ return escape_first_char_inline(content, "\\>")
226
223
  when DASH
227
- if THEMATIC_BREAK_DASH.match?(content) ||
228
- (prev_was_paragraph && SETEXT_UNDERLINE_DASH.match?(content))
229
- return escape_all_chars(content, DASH, "\\-"), true
230
- end
231
- return "\\-#{escape_inline(content[1..])}", true if BULLET_LIST.match?(content)
224
+ return escape_block_dash(content, prev_was_paragraph)
232
225
  when PLUS
233
- return "\\+#{escape_inline(content[1..])}", true if BULLET_LIST.match?(content)
226
+ return escape_first_char_inline(content, "\\+") if BULLET_LIST.match?(content)
234
227
  when STAR
235
- if THEMATIC_BREAK_STAR.match?(content)
236
- return escape_all_chars(content, STAR, "\\*"), true
237
- end
238
- return "\\*#{escape_inline(content[1..])}", true if BULLET_LIST.match?(content)
228
+ return escape_block_star(content)
239
229
  when UNDERSCORE
240
230
  if THEMATIC_BREAK_UNDERSCORE.match?(content)
241
231
  return escape_all_chars(content, UNDERSCORE, "\\_"), true
@@ -246,162 +236,221 @@ module Markbridge
246
236
  end
247
237
  when BACKTICK
248
238
  if FENCED_CODE_BACKTICK.match?(content)
249
- # Escape ALL backticks to prevent code span interpretation
250
- # e.g., ```` becomes \`\`\`\` not \```` (which would be \` + ```)
251
239
  return escape_all_chars(content, BACKTICK, "\\`"), true
252
240
  end
253
241
  when TILDE
254
242
  return "\\#{content}", true if FENCED_CODE_TILDE.match?(content)
255
243
  when BRACKET_OPEN
256
- return "\\[#{escape_inline(content[1..])}", true
244
+ return escape_first_char_inline(content, "\\[")
257
245
  when PIPE
258
- return "\\|#{escape_inline(content[1..])}", true
246
+ return escape_first_char_inline(content, "\\|")
259
247
  when DIGIT_0..DIGIT_9
260
- if (m = ORDERED_LIST.match(content))
261
- prefix = m[1]
262
- delim = m[2]
263
- rest = content[m[0].length..]
264
- return "#{prefix}\\#{delim}#{escape_inline(rest)}", true
265
- end
248
+ return escape_block_ordered_list(content)
266
249
  end
267
250
 
268
251
  [content, false]
269
252
  end
270
253
 
254
+ # Escape the first character and inline-escape the rest.
255
+ def escape_first_char_inline(content, escaped_char)
256
+ ["#{escaped_char}#{escape_inline(content[1..])}", true]
257
+ end
258
+
259
+ def escape_block_dash(content, prev_was_paragraph)
260
+ if THEMATIC_BREAK_DASH.match?(content) ||
261
+ (prev_was_paragraph && SETEXT_UNDERLINE_DASH.match?(content))
262
+ return escape_all_chars(content, DASH, "\\-"), true
263
+ end
264
+ return escape_first_char_inline(content, "\\-") if BULLET_LIST.match?(content)
265
+ [content, false]
266
+ end
267
+
268
+ def escape_block_star(content)
269
+ return escape_all_chars(content, STAR, "\\*"), true if THEMATIC_BREAK_STAR.match?(content)
270
+ return escape_first_char_inline(content, "\\*") if BULLET_LIST.match?(content)
271
+ [content, false]
272
+ end
273
+
274
+ def escape_block_ordered_list(content)
275
+ if (match = ORDERED_LIST.match(content))
276
+ rest = content[match[0].length..]
277
+ return "#{match[1]}\\#{match[2]}#{escape_inline(rest)}", true
278
+ end
279
+ [content, false]
280
+ end
281
+
271
282
  def escape_all_chars(str, byte_val, escaped)
272
283
  result = String.new(capacity: str.bytesize * 2, encoding: str.encoding)
273
- str.each_byte do |b|
274
- if b == byte_val
284
+ str.each_byte do |byte|
285
+ if byte == byte_val
275
286
  result << escaped
276
287
  else
277
- result << b
288
+ result << byte
278
289
  end
279
290
  end
280
291
  result
281
292
  end
282
293
 
283
294
  def escape_inline(content)
284
- # Quick check - if no special chars, return as-is
285
295
  return content unless INLINE_SPECIAL.match?(content)
286
296
 
287
- result =
288
- String.new(
289
- capacity: content.bytesize + content.bytesize / 4,
290
- encoding: content.encoding,
291
- )
292
- len = content.bytesize
293
- i = 0
294
-
295
- while i < len
296
- b = content.getbyte(i)
297
-
298
- case b
299
- when BACKSLASH # \
300
- if i + 1 < len && ascii_punctuation?(content.getbyte(i + 1))
301
- # Escape the backslash, but let the next char be processed on its own
302
- result << "\\\\"
303
- i += 1
304
- elsif i + 1 == len # backslash at end (hard break)
305
- result << "\\\\"
306
- i += 1
307
- else
308
- result << b
309
- i += 1
310
- end
311
- when DASH # -
312
- if i + 1 < len && content.getbyte(i + 1) == DASH
313
- # Consecutive dashes - escape each for Discourse ndash prevention
314
- while i < len && content.getbyte(i) == DASH
315
- result << "\\-"
316
- i += 1
317
- end
318
- else
319
- result << b
320
- i += 1
321
- end
322
- when TILDE # ~
323
- if i + 1 < len && content.getbyte(i + 1) == TILDE
324
- result << "\\~\\~"
325
- i += 2
326
- else
327
- result << b
328
- i += 1
329
- end
330
- when STAR # *
331
- while i < len && content.getbyte(i) == STAR
332
- result << "\\*"
333
- i += 1
334
- end
335
- when UNDERSCORE # _
336
- while i < len && content.getbyte(i) == UNDERSCORE
337
- result << "\\_"
338
- i += 1
339
- end
340
- when BACKTICK # `
341
- while i < len && content.getbyte(i) == BACKTICK
342
- result << "\\`"
343
- i += 1
344
- end
345
- when BANG # !
346
- if i + 1 < len && content.getbyte(i + 1) == BRACKET_OPEN
347
- result << "\\!\\["
348
- i += 2
349
- else
350
- result << b
351
- i += 1
352
- end
353
- when BRACKET_OPEN # [
354
- result << "\\["
355
- i += 1
356
- when PIPE # |
357
- result << "\\|"
358
- i += 1
359
- when LT # <
360
- remaining = content.byteslice(i, len - i)
361
- # Check for autolinks first - pass through entirely unchanged
362
- if (m = AUTOLINK.match(remaining))
363
- result << m[0]
364
- i += m[0].bytesize
365
- # Escape complete HTML tags (include tag in output for readability)
366
- # Also escape backticks inside the tag to prevent code span interpretation
367
- elsif (m = HTML_TAG.match(remaining))
368
- escaped_tag = m[0].gsub("`") { "\\`" }
369
- result << "\\" << escaped_tag
370
- i += m[0].bytesize
371
- # Escape HTML-like constructs: processing instructions, SGML declarations,
372
- # and potential tag starts (including multi-line and custom elements)
373
- elsif HTML_TAG_START.match?(remaining)
374
- result << "\\<"
375
- i += 1
376
- else
377
- # Not HTML-like (comparison operator, etc.)
378
- result << b
379
- i += 1
380
- end
381
- when AMP # &
382
- remaining = content.byteslice(i, len - i)
383
- if (m = ENTITY_REF.match(remaining))
384
- result << "\\" << m[0]
385
- i += m[0].bytesize
386
- else
387
- result << b
388
- i += 1
389
- end
390
- else
391
- # Regular character - handle multi-byte UTF-8
392
- if b < 128
393
- result << b
394
- i += 1
395
- else
396
- char_len = utf8_char_length(b)
397
- end_i = [i + char_len, len].min
398
- result << content.byteslice(i, end_i - i)
399
- i = end_i
400
- end
401
- end
297
+ bytesize = content.bytesize
298
+ @inline_content = content
299
+ @inline_result = String.new(capacity: bytesize + bytesize / 4, encoding: content.encoding)
300
+ @inline_len = bytesize
301
+ pos = 0
302
+
303
+ # No loop-progress guard: every `dispatch_inline_byte` branch
304
+ # returns `pos + N` for N >= 1 by construction, so the loop
305
+ # is provably terminating. Mutations that break this
306
+ # (`while true`, body drops, selector swaps that short-circuit
307
+ # the dispatch) surface as timeouts rather than alive
308
+ # mutations, and the inline guard would otherwise cost ~15%
309
+ # on this hot path per benchmark.
310
+ while pos < @inline_len
311
+ byte = @inline_content.getbyte(pos)
312
+ pos = dispatch_inline_byte(byte, pos)
402
313
  end
403
314
 
404
- result
315
+ @inline_result
316
+ end
317
+
318
+ def dispatch_inline_byte(byte, pos)
319
+ case byte
320
+ when BACKSLASH
321
+ escape_backslash(pos)
322
+ when DASH
323
+ escape_consecutive_pair(pos, DASH, "\\-")
324
+ when TILDE
325
+ escape_tilde_pair(pos)
326
+ when STAR
327
+ escape_char_run(pos, STAR, "\\*")
328
+ when UNDERSCORE
329
+ escape_char_run(pos, UNDERSCORE, "\\_")
330
+ when BACKTICK
331
+ escape_char_run(pos, BACKTICK, "\\`")
332
+ when BANG
333
+ escape_image_open(pos)
334
+ when BRACKET_OPEN
335
+ @inline_result << "\\["
336
+ pos + 1
337
+ when PIPE
338
+ @inline_result << "\\|"
339
+ pos + 1
340
+ when LT
341
+ escape_lt(pos)
342
+ when AMP
343
+ escape_amp(pos)
344
+ else
345
+ escape_regular_char(byte, pos)
346
+ end
347
+ end
348
+
349
+ # Escape backslash before ASCII punctuation or at end of content.
350
+ def escape_backslash(pos)
351
+ next_pos = pos + 1
352
+ if next_pos >= @inline_len || ascii_punctuation?(@inline_content.getbyte(next_pos))
353
+ @inline_result << "\\\\"
354
+ else
355
+ @inline_result << BACKSLASH
356
+ end
357
+ next_pos
358
+ end
359
+
360
+ # Escape consecutive pairs (e.g., -- for ndash prevention) or pass single through.
361
+ def escape_consecutive_pair(pos, byte_val, escaped)
362
+ next_pos = pos + 1
363
+ if next_pos < @inline_len && @inline_content.getbyte(next_pos) == byte_val
364
+ escape_char_run(pos, byte_val, escaped)
365
+ else
366
+ @inline_result << byte_val
367
+ next_pos
368
+ end
369
+ end
370
+
371
+ # Escape ~~ pairs, pass single ~ through.
372
+ def escape_tilde_pair(pos)
373
+ next_pos = pos + 1
374
+ if next_pos < @inline_len && @inline_content.getbyte(next_pos) == TILDE
375
+ @inline_result << "\\~\\~"
376
+ pos + 2
377
+ else
378
+ @inline_result << TILDE
379
+ next_pos
380
+ end
381
+ end
382
+
383
+ # Escape all consecutive occurrences of a repeatable character (*, _, `).
384
+ def escape_char_run(pos, byte_val, escaped)
385
+ while pos < @inline_len && @inline_content.getbyte(pos) == byte_val
386
+ @inline_result << escaped
387
+ pos += 1
388
+ end
389
+ pos
390
+ end
391
+
392
+ # Escape ![ image syntax, pass standalone ! through.
393
+ def escape_image_open(pos)
394
+ next_pos = pos + 1
395
+ if next_pos < @inline_len && @inline_content.getbyte(next_pos) == BRACKET_OPEN
396
+ @inline_result << "\\!\\["
397
+ pos + 2
398
+ else
399
+ @inline_result << BANG
400
+ next_pos
401
+ end
402
+ end
403
+
404
+ # Handle < for autolinks (preserved), HTML tags (escaped), and other constructs.
405
+ def escape_lt(pos)
406
+ remaining = remaining_content(pos)
407
+
408
+ if (match = AUTOLINK.match(remaining))
409
+ matched = match[0]
410
+ @inline_result << matched
411
+ pos + matched.bytesize
412
+ elsif (match = HTML_TAG.match(remaining))
413
+ matched = match[0]
414
+ @inline_result << "\\" << matched.gsub("`") { "\\`" }
415
+ pos + matched.bytesize
416
+ elsif HTML_TAG_START.match?(remaining)
417
+ @inline_result << "\\<"
418
+ pos + 1
419
+ else
420
+ @inline_result << LT
421
+ pos + 1
422
+ end
423
+ end
424
+
425
+ # Handle & for entity references.
426
+ def escape_amp(pos)
427
+ remaining = remaining_content(pos)
428
+
429
+ if (match = ENTITY_REF.match(remaining))
430
+ matched = match[0]
431
+ @inline_result << "\\" << matched
432
+ pos + matched.bytesize
433
+ else
434
+ @inline_result << AMP
435
+ pos + 1
436
+ end
437
+ end
438
+
439
+ def remaining_content(pos)
440
+ @inline_content.byteslice(pos, @inline_len - pos)
441
+ end
442
+
443
+ # Handle regular characters including multi-byte UTF-8.
444
+ def escape_regular_char(byte, pos)
445
+ if byte < 128
446
+ @inline_result << byte
447
+ pos + 1
448
+ else
449
+ char_len = utf8_char_length(byte)
450
+ end_pos = [pos + char_len, @inline_len].min
451
+ @inline_result << @inline_content.byteslice(pos, end_pos - pos)
452
+ end_pos
453
+ end
405
454
  end
406
455
 
407
456
  def ascii_punctuation?(byte)
@@ -422,45 +471,53 @@ module Markbridge
422
471
  end
423
472
 
424
473
  def paragraph_line?(line)
425
- return false if line.empty?
474
+ pos = 0
475
+ line_len = line.bytesize
476
+ pos += 1 while pos < line_len && line.getbyte(pos) == SPACE
477
+ first_non_space = pos
426
478
 
427
- # Quick whitespace-only check
428
- first_non_space = 0
429
- while first_non_space < line.length && line.getbyte(first_non_space) == SPACE
430
- first_non_space += 1
431
- end
432
- return false if first_non_space >= line.length || line.getbyte(first_non_space) == TAB
479
+ # Empty or whitespace-only lines: getbyte past the end returns nil.
480
+ return false if line.getbyte(first_non_space).nil?
433
481
 
434
- # Check if this is a block construct
435
- content = first_non_space <= 3 ? line[first_non_space..] : line
436
- return false if content.nil? || content.empty?
482
+ # Indented code (4+ spaces or any leading \t) is not a paragraph.
483
+ # INDENTED_CODE also catches lines where first_non_space > 3, so no
484
+ # separate numeric boundary check is needed.
485
+ return false if INDENTED_CODE.match?(line)
437
486
 
438
- first_byte = content.getbyte(0)
487
+ content = first_non_space == 0 ? line : line[first_non_space..]
439
488
 
440
- case first_byte
489
+ # Lines starting with [ are paragraph content (the escaper rewrites [
490
+ # to \[). block_construct? has no BRACKET_OPEN case arm, so such
491
+ # lines naturally fall through and !block_construct?(content) == true.
492
+ !block_construct?(content)
493
+ end
494
+
495
+ # Checks whether content starts with a block-level markdown construct.
496
+ # Used by both escape_block_level (to decide what to escape) and
497
+ # paragraph_line? (to decide if setext underlines can follow).
498
+ def block_construct?(content)
499
+ case content.getbyte(0)
441
500
  when HASH
442
- return false if ATX_HEADING.match?(content)
501
+ ATX_HEADING.match?(content)
443
502
  when GT
444
- return false
445
- when DASH, PLUS, STAR
446
- return false if BULLET_LIST.match?(content)
447
- return false if first_byte == DASH && THEMATIC_BREAK_DASH.match?(content)
448
- return false if first_byte == STAR && THEMATIC_BREAK_STAR.match?(content)
503
+ true
504
+ when DASH
505
+ BULLET_LIST.match?(content) || THEMATIC_BREAK_DASH.match?(content)
506
+ when STAR
507
+ BULLET_LIST.match?(content) || THEMATIC_BREAK_STAR.match?(content)
508
+ when PLUS
509
+ BULLET_LIST.match?(content)
449
510
  when UNDERSCORE
450
- return false if THEMATIC_BREAK_UNDERSCORE.match?(content)
451
- when BACKTICK, TILDE
452
- if FENCED_CODE_BACKTICK.match?(content) || FENCED_CODE_TILDE.match?(content)
453
- return false
454
- end
455
- when BRACKET_OPEN
456
- # Lines starting with [ get escaped to \[, which IS paragraph content
457
- # So setext headings CAN follow them
458
- return true
511
+ THEMATIC_BREAK_UNDERSCORE.match?(content)
512
+ when BACKTICK
513
+ FENCED_CODE_BACKTICK.match?(content)
514
+ when TILDE
515
+ FENCED_CODE_TILDE.match?(content)
459
516
  when DIGIT_0..DIGIT_9
460
- return false if ORDERED_LIST.match?(content)
517
+ ORDERED_LIST.match?(content)
518
+ else
519
+ false
461
520
  end
462
-
463
- !INDENTED_CODE.match?(line)
464
521
  end
465
522
  end
466
523
  end
@@ -11,26 +11,40 @@ module Markbridge
11
11
  class RenderContext
12
12
  attr_reader :parents, :depth
13
13
 
14
- def initialize(parents = [], parent_cache: nil)
14
+ def initialize(parents = [], parent_cache: nil, html_mode: false)
15
15
  @parents = parents.freeze
16
16
  @depth = parents.size
17
17
  @parent_cache = parent_cache || build_cache(parents)
18
+ @html_mode = html_mode
18
19
  end
19
20
 
20
- # Create new context with element added to parent chain
21
- # Incrementally updates cache instead of rebuilding from scratch
21
+ # Create new context with element added to parent chain.
22
+ # Incrementally updates the cache (O(1)) instead of rebuilding from
23
+ # parents (O(depth)) — important for deeply-nested documents.
22
24
  # @param element [AST::Element]
23
25
  # @return [RenderContext]
24
26
  def with_parent(element)
25
27
  new_parents = @parents + [element]
26
28
 
27
- # Incrementally update cache instead of rebuilding
28
29
  new_cache = @parent_cache.dup
29
30
  element_class = element.class
30
31
  new_cache[element_class] ||= []
31
32
  new_cache[element_class] = new_cache[element_class] + [element]
32
33
 
33
- self.class.new(new_parents, parent_cache: new_cache)
34
+ self.class.new(new_parents, parent_cache: new_cache, html_mode: @html_mode)
35
+ end
36
+
37
+ # Create new context with html_mode toggled
38
+ # Preserves parent chain and cache
39
+ # @param value [Boolean]
40
+ # @return [RenderContext]
41
+ def with_html_mode(value)
42
+ self.class.new(@parents, parent_cache: @parent_cache, html_mode: value)
43
+ end
44
+
45
+ # @return [Boolean]
46
+ def html_mode?
47
+ @html_mode
34
48
  end
35
49
 
36
50
  # Find closest parent of given type
@@ -54,7 +68,7 @@ module Markbridge
54
68
  # @param klass [Class]
55
69
  # @return [Boolean]
56
70
  def has_parent?(klass)
57
- @parent_cache.key?(klass) && !@parent_cache[klass].empty?
71
+ !@parent_cache[klass].nil?
58
72
  end
59
73
 
60
74
  # Check if we're at the root (no parents)
@@ -65,14 +79,12 @@ module Markbridge
65
79
 
66
80
  private
67
81
 
68
- # Build cache from parents array
69
- # Groups parents by class for fast lookup
82
+ # Build cache from parents array.
83
+ # Groups parents by class for fast O(1) lookup.
70
84
  # @param parents [Array<AST::Element>]
71
85
  # @return [Hash{Class => Array<AST::Element>}]
72
86
  def build_cache(parents)
73
- parents.each_with_object(Hash.new { |h, k| h[k] = [] }) do |parent, cache|
74
- cache[parent.class] = cache[parent.class] + [parent]
75
- end
87
+ parents.group_by(&:class)
76
88
  end
77
89
  end
78
90
  end