markbridge 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/lib/markbridge/all.rb +4 -7
  3. data/lib/markbridge/ast/document.rb +1 -1
  4. data/lib/markbridge/ast/element.rb +2 -2
  5. data/lib/markbridge/ast/list.rb +2 -2
  6. data/lib/markbridge/ast/table.rb +6 -12
  7. data/lib/markbridge/ast/text.rb +5 -1
  8. data/lib/markbridge/bbcode.rb +4 -0
  9. data/lib/markbridge/gem_loader.rb +2 -3
  10. data/lib/markbridge/html.rb +4 -0
  11. data/lib/markbridge/mediawiki.rb +4 -0
  12. data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +0 -10
  13. data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -4
  14. data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +64 -44
  15. data/lib/markbridge/parsers/bbcode/handler_registry.rb +21 -11
  16. data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +17 -12
  17. data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +0 -10
  18. data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +6 -10
  19. data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +9 -17
  20. data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +1 -5
  21. data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +1 -2
  22. data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +6 -18
  23. data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +2 -6
  24. data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +4 -4
  25. data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +1 -1
  26. data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +2 -2
  27. data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +3 -3
  28. data/lib/markbridge/parsers/bbcode/parser.rb +5 -8
  29. data/lib/markbridge/parsers/bbcode/parser_state.rb +12 -18
  30. data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +9 -59
  31. data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +2 -2
  32. data/lib/markbridge/parsers/bbcode/scanner.rb +49 -63
  33. data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +1 -5
  34. data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +1 -6
  35. data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +1 -7
  36. data/lib/markbridge/parsers/bbcode/tokens/token.rb +1 -1
  37. data/lib/markbridge/parsers/bbcode.rb +1 -0
  38. data/lib/markbridge/parsers/html/handler_registry.rb +32 -49
  39. data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -2
  40. data/lib/markbridge/parsers/html/handlers/image_handler.rb +1 -4
  41. data/lib/markbridge/parsers/html/parser.rb +3 -13
  42. data/lib/markbridge/parsers/media_wiki/inline_parser.rb +56 -67
  43. data/lib/markbridge/parsers/media_wiki/inline_tag_registry.rb +103 -0
  44. data/lib/markbridge/parsers/media_wiki/parser.rb +51 -76
  45. data/lib/markbridge/parsers/media_wiki.rb +1 -0
  46. data/lib/markbridge/parsers/text_formatter/handler_registry.rb +5 -37
  47. data/lib/markbridge/parsers/text_formatter/parser.rb +3 -8
  48. data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +24 -17
  49. data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +9 -15
  50. data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +11 -10
  51. data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +11 -39
  52. data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +38 -63
  53. data/lib/markbridge/processors/discourse_markdown/scanner.rb +25 -33
  54. data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +6 -6
  55. data/lib/markbridge/renderers/discourse/html_escaper.rb +20 -0
  56. data/lib/markbridge/renderers/discourse/markdown_escaper.rb +49 -49
  57. data/lib/markbridge/renderers/discourse/render_context.rb +23 -11
  58. data/lib/markbridge/renderers/discourse/renderer.rb +54 -12
  59. data/lib/markbridge/renderers/discourse/rendering_interface.rb +12 -4
  60. data/lib/markbridge/renderers/discourse/tag.rb +14 -1
  61. data/lib/markbridge/renderers/discourse/tag_library.rb +30 -25
  62. data/lib/markbridge/renderers/discourse/tags/align_tag.rb +15 -7
  63. data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +2 -0
  64. data/lib/markbridge/renderers/discourse/tags/code_tag.rb +14 -9
  65. data/lib/markbridge/renderers/discourse/tags/email_tag.rb +5 -3
  66. data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -1
  67. data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +6 -2
  68. data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +2 -2
  69. data/lib/markbridge/renderers/discourse/tags/image_tag.rb +13 -2
  70. data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +2 -0
  71. data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +2 -2
  72. data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +24 -47
  73. data/lib/markbridge/renderers/discourse/tags/list_tag.rb +10 -15
  74. data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +5 -1
  75. data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +10 -0
  76. data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +9 -2
  77. data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +2 -0
  78. data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +9 -0
  79. data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +2 -0
  80. data/lib/markbridge/renderers/discourse/tags/table_tag.rb +12 -8
  81. data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +10 -3
  82. data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +29 -2
  83. data/lib/markbridge/renderers/discourse/tags/url_tag.rb +5 -3
  84. data/lib/markbridge/renderers/discourse.rb +1 -0
  85. data/lib/markbridge/textformatter.rb +4 -0
  86. data/lib/markbridge/version.rb +1 -1
  87. data/lib/markbridge.rb +8 -8
  88. metadata +8 -2
@@ -22,11 +22,18 @@ module Markbridge
22
22
  # match = detector.detect(input, 0)
23
23
  # match.node.type # => :attachment
24
24
  class Upload < Base
25
- # Pattern for image: ![alt|dimensions](upload://sha1.ext)
26
- IMAGE_PATTERN = %r{!\[([^\]]*)\]\(upload://([^)]+)\)}
27
-
28
- # Pattern for attachment: [filename|attachment](upload://sha1.ext) followed by optional (size)
29
- ATTACHMENT_PATTERN = %r{\[([^\]]*\|attachment)\]\(upload://([^)]+)\)(\s*\([^)]+\))?}
25
+ # Image: ![alt|dimensions](upload://sha1.ext)
26
+ IMAGE_PATTERN =
27
+ %r{\A!\[(?<alt>[^|\]]*)(?:\|(?<dimensions>[^\]]*))?\]\(upload://(?<url>[^)]+)\)}
28
+
29
+ # Attachment: [filename|attachment](upload://sha1.ext) (size)
30
+ ATTACHMENT_PATTERN =
31
+ %r{
32
+ \A
33
+ \[(?<filename>[^|\]]*)\|attachment\]
34
+ \(upload://(?<url>[^)]+)\)
35
+ (?:\s*\((?<size>[^)]+)\))?
36
+ }xi
30
37
 
31
38
  # Attempt to detect an upload at the given position.
32
39
  #
@@ -34,14 +41,11 @@ module Markbridge
34
41
  # @param pos [Integer] current position to check
35
42
  # @return [Match, nil] match result or nil if no match
36
43
  def detect(input, pos)
37
- char = input[pos]
38
- return nil unless char == "!" || char == "["
39
-
40
44
  remaining = input[pos..]
41
-
42
- if char == "!"
45
+ case input[pos]
46
+ when "!"
43
47
  detect_image(remaining, pos)
44
- else
48
+ when "["
45
49
  detect_attachment(remaining, pos)
46
50
  end
47
51
  end
@@ -50,71 +54,42 @@ module Markbridge
50
54
 
51
55
  def detect_image(remaining, pos)
52
56
  match = IMAGE_PATTERN.match(remaining)
53
- return nil unless match&.begin(0)&.zero?
54
-
55
- raw = match[0]
56
- alt_part = match[1]
57
- url_part = match[2]
57
+ return nil unless match
58
58
 
59
- # Parse alt and dimensions from "alt|dimensions" format
60
- alt, dimensions = parse_alt_dimensions(alt_part)
59
+ sha1, filename = parse_upload_url(match[:url])
60
+ alt = match[:alt]
61
+ alt = nil if alt.empty?
61
62
 
62
- # Extract SHA1 and filename from URL
63
- sha1, filename = parse_upload_url(url_part)
63
+ # `type: :image` is omitted because it is AST::Upload's default -
64
+ # passing it explicitly was an equivalent-mutation surface.
65
+ node =
66
+ AST::Upload.new(sha1:, filename:, alt:, dimensions: match[:dimensions], raw: match[0])
64
67
 
65
- node = AST::Upload.new(sha1:, filename:, type: :image, alt:, dimensions:, raw:)
66
-
67
- Match.new(start_pos: pos, end_pos: pos + raw.length, node:)
68
+ Match.new(start_pos: pos, end_pos: pos + match[0].length, node:)
68
69
  end
69
70
 
70
71
  def detect_attachment(remaining, pos)
71
72
  match = ATTACHMENT_PATTERN.match(remaining)
72
- return nil unless match&.begin(0)&.zero?
73
-
74
- raw = match[0]
75
- name_part = match[1]
76
- url_part = match[2]
77
- size_part = match[3]
78
-
79
- # Parse filename from "filename|attachment" format
80
- filename = name_part.sub(/\|attachment$/i, "")
73
+ return nil unless match
81
74
 
82
- # Extract SHA1 from URL
83
- sha1, _url_filename = parse_upload_url(url_part)
75
+ sha1, = parse_upload_url(match[:url])
84
76
 
85
- # Parse size if present
86
- size = size_part&.strip&.delete_prefix("(")&.delete_suffix(")")
77
+ node =
78
+ AST::Upload.new(
79
+ sha1:,
80
+ filename: match[:filename],
81
+ type: :attachment,
82
+ size: match[:size],
83
+ raw: match[0],
84
+ )
87
85
 
88
- node = AST::Upload.new(sha1:, filename:, type: :attachment, size:, raw:)
89
-
90
- Match.new(start_pos: pos, end_pos: pos + raw.length, node:)
91
- end
92
-
93
- def parse_alt_dimensions(alt_part)
94
- return nil, nil if alt_part.nil? || alt_part.empty?
95
-
96
- if alt_part.include?("|")
97
- parts = alt_part.split("|", 2)
98
- alt = parts[0].empty? ? nil : parts[0]
99
- dimensions = parts[1]
100
- [alt, dimensions]
101
- else
102
- [alt_part, nil]
103
- end
86
+ Match.new(start_pos: pos, end_pos: pos + match[0].length, node:)
104
87
  end
105
88
 
89
+ # URL format: sha1.ext or just sha1. Returns [sha1, filename-or-nil].
106
90
  def parse_upload_url(url_part)
107
- # URL format: sha1.ext or just sha1
108
- if url_part.include?(".")
109
- parts = url_part.split(".", 2)
110
- sha1 = parts[0]
111
- filename = url_part
112
- else
113
- sha1 = url_part
114
- filename = nil
115
- end
116
-
117
- [sha1, filename]
91
+ sha1, _, ext = url_part.partition(".")
92
+ [sha1, ext.empty? ? nil : url_part]
118
93
  end
119
94
  end
120
95
  end
@@ -49,13 +49,8 @@ module Markbridge
49
49
  def initialize(detectors: DEFAULT_DETECTORS, tag_library: nil, mention_resolver: nil)
50
50
  @detector_instances = build_detectors(detectors, mention_resolver)
51
51
  @tag_library = tag_library
52
- @code_tracker = nil
53
- @result = nil
54
- @nodes = nil
55
- @node_index = 0
56
- @pos = 0
57
- @input = nil
58
- @line_start = true
52
+ # @code_tracker / @result / @nodes / @node_index / @pos / @input /
53
+ # @line_start are set by #scan before use; no defensive init needed.
59
54
  end
60
55
 
61
56
  # Scan input and extract constructs.
@@ -63,14 +58,12 @@ module Markbridge
63
58
  # @param input [String] Discourse Markdown input
64
59
  # @return [ScanResult] result containing processed markdown and extracted nodes
65
60
  def scan(input)
66
- return ScanResult.new(markdown: "", nodes: []) if input.nil? || input.empty?
67
-
68
61
  @code_tracker = CodeBlockTracker.new
69
62
  @result = +""
70
63
  @nodes = []
71
64
  @node_index = 0
72
65
  @pos = 0
73
- @input = input
66
+ @input = input.to_s
74
67
  @line_start = true
75
68
 
76
69
  scan_input
@@ -82,14 +75,10 @@ module Markbridge
82
75
 
83
76
  def build_detectors(detectors, mention_resolver)
84
77
  detectors.map do |klass|
85
- if klass.is_a?(Class)
86
- if klass == Detectors::Mention && mention_resolver
87
- klass.new(type_resolver: mention_resolver)
88
- else
89
- klass.new
90
- end
78
+ if klass == Detectors::Mention
79
+ klass.new(type_resolver: mention_resolver)
91
80
  else
92
- klass
81
+ klass.new
93
82
  end
94
83
  end
95
84
  end
@@ -102,9 +91,10 @@ module Markbridge
102
91
  next if advance_code_boundary(:check_indented_boundary)
103
92
  end
104
93
 
105
- # Check for inline code boundary
106
- if @input[@pos] == "`" && !@code_tracker.in_fenced_block &&
107
- !@code_tracker.in_indented_block
94
+ # Check for inline code boundary. check_inline_boundary's
95
+ # own fenced/indented guard means we don't need to pre-check
96
+ # here — it'll just return nil in those cases.
97
+ if @input[@pos] == "`"
108
98
  new_pos = @code_tracker.check_inline_boundary(@input, @pos)
109
99
  if new_pos
110
100
  @result << @input[@pos...new_pos]
@@ -142,9 +132,15 @@ module Markbridge
142
132
  new_pos = @code_tracker.public_send(method, @input, @pos, line_start: true)
143
133
  return false unless new_pos
144
134
 
135
+ # check_fenced_boundary / check_indented_boundary always stop
136
+ # at pos_after_line, which is either after a "\n" or at EOF.
137
+ # After-newline → @line_start should be true; at EOF the
138
+ # outer `while @pos < @input.length` exits and @line_start
139
+ # is unobservable. Setting true unconditionally drops the
140
+ # `@input[new_pos - 1] == "\n"` dance.
145
141
  @result << @input[@pos...new_pos]
146
142
  @pos = new_pos
147
- @line_start = new_pos > 0 && @input[new_pos - 1] == "\n"
143
+ @line_start = true
148
144
  true
149
145
  end
150
146
 
@@ -159,26 +155,24 @@ module Markbridge
159
155
  def handle_match(match)
160
156
  node = match.node
161
157
  @nodes << node
158
+ @result << render_placeholder(node)
162
159
 
163
- # Render placeholder using tag library if available
164
- placeholder = render_placeholder(node)
165
- @result << placeholder
166
-
160
+ # Every detector shipped today matches content that ends on a
161
+ # non-newline byte (`]`, `)`, `_`, alphanumeric), so @line_start
162
+ # is always false after a successful match. If a future custom
163
+ # detector produces a match whose end_pos sits right after
164
+ # "\n", re-introduce the `@input[@pos - 1] == "\n"` check.
167
165
  @pos = match.end_pos
168
- @line_start = @pos > 0 && @input[@pos - 1] == "\n"
166
+ @line_start = false
169
167
  @node_index += 1
170
168
  end
171
169
 
172
170
  def render_placeholder(node)
173
171
  if @tag_library
174
172
  tag = @tag_library[node.class]
175
- if tag
176
- # Create a minimal interface for rendering
177
- return tag.render(node, nil)
178
- end
173
+ return tag.render(node, nil) if tag
179
174
  end
180
175
 
181
- # Default placeholder format if no tag library or tag not found
182
176
  default_placeholder(node)
183
177
  end
184
178
 
@@ -192,8 +186,6 @@ module Markbridge
192
186
  "<<EVENT:#{@node_index}:#{node.name}>>"
193
187
  when AST::Upload
194
188
  "<<UPLOAD:#{@node_index}:#{node.sha1}>>"
195
- else
196
- "<<UNKNOWN:#{@node_index}>>"
197
189
  end
198
190
  end
199
191
  end
@@ -15,12 +15,10 @@ module Markbridge
15
15
  # @return [String]
16
16
  def build(content, marker:, indent:)
17
17
  lines = content.split("\n")
18
- lines = [""] if lines.empty? # Handle empty content
19
18
  first_line = "#{indent}#{marker}#{lines.first}"
20
19
 
21
- return "#{first_line}\n" if lines.size == 1
20
+ return "#{first_line}\n" if lines.size < 2
22
21
 
23
- # Handle multi-line content with sophisticated blank line handling
24
22
  format_multiline(lines, first_line, indent)
25
23
  end
26
24
 
@@ -63,15 +61,17 @@ module Markbridge
63
61
  end
64
62
  end
65
63
 
66
- # Handle empty lines in continuation
64
+ # Handle empty lines in continuation. Caller (format_continuation_line)
65
+ # only invokes this when `line.empty?`, and `content.split("\n")`
66
+ # trims trailing empty strings, so the LAST continuation line is
67
+ # never empty — `idx + 1` is always in bounds when we get here.
67
68
  # @param idx [Integer] index in continuation_lines
68
69
  # @param continuation_lines [Array<String>] all continuation lines
69
70
  # @param continuation_indent [String] indent for continuation
70
71
  # @return [String, nil] formatted line or nil to skip
71
72
  def handle_empty_line(idx, continuation_lines, continuation_indent)
72
73
  # Skip empty lines that come before nested list items (structural blanks)
73
- next_line = continuation_lines[idx + 1]
74
- return nil if next_line&.match?(/\A\s*(?:-|\d+\.)\s/)
74
+ return nil if continuation_lines[idx + 1].match?(/\A\s*(?:-|\d+\.)\s/)
75
75
 
76
76
  # Preserve empty lines within text content (paragraph breaks) with indentation
77
77
  continuation_indent
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "cgi"
4
+
5
+ module Markbridge
6
+ module Renderers
7
+ module Discourse
8
+ # Escapes text for safe inclusion in HTML output. Used when rendering
9
+ # content inside a CommonMark HTML block (e.g. TableTag's fallback)
10
+ # where Markdown-level escaping would not be applied.
11
+ class HtmlEscaper
12
+ # @param text [String, nil]
13
+ # @return [String]
14
+ def self.escape(text)
15
+ CGI.escapeHTML(text || "")
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -36,18 +36,15 @@ module Markbridge
36
36
  # breaks disabled by default.
37
37
  def initialize(escape_hard_line_breaks: false)
38
38
  @escape_hard_line_breaks = escape_hard_line_breaks
39
- @inline_content = nil
40
- @inline_result = nil
41
- @inline_len = 0
39
+ # @inline_content / @inline_result / @inline_len are set by
40
+ # escape_inline on every call before any helper reads them;
41
+ # no defensive init needed.
42
42
  end
43
43
 
44
- # Fast-path check: any character that might need escaping
45
- # Only includes characters we actually escape (removed ], {, }, ^)
46
- # > is needed for blockquote detection at line start
44
+ # Fast-path: skip escape_text entirely for content with no special
45
+ # chars. `>` is needed for blockquote detection at line start.
47
46
  MAYBE_SPECIAL = /[\\`*_\[#+\-.!<>&|~=>)]/
48
47
 
49
- # Check for indented code on any line
50
- # Matches: 4+ spaces, tab, or space+tab combinations that reach column 4+
51
48
  MAYBE_INDENTED_CODE = /(?:^|\n)(?: {4}|\t| {1,3}\t)/
52
49
 
53
50
  # Block-level patterns
@@ -122,8 +119,7 @@ module Markbridge
122
119
  # @return [String] the escaped text, or empty string if input is nil
123
120
  # @note Multi-line HTML tags and blocks are handled by escaping the opening <
124
121
  def escape(text)
125
- return "".freeze if text.nil?
126
- return text if text.empty?
122
+ return "" if text.nil?
127
123
 
128
124
  # Neutralize hard line breaks (trailing 2+ spaces before newline)
129
125
  text = text.gsub(/ +\n/, "\n") if @escape_hard_line_breaks && text.include?(" \n")
@@ -158,37 +154,32 @@ module Markbridge
158
154
  end
159
155
 
160
156
  def escape_line(line, prev_was_paragraph)
161
- return line if line.empty?
162
-
163
- # Handle indented code blocks first
157
+ # No `line.empty?` early-return: it's redundant with the
158
+ # `line.getbyte(indent_len).nil?` guard below, which catches both
159
+ # empty and whitespace-only lines while also preserving object
160
+ # identity (returns `line`).
164
161
  return escape_indented_code(line) if INDENTED_CODE.match?(line)
165
162
 
166
- # Extract 0-3 space indent
167
- line_length = line.length
163
+ # After INDENTED_CODE, line has at most 3 leading spaces, so the
164
+ # `< 3` bound keeps this a tight YJIT-friendly hot loop.
168
165
  indent_len = 0
169
- while indent_len < 3 && indent_len < line_length && line.getbyte(indent_len) == SPACE
170
- indent_len += 1
171
- end
166
+ indent_len += 1 while indent_len < 3 && line.getbyte(indent_len) == SPACE
172
167
 
173
- return line if indent_len >= line_length
168
+ # Whitespace-only line (1-3 spaces) getbyte past end is nil.
169
+ return line if line.getbyte(indent_len).nil?
174
170
 
175
171
  has_indent = indent_len > 0
176
172
  content = has_indent ? line[indent_len..] : line
177
173
 
178
- # Apply block-level escaping (which may also do inline escaping)
179
174
  escaped, skip_inline = escape_block_level(content, prev_was_paragraph)
180
-
181
- # Apply inline escaping if block-level didn't handle it
182
175
  escaped = escape_inline(escaped) unless skip_inline
183
176
 
184
- # Prepend indent if present, preserve encoding
185
177
  if has_indent
186
- encoding = line.encoding
187
- result = String.new(encoding:)
178
+ result = String.new(encoding: line.encoding)
188
179
  result << line[0, indent_len] << escaped
189
180
  result
190
181
  else
191
- escaped.is_a?(String) ? escaped.force_encoding(line.encoding) : escaped
182
+ escaped.force_encoding(line.encoding)
192
183
  end
193
184
  end
194
185
 
@@ -203,15 +194,14 @@ module Markbridge
203
194
  # - Content doesn't start at valid block position (no lists, headings, etc.)
204
195
  # - Visual indentation is preserved (NBSP renders as space)
205
196
  # We still escape inline content since it's no longer protected.
197
+ # Caller (escape_line) guarantees INDENTED_CODE matched, so line
198
+ # starts with at least one SPACE or TAB; ws_end is always ≥ 1.
206
199
  line_length = line.length
207
200
  ws_end = 0
208
- while ws_end < line_length
209
- byte = line.getbyte(ws_end)
210
- break if byte != SPACE && byte != TAB
201
+ while ws_end < line_length && ((byte = line.getbyte(ws_end)) == SPACE || byte == TAB)
211
202
  ws_end += 1
212
203
  end
213
204
 
214
- return line if ws_end == 0 # No leading whitespace (shouldn't happen, but safe)
215
205
  return line if ws_end >= line_length # Whitespace-only line
216
206
 
217
207
  # Convert leading whitespace to NBSP (tab = 4 NBSP for visual consistency)
@@ -310,6 +300,13 @@ module Markbridge
310
300
  @inline_len = bytesize
311
301
  pos = 0
312
302
 
303
+ # No loop-progress guard: every `dispatch_inline_byte` branch
304
+ # returns `pos + N` for N >= 1 by construction, so the loop
305
+ # is provably terminating. Mutations that break this
306
+ # (`while true`, body drops, selector swaps that short-circuit
307
+ # the dispatch) surface as timeouts rather than alive
308
+ # mutations, and the inline guard would otherwise cost ~15%
309
+ # on this hot path per benchmark.
313
310
  while pos < @inline_len
314
311
  byte = @inline_content.getbyte(pos)
315
312
  pos = dispatch_inline_byte(byte, pos)
@@ -474,39 +471,42 @@ module Markbridge
474
471
  end
475
472
 
476
473
  def paragraph_line?(line)
477
- return false if line.empty?
474
+ pos = 0
475
+ line_len = line.bytesize
476
+ pos += 1 while pos < line_len && line.getbyte(pos) == SPACE
477
+ first_non_space = pos
478
478
 
479
- line_length = line.length
480
- first_non_space = 0
481
- while first_non_space < line_length && line.getbyte(first_non_space) == SPACE
482
- first_non_space += 1
483
- end
484
- return false if first_non_space >= line_length || line.getbyte(first_non_space) == TAB
479
+ # Empty or whitespace-only lines: getbyte past the end returns nil.
480
+ return false if line.getbyte(first_non_space).nil?
485
481
 
486
- content = first_non_space <= 3 ? line[first_non_space..] : line
482
+ # Indented code (4+ spaces or any leading \t) is not a paragraph.
483
+ # INDENTED_CODE also catches lines where first_non_space > 3, so no
484
+ # separate numeric boundary check is needed.
485
+ return false if INDENTED_CODE.match?(line)
487
486
 
488
- # Lines starting with [ get escaped to \[, which IS paragraph content
489
- # So setext headings CAN follow them
490
- return true if content.getbyte(0) == BRACKET_OPEN
487
+ content = first_non_space == 0 ? line : line[first_non_space..]
491
488
 
492
- !block_construct?(content) && !INDENTED_CODE.match?(line)
489
+ # Lines starting with [ are paragraph content (the escaper rewrites [
490
+ # to \[). block_construct? has no BRACKET_OPEN case arm, so such
491
+ # lines naturally fall through and !block_construct?(content) == true.
492
+ !block_construct?(content)
493
493
  end
494
494
 
495
495
  # Checks whether content starts with a block-level markdown construct.
496
496
  # Used by both escape_block_level (to decide what to escape) and
497
497
  # paragraph_line? (to decide if setext underlines can follow).
498
498
  def block_construct?(content)
499
- first_byte = content.getbyte(0)
500
-
501
- case first_byte
499
+ case content.getbyte(0)
502
500
  when HASH
503
501
  ATX_HEADING.match?(content)
504
502
  when GT
505
503
  true
506
- when DASH, PLUS, STAR
507
- BULLET_LIST.match?(content) ||
508
- (first_byte == DASH && THEMATIC_BREAK_DASH.match?(content)) ||
509
- (first_byte == STAR && THEMATIC_BREAK_STAR.match?(content))
504
+ when DASH
505
+ BULLET_LIST.match?(content) || THEMATIC_BREAK_DASH.match?(content)
506
+ when STAR
507
+ BULLET_LIST.match?(content) || THEMATIC_BREAK_STAR.match?(content)
508
+ when PLUS
509
+ BULLET_LIST.match?(content)
510
510
  when UNDERSCORE
511
511
  THEMATIC_BREAK_UNDERSCORE.match?(content)
512
512
  when BACKTICK
@@ -11,26 +11,40 @@ module Markbridge
11
11
  class RenderContext
12
12
  attr_reader :parents, :depth
13
13
 
14
- def initialize(parents = [], parent_cache: nil)
14
+ def initialize(parents = [], parent_cache: nil, html_mode: false)
15
15
  @parents = parents.freeze
16
16
  @depth = parents.size
17
17
  @parent_cache = parent_cache || build_cache(parents)
18
+ @html_mode = html_mode
18
19
  end
19
20
 
20
- # Create new context with element added to parent chain
21
- # Incrementally updates cache instead of rebuilding from scratch
21
+ # Create new context with element added to parent chain.
22
+ # Incrementally updates the cache (O(1)) instead of rebuilding from
23
+ # parents (O(depth)) — important for deeply-nested documents.
22
24
  # @param element [AST::Element]
23
25
  # @return [RenderContext]
24
26
  def with_parent(element)
25
27
  new_parents = @parents + [element]
26
28
 
27
- # Incrementally update cache instead of rebuilding
28
29
  new_cache = @parent_cache.dup
29
30
  element_class = element.class
30
31
  new_cache[element_class] ||= []
31
32
  new_cache[element_class] = new_cache[element_class] + [element]
32
33
 
33
- self.class.new(new_parents, parent_cache: new_cache)
34
+ self.class.new(new_parents, parent_cache: new_cache, html_mode: @html_mode)
35
+ end
36
+
37
+ # Create new context with html_mode toggled
38
+ # Preserves parent chain and cache
39
+ # @param value [Boolean]
40
+ # @return [RenderContext]
41
+ def with_html_mode(value)
42
+ self.class.new(@parents, parent_cache: @parent_cache, html_mode: value)
43
+ end
44
+
45
+ # @return [Boolean]
46
+ def html_mode?
47
+ @html_mode
34
48
  end
35
49
 
36
50
  # Find closest parent of given type
@@ -54,7 +68,7 @@ module Markbridge
54
68
  # @param klass [Class]
55
69
  # @return [Boolean]
56
70
  def has_parent?(klass)
57
- @parent_cache.key?(klass) && !@parent_cache[klass].empty?
71
+ !@parent_cache[klass].nil?
58
72
  end
59
73
 
60
74
  # Check if we're at the root (no parents)
@@ -65,14 +79,12 @@ module Markbridge
65
79
 
66
80
  private
67
81
 
68
- # Build cache from parents array
69
- # Groups parents by class for fast lookup
82
+ # Build cache from parents array.
83
+ # Groups parents by class for fast O(1) lookup.
70
84
  # @param parents [Array<AST::Element>]
71
85
  # @return [Hash{Class => Array<AST::Element>}]
72
86
  def build_cache(parents)
73
- parents.each_with_object(Hash.new { |h, k| h[k] = [] }) do |parent, cache|
74
- cache[parent.class] = cache[parent.class] + [parent]
75
- end
87
+ parents.group_by(&:class)
76
88
  end
77
89
  end
78
90
  end
@@ -5,10 +5,13 @@ module Markbridge
5
5
  module Discourse
6
6
  # Renders AST to Discourse-flavored Markdown in-memory.
7
7
  class Renderer
8
- def initialize(tag_library: nil, escaper: nil)
8
+ def initialize(tag_library: nil, escaper: nil, html_escaper: nil)
9
9
  @tag_library = tag_library || TagLibrary.default
10
10
  @escaper = escaper || MarkdownEscaper.new
11
- @interface_cache = nil
11
+ @html_escaper = html_escaper || HtmlEscaper
12
+ # @interface_cache is lazily initialized in #render's top-level
13
+ # call and reset to nil after the call completes. No init
14
+ # needed here — unset ivar returns nil under `.nil?` check.
12
15
  end
13
16
 
14
17
  # Render a node to Markdown
@@ -26,18 +29,12 @@ module Markbridge
26
29
  end
27
30
 
28
31
  case node
29
- when AST::Document, AST::Element
32
+ when AST::Element # Document is an Element subclass
30
33
  render_children(node, context:)
31
34
  when AST::MarkdownText
32
- # Pass through markdown text as-is (already formatted)
33
- node.text
35
+ render_markdown_text(node, context)
34
36
  when AST::Text
35
- # Escape plain text unless we're inside a code block
36
- if context.has_parent?(AST::Code)
37
- node.text
38
- else
39
- @escaper.escape(node.text)
40
- end
37
+ render_text(node, context)
41
38
  else
42
39
  ""
43
40
  end
@@ -50,14 +47,59 @@ module Markbridge
50
47
  # @param context [RenderContext] rendering context
51
48
  # @return [String]
52
49
  def render_children(node, context:)
53
- node.children.map { |child| render(child, context:) }.join
50
+ result = +""
51
+ node.children.each do |child|
52
+ part = render(child, context:)
53
+ next if part.empty?
54
+
55
+ # Integer-byte check avoids allocating substrings for the
56
+ # per-child adjacency probe. EMPHASIS_DELIMITER_BYTES.include?
57
+ # over a 4-element Set is O(1).
58
+ if !result.empty? && (last_byte = result.getbyte(-1)) == part.getbyte(0) &&
59
+ EMPHASIS_DELIMITER_BYTES.include?(last_byte)
60
+ result << EMPHASIS_BOUNDARY
61
+ end
62
+ result << part
63
+ end
64
+ result
54
65
  end
55
66
 
56
67
  private
57
68
 
69
+ # Inserted between sibling outputs when their adjacent characters
70
+ # would merge into a longer Markdown emphasis delimiter run (e.g.
71
+ # `***` + `*...` becoming `****...`). The HTML comment is invisible
72
+ # in rendered output but breaks the delimiter run during Markdown
73
+ # parsing.
74
+ EMPHASIS_BOUNDARY = "<!---->"
75
+ # Bytes where adjacent runs merge into a single longer run during
76
+ # Markdown parsing: emphasis (* _), strikethrough (~), code spans (`).
77
+ EMPHASIS_DELIMITER_BYTES = Set[42, 95, 126, 96].freeze
78
+ private_constant :EMPHASIS_BOUNDARY, :EMPHASIS_DELIMITER_BYTES
79
+
58
80
  def interface_for(context)
59
81
  @interface_cache[context.object_id] ||= RenderingInterface.new(self, context)
60
82
  end
83
+
84
+ # In html_mode, surround pre-formatted Markdown with blank lines so that
85
+ # CommonMark terminates the enclosing HTML block (e.g. <table>) and
86
+ # parses the content as Markdown before the closing tags reopen another
87
+ # HTML block.
88
+ def render_markdown_text(node, context)
89
+ context.html_mode? ? "\n\n#{node.text}\n\n" : node.text
90
+ end
91
+
92
+ def render_text(node, context)
93
+ # In html_mode even inside a code block we must HTML-escape, otherwise a
94
+ # stray `<` in a code cell would break the surrounding <td>.
95
+ if context.has_parent?(AST::Code)
96
+ context.html_mode? ? @html_escaper.escape(node.text) : node.text
97
+ elsif context.html_mode?
98
+ @html_escaper.escape(node.text)
99
+ else
100
+ @escaper.escape(node.text)
101
+ end
102
+ end
61
103
  end
62
104
  end
63
105
  end