markbridge 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/lib/markbridge/all.rb +4 -7
  3. data/lib/markbridge/ast/document.rb +1 -1
  4. data/lib/markbridge/ast/element.rb +2 -2
  5. data/lib/markbridge/ast/list.rb +2 -2
  6. data/lib/markbridge/ast/table.rb +6 -12
  7. data/lib/markbridge/ast/text.rb +5 -1
  8. data/lib/markbridge/bbcode.rb +4 -0
  9. data/lib/markbridge/gem_loader.rb +2 -3
  10. data/lib/markbridge/html.rb +4 -0
  11. data/lib/markbridge/mediawiki.rb +4 -0
  12. data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +0 -10
  13. data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -4
  14. data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +64 -44
  15. data/lib/markbridge/parsers/bbcode/handler_registry.rb +21 -11
  16. data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +17 -12
  17. data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +0 -10
  18. data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +6 -10
  19. data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +9 -17
  20. data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +1 -5
  21. data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +1 -2
  22. data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +6 -18
  23. data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +2 -6
  24. data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +4 -4
  25. data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +1 -1
  26. data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +2 -2
  27. data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +3 -3
  28. data/lib/markbridge/parsers/bbcode/parser.rb +5 -8
  29. data/lib/markbridge/parsers/bbcode/parser_state.rb +12 -18
  30. data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +9 -59
  31. data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +2 -2
  32. data/lib/markbridge/parsers/bbcode/scanner.rb +49 -63
  33. data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +1 -5
  34. data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +1 -6
  35. data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +1 -7
  36. data/lib/markbridge/parsers/bbcode/tokens/token.rb +1 -1
  37. data/lib/markbridge/parsers/bbcode.rb +1 -0
  38. data/lib/markbridge/parsers/html/handler_registry.rb +32 -49
  39. data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -2
  40. data/lib/markbridge/parsers/html/handlers/image_handler.rb +1 -4
  41. data/lib/markbridge/parsers/html/parser.rb +3 -13
  42. data/lib/markbridge/parsers/media_wiki/inline_parser.rb +56 -67
  43. data/lib/markbridge/parsers/media_wiki/inline_tag_registry.rb +103 -0
  44. data/lib/markbridge/parsers/media_wiki/parser.rb +51 -76
  45. data/lib/markbridge/parsers/media_wiki.rb +1 -0
  46. data/lib/markbridge/parsers/text_formatter/handler_registry.rb +5 -37
  47. data/lib/markbridge/parsers/text_formatter/parser.rb +3 -8
  48. data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +24 -17
  49. data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +9 -15
  50. data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +11 -10
  51. data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +11 -39
  52. data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +38 -63
  53. data/lib/markbridge/processors/discourse_markdown/scanner.rb +25 -33
  54. data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +6 -6
  55. data/lib/markbridge/renderers/discourse/html_escaper.rb +20 -0
  56. data/lib/markbridge/renderers/discourse/markdown_escaper.rb +57 -50
  57. data/lib/markbridge/renderers/discourse/render_context.rb +23 -11
  58. data/lib/markbridge/renderers/discourse/renderer.rb +54 -12
  59. data/lib/markbridge/renderers/discourse/rendering_interface.rb +12 -4
  60. data/lib/markbridge/renderers/discourse/tag.rb +14 -1
  61. data/lib/markbridge/renderers/discourse/tag_library.rb +30 -25
  62. data/lib/markbridge/renderers/discourse/tags/align_tag.rb +15 -7
  63. data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +2 -0
  64. data/lib/markbridge/renderers/discourse/tags/code_tag.rb +14 -9
  65. data/lib/markbridge/renderers/discourse/tags/email_tag.rb +5 -3
  66. data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -1
  67. data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +6 -2
  68. data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +2 -2
  69. data/lib/markbridge/renderers/discourse/tags/image_tag.rb +13 -2
  70. data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +2 -0
  71. data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +2 -2
  72. data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +24 -47
  73. data/lib/markbridge/renderers/discourse/tags/list_tag.rb +10 -15
  74. data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +5 -1
  75. data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +10 -0
  76. data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +9 -2
  77. data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +2 -0
  78. data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +9 -0
  79. data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +2 -0
  80. data/lib/markbridge/renderers/discourse/tags/table_tag.rb +12 -8
  81. data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +10 -3
  82. data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +29 -2
  83. data/lib/markbridge/renderers/discourse/tags/url_tag.rb +5 -3
  84. data/lib/markbridge/renderers/discourse.rb +1 -0
  85. data/lib/markbridge/textformatter.rb +4 -0
  86. data/lib/markbridge/version.rb +1 -1
  87. data/lib/markbridge.rb +8 -8
  88. metadata +8 -2
@@ -22,11 +22,18 @@ module Markbridge
22
22
  # match = detector.detect(input, 0)
23
23
  # match.node.type # => :attachment
24
24
  class Upload < Base
25
- # Pattern for image: ![alt|dimensions](upload://sha1.ext)
26
- IMAGE_PATTERN = %r{!\[([^\]]*)\]\(upload://([^)]+)\)}
27
-
28
- # Pattern for attachment: [filename|attachment](upload://sha1.ext) followed by optional (size)
29
- ATTACHMENT_PATTERN = %r{\[([^\]]*\|attachment)\]\(upload://([^)]+)\)(\s*\([^)]+\))?}
25
+ # Image: ![alt|dimensions](upload://sha1.ext)
26
+ IMAGE_PATTERN =
27
+ %r{\A!\[(?<alt>[^|\]]*)(?:\|(?<dimensions>[^\]]*))?\]\(upload://(?<url>[^)]+)\)}
28
+
29
+ # Attachment: [filename|attachment](upload://sha1.ext) (size)
30
+ ATTACHMENT_PATTERN =
31
+ %r{
32
+ \A
33
+ \[(?<filename>[^|\]]*)\|attachment\]
34
+ \(upload://(?<url>[^)]+)\)
35
+ (?:\s*\((?<size>[^)]+)\))?
36
+ }xi
30
37
 
31
38
  # Attempt to detect an upload at the given position.
32
39
  #
@@ -34,14 +41,11 @@ module Markbridge
34
41
  # @param pos [Integer] current position to check
35
42
  # @return [Match, nil] match result or nil if no match
36
43
  def detect(input, pos)
37
- char = input[pos]
38
- return nil unless char == "!" || char == "["
39
-
40
44
  remaining = input[pos..]
41
-
42
- if char == "!"
45
+ case input[pos]
46
+ when "!"
43
47
  detect_image(remaining, pos)
44
- else
48
+ when "["
45
49
  detect_attachment(remaining, pos)
46
50
  end
47
51
  end
@@ -50,71 +54,42 @@ module Markbridge
50
54
 
51
55
  def detect_image(remaining, pos)
52
56
  match = IMAGE_PATTERN.match(remaining)
53
- return nil unless match&.begin(0)&.zero?
54
-
55
- raw = match[0]
56
- alt_part = match[1]
57
- url_part = match[2]
57
+ return nil unless match
58
58
 
59
- # Parse alt and dimensions from "alt|dimensions" format
60
- alt, dimensions = parse_alt_dimensions(alt_part)
59
+ sha1, filename = parse_upload_url(match[:url])
60
+ alt = match[:alt]
61
+ alt = nil if alt.empty?
61
62
 
62
- # Extract SHA1 and filename from URL
63
- sha1, filename = parse_upload_url(url_part)
63
+ # `type: :image` is omitted because it is AST::Upload's default -
64
+ # passing it explicitly was an equivalent-mutation surface.
65
+ node =
66
+ AST::Upload.new(sha1:, filename:, alt:, dimensions: match[:dimensions], raw: match[0])
64
67
 
65
- node = AST::Upload.new(sha1:, filename:, type: :image, alt:, dimensions:, raw:)
66
-
67
- Match.new(start_pos: pos, end_pos: pos + raw.length, node:)
68
+ Match.new(start_pos: pos, end_pos: pos + match[0].length, node:)
68
69
  end
69
70
 
70
71
  def detect_attachment(remaining, pos)
71
72
  match = ATTACHMENT_PATTERN.match(remaining)
72
- return nil unless match&.begin(0)&.zero?
73
-
74
- raw = match[0]
75
- name_part = match[1]
76
- url_part = match[2]
77
- size_part = match[3]
78
-
79
- # Parse filename from "filename|attachment" format
80
- filename = name_part.sub(/\|attachment$/i, "")
73
+ return nil unless match
81
74
 
82
- # Extract SHA1 from URL
83
- sha1, _url_filename = parse_upload_url(url_part)
75
+ sha1, = parse_upload_url(match[:url])
84
76
 
85
- # Parse size if present
86
- size = size_part&.strip&.delete_prefix("(")&.delete_suffix(")")
77
+ node =
78
+ AST::Upload.new(
79
+ sha1:,
80
+ filename: match[:filename],
81
+ type: :attachment,
82
+ size: match[:size],
83
+ raw: match[0],
84
+ )
87
85
 
88
- node = AST::Upload.new(sha1:, filename:, type: :attachment, size:, raw:)
89
-
90
- Match.new(start_pos: pos, end_pos: pos + raw.length, node:)
91
- end
92
-
93
- def parse_alt_dimensions(alt_part)
94
- return nil, nil if alt_part.nil? || alt_part.empty?
95
-
96
- if alt_part.include?("|")
97
- parts = alt_part.split("|", 2)
98
- alt = parts[0].empty? ? nil : parts[0]
99
- dimensions = parts[1]
100
- [alt, dimensions]
101
- else
102
- [alt_part, nil]
103
- end
86
+ Match.new(start_pos: pos, end_pos: pos + match[0].length, node:)
104
87
  end
105
88
 
89
+ # URL format: sha1.ext or just sha1. Returns [sha1, filename-or-nil].
106
90
  def parse_upload_url(url_part)
107
- # URL format: sha1.ext or just sha1
108
- if url_part.include?(".")
109
- parts = url_part.split(".", 2)
110
- sha1 = parts[0]
111
- filename = url_part
112
- else
113
- sha1 = url_part
114
- filename = nil
115
- end
116
-
117
- [sha1, filename]
91
+ sha1, _, ext = url_part.partition(".")
92
+ [sha1, ext.empty? ? nil : url_part]
118
93
  end
119
94
  end
120
95
  end
@@ -49,13 +49,8 @@ module Markbridge
49
49
  def initialize(detectors: DEFAULT_DETECTORS, tag_library: nil, mention_resolver: nil)
50
50
  @detector_instances = build_detectors(detectors, mention_resolver)
51
51
  @tag_library = tag_library
52
- @code_tracker = nil
53
- @result = nil
54
- @nodes = nil
55
- @node_index = 0
56
- @pos = 0
57
- @input = nil
58
- @line_start = true
52
+ # @code_tracker / @result / @nodes / @node_index / @pos / @input /
53
+ # @line_start are set by #scan before use; no defensive init needed.
59
54
  end
60
55
 
61
56
  # Scan input and extract constructs.
@@ -63,14 +58,12 @@ module Markbridge
63
58
  # @param input [String] Discourse Markdown input
64
59
  # @return [ScanResult] result containing processed markdown and extracted nodes
65
60
  def scan(input)
66
- return ScanResult.new(markdown: "", nodes: []) if input.nil? || input.empty?
67
-
68
61
  @code_tracker = CodeBlockTracker.new
69
62
  @result = +""
70
63
  @nodes = []
71
64
  @node_index = 0
72
65
  @pos = 0
73
- @input = input
66
+ @input = input.to_s
74
67
  @line_start = true
75
68
 
76
69
  scan_input
@@ -82,14 +75,10 @@ module Markbridge
82
75
 
83
76
  def build_detectors(detectors, mention_resolver)
84
77
  detectors.map do |klass|
85
- if klass.is_a?(Class)
86
- if klass == Detectors::Mention && mention_resolver
87
- klass.new(type_resolver: mention_resolver)
88
- else
89
- klass.new
90
- end
78
+ if klass == Detectors::Mention
79
+ klass.new(type_resolver: mention_resolver)
91
80
  else
92
- klass
81
+ klass.new
93
82
  end
94
83
  end
95
84
  end
@@ -102,9 +91,10 @@ module Markbridge
102
91
  next if advance_code_boundary(:check_indented_boundary)
103
92
  end
104
93
 
105
- # Check for inline code boundary
106
- if @input[@pos] == "`" && !@code_tracker.in_fenced_block &&
107
- !@code_tracker.in_indented_block
94
+ # Check for inline code boundary. check_inline_boundary's
95
+ # own fenced/indented guard means we don't need to pre-check
96
+ # here — it'll just return nil in those cases.
97
+ if @input[@pos] == "`"
108
98
  new_pos = @code_tracker.check_inline_boundary(@input, @pos)
109
99
  if new_pos
110
100
  @result << @input[@pos...new_pos]
@@ -142,9 +132,15 @@ module Markbridge
142
132
  new_pos = @code_tracker.public_send(method, @input, @pos, line_start: true)
143
133
  return false unless new_pos
144
134
 
135
+ # check_fenced_boundary / check_indented_boundary always stop
136
+ # at pos_after_line, which is either after a "\n" or at EOF.
137
+ # After-newline → @line_start should be true; at EOF the
138
+ # outer `while @pos < @input.length` exits and @line_start
139
+ # is unobservable. Setting true unconditionally drops the
140
+ # `@input[new_pos - 1] == "\n"` dance.
145
141
  @result << @input[@pos...new_pos]
146
142
  @pos = new_pos
147
- @line_start = new_pos > 0 && @input[new_pos - 1] == "\n"
143
+ @line_start = true
148
144
  true
149
145
  end
150
146
 
@@ -159,26 +155,24 @@ module Markbridge
159
155
  def handle_match(match)
160
156
  node = match.node
161
157
  @nodes << node
158
+ @result << render_placeholder(node)
162
159
 
163
- # Render placeholder using tag library if available
164
- placeholder = render_placeholder(node)
165
- @result << placeholder
166
-
160
+ # Every detector shipped today matches content that ends on a
161
+ # non-newline byte (`]`, `)`, `_`, alphanumeric), so @line_start
162
+ # is always false after a successful match. If a future custom
163
+ # detector produces a match whose end_pos sits right after
164
+ # "\n", re-introduce the `@input[@pos - 1] == "\n"` check.
167
165
  @pos = match.end_pos
168
- @line_start = @pos > 0 && @input[@pos - 1] == "\n"
166
+ @line_start = false
169
167
  @node_index += 1
170
168
  end
171
169
 
172
170
  def render_placeholder(node)
173
171
  if @tag_library
174
172
  tag = @tag_library[node.class]
175
- if tag
176
- # Create a minimal interface for rendering
177
- return tag.render(node, nil)
178
- end
173
+ return tag.render(node, nil) if tag
179
174
  end
180
175
 
181
- # Default placeholder format if no tag library or tag not found
182
176
  default_placeholder(node)
183
177
  end
184
178
 
@@ -192,8 +186,6 @@ module Markbridge
192
186
  "<<EVENT:#{@node_index}:#{node.name}>>"
193
187
  when AST::Upload
194
188
  "<<UPLOAD:#{@node_index}:#{node.sha1}>>"
195
- else
196
- "<<UNKNOWN:#{@node_index}>>"
197
189
  end
198
190
  end
199
191
  end
@@ -15,12 +15,10 @@ module Markbridge
15
15
  # @return [String]
16
16
  def build(content, marker:, indent:)
17
17
  lines = content.split("\n")
18
- lines = [""] if lines.empty? # Handle empty content
19
18
  first_line = "#{indent}#{marker}#{lines.first}"
20
19
 
21
- return "#{first_line}\n" if lines.size == 1
20
+ return "#{first_line}\n" if lines.size < 2
22
21
 
23
- # Handle multi-line content with sophisticated blank line handling
24
22
  format_multiline(lines, first_line, indent)
25
23
  end
26
24
 
@@ -63,15 +61,17 @@ module Markbridge
63
61
  end
64
62
  end
65
63
 
66
- # Handle empty lines in continuation
64
+ # Handle empty lines in continuation. Caller (format_continuation_line)
65
+ # only invokes this when `line.empty?`, and `content.split("\n")`
66
+ # trims trailing empty strings, so the LAST continuation line is
67
+ # never empty — `idx + 1` is always in bounds when we get here.
67
68
  # @param idx [Integer] index in continuation_lines
68
69
  # @param continuation_lines [Array<String>] all continuation lines
69
70
  # @param continuation_indent [String] indent for continuation
70
71
  # @return [String, nil] formatted line or nil to skip
71
72
  def handle_empty_line(idx, continuation_lines, continuation_indent)
72
73
  # Skip empty lines that come before nested list items (structural blanks)
73
- next_line = continuation_lines[idx + 1]
74
- return nil if next_line&.match?(/\A\s*(?:-|\d+\.)\s/)
74
+ return nil if continuation_lines[idx + 1].match?(/\A\s*(?:-|\d+\.)\s/)
75
75
 
76
76
  # Preserve empty lines within text content (paragraph breaks) with indentation
77
77
  continuation_indent
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "cgi"
4
+
5
+ module Markbridge
6
+ module Renderers
7
+ module Discourse
8
+ # Escapes text for safe inclusion in HTML output. Used when rendering
9
+ # content inside a CommonMark HTML block (e.g. TableTag's fallback)
10
+ # where Markdown-level escaping would not be applied.
11
+ class HtmlEscaper
12
+ # @param text [String, nil]
13
+ # @return [String]
14
+ def self.escape(text)
15
+ CGI.escapeHTML(text || "")
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -36,18 +36,15 @@ module Markbridge
36
36
  # breaks disabled by default.
37
37
  def initialize(escape_hard_line_breaks: false)
38
38
  @escape_hard_line_breaks = escape_hard_line_breaks
39
- @inline_content = nil
40
- @inline_result = nil
41
- @inline_len = 0
39
+ # @inline_content / @inline_result / @inline_len are set by
40
+ # escape_inline on every call before any helper reads them;
41
+ # no defensive init needed.
42
42
  end
43
43
 
44
- # Fast-path check: any character that might need escaping
45
- # Only includes characters we actually escape (removed ], {, }, ^)
46
- # > is needed for blockquote detection at line start
44
+ # Fast-path: skip escape_text entirely for content with no special
45
+ # chars. `>` is needed for blockquote detection at line start.
47
46
  MAYBE_SPECIAL = /[\\`*_\[#+\-.!<>&|~=>)]/
48
47
 
49
- # Check for indented code on any line
50
- # Matches: 4+ spaces, tab, or space+tab combinations that reach column 4+
51
48
  MAYBE_INDENTED_CODE = /(?:^|\n)(?: {4}|\t| {1,3}\t)/
52
49
 
53
50
  # Block-level patterns
@@ -122,8 +119,7 @@ module Markbridge
122
119
  # @return [String] the escaped text, or empty string if input is nil
123
120
  # @note Multi-line HTML tags and blocks are handled by escaping the opening <
124
121
  def escape(text)
125
- return "".freeze if text.nil?
126
- return text if text.empty?
122
+ return "" if text.nil?
127
123
 
128
124
  # Neutralize hard line breaks (trailing 2+ spaces before newline)
129
125
  text = text.gsub(/ +\n/, "\n") if @escape_hard_line_breaks && text.include?(" \n")
@@ -136,7 +132,14 @@ module Markbridge
136
132
  private
137
133
 
138
134
  def escape_text(text)
139
- lines = text.split("\n", -1)
135
+ # On CRLF input, consume `\r` as part of the line terminator instead
136
+ # of leaving it on the line. A trailing `\r` breaks line-end anchored
137
+ # regexes (e.g. SETEXT_UNDERLINE_*) and the `ws_end >= line_length`
138
+ # early-out in escape_indented_code, leaking NBSPs onto
139
+ # whitespace-only CRLF lines. The `include?` guard keeps the
140
+ # LF-only fast path on a string split (regex split is ~20% slower
141
+ # on the indented-code hot path).
142
+ lines = text.include?("\r") ? text.split(/\r?\n/, -1) : text.split("\n", -1)
140
143
  return escape_line(lines[0], false) if lines.size == 1
141
144
 
142
145
  # Pre-allocate result buffer
@@ -158,37 +161,32 @@ module Markbridge
158
161
  end
159
162
 
160
163
  def escape_line(line, prev_was_paragraph)
161
- return line if line.empty?
162
-
163
- # Handle indented code blocks first
164
+ # No `line.empty?` early-return: it's redundant with the
165
+ # `line.getbyte(indent_len).nil?` guard below, which catches both
166
+ # empty and whitespace-only lines while also preserving object
167
+ # identity (returns `line`).
164
168
  return escape_indented_code(line) if INDENTED_CODE.match?(line)
165
169
 
166
- # Extract 0-3 space indent
167
- line_length = line.length
170
+ # After INDENTED_CODE, line has at most 3 leading spaces, so the
171
+ # `< 3` bound keeps this a tight YJIT-friendly hot loop.
168
172
  indent_len = 0
169
- while indent_len < 3 && indent_len < line_length && line.getbyte(indent_len) == SPACE
170
- indent_len += 1
171
- end
173
+ indent_len += 1 while indent_len < 3 && line.getbyte(indent_len) == SPACE
172
174
 
173
- return line if indent_len >= line_length
175
+ # Whitespace-only line (1-3 spaces) getbyte past end is nil.
176
+ return line if line.getbyte(indent_len).nil?
174
177
 
175
178
  has_indent = indent_len > 0
176
179
  content = has_indent ? line[indent_len..] : line
177
180
 
178
- # Apply block-level escaping (which may also do inline escaping)
179
181
  escaped, skip_inline = escape_block_level(content, prev_was_paragraph)
180
-
181
- # Apply inline escaping if block-level didn't handle it
182
182
  escaped = escape_inline(escaped) unless skip_inline
183
183
 
184
- # Prepend indent if present, preserve encoding
185
184
  if has_indent
186
- encoding = line.encoding
187
- result = String.new(encoding:)
185
+ result = String.new(encoding: line.encoding)
188
186
  result << line[0, indent_len] << escaped
189
187
  result
190
188
  else
191
- escaped.is_a?(String) ? escaped.force_encoding(line.encoding) : escaped
189
+ escaped.force_encoding(line.encoding)
192
190
  end
193
191
  end
194
192
 
@@ -203,15 +201,14 @@ module Markbridge
203
201
  # - Content doesn't start at valid block position (no lists, headings, etc.)
204
202
  # - Visual indentation is preserved (NBSP renders as space)
205
203
  # We still escape inline content since it's no longer protected.
204
+ # Caller (escape_line) guarantees INDENTED_CODE matched, so line
205
+ # starts with at least one SPACE or TAB; ws_end is always ≥ 1.
206
206
  line_length = line.length
207
207
  ws_end = 0
208
- while ws_end < line_length
209
- byte = line.getbyte(ws_end)
210
- break if byte != SPACE && byte != TAB
208
+ while ws_end < line_length && ((byte = line.getbyte(ws_end)) == SPACE || byte == TAB)
211
209
  ws_end += 1
212
210
  end
213
211
 
214
- return line if ws_end == 0 # No leading whitespace (shouldn't happen, but safe)
215
212
  return line if ws_end >= line_length # Whitespace-only line
216
213
 
217
214
  # Convert leading whitespace to NBSP (tab = 4 NBSP for visual consistency)
@@ -310,6 +307,13 @@ module Markbridge
310
307
  @inline_len = bytesize
311
308
  pos = 0
312
309
 
310
+ # No loop-progress guard: every `dispatch_inline_byte` branch
311
+ # returns `pos + N` for N >= 1 by construction, so the loop
312
+ # is provably terminating. Mutations that break this
313
+ # (`while true`, body drops, selector swaps that short-circuit
314
+ # the dispatch) surface as timeouts rather than alive
315
+ # mutations, and the inline guard would otherwise cost ~15%
316
+ # on this hot path per benchmark.
313
317
  while pos < @inline_len
314
318
  byte = @inline_content.getbyte(pos)
315
319
  pos = dispatch_inline_byte(byte, pos)
@@ -474,39 +478,42 @@ module Markbridge
474
478
  end
475
479
 
476
480
  def paragraph_line?(line)
477
- return false if line.empty?
481
+ pos = 0
482
+ line_len = line.bytesize
483
+ pos += 1 while pos < line_len && line.getbyte(pos) == SPACE
484
+ first_non_space = pos
478
485
 
479
- line_length = line.length
480
- first_non_space = 0
481
- while first_non_space < line_length && line.getbyte(first_non_space) == SPACE
482
- first_non_space += 1
483
- end
484
- return false if first_non_space >= line_length || line.getbyte(first_non_space) == TAB
486
+ # Empty or whitespace-only lines: getbyte past the end returns nil.
487
+ return false if line.getbyte(first_non_space).nil?
485
488
 
486
- content = first_non_space <= 3 ? line[first_non_space..] : line
489
+ # Indented code (4+ spaces or any leading \t) is not a paragraph.
490
+ # INDENTED_CODE also catches lines where first_non_space > 3, so no
491
+ # separate numeric boundary check is needed.
492
+ return false if INDENTED_CODE.match?(line)
487
493
 
488
- # Lines starting with [ get escaped to \[, which IS paragraph content
489
- # So setext headings CAN follow them
490
- return true if content.getbyte(0) == BRACKET_OPEN
494
+ content = first_non_space == 0 ? line : line[first_non_space..]
491
495
 
492
- !block_construct?(content) && !INDENTED_CODE.match?(line)
496
+ # Lines starting with [ are paragraph content (the escaper rewrites [
497
+ # to \[). block_construct? has no BRACKET_OPEN case arm, so such
498
+ # lines naturally fall through and !block_construct?(content) == true.
499
+ !block_construct?(content)
493
500
  end
494
501
 
495
502
  # Checks whether content starts with a block-level markdown construct.
496
503
  # Used by both escape_block_level (to decide what to escape) and
497
504
  # paragraph_line? (to decide if setext underlines can follow).
498
505
  def block_construct?(content)
499
- first_byte = content.getbyte(0)
500
-
501
- case first_byte
506
+ case content.getbyte(0)
502
507
  when HASH
503
508
  ATX_HEADING.match?(content)
504
509
  when GT
505
510
  true
506
- when DASH, PLUS, STAR
507
- BULLET_LIST.match?(content) ||
508
- (first_byte == DASH && THEMATIC_BREAK_DASH.match?(content)) ||
509
- (first_byte == STAR && THEMATIC_BREAK_STAR.match?(content))
511
+ when DASH
512
+ BULLET_LIST.match?(content) || THEMATIC_BREAK_DASH.match?(content)
513
+ when STAR
514
+ BULLET_LIST.match?(content) || THEMATIC_BREAK_STAR.match?(content)
515
+ when PLUS
516
+ BULLET_LIST.match?(content)
510
517
  when UNDERSCORE
511
518
  THEMATIC_BREAK_UNDERSCORE.match?(content)
512
519
  when BACKTICK
@@ -11,26 +11,40 @@ module Markbridge
11
11
  class RenderContext
12
12
  attr_reader :parents, :depth
13
13
 
14
- def initialize(parents = [], parent_cache: nil)
14
+ def initialize(parents = [], parent_cache: nil, html_mode: false)
15
15
  @parents = parents.freeze
16
16
  @depth = parents.size
17
17
  @parent_cache = parent_cache || build_cache(parents)
18
+ @html_mode = html_mode
18
19
  end
19
20
 
20
- # Create new context with element added to parent chain
21
- # Incrementally updates cache instead of rebuilding from scratch
21
+ # Create new context with element added to parent chain.
22
+ # Incrementally updates the cache (O(1)) instead of rebuilding from
23
+ # parents (O(depth)) — important for deeply-nested documents.
22
24
  # @param element [AST::Element]
23
25
  # @return [RenderContext]
24
26
  def with_parent(element)
25
27
  new_parents = @parents + [element]
26
28
 
27
- # Incrementally update cache instead of rebuilding
28
29
  new_cache = @parent_cache.dup
29
30
  element_class = element.class
30
31
  new_cache[element_class] ||= []
31
32
  new_cache[element_class] = new_cache[element_class] + [element]
32
33
 
33
- self.class.new(new_parents, parent_cache: new_cache)
34
+ self.class.new(new_parents, parent_cache: new_cache, html_mode: @html_mode)
35
+ end
36
+
37
+ # Create new context with html_mode toggled
38
+ # Preserves parent chain and cache
39
+ # @param value [Boolean]
40
+ # @return [RenderContext]
41
+ def with_html_mode(value)
42
+ self.class.new(@parents, parent_cache: @parent_cache, html_mode: value)
43
+ end
44
+
45
+ # @return [Boolean]
46
+ def html_mode?
47
+ @html_mode
34
48
  end
35
49
 
36
50
  # Find closest parent of given type
@@ -54,7 +68,7 @@ module Markbridge
54
68
  # @param klass [Class]
55
69
  # @return [Boolean]
56
70
  def has_parent?(klass)
57
- @parent_cache.key?(klass) && !@parent_cache[klass].empty?
71
+ !@parent_cache[klass].nil?
58
72
  end
59
73
 
60
74
  # Check if we're at the root (no parents)
@@ -65,14 +79,12 @@ module Markbridge
65
79
 
66
80
  private
67
81
 
68
- # Build cache from parents array
69
- # Groups parents by class for fast lookup
82
+ # Build cache from parents array.
83
+ # Groups parents by class for fast O(1) lookup.
70
84
  # @param parents [Array<AST::Element>]
71
85
  # @return [Hash{Class => Array<AST::Element>}]
72
86
  def build_cache(parents)
73
- parents.each_with_object(Hash.new { |h, k| h[k] = [] }) do |parent, cache|
74
- cache[parent.class] = cache[parent.class] + [parent]
75
- end
87
+ parents.group_by(&:class)
76
88
  end
77
89
  end
78
90
  end