markbridge 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +1 -1
  3. data/lib/markbridge/ast/details.rb +24 -0
  4. data/lib/markbridge/ast/element.rb +63 -0
  5. data/lib/markbridge/ast.rb +1 -0
  6. data/lib/markbridge/conversion.rb +40 -0
  7. data/lib/markbridge/parse.rb +20 -0
  8. data/lib/markbridge/parsers/bbcode/handler_registry.rb +25 -2
  9. data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +13 -2
  10. data/lib/markbridge/parsers/html/handler_registry.rb +97 -17
  11. data/lib/markbridge/parsers/html/handlers/self_closing_handler.rb +26 -0
  12. data/lib/markbridge/parsers/html/handlers/span_handler.rb +74 -0
  13. data/lib/markbridge/parsers/html/parser.rb +88 -18
  14. data/lib/markbridge/parsers/html.rb +2 -0
  15. data/lib/markbridge/parsers/media_wiki/inline_parser.rb +21 -8
  16. data/lib/markbridge/parsers/media_wiki/parser.rb +13 -5
  17. data/lib/markbridge/parsers/text_formatter/handler_registry.rb +27 -4
  18. data/lib/markbridge/parsers/text_formatter/handlers/attachment_handler.rb +1 -1
  19. data/lib/markbridge/parsers/text_formatter/handlers/attribute_handler.rb +1 -1
  20. data/lib/markbridge/parsers/text_formatter/handlers/base_handler.rb +1 -1
  21. data/lib/markbridge/parsers/text_formatter/handlers/code_handler.rb +1 -1
  22. data/lib/markbridge/parsers/text_formatter/handlers/email_handler.rb +1 -1
  23. data/lib/markbridge/parsers/text_formatter/handlers/image_handler.rb +1 -1
  24. data/lib/markbridge/parsers/text_formatter/handlers/list_handler.rb +1 -1
  25. data/lib/markbridge/parsers/text_formatter/handlers/quote_handler.rb +1 -1
  26. data/lib/markbridge/parsers/text_formatter/handlers/simple_handler.rb +1 -1
  27. data/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb +1 -1
  28. data/lib/markbridge/parsers/text_formatter/handlers/url_handler.rb +1 -1
  29. data/lib/markbridge/parsers/text_formatter/parser.rb +17 -3
  30. data/lib/markbridge/renderers/discourse/identity_escaper.rb +37 -0
  31. data/lib/markbridge/renderers/discourse/markdown_escaper.rb +83 -8
  32. data/lib/markbridge/renderers/discourse/postprocessor.rb +53 -0
  33. data/lib/markbridge/renderers/discourse/render_context.rb +14 -40
  34. data/lib/markbridge/renderers/discourse/renderer.rb +15 -5
  35. data/lib/markbridge/renderers/discourse/rendering_interface.rb +4 -3
  36. data/lib/markbridge/renderers/discourse/tag_library.rb +42 -2
  37. data/lib/markbridge/renderers/discourse/tags/align_tag.rb +2 -2
  38. data/lib/markbridge/renderers/discourse/tags/code_tag.rb +5 -3
  39. data/lib/markbridge/renderers/discourse/tags/details_tag.rb +46 -0
  40. data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +1 -1
  41. data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +5 -2
  42. data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +4 -3
  43. data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +13 -0
  44. data/lib/markbridge/renderers/discourse.rb +3 -0
  45. data/lib/markbridge/version.rb +1 -1
  46. data/lib/markbridge.rb +274 -110
  47. metadata +9 -2
  48. data/lib/markbridge/configuration.rb +0 -11
@@ -11,13 +11,20 @@ module Markbridge
11
11
  # registry = InlineTagRegistry.build_from_default do |r|
12
12
  # r.register("mark", :formatting, AST::Bold)
13
13
  # end
14
- # parser = InlineParser.new(inline_tag_registry: registry)
14
+ # parser = InlineParser.new(handlers: registry)
15
15
  class InlineParser
16
16
  MAX_INLINE_DEPTH = 20
17
17
 
18
- def initialize(inline_tag_registry: nil, depth: 0)
19
- @registry = inline_tag_registry || InlineTagRegistry.default
18
+ # @return [Hash{String => Integer}] tag-name → occurrence count for
19
+ # HTML-like inline tags whose names are not registered. Shared
20
+ # with nested InlineParser instances so depth-recursive parses
21
+ # contribute to the same tally.
22
+ attr_reader :unknown_tags
23
+
24
+ def initialize(handlers: nil, depth: 0, unknown_tags: nil)
25
+ @registry = handlers || InlineTagRegistry.default
20
26
  @depth = depth
27
+ @unknown_tags = unknown_tags || Hash.new(0)
21
28
  end
22
29
 
23
30
  # Parse inline markup and append resulting AST nodes to the parent element.
@@ -110,10 +117,11 @@ module Markbridge
110
117
  return
111
118
  end
112
119
 
113
- InlineParser.new(inline_tag_registry: @registry, depth: @depth + 1).parse(
114
- content,
115
- parent:,
116
- )
120
+ InlineParser.new(
121
+ handlers: @registry,
122
+ depth: @depth + 1,
123
+ unknown_tags: @unknown_tags,
124
+ ).parse(content, parent:)
117
125
  end
118
126
 
119
127
  # Collect text until we find n consecutive apostrophes.
@@ -203,9 +211,14 @@ module Markbridge
203
211
  self_closing = !tag_match[3].empty?
204
212
  tag_name = tag_match[2].downcase
205
213
 
206
- # Closing/self-closing tags and unknown tags are treated as literal text
214
+ # Closing/self-closing tags and unknown tags are treated as literal text.
215
+ # Track *unknown* opening tags so callers can surface them via
216
+ # Parse/Conversion#unknown_tags. We deliberately don't track
217
+ # closing/self-closing forms — they often pair up with the
218
+ # opening tag that's already counted.
207
219
  entry = @registry[tag_name]
208
220
  if closing || self_closing || !entry
221
+ @unknown_tags[tag_name] += 1 if !entry && !closing && !self_closing
209
222
  advance_as_text(full_match)
210
223
  return
211
224
  end
@@ -21,13 +21,20 @@ module Markbridge
21
21
  # parser = Markbridge::Parsers::MediaWiki::Parser.new
22
22
  # ast = parser.parse("'''bold''' and ''italic''")
23
23
  class Parser
24
- # @param inline_tag_registry [InlineTagRegistry, nil] custom registry or use default
24
+ # @return [Hash{String => Integer}] tag-name occurrence count for
25
+ # inline HTML-like tags whose names are not registered. Reset at
26
+ # the start of every #parse call.
27
+ attr_reader :unknown_tags
28
+
29
+ # @param handlers [InlineTagRegistry, nil] custom registry or use default.
30
+ # Named +handlers:+ for consistency with sibling parsers; the
31
+ # value is still an +InlineTagRegistry+ instance.
25
32
  # @yield [InlineTagRegistry] optional block to customize the default registry
26
- def initialize(inline_tag_registry: nil, &block)
33
+ def initialize(handlers: nil, &block)
27
34
  # InlineParser falls back to InlineTagRegistry.default when this is
28
35
  # nil, so we don't need to materialise it here.
29
- @inline_tag_registry =
30
- block_given? ? InlineTagRegistry.build_from_default(&block) : inline_tag_registry
36
+ @handlers = block_given? ? InlineTagRegistry.build_from_default(&block) : handlers
37
+ @unknown_tags = Hash.new(0)
31
38
  end
32
39
 
33
40
  # Parse MediaWiki wikitext into an AST Document.
@@ -38,8 +45,9 @@ module Markbridge
38
45
  normalized = normalize_line_endings(input)
39
46
  lines = normalized.split("\n")
40
47
 
48
+ @unknown_tags.clear
41
49
  @document = AST::Document.new
42
- @inline_parser = InlineParser.new(inline_tag_registry: @inline_tag_registry)
50
+ @inline_parser = InlineParser.new(handlers: @handlers, unknown_tags: @unknown_tags)
43
51
  @list_stack = []
44
52
 
45
53
  process_lines(lines)
@@ -43,6 +43,28 @@ module Markbridge
43
43
  @mappings[element_name.upcase] = handler
44
44
  end
45
45
 
46
+ # Look up the handler for an element name (case-insensitive).
47
+ # @param element_name [String]
48
+ # @return [#process, nil]
49
+ def [](element_name)
50
+ @mappings[element_name.upcase]
51
+ end
52
+
53
+ # Replace the handler bound to one or more element names by
54
+ # yielding the previously-bound handler (which may be +nil+)
55
+ # and registering whatever the block returns.
56
+ #
57
+ # @param element_names [String, Array<String>]
58
+ # @yieldparam previous [#process, nil]
59
+ # @return [self]
60
+ def overlay(element_names)
61
+ Array(element_names).each do |name|
62
+ previous = self[name]
63
+ register(name, yield(previous))
64
+ end
65
+ self
66
+ end
67
+
46
68
  # Check if a handler is registered for an element
47
69
  # @param element_name [String] XML element name
48
70
  # @return [Boolean] true if handler is registered
@@ -53,11 +75,12 @@ module Markbridge
53
75
  # Process an XML element using the registered handler
54
76
  # @param element [Nokogiri::XML::Element]
55
77
  # @param parent [AST::Element] parent node to add children to
78
+ # @param processor [Parser] the parser, exposed to handlers so
79
+ # they can call back into +process_children+ for nested content
56
80
  # @return [AST::Element, nil] the created element if children should be processed, nil otherwise
57
- def process_element(element, parent)
58
- tag_name = element.name.upcase
59
- handler = @mappings[tag_name]
60
- handler&.process(element:, parent:)
81
+ def process_element(element, parent, processor)
82
+ handler = self[element.name]
83
+ handler&.process(element:, parent:, processor:)
61
84
  end
62
85
 
63
86
  # Register all default s9e/TextFormatter element mappings
@@ -10,7 +10,7 @@ module Markbridge
10
10
  @element_class = AST::Attachment
11
11
  end
12
12
 
13
- def process(element:, parent:)
13
+ def process(element:, parent:, processor: nil)
14
14
  attrs = extract_attributes(element)
15
15
  node =
16
16
  AST::Attachment.new(
@@ -23,7 +23,7 @@ module Markbridge
23
23
  @param = param || attribute
24
24
  end
25
25
 
26
- def process(element:, parent:)
26
+ def process(element:, parent:, processor: nil)
27
27
  attrs = extract_attributes(element)
28
28
  node = @element_class.new(@param => attrs[@attribute])
29
29
  parent << node
@@ -16,7 +16,7 @@ module Markbridge
16
16
  # @param element [Nokogiri::XML::Element] the XML element to process
17
17
  # @param parent [AST::Element] the parent AST node to add children to
18
18
  # @return [AST::Element, nil] the created element if children should be processed, nil otherwise
19
- def process(element:, parent:)
19
+ def process(element:, parent:, processor: nil)
20
20
  raise NotImplementedError, "#{self.class} must implement #process"
21
21
  end
22
22
 
@@ -10,7 +10,7 @@ module Markbridge
10
10
  @element_class = AST::Code
11
11
  end
12
12
 
13
- def process(element:, parent:)
13
+ def process(element:, parent:, processor: nil)
14
14
  attrs = extract_attributes(element)
15
15
  lang = attrs[:lang] || attrs[:language]
16
16
  node = AST::Code.new(language: lang)
@@ -10,7 +10,7 @@ module Markbridge
10
10
  @element_class = AST::Email
11
11
  end
12
12
 
13
- def process(element:, parent:)
13
+ def process(element:, parent:, processor: nil)
14
14
  attrs = extract_attributes(element)
15
15
  node = AST::Email.new(address: attrs[:email])
16
16
  parent << node
@@ -10,7 +10,7 @@ module Markbridge
10
10
  @element_class = AST::Image
11
11
  end
12
12
 
13
- def process(element:, parent:)
13
+ def process(element:, parent:, processor: nil)
14
14
  attrs = extract_attributes(element)
15
15
  node =
16
16
  AST::Image.new(
@@ -10,7 +10,7 @@ module Markbridge
10
10
  @element_class = AST::List
11
11
  end
12
12
 
13
- def process(element:, parent:)
13
+ def process(element:, parent:, processor: nil)
14
14
  attrs = extract_attributes(element)
15
15
  type_str = attrs[:type]
16
16
  # Ordered if type is not empty, disc, circle, or square
@@ -10,7 +10,7 @@ module Markbridge
10
10
  @element_class = AST::Quote
11
11
  end
12
12
 
13
- def process(element:, parent:)
13
+ def process(element:, parent:, processor: nil)
14
14
  attrs = extract_attributes(element)
15
15
  node =
16
16
  AST::Quote.new(
@@ -21,7 +21,7 @@ module Markbridge
21
21
  # Process the element by creating an AST node and processing children
22
22
  # @param element [Nokogiri::XML::Element]
23
23
  # @param parent [AST::Element]
24
- def process(element:, parent:)
24
+ def process(element:, parent:, processor: nil)
25
25
  node = @element_class.new
26
26
  parent << node
27
27
 
@@ -10,7 +10,7 @@ module Markbridge
10
10
  @element_class = AST::TableCell
11
11
  end
12
12
 
13
- def process(element:, parent:)
13
+ def process(element:, parent:, processor: nil)
14
14
  node = AST::TableCell.new(header: element.name.upcase == "TH")
15
15
  parent << node
16
16
  node
@@ -12,7 +12,7 @@ module Markbridge
12
12
  @element_class = AST::Url
13
13
  end
14
14
 
15
- def process(element:, parent:)
15
+ def process(element:, parent:, processor: nil)
16
16
  attrs = extract_attributes(element)
17
17
  node = AST::Url.new(href: attrs[:url])
18
18
  parent << node
@@ -38,12 +38,26 @@ module Markbridge
38
38
  @unknown_tags = Hash.new(0)
39
39
  end
40
40
 
41
- # Parse s9e/TextFormatter XML into an AST
42
- # @param input [String] XML string in s9e/TextFormatter format
41
+ # Parse s9e/TextFormatter XML into an AST.
42
+ #
43
+ # Accepts either a String of XML or a pre-parsed Nokogiri node.
44
+ # A +Nokogiri::XML::Document+ is unwrapped via +#root+; any
45
+ # other node is treated as the root itself.
46
+ #
47
+ # @param input [String, Nokogiri::XML::Node] XML source or
48
+ # pre-parsed Nokogiri tree
43
49
  # @return [AST::Document]
44
50
  def parse(input)
45
51
  @unknown_tags.clear
46
52
 
53
+ if input.is_a?(Nokogiri::XML::Node)
54
+ root = input.is_a?(Nokogiri::XML::Document) ? input.root : input
55
+ document = AST::Document.new
56
+ process_node(root, document) if root
57
+ return document
58
+ end
59
+
60
+ input = input.to_s
47
61
  xml_doc = Nokogiri.XML(input)
48
62
  root = xml_doc.root
49
63
 
@@ -101,7 +115,7 @@ module Markbridge
101
115
 
102
116
  # Process element with registered handler
103
117
  # Handler returns element if children should be processed, nil otherwise
104
- result_element = @handlers.process_element(element, ast_parent)
118
+ result_element = @handlers.process_element(element, ast_parent, self)
105
119
 
106
120
  if result_element
107
121
  # Handler succeeded and returned element - process children into it
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Renderers
5
+ module Discourse
6
+ # Pass-through escaper. Returns its input unchanged.
7
+ #
8
+ # Useful for migration paths where the source content is already
9
+ # valid Markdown (or otherwise trusted not to need escaping) and
10
+ # should reach the postprocessor verbatim. For *partial*
11
+ # passthrough (e.g. allow lists but still escape headings), see
12
+ # {MarkdownEscaper#initialize}'s +allow:+ kwarg.
13
+ #
14
+ # @example Per-call use via the renderer factory
15
+ # renderer = Markbridge.discourse_renderer(escape: false)
16
+ # Markbridge.bbcode_to_markdown(post.body, renderer:)
17
+ class IdentityEscaper
18
+ # @param text [String, nil]
19
+ # @param in_link_label [Boolean] when true, escape +]+ so the
20
+ # text can be spliced into a Markdown link label
21
+ # +[text](url)+ without terminating it early. Mirrors
22
+ # {MarkdownEscaper#escape}'s +in_link_label:+. This isn't a
23
+ # stylistic escape — without it, trusted-Markdown content
24
+ # containing +]+ inside a +Url+/+Email+ ancestor produces a
25
+ # broken link.
26
+ # @return [String] +text+ with +]+ optionally escaped, or
27
+ # +""+ when +text+ is nil
28
+ def escape(text, in_link_label: false)
29
+ return "" if text.nil?
30
+ return text.gsub("]", "\\]") if in_link_label && text.include?("]")
31
+
32
+ text
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -30,12 +30,29 @@ module Markbridge
30
30
  # escaper.escape("<?php echo 1; ?>") # => "\\<?php echo 1; ?>"
31
31
  #
32
32
  class MarkdownEscaper
33
+ # Block-level constructs that callers can opt into letting
34
+ # through unescaped via the +allow:+ kwarg. The check fires
35
+ # only after a line's first byte has matched the relevant
36
+ # case arm, so this is a cold-path lookup with no measurable
37
+ # hot-path cost.
38
+ ALLOW_KEYS = %i[bullet_list ordered_list atx_heading block_quote].freeze
39
+ ALLOW_ALIASES = { lists: %i[bullet_list ordered_list] }.freeze
40
+ private_constant :ALLOW_KEYS, :ALLOW_ALIASES
41
+
33
42
  # @param escape_hard_line_breaks [Boolean] when true, strip trailing spaces
34
43
  # before newlines to prevent CommonMark hard line breaks (<br/>).
35
44
  # Defaults to false because Discourse has trailing-space hard line
36
45
  # breaks disabled by default.
37
- def initialize(escape_hard_line_breaks: false)
46
+ # @param allow [Symbol, Array<Symbol>, nil] block-level constructs
47
+ # to pass through unescaped. Recognised keys:
48
+ # +:bullet_list+, +:ordered_list+, +:atx_heading+,
49
+ # +:block_quote+. The alias +:lists+ expands to
50
+ # `[:bullet_list, :ordered_list]`. Thematic breaks, setext
51
+ # underlines, fenced code, and indented code remain escaped
52
+ # even when their first byte matches an allow-listed marker.
53
+ def initialize(escape_hard_line_breaks: false, allow: nil)
38
54
  @escape_hard_line_breaks = escape_hard_line_breaks
55
+ @allow = resolve_allow(allow)
39
56
  # @inline_content / @inline_result / @inline_len are set by
40
57
  # escape_inline on every call before any helper reads them;
41
58
  # no defensive init needed.
@@ -116,21 +133,50 @@ module Markbridge
116
133
  # Autolinks (<https://...>, <email@domain>) are intentionally preserved.
117
134
  #
118
135
  # @param text [String, nil] the text to escape
136
+ # @param in_link_label [Boolean] when true, also escape `]` so the text
137
+ # can be spliced into a Markdown link label `[text](url)` without
138
+ # terminating it early. The default leaves `]` alone because a bare
139
+ # `]` in prose is harmless (the matching `[` is already escaped).
119
140
  # @return [String] the escaped text, or empty string if input is nil
120
141
  # @note Multi-line HTML tags and blocks are handled by escaping the opening <
121
- def escape(text)
142
+ def escape(text, in_link_label: false)
122
143
  return "" if text.nil?
123
144
 
124
145
  # Neutralize hard line breaks (trailing 2+ spaces before newline)
125
146
  text = text.gsub(/ +\n/, "\n") if @escape_hard_line_breaks && text.include?(" \n")
126
147
 
127
- return text unless MAYBE_SPECIAL.match?(text) || MAYBE_INDENTED_CODE.match?(text)
148
+ result =
149
+ if MAYBE_SPECIAL.match?(text) || MAYBE_INDENTED_CODE.match?(text)
150
+ escape_text(text)
151
+ else
152
+ text
153
+ end
154
+
155
+ return result unless in_link_label && result.include?("]")
128
156
 
129
- escape_text(text)
157
+ result.gsub("]") { "\\]" }
130
158
  end
131
159
 
132
160
  private
133
161
 
162
+ def resolve_allow(allow)
163
+ # `flat_map` flattens Array results and appends scalar
164
+ # results as-is, so `|| key` keeps non-alias keys without
165
+ # extra wrapping.
166
+ keys = Array(allow).flat_map { |key| ALLOW_ALIASES[key] || key }
167
+ unknown = keys - ALLOW_KEYS
168
+ unless unknown.empty?
169
+ raise ArgumentError,
170
+ "unknown allow keys: #{unknown.inspect} " \
171
+ "(expected #{ALLOW_KEYS.inspect} or alias #{ALLOW_ALIASES.keys.inspect})"
172
+ end
173
+ # Array, not Set: with at most 4 keys the linear `include?`
174
+ # is observably identical to `Set#include?` and avoids the
175
+ # Set allocation. The array isn't reachable from outside
176
+ # the escaper, so we don't bother freezing it.
177
+ keys
178
+ end
179
+
134
180
  def escape_text(text)
135
181
  # On CRLF input, consume `\r` as part of the line terminator instead
136
182
  # of leaving it on the line. A trailing `\r` breaks line-end anchored
@@ -224,13 +270,20 @@ module Markbridge
224
270
 
225
271
  case first_byte
226
272
  when HASH
227
- return escape_first_char_inline(content, "\\#") if ATX_HEADING.match?(content)
273
+ if (match = ATX_HEADING.match(content))
274
+ return pass_marker_inline(content, match[0].length) if @allow.include?(:atx_heading)
275
+ return escape_first_char_inline(content, "\\#")
276
+ end
228
277
  when GT
278
+ return pass_first_char_inline(content) if @allow.include?(:block_quote)
229
279
  return escape_first_char_inline(content, "\\>")
230
280
  when DASH
231
281
  return escape_block_dash(content, prev_was_paragraph)
232
282
  when PLUS
233
- return escape_first_char_inline(content, "\\+") if BULLET_LIST.match?(content)
283
+ if BULLET_LIST.match?(content)
284
+ return pass_first_char_inline(content) if @allow.include?(:bullet_list)
285
+ return escape_first_char_inline(content, "\\+")
286
+ end
234
287
  when STAR
235
288
  return escape_block_star(content)
236
289
  when UNDERSCORE
@@ -268,24 +321,46 @@ module Markbridge
268
321
  (prev_was_paragraph && SETEXT_UNDERLINE_DASH.match?(content))
269
322
  return escape_all_chars(content, DASH, "\\-"), true
270
323
  end
271
- return escape_first_char_inline(content, "\\-") if BULLET_LIST.match?(content)
324
+ if BULLET_LIST.match?(content)
325
+ return pass_first_char_inline(content) if @allow.include?(:bullet_list)
326
+ return escape_first_char_inline(content, "\\-")
327
+ end
272
328
  [content, false]
273
329
  end
274
330
 
275
331
  def escape_block_star(content)
276
332
  return escape_all_chars(content, STAR, "\\*"), true if THEMATIC_BREAK_STAR.match?(content)
277
- return escape_first_char_inline(content, "\\*") if BULLET_LIST.match?(content)
333
+ if BULLET_LIST.match?(content)
334
+ return pass_first_char_inline(content) if @allow.include?(:bullet_list)
335
+ return escape_first_char_inline(content, "\\*")
336
+ end
278
337
  [content, false]
279
338
  end
280
339
 
281
340
  def escape_block_ordered_list(content)
282
341
  if (match = ORDERED_LIST.match(content))
283
342
  rest = content[match[0].length..]
343
+ return pass_marker_inline(content, match[0].length) if @allow.include?(:ordered_list)
344
+
284
345
  return "#{match[1]}\\#{match[2]}#{escape_inline(rest)}", true
285
346
  end
286
347
  [content, false]
287
348
  end
288
349
 
350
+ # Like {#escape_first_char_inline} but the leading character is
351
+ # preserved verbatim (used when allow: lets a single-byte
352
+ # marker like `-`, `+`, `*`, or `>` through).
353
+ def pass_first_char_inline(content)
354
+ ["#{content[0]}#{escape_inline(content[1..])}", true]
355
+ end
356
+
357
+ # Preserve a multi-byte marker (e.g. `1.`, `99)`, `##`) and
358
+ # inline-escape the rest. Used when allow: lets ordered lists
359
+ # or ATX headings through.
360
+ def pass_marker_inline(content, marker_length)
361
+ ["#{content[0, marker_length]}#{escape_inline(content[marker_length..])}", true]
362
+ end
363
+
289
364
  def escape_all_chars(str, byte_val, escaped)
290
365
  result = String.new(capacity: str.bytesize * 2, encoding: str.encoding)
291
366
  str.each_byte do |byte|
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Renderers
5
+ module Discourse
6
+ # Cleans up the raw Markdown produced by the Renderer:
7
+ #
8
+ # 1. (optional) strips trailing invisible characters per line —
9
+ # NBSP plus the zero-width format chars (ZWSP, ZWNJ, ZWJ, WJ,
10
+ # ZWNBSP/BOM). Deliberately excludes ASCII space and tab so
11
+ # Markdown's "two trailing spaces = hard line break" rule
12
+ # still works. Off by default.
13
+ # 2. collapses runs of 3+ newlines down to two,
14
+ # 3. clears whitespace-only lines,
15
+ # 4. trims leading/trailing whitespace from the whole document.
16
+ #
17
+ # Subclass to customize. The +call+ method is the entry point.
18
+ class Postprocessor
19
+ # NBSP plus zero-width format chars. Spelled with explicit
20
+ # +\u{...}+ escapes rather than the literal characters — the
21
+ # latter are invisible in editors and easy to corrupt on
22
+ # encoding-conversion round-trips.
23
+ #
24
+ # U+00A0 NBSP no-break space
25
+ # U+200B ZWSP zero-width space
26
+ # U+200C ZWNJ zero-width non-joiner
27
+ # U+200D ZWJ zero-width joiner
28
+ # U+2060 WJ word joiner
29
+ # U+FEFF ZWNBSP/BOM zero-width no-break space / byte-order mark
30
+ TRAILING_INVISIBLE_RE = /[\u{00A0 200B 200C 200D 2060 FEFF}]+$/
31
+
32
+ # @param strip_trailing_invisibles [Boolean] when true, strips
33
+ # trailing invisible characters (NBSP and zero-width format
34
+ # chars) from each line before the standard cleanup pass.
35
+ def initialize(strip_trailing_invisibles: false)
36
+ @strip_trailing_invisibles = strip_trailing_invisibles
37
+ end
38
+
39
+ # @param text [String]
40
+ # @return [String]
41
+ def call(text)
42
+ text = text.gsub(TRAILING_INVISIBLE_RE, "") if @strip_trailing_invisibles
43
+ text
44
+ .gsub(/\n{3,}/, "\n\n") # Max 2 consecutive newlines
45
+ .gsub(/^[ \t]+$/, "") # Remove whitespace-only lines
46
+ .strip # Trim leading/trailing whitespace
47
+ end
48
+
49
+ DEFAULT = new
50
+ end
51
+ end
52
+ end
53
+ end
@@ -3,43 +3,30 @@
3
3
  module Markbridge
4
4
  module Renderers
5
5
  module Discourse
6
- # Immutable context for rendering that wraps the parent chain
7
- # Provides query methods to ask about parent elements without
8
- # the renderer knowing about specific element types
9
- #
10
- # Uses a hash-based cache for O(1) parent lookups instead of O(depth) scans
6
+ # Immutable context for rendering that wraps the parent chain.
7
+ # Provides query methods to ask about parent elements without the
8
+ # renderer knowing about specific element types.
11
9
  class RenderContext
12
10
  attr_reader :parents, :depth
13
11
 
14
- def initialize(parents = [], parent_cache: nil, html_mode: false)
12
+ def initialize(parents = [], html_mode: false)
15
13
  @parents = parents.freeze
16
14
  @depth = parents.size
17
- @parent_cache = parent_cache || build_cache(parents)
18
15
  @html_mode = html_mode
19
16
  end
20
17
 
21
18
  # Create new context with element added to parent chain.
22
- # Incrementally updates the cache (O(1)) instead of rebuilding from
23
- # parents (O(depth)) — important for deeply-nested documents.
24
19
  # @param element [AST::Element]
25
20
  # @return [RenderContext]
26
21
  def with_parent(element)
27
- new_parents = @parents + [element]
28
-
29
- new_cache = @parent_cache.dup
30
- element_class = element.class
31
- new_cache[element_class] ||= []
32
- new_cache[element_class] = new_cache[element_class] + [element]
33
-
34
- self.class.new(new_parents, parent_cache: new_cache, html_mode: @html_mode)
22
+ self.class.new(@parents + [element], html_mode: @html_mode)
35
23
  end
36
24
 
37
- # Create new context with html_mode toggled
38
- # Preserves parent chain and cache
25
+ # Create new context with html_mode toggled.
39
26
  # @param value [Boolean]
40
27
  # @return [RenderContext]
41
28
  def with_html_mode(value)
42
- self.class.new(@parents, parent_cache: @parent_cache, html_mode: value)
29
+ self.class.new(@parents, html_mode: value)
43
30
  end
44
31
 
45
32
  # @return [Boolean]
@@ -47,45 +34,32 @@ module Markbridge
47
34
  @html_mode
48
35
  end
49
36
 
50
- # Find closest parent of given type
51
- # O(1) hash lookup instead of O(depth) scan
37
+ # Find closest parent that is_a? klass (handles subclasses).
52
38
  # @param klass [Class]
53
39
  # @return [AST::Element, nil]
54
40
  def find_parent(klass)
55
- @parent_cache[klass]&.last
41
+ @parents.reverse_each.find { |parent| parent.is_a?(klass) }
56
42
  end
57
43
 
58
- # Count parents of given type
59
- # O(1) instead of O(depth)
44
+ # Count parents that are is_a? klass (handles subclasses).
60
45
  # @param klass [Class]
61
46
  # @return [Integer]
62
47
  def count_parents(klass)
63
- @parent_cache[klass]&.size || 0
48
+ @parents.count { |parent| parent.is_a?(klass) }
64
49
  end
65
50
 
66
- # Check if parent of type exists
67
- # O(1) check
51
+ # Check if any parent is_a? klass (handles subclasses).
68
52
  # @param klass [Class]
69
53
  # @return [Boolean]
70
54
  def has_parent?(klass)
71
- !@parent_cache[klass].nil?
55
+ @parents.any? { |parent| parent.is_a?(klass) }
72
56
  end
73
57
 
74
- # Check if we're at the root (no parents)
58
+ # Check if we're at the root (no parents).
75
59
  # @return [Boolean]
76
60
  def root?
77
61
  @depth.zero?
78
62
  end
79
-
80
- private
81
-
82
- # Build cache from parents array.
83
- # Groups parents by class for fast O(1) lookup.
84
- # @param parents [Array<AST::Element>]
85
- # @return [Hash{Class => Array<AST::Element>}]
86
- def build_cache(parents)
87
- parents.group_by(&:class)
88
- end
89
63
  end
90
64
  end
91
65
  end