markbridge 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/lib/markbridge/ast/details.rb +24 -0
- data/lib/markbridge/ast/element.rb +63 -0
- data/lib/markbridge/ast.rb +1 -0
- data/lib/markbridge/conversion.rb +40 -0
- data/lib/markbridge/parse.rb +20 -0
- data/lib/markbridge/parsers/bbcode/handler_registry.rb +25 -2
- data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +13 -2
- data/lib/markbridge/parsers/html/handler_registry.rb +97 -17
- data/lib/markbridge/parsers/html/handlers/self_closing_handler.rb +26 -0
- data/lib/markbridge/parsers/html/handlers/span_handler.rb +74 -0
- data/lib/markbridge/parsers/html/parser.rb +88 -18
- data/lib/markbridge/parsers/html.rb +2 -0
- data/lib/markbridge/parsers/media_wiki/inline_parser.rb +21 -8
- data/lib/markbridge/parsers/media_wiki/parser.rb +13 -5
- data/lib/markbridge/parsers/text_formatter/handler_registry.rb +27 -4
- data/lib/markbridge/parsers/text_formatter/handlers/attachment_handler.rb +1 -1
- data/lib/markbridge/parsers/text_formatter/handlers/attribute_handler.rb +1 -1
- data/lib/markbridge/parsers/text_formatter/handlers/base_handler.rb +1 -1
- data/lib/markbridge/parsers/text_formatter/handlers/code_handler.rb +1 -1
- data/lib/markbridge/parsers/text_formatter/handlers/email_handler.rb +1 -1
- data/lib/markbridge/parsers/text_formatter/handlers/image_handler.rb +1 -1
- data/lib/markbridge/parsers/text_formatter/handlers/list_handler.rb +1 -1
- data/lib/markbridge/parsers/text_formatter/handlers/quote_handler.rb +1 -1
- data/lib/markbridge/parsers/text_formatter/handlers/simple_handler.rb +1 -1
- data/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb +1 -1
- data/lib/markbridge/parsers/text_formatter/handlers/url_handler.rb +1 -1
- data/lib/markbridge/parsers/text_formatter/parser.rb +17 -3
- data/lib/markbridge/renderers/discourse/identity_escaper.rb +37 -0
- data/lib/markbridge/renderers/discourse/markdown_escaper.rb +91 -9
- data/lib/markbridge/renderers/discourse/postprocessor.rb +53 -0
- data/lib/markbridge/renderers/discourse/render_context.rb +14 -40
- data/lib/markbridge/renderers/discourse/renderer.rb +15 -5
- data/lib/markbridge/renderers/discourse/rendering_interface.rb +4 -3
- data/lib/markbridge/renderers/discourse/tag_library.rb +42 -2
- data/lib/markbridge/renderers/discourse/tags/align_tag.rb +2 -2
- data/lib/markbridge/renderers/discourse/tags/code_tag.rb +5 -3
- data/lib/markbridge/renderers/discourse/tags/details_tag.rb +46 -0
- data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +1 -1
- data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +5 -2
- data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +4 -3
- data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +13 -0
- data/lib/markbridge/renderers/discourse.rb +3 -0
- data/lib/markbridge/version.rb +1 -1
- data/lib/markbridge.rb +274 -110
- metadata +9 -2
- data/lib/markbridge/configuration.rb +0 -11
|
@@ -11,13 +11,20 @@ module Markbridge
|
|
|
11
11
|
# registry = InlineTagRegistry.build_from_default do |r|
|
|
12
12
|
# r.register("mark", :formatting, AST::Bold)
|
|
13
13
|
# end
|
|
14
|
-
# parser = InlineParser.new(
|
|
14
|
+
# parser = InlineParser.new(handlers: registry)
|
|
15
15
|
class InlineParser
|
|
16
16
|
MAX_INLINE_DEPTH = 20
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
# @return [Hash{String => Integer}] tag-name → occurrence count for
|
|
19
|
+
# HTML-like inline tags whose names are not registered. Shared
|
|
20
|
+
# with nested InlineParser instances so depth-recursive parses
|
|
21
|
+
# contribute to the same tally.
|
|
22
|
+
attr_reader :unknown_tags
|
|
23
|
+
|
|
24
|
+
def initialize(handlers: nil, depth: 0, unknown_tags: nil)
|
|
25
|
+
@registry = handlers || InlineTagRegistry.default
|
|
20
26
|
@depth = depth
|
|
27
|
+
@unknown_tags = unknown_tags || Hash.new(0)
|
|
21
28
|
end
|
|
22
29
|
|
|
23
30
|
# Parse inline markup and append resulting AST nodes to the parent element.
|
|
@@ -110,10 +117,11 @@ module Markbridge
|
|
|
110
117
|
return
|
|
111
118
|
end
|
|
112
119
|
|
|
113
|
-
InlineParser.new(
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
120
|
+
InlineParser.new(
|
|
121
|
+
handlers: @registry,
|
|
122
|
+
depth: @depth + 1,
|
|
123
|
+
unknown_tags: @unknown_tags,
|
|
124
|
+
).parse(content, parent:)
|
|
117
125
|
end
|
|
118
126
|
|
|
119
127
|
# Collect text until we find n consecutive apostrophes.
|
|
@@ -203,9 +211,14 @@ module Markbridge
|
|
|
203
211
|
self_closing = !tag_match[3].empty?
|
|
204
212
|
tag_name = tag_match[2].downcase
|
|
205
213
|
|
|
206
|
-
# Closing/self-closing tags and unknown tags are treated as literal text
|
|
214
|
+
# Closing/self-closing tags and unknown tags are treated as literal text.
|
|
215
|
+
# Track *unknown* opening tags so callers can surface them via
|
|
216
|
+
# Parse/Conversion#unknown_tags. We deliberately don't track
|
|
217
|
+
# closing/self-closing forms — they often pair up with the
|
|
218
|
+
# opening tag that's already counted.
|
|
207
219
|
entry = @registry[tag_name]
|
|
208
220
|
if closing || self_closing || !entry
|
|
221
|
+
@unknown_tags[tag_name] += 1 if !entry && !closing && !self_closing
|
|
209
222
|
advance_as_text(full_match)
|
|
210
223
|
return
|
|
211
224
|
end
|
|
@@ -21,13 +21,20 @@ module Markbridge
|
|
|
21
21
|
# parser = Markbridge::Parsers::MediaWiki::Parser.new
|
|
22
22
|
# ast = parser.parse("'''bold''' and ''italic''")
|
|
23
23
|
class Parser
|
|
24
|
-
# @
|
|
24
|
+
# @return [Hash{String => Integer}] tag-name → occurrence count for
|
|
25
|
+
# inline HTML-like tags whose names are not registered. Reset at
|
|
26
|
+
# the start of every #parse call.
|
|
27
|
+
attr_reader :unknown_tags
|
|
28
|
+
|
|
29
|
+
# @param handlers [InlineTagRegistry, nil] custom registry or use default.
|
|
30
|
+
# Named +handlers:+ for consistency with sibling parsers; the
|
|
31
|
+
# value is still an +InlineTagRegistry+ instance.
|
|
25
32
|
# @yield [InlineTagRegistry] optional block to customize the default registry
|
|
26
|
-
def initialize(
|
|
33
|
+
def initialize(handlers: nil, &block)
|
|
27
34
|
# InlineParser falls back to InlineTagRegistry.default when this is
|
|
28
35
|
# nil, so we don't need to materialise it here.
|
|
29
|
-
@
|
|
30
|
-
|
|
36
|
+
@handlers = block_given? ? InlineTagRegistry.build_from_default(&block) : handlers
|
|
37
|
+
@unknown_tags = Hash.new(0)
|
|
31
38
|
end
|
|
32
39
|
|
|
33
40
|
# Parse MediaWiki wikitext into an AST Document.
|
|
@@ -38,8 +45,9 @@ module Markbridge
|
|
|
38
45
|
normalized = normalize_line_endings(input)
|
|
39
46
|
lines = normalized.split("\n")
|
|
40
47
|
|
|
48
|
+
@unknown_tags.clear
|
|
41
49
|
@document = AST::Document.new
|
|
42
|
-
@inline_parser = InlineParser.new(
|
|
50
|
+
@inline_parser = InlineParser.new(handlers: @handlers, unknown_tags: @unknown_tags)
|
|
43
51
|
@list_stack = []
|
|
44
52
|
|
|
45
53
|
process_lines(lines)
|
|
@@ -43,6 +43,28 @@ module Markbridge
|
|
|
43
43
|
@mappings[element_name.upcase] = handler
|
|
44
44
|
end
|
|
45
45
|
|
|
46
|
+
# Look up the handler for an element name (case-insensitive).
|
|
47
|
+
# @param element_name [String]
|
|
48
|
+
# @return [#process, nil]
|
|
49
|
+
def [](element_name)
|
|
50
|
+
@mappings[element_name.upcase]
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Replace the handler bound to one or more element names by
|
|
54
|
+
# yielding the previously-bound handler (which may be +nil+)
|
|
55
|
+
# and registering whatever the block returns.
|
|
56
|
+
#
|
|
57
|
+
# @param element_names [String, Array<String>]
|
|
58
|
+
# @yieldparam previous [#process, nil]
|
|
59
|
+
# @return [self]
|
|
60
|
+
def overlay(element_names)
|
|
61
|
+
Array(element_names).each do |name|
|
|
62
|
+
previous = self[name]
|
|
63
|
+
register(name, yield(previous))
|
|
64
|
+
end
|
|
65
|
+
self
|
|
66
|
+
end
|
|
67
|
+
|
|
46
68
|
# Check if a handler is registered for an element
|
|
47
69
|
# @param element_name [String] XML element name
|
|
48
70
|
# @return [Boolean] true if handler is registered
|
|
@@ -53,11 +75,12 @@ module Markbridge
|
|
|
53
75
|
# Process an XML element using the registered handler
|
|
54
76
|
# @param element [Nokogiri::XML::Element]
|
|
55
77
|
# @param parent [AST::Element] parent node to add children to
|
|
78
|
+
# @param processor [Parser] the parser, exposed to handlers so
|
|
79
|
+
# they can call back into +process_children+ for nested content
|
|
56
80
|
# @return [AST::Element, nil] the created element if children should be processed, nil otherwise
|
|
57
|
-
def process_element(element, parent)
|
|
58
|
-
|
|
59
|
-
handler
|
|
60
|
-
handler&.process(element:, parent:)
|
|
81
|
+
def process_element(element, parent, processor)
|
|
82
|
+
handler = self[element.name]
|
|
83
|
+
handler&.process(element:, parent:, processor:)
|
|
61
84
|
end
|
|
62
85
|
|
|
63
86
|
# Register all default s9e/TextFormatter element mappings
|
|
@@ -16,7 +16,7 @@ module Markbridge
|
|
|
16
16
|
# @param element [Nokogiri::XML::Element] the XML element to process
|
|
17
17
|
# @param parent [AST::Element] the parent AST node to add children to
|
|
18
18
|
# @return [AST::Element, nil] the created element if children should be processed, nil otherwise
|
|
19
|
-
def process(element:, parent:)
|
|
19
|
+
def process(element:, parent:, processor: nil)
|
|
20
20
|
raise NotImplementedError, "#{self.class} must implement #process"
|
|
21
21
|
end
|
|
22
22
|
|
|
@@ -10,7 +10,7 @@ module Markbridge
|
|
|
10
10
|
@element_class = AST::Code
|
|
11
11
|
end
|
|
12
12
|
|
|
13
|
-
def process(element:, parent:)
|
|
13
|
+
def process(element:, parent:, processor: nil)
|
|
14
14
|
attrs = extract_attributes(element)
|
|
15
15
|
lang = attrs[:lang] || attrs[:language]
|
|
16
16
|
node = AST::Code.new(language: lang)
|
|
@@ -10,7 +10,7 @@ module Markbridge
|
|
|
10
10
|
@element_class = AST::List
|
|
11
11
|
end
|
|
12
12
|
|
|
13
|
-
def process(element:, parent:)
|
|
13
|
+
def process(element:, parent:, processor: nil)
|
|
14
14
|
attrs = extract_attributes(element)
|
|
15
15
|
type_str = attrs[:type]
|
|
16
16
|
# Ordered if type is not empty, disc, circle, or square
|
|
@@ -21,7 +21,7 @@ module Markbridge
|
|
|
21
21
|
# Process the element by creating an AST node and processing children
|
|
22
22
|
# @param element [Nokogiri::XML::Element]
|
|
23
23
|
# @param parent [AST::Element]
|
|
24
|
-
def process(element:, parent:)
|
|
24
|
+
def process(element:, parent:, processor: nil)
|
|
25
25
|
node = @element_class.new
|
|
26
26
|
parent << node
|
|
27
27
|
|
|
@@ -38,12 +38,26 @@ module Markbridge
|
|
|
38
38
|
@unknown_tags = Hash.new(0)
|
|
39
39
|
end
|
|
40
40
|
|
|
41
|
-
# Parse s9e/TextFormatter XML into an AST
|
|
42
|
-
#
|
|
41
|
+
# Parse s9e/TextFormatter XML into an AST.
|
|
42
|
+
#
|
|
43
|
+
# Accepts either a String of XML or a pre-parsed Nokogiri node.
|
|
44
|
+
# A +Nokogiri::XML::Document+ is unwrapped via +#root+; any
|
|
45
|
+
# other node is treated as the root itself.
|
|
46
|
+
#
|
|
47
|
+
# @param input [String, Nokogiri::XML::Node] XML source or
|
|
48
|
+
# pre-parsed Nokogiri tree
|
|
43
49
|
# @return [AST::Document]
|
|
44
50
|
def parse(input)
|
|
45
51
|
@unknown_tags.clear
|
|
46
52
|
|
|
53
|
+
if input.is_a?(Nokogiri::XML::Node)
|
|
54
|
+
root = input.is_a?(Nokogiri::XML::Document) ? input.root : input
|
|
55
|
+
document = AST::Document.new
|
|
56
|
+
process_node(root, document) if root
|
|
57
|
+
return document
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
input = input.to_s
|
|
47
61
|
xml_doc = Nokogiri.XML(input)
|
|
48
62
|
root = xml_doc.root
|
|
49
63
|
|
|
@@ -101,7 +115,7 @@ module Markbridge
|
|
|
101
115
|
|
|
102
116
|
# Process element with registered handler
|
|
103
117
|
# Handler returns element if children should be processed, nil otherwise
|
|
104
|
-
result_element = @handlers.process_element(element, ast_parent)
|
|
118
|
+
result_element = @handlers.process_element(element, ast_parent, self)
|
|
105
119
|
|
|
106
120
|
if result_element
|
|
107
121
|
# Handler succeeded and returned element - process children into it
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Renderers
|
|
5
|
+
module Discourse
|
|
6
|
+
# Pass-through escaper. Returns its input unchanged.
|
|
7
|
+
#
|
|
8
|
+
# Useful for migration paths where the source content is already
|
|
9
|
+
# valid Markdown (or otherwise trusted not to need escaping) and
|
|
10
|
+
# should reach the postprocessor verbatim. For *partial*
|
|
11
|
+
# passthrough (e.g. allow lists but still escape headings), see
|
|
12
|
+
# {MarkdownEscaper#initialize}'s +allow:+ kwarg.
|
|
13
|
+
#
|
|
14
|
+
# @example Per-call use via the renderer factory
|
|
15
|
+
# renderer = Markbridge.discourse_renderer(escape: false)
|
|
16
|
+
# Markbridge.bbcode_to_markdown(post.body, renderer:)
|
|
17
|
+
class IdentityEscaper
|
|
18
|
+
# @param text [String, nil]
|
|
19
|
+
# @param in_link_label [Boolean] when true, escape +]+ so the
|
|
20
|
+
# text can be spliced into a Markdown link label
|
|
21
|
+
# +[text](url)+ without terminating it early. Mirrors
|
|
22
|
+
# {MarkdownEscaper#escape}'s +in_link_label:+. This isn't a
|
|
23
|
+
# stylistic escape — without it, trusted-Markdown content
|
|
24
|
+
# containing +]+ inside a +Url+/+Email+ ancestor produces a
|
|
25
|
+
# broken link.
|
|
26
|
+
# @return [String] +text+ with +]+ optionally escaped, or
|
|
27
|
+
# +""+ when +text+ is nil
|
|
28
|
+
def escape(text, in_link_label: false)
|
|
29
|
+
return "" if text.nil?
|
|
30
|
+
return text.gsub("]", "\\]") if in_link_label && text.include?("]")
|
|
31
|
+
|
|
32
|
+
text
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -30,12 +30,29 @@ module Markbridge
|
|
|
30
30
|
# escaper.escape("<?php echo 1; ?>") # => "\\<?php echo 1; ?>"
|
|
31
31
|
#
|
|
32
32
|
class MarkdownEscaper
|
|
33
|
+
# Block-level constructs that callers can opt into letting
|
|
34
|
+
# through unescaped via the +allow:+ kwarg. The check fires
|
|
35
|
+
# only after a line's first byte has matched the relevant
|
|
36
|
+
# case arm, so this is a cold-path lookup with no measurable
|
|
37
|
+
# hot-path cost.
|
|
38
|
+
ALLOW_KEYS = %i[bullet_list ordered_list atx_heading block_quote].freeze
|
|
39
|
+
ALLOW_ALIASES = { lists: %i[bullet_list ordered_list] }.freeze
|
|
40
|
+
private_constant :ALLOW_KEYS, :ALLOW_ALIASES
|
|
41
|
+
|
|
33
42
|
# @param escape_hard_line_breaks [Boolean] when true, strip trailing spaces
|
|
34
43
|
# before newlines to prevent CommonMark hard line breaks (<br/>).
|
|
35
44
|
# Defaults to false because Discourse has trailing-space hard line
|
|
36
45
|
# breaks disabled by default.
|
|
37
|
-
|
|
46
|
+
# @param allow [Symbol, Array<Symbol>, nil] block-level constructs
|
|
47
|
+
# to pass through unescaped. Recognised keys:
|
|
48
|
+
# +:bullet_list+, +:ordered_list+, +:atx_heading+,
|
|
49
|
+
# +:block_quote+. The alias +:lists+ expands to
|
|
50
|
+
# `[:bullet_list, :ordered_list]`. Thematic breaks, setext
|
|
51
|
+
# underlines, fenced code, and indented code remain escaped
|
|
52
|
+
# even when their first byte matches an allow-listed marker.
|
|
53
|
+
def initialize(escape_hard_line_breaks: false, allow: nil)
|
|
38
54
|
@escape_hard_line_breaks = escape_hard_line_breaks
|
|
55
|
+
@allow = resolve_allow(allow)
|
|
39
56
|
# @inline_content / @inline_result / @inline_len are set by
|
|
40
57
|
# escape_inline on every call before any helper reads them;
|
|
41
58
|
# no defensive init needed.
|
|
@@ -116,23 +133,59 @@ module Markbridge
|
|
|
116
133
|
# Autolinks (<https://...>, <email@domain>) are intentionally preserved.
|
|
117
134
|
#
|
|
118
135
|
# @param text [String, nil] the text to escape
|
|
136
|
+
# @param in_link_label [Boolean] when true, also escape `]` so the text
|
|
137
|
+
# can be spliced into a Markdown link label `[text](url)` without
|
|
138
|
+
# terminating it early. The default leaves `]` alone because a bare
|
|
139
|
+
# `]` in prose is harmless (the matching `[` is already escaped).
|
|
119
140
|
# @return [String] the escaped text, or empty string if input is nil
|
|
120
141
|
# @note Multi-line HTML tags and blocks are handled by escaping the opening <
|
|
121
|
-
def escape(text)
|
|
142
|
+
def escape(text, in_link_label: false)
|
|
122
143
|
return "" if text.nil?
|
|
123
144
|
|
|
124
145
|
# Neutralize hard line breaks (trailing 2+ spaces before newline)
|
|
125
146
|
text = text.gsub(/ +\n/, "\n") if @escape_hard_line_breaks && text.include?(" \n")
|
|
126
147
|
|
|
127
|
-
|
|
148
|
+
result =
|
|
149
|
+
if MAYBE_SPECIAL.match?(text) || MAYBE_INDENTED_CODE.match?(text)
|
|
150
|
+
escape_text(text)
|
|
151
|
+
else
|
|
152
|
+
text
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
return result unless in_link_label && result.include?("]")
|
|
128
156
|
|
|
129
|
-
|
|
157
|
+
result.gsub("]") { "\\]" }
|
|
130
158
|
end
|
|
131
159
|
|
|
132
160
|
private
|
|
133
161
|
|
|
162
|
+
def resolve_allow(allow)
|
|
163
|
+
# `flat_map` flattens Array results and appends scalar
|
|
164
|
+
# results as-is, so `|| key` keeps non-alias keys without
|
|
165
|
+
# extra wrapping.
|
|
166
|
+
keys = Array(allow).flat_map { |key| ALLOW_ALIASES[key] || key }
|
|
167
|
+
unknown = keys - ALLOW_KEYS
|
|
168
|
+
unless unknown.empty?
|
|
169
|
+
raise ArgumentError,
|
|
170
|
+
"unknown allow keys: #{unknown.inspect} " \
|
|
171
|
+
"(expected #{ALLOW_KEYS.inspect} or alias #{ALLOW_ALIASES.keys.inspect})"
|
|
172
|
+
end
|
|
173
|
+
# Array, not Set: with at most 4 keys the linear `include?`
|
|
174
|
+
# is observably identical to `Set#include?` and avoids the
|
|
175
|
+
# Set allocation. The array isn't reachable from outside
|
|
176
|
+
# the escaper, so we don't bother freezing it.
|
|
177
|
+
keys
|
|
178
|
+
end
|
|
179
|
+
|
|
134
180
|
def escape_text(text)
|
|
135
|
-
|
|
181
|
+
# On CRLF input, consume `\r` as part of the line terminator instead
|
|
182
|
+
# of leaving it on the line. A trailing `\r` breaks line-end anchored
|
|
183
|
+
# regexes (e.g. SETEXT_UNDERLINE_*) and the `ws_end >= line_length`
|
|
184
|
+
# early-out in escape_indented_code, leaking NBSPs onto
|
|
185
|
+
# whitespace-only CRLF lines. The `include?` guard keeps the
|
|
186
|
+
# LF-only fast path on a string split (regex split is ~20% slower
|
|
187
|
+
# on the indented-code hot path).
|
|
188
|
+
lines = text.include?("\r") ? text.split(/\r?\n/, -1) : text.split("\n", -1)
|
|
136
189
|
return escape_line(lines[0], false) if lines.size == 1
|
|
137
190
|
|
|
138
191
|
# Pre-allocate result buffer
|
|
@@ -217,13 +270,20 @@ module Markbridge
|
|
|
217
270
|
|
|
218
271
|
case first_byte
|
|
219
272
|
when HASH
|
|
220
|
-
|
|
273
|
+
if (match = ATX_HEADING.match(content))
|
|
274
|
+
return pass_marker_inline(content, match[0].length) if @allow.include?(:atx_heading)
|
|
275
|
+
return escape_first_char_inline(content, "\\#")
|
|
276
|
+
end
|
|
221
277
|
when GT
|
|
278
|
+
return pass_first_char_inline(content) if @allow.include?(:block_quote)
|
|
222
279
|
return escape_first_char_inline(content, "\\>")
|
|
223
280
|
when DASH
|
|
224
281
|
return escape_block_dash(content, prev_was_paragraph)
|
|
225
282
|
when PLUS
|
|
226
|
-
|
|
283
|
+
if BULLET_LIST.match?(content)
|
|
284
|
+
return pass_first_char_inline(content) if @allow.include?(:bullet_list)
|
|
285
|
+
return escape_first_char_inline(content, "\\+")
|
|
286
|
+
end
|
|
227
287
|
when STAR
|
|
228
288
|
return escape_block_star(content)
|
|
229
289
|
when UNDERSCORE
|
|
@@ -261,24 +321,46 @@ module Markbridge
|
|
|
261
321
|
(prev_was_paragraph && SETEXT_UNDERLINE_DASH.match?(content))
|
|
262
322
|
return escape_all_chars(content, DASH, "\\-"), true
|
|
263
323
|
end
|
|
264
|
-
|
|
324
|
+
if BULLET_LIST.match?(content)
|
|
325
|
+
return pass_first_char_inline(content) if @allow.include?(:bullet_list)
|
|
326
|
+
return escape_first_char_inline(content, "\\-")
|
|
327
|
+
end
|
|
265
328
|
[content, false]
|
|
266
329
|
end
|
|
267
330
|
|
|
268
331
|
def escape_block_star(content)
|
|
269
332
|
return escape_all_chars(content, STAR, "\\*"), true if THEMATIC_BREAK_STAR.match?(content)
|
|
270
|
-
|
|
333
|
+
if BULLET_LIST.match?(content)
|
|
334
|
+
return pass_first_char_inline(content) if @allow.include?(:bullet_list)
|
|
335
|
+
return escape_first_char_inline(content, "\\*")
|
|
336
|
+
end
|
|
271
337
|
[content, false]
|
|
272
338
|
end
|
|
273
339
|
|
|
274
340
|
def escape_block_ordered_list(content)
|
|
275
341
|
if (match = ORDERED_LIST.match(content))
|
|
276
342
|
rest = content[match[0].length..]
|
|
343
|
+
return pass_marker_inline(content, match[0].length) if @allow.include?(:ordered_list)
|
|
344
|
+
|
|
277
345
|
return "#{match[1]}\\#{match[2]}#{escape_inline(rest)}", true
|
|
278
346
|
end
|
|
279
347
|
[content, false]
|
|
280
348
|
end
|
|
281
349
|
|
|
350
|
+
# Like {#escape_first_char_inline} but the leading character is
|
|
351
|
+
# preserved verbatim (used when allow: lets a single-byte
|
|
352
|
+
# marker like `-`, `+`, `*`, or `>` through).
|
|
353
|
+
def pass_first_char_inline(content)
|
|
354
|
+
["#{content[0]}#{escape_inline(content[1..])}", true]
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
# Preserve a multi-byte marker (e.g. `1.`, `99)`, `##`) and
|
|
358
|
+
# inline-escape the rest. Used when allow: lets ordered lists
|
|
359
|
+
# or ATX headings through.
|
|
360
|
+
def pass_marker_inline(content, marker_length)
|
|
361
|
+
["#{content[0, marker_length]}#{escape_inline(content[marker_length..])}", true]
|
|
362
|
+
end
|
|
363
|
+
|
|
282
364
|
def escape_all_chars(str, byte_val, escaped)
|
|
283
365
|
result = String.new(capacity: str.bytesize * 2, encoding: str.encoding)
|
|
284
366
|
str.each_byte do |byte|
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Renderers
|
|
5
|
+
module Discourse
|
|
6
|
+
# Cleans up the raw Markdown produced by the Renderer:
|
|
7
|
+
#
|
|
8
|
+
# 1. (optional) strips trailing invisible characters per line —
|
|
9
|
+
# NBSP plus the zero-width format chars (ZWSP, ZWNJ, ZWJ, WJ,
|
|
10
|
+
# ZWNBSP/BOM). Deliberately excludes ASCII space and tab so
|
|
11
|
+
# Markdown's "two trailing spaces = hard line break" rule
|
|
12
|
+
# still works. Off by default.
|
|
13
|
+
# 2. collapses runs of 3+ newlines down to two,
|
|
14
|
+
# 3. clears whitespace-only lines,
|
|
15
|
+
# 4. trims leading/trailing whitespace from the whole document.
|
|
16
|
+
#
|
|
17
|
+
# Subclass to customize. The +call+ method is the entry point.
|
|
18
|
+
class Postprocessor
|
|
19
|
+
# NBSP plus zero-width format chars. Spelled with explicit
|
|
20
|
+
# +\u{...}+ escapes rather than the literal characters — the
|
|
21
|
+
# latter are invisible in editors and easy to corrupt on
|
|
22
|
+
# encoding-conversion round-trips.
|
|
23
|
+
#
|
|
24
|
+
# U+00A0 NBSP no-break space
|
|
25
|
+
# U+200B ZWSP zero-width space
|
|
26
|
+
# U+200C ZWNJ zero-width non-joiner
|
|
27
|
+
# U+200D ZWJ zero-width joiner
|
|
28
|
+
# U+2060 WJ word joiner
|
|
29
|
+
# U+FEFF ZWNBSP/BOM zero-width no-break space / byte-order mark
|
|
30
|
+
TRAILING_INVISIBLE_RE = /[\u{00A0 200B 200C 200D 2060 FEFF}]+$/
|
|
31
|
+
|
|
32
|
+
# @param strip_trailing_invisibles [Boolean] when true, strips
|
|
33
|
+
# trailing invisible characters (NBSP and zero-width format
|
|
34
|
+
# chars) from each line before the standard cleanup pass.
|
|
35
|
+
def initialize(strip_trailing_invisibles: false)
|
|
36
|
+
@strip_trailing_invisibles = strip_trailing_invisibles
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# @param text [String]
|
|
40
|
+
# @return [String]
|
|
41
|
+
def call(text)
|
|
42
|
+
text = text.gsub(TRAILING_INVISIBLE_RE, "") if @strip_trailing_invisibles
|
|
43
|
+
text
|
|
44
|
+
.gsub(/\n{3,}/, "\n\n") # Max 2 consecutive newlines
|
|
45
|
+
.gsub(/^[ \t]+$/, "") # Remove whitespace-only lines
|
|
46
|
+
.strip # Trim leading/trailing whitespace
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
DEFAULT = new
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -3,43 +3,30 @@
|
|
|
3
3
|
module Markbridge
|
|
4
4
|
module Renderers
|
|
5
5
|
module Discourse
|
|
6
|
-
# Immutable context for rendering that wraps the parent chain
|
|
7
|
-
# Provides query methods to ask about parent elements without
|
|
8
|
-
#
|
|
9
|
-
#
|
|
10
|
-
# Uses a hash-based cache for O(1) parent lookups instead of O(depth) scans
|
|
6
|
+
# Immutable context for rendering that wraps the parent chain.
|
|
7
|
+
# Provides query methods to ask about parent elements without the
|
|
8
|
+
# renderer knowing about specific element types.
|
|
11
9
|
class RenderContext
|
|
12
10
|
attr_reader :parents, :depth
|
|
13
11
|
|
|
14
|
-
def initialize(parents = [],
|
|
12
|
+
def initialize(parents = [], html_mode: false)
|
|
15
13
|
@parents = parents.freeze
|
|
16
14
|
@depth = parents.size
|
|
17
|
-
@parent_cache = parent_cache || build_cache(parents)
|
|
18
15
|
@html_mode = html_mode
|
|
19
16
|
end
|
|
20
17
|
|
|
21
18
|
# Create new context with element added to parent chain.
|
|
22
|
-
# Incrementally updates the cache (O(1)) instead of rebuilding from
|
|
23
|
-
# parents (O(depth)) — important for deeply-nested documents.
|
|
24
19
|
# @param element [AST::Element]
|
|
25
20
|
# @return [RenderContext]
|
|
26
21
|
def with_parent(element)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
new_cache = @parent_cache.dup
|
|
30
|
-
element_class = element.class
|
|
31
|
-
new_cache[element_class] ||= []
|
|
32
|
-
new_cache[element_class] = new_cache[element_class] + [element]
|
|
33
|
-
|
|
34
|
-
self.class.new(new_parents, parent_cache: new_cache, html_mode: @html_mode)
|
|
22
|
+
self.class.new(@parents + [element], html_mode: @html_mode)
|
|
35
23
|
end
|
|
36
24
|
|
|
37
|
-
# Create new context with html_mode toggled
|
|
38
|
-
# Preserves parent chain and cache
|
|
25
|
+
# Create new context with html_mode toggled.
|
|
39
26
|
# @param value [Boolean]
|
|
40
27
|
# @return [RenderContext]
|
|
41
28
|
def with_html_mode(value)
|
|
42
|
-
self.class.new(@parents,
|
|
29
|
+
self.class.new(@parents, html_mode: value)
|
|
43
30
|
end
|
|
44
31
|
|
|
45
32
|
# @return [Boolean]
|
|
@@ -47,45 +34,32 @@ module Markbridge
|
|
|
47
34
|
@html_mode
|
|
48
35
|
end
|
|
49
36
|
|
|
50
|
-
# Find closest parent
|
|
51
|
-
# O(1) hash lookup instead of O(depth) scan
|
|
37
|
+
# Find closest parent that is_a? klass (handles subclasses).
|
|
52
38
|
# @param klass [Class]
|
|
53
39
|
# @return [AST::Element, nil]
|
|
54
40
|
def find_parent(klass)
|
|
55
|
-
@
|
|
41
|
+
@parents.reverse_each.find { |parent| parent.is_a?(klass) }
|
|
56
42
|
end
|
|
57
43
|
|
|
58
|
-
# Count parents
|
|
59
|
-
# O(1) instead of O(depth)
|
|
44
|
+
# Count parents that are is_a? klass (handles subclasses).
|
|
60
45
|
# @param klass [Class]
|
|
61
46
|
# @return [Integer]
|
|
62
47
|
def count_parents(klass)
|
|
63
|
-
@
|
|
48
|
+
@parents.count { |parent| parent.is_a?(klass) }
|
|
64
49
|
end
|
|
65
50
|
|
|
66
|
-
# Check if parent
|
|
67
|
-
# O(1) check
|
|
51
|
+
# Check if any parent is_a? klass (handles subclasses).
|
|
68
52
|
# @param klass [Class]
|
|
69
53
|
# @return [Boolean]
|
|
70
54
|
def has_parent?(klass)
|
|
71
|
-
|
|
55
|
+
@parents.any? { |parent| parent.is_a?(klass) }
|
|
72
56
|
end
|
|
73
57
|
|
|
74
|
-
# Check if we're at the root (no parents)
|
|
58
|
+
# Check if we're at the root (no parents).
|
|
75
59
|
# @return [Boolean]
|
|
76
60
|
def root?
|
|
77
61
|
@depth.zero?
|
|
78
62
|
end
|
|
79
|
-
|
|
80
|
-
private
|
|
81
|
-
|
|
82
|
-
# Build cache from parents array.
|
|
83
|
-
# Groups parents by class for fast O(1) lookup.
|
|
84
|
-
# @param parents [Array<AST::Element>]
|
|
85
|
-
# @return [Hash{Class => Array<AST::Element>}]
|
|
86
|
-
def build_cache(parents)
|
|
87
|
-
parents.group_by(&:class)
|
|
88
|
-
end
|
|
89
63
|
end
|
|
90
64
|
end
|
|
91
65
|
end
|