markbridge 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/markbridge/all.rb +4 -7
- data/lib/markbridge/ast/document.rb +1 -1
- data/lib/markbridge/ast/element.rb +2 -2
- data/lib/markbridge/ast/list.rb +2 -2
- data/lib/markbridge/ast/table.rb +6 -12
- data/lib/markbridge/ast/text.rb +5 -1
- data/lib/markbridge/bbcode.rb +4 -0
- data/lib/markbridge/gem_loader.rb +2 -3
- data/lib/markbridge/html.rb +4 -0
- data/lib/markbridge/mediawiki.rb +4 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +0 -10
- data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -4
- data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +64 -44
- data/lib/markbridge/parsers/bbcode/handler_registry.rb +21 -11
- data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +17 -12
- data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +0 -10
- data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +6 -10
- data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +9 -17
- data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +1 -5
- data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +1 -2
- data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +6 -18
- data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +2 -6
- data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +4 -4
- data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +1 -1
- data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +2 -2
- data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +3 -3
- data/lib/markbridge/parsers/bbcode/parser.rb +5 -8
- data/lib/markbridge/parsers/bbcode/parser_state.rb +12 -18
- data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +9 -59
- data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +2 -2
- data/lib/markbridge/parsers/bbcode/scanner.rb +49 -63
- data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +1 -5
- data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +1 -6
- data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +1 -7
- data/lib/markbridge/parsers/bbcode/tokens/token.rb +1 -1
- data/lib/markbridge/parsers/bbcode.rb +1 -0
- data/lib/markbridge/parsers/html/handler_registry.rb +32 -49
- data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -2
- data/lib/markbridge/parsers/html/handlers/image_handler.rb +1 -4
- data/lib/markbridge/parsers/html/parser.rb +3 -13
- data/lib/markbridge/parsers/media_wiki/inline_parser.rb +56 -67
- data/lib/markbridge/parsers/media_wiki/inline_tag_registry.rb +103 -0
- data/lib/markbridge/parsers/media_wiki/parser.rb +51 -76
- data/lib/markbridge/parsers/media_wiki.rb +1 -0
- data/lib/markbridge/parsers/text_formatter/handler_registry.rb +5 -37
- data/lib/markbridge/parsers/text_formatter/parser.rb +3 -8
- data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +24 -17
- data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +9 -15
- data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +11 -10
- data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +11 -39
- data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +38 -63
- data/lib/markbridge/processors/discourse_markdown/scanner.rb +25 -33
- data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +6 -6
- data/lib/markbridge/renderers/discourse/html_escaper.rb +20 -0
- data/lib/markbridge/renderers/discourse/markdown_escaper.rb +49 -49
- data/lib/markbridge/renderers/discourse/render_context.rb +23 -11
- data/lib/markbridge/renderers/discourse/renderer.rb +54 -12
- data/lib/markbridge/renderers/discourse/rendering_interface.rb +12 -4
- data/lib/markbridge/renderers/discourse/tag.rb +14 -1
- data/lib/markbridge/renderers/discourse/tag_library.rb +30 -25
- data/lib/markbridge/renderers/discourse/tags/align_tag.rb +15 -7
- data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/code_tag.rb +14 -9
- data/lib/markbridge/renderers/discourse/tags/email_tag.rb +5 -3
- data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -1
- data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +6 -2
- data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +2 -2
- data/lib/markbridge/renderers/discourse/tags/image_tag.rb +13 -2
- data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +2 -2
- data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +24 -47
- data/lib/markbridge/renderers/discourse/tags/list_tag.rb +10 -15
- data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +5 -1
- data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +10 -0
- data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +9 -2
- data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +9 -0
- data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/table_tag.rb +12 -8
- data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +10 -3
- data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +29 -2
- data/lib/markbridge/renderers/discourse/tags/url_tag.rb +5 -3
- data/lib/markbridge/renderers/discourse.rb +1 -0
- data/lib/markbridge/textformatter.rb +4 -0
- data/lib/markbridge/version.rb +1 -1
- data/lib/markbridge.rb +8 -8
- metadata +8 -2
|
@@ -22,11 +22,18 @@ module Markbridge
|
|
|
22
22
|
# match = detector.detect(input, 0)
|
|
23
23
|
# match.node.type # => :attachment
|
|
24
24
|
class Upload < Base
|
|
25
|
-
#
|
|
26
|
-
IMAGE_PATTERN =
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
25
|
+
# Image: 
|
|
26
|
+
IMAGE_PATTERN =
|
|
27
|
+
%r{\A!\[(?<alt>[^|\]]*)(?:\|(?<dimensions>[^\]]*))?\]\(upload://(?<url>[^)]+)\)}
|
|
28
|
+
|
|
29
|
+
# Attachment: [filename|attachment](upload://sha1.ext) (size)
|
|
30
|
+
ATTACHMENT_PATTERN =
|
|
31
|
+
%r{
|
|
32
|
+
\A
|
|
33
|
+
\[(?<filename>[^|\]]*)\|attachment\]
|
|
34
|
+
\(upload://(?<url>[^)]+)\)
|
|
35
|
+
(?:\s*\((?<size>[^)]+)\))?
|
|
36
|
+
}xi
|
|
30
37
|
|
|
31
38
|
# Attempt to detect an upload at the given position.
|
|
32
39
|
#
|
|
@@ -34,14 +41,11 @@ module Markbridge
|
|
|
34
41
|
# @param pos [Integer] current position to check
|
|
35
42
|
# @return [Match, nil] match result or nil if no match
|
|
36
43
|
def detect(input, pos)
|
|
37
|
-
char = input[pos]
|
|
38
|
-
return nil unless char == "!" || char == "["
|
|
39
|
-
|
|
40
44
|
remaining = input[pos..]
|
|
41
|
-
|
|
42
|
-
|
|
45
|
+
case input[pos]
|
|
46
|
+
when "!"
|
|
43
47
|
detect_image(remaining, pos)
|
|
44
|
-
|
|
48
|
+
when "["
|
|
45
49
|
detect_attachment(remaining, pos)
|
|
46
50
|
end
|
|
47
51
|
end
|
|
@@ -50,71 +54,42 @@ module Markbridge
|
|
|
50
54
|
|
|
51
55
|
def detect_image(remaining, pos)
|
|
52
56
|
match = IMAGE_PATTERN.match(remaining)
|
|
53
|
-
return nil unless match
|
|
54
|
-
|
|
55
|
-
raw = match[0]
|
|
56
|
-
alt_part = match[1]
|
|
57
|
-
url_part = match[2]
|
|
57
|
+
return nil unless match
|
|
58
58
|
|
|
59
|
-
|
|
60
|
-
alt
|
|
59
|
+
sha1, filename = parse_upload_url(match[:url])
|
|
60
|
+
alt = match[:alt]
|
|
61
|
+
alt = nil if alt.empty?
|
|
61
62
|
|
|
62
|
-
#
|
|
63
|
-
|
|
63
|
+
# `type: :image` is omitted because it is AST::Upload's default -
|
|
64
|
+
# passing it explicitly was an equivalent-mutation surface.
|
|
65
|
+
node =
|
|
66
|
+
AST::Upload.new(sha1:, filename:, alt:, dimensions: match[:dimensions], raw: match[0])
|
|
64
67
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
Match.new(start_pos: pos, end_pos: pos + raw.length, node:)
|
|
68
|
+
Match.new(start_pos: pos, end_pos: pos + match[0].length, node:)
|
|
68
69
|
end
|
|
69
70
|
|
|
70
71
|
def detect_attachment(remaining, pos)
|
|
71
72
|
match = ATTACHMENT_PATTERN.match(remaining)
|
|
72
|
-
return nil unless match
|
|
73
|
-
|
|
74
|
-
raw = match[0]
|
|
75
|
-
name_part = match[1]
|
|
76
|
-
url_part = match[2]
|
|
77
|
-
size_part = match[3]
|
|
78
|
-
|
|
79
|
-
# Parse filename from "filename|attachment" format
|
|
80
|
-
filename = name_part.sub(/\|attachment$/i, "")
|
|
73
|
+
return nil unless match
|
|
81
74
|
|
|
82
|
-
|
|
83
|
-
sha1, _url_filename = parse_upload_url(url_part)
|
|
75
|
+
sha1, = parse_upload_url(match[:url])
|
|
84
76
|
|
|
85
|
-
|
|
86
|
-
|
|
77
|
+
node =
|
|
78
|
+
AST::Upload.new(
|
|
79
|
+
sha1:,
|
|
80
|
+
filename: match[:filename],
|
|
81
|
+
type: :attachment,
|
|
82
|
+
size: match[:size],
|
|
83
|
+
raw: match[0],
|
|
84
|
+
)
|
|
87
85
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
Match.new(start_pos: pos, end_pos: pos + raw.length, node:)
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
def parse_alt_dimensions(alt_part)
|
|
94
|
-
return nil, nil if alt_part.nil? || alt_part.empty?
|
|
95
|
-
|
|
96
|
-
if alt_part.include?("|")
|
|
97
|
-
parts = alt_part.split("|", 2)
|
|
98
|
-
alt = parts[0].empty? ? nil : parts[0]
|
|
99
|
-
dimensions = parts[1]
|
|
100
|
-
[alt, dimensions]
|
|
101
|
-
else
|
|
102
|
-
[alt_part, nil]
|
|
103
|
-
end
|
|
86
|
+
Match.new(start_pos: pos, end_pos: pos + match[0].length, node:)
|
|
104
87
|
end
|
|
105
88
|
|
|
89
|
+
# URL format: sha1.ext or just sha1. Returns [sha1, filename-or-nil].
|
|
106
90
|
def parse_upload_url(url_part)
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
parts = url_part.split(".", 2)
|
|
110
|
-
sha1 = parts[0]
|
|
111
|
-
filename = url_part
|
|
112
|
-
else
|
|
113
|
-
sha1 = url_part
|
|
114
|
-
filename = nil
|
|
115
|
-
end
|
|
116
|
-
|
|
117
|
-
[sha1, filename]
|
|
91
|
+
sha1, _, ext = url_part.partition(".")
|
|
92
|
+
[sha1, ext.empty? ? nil : url_part]
|
|
118
93
|
end
|
|
119
94
|
end
|
|
120
95
|
end
|
|
@@ -49,13 +49,8 @@ module Markbridge
|
|
|
49
49
|
def initialize(detectors: DEFAULT_DETECTORS, tag_library: nil, mention_resolver: nil)
|
|
50
50
|
@detector_instances = build_detectors(detectors, mention_resolver)
|
|
51
51
|
@tag_library = tag_library
|
|
52
|
-
@code_tracker
|
|
53
|
-
@
|
|
54
|
-
@nodes = nil
|
|
55
|
-
@node_index = 0
|
|
56
|
-
@pos = 0
|
|
57
|
-
@input = nil
|
|
58
|
-
@line_start = true
|
|
52
|
+
# @code_tracker / @result / @nodes / @node_index / @pos / @input /
|
|
53
|
+
# @line_start are set by #scan before use; no defensive init needed.
|
|
59
54
|
end
|
|
60
55
|
|
|
61
56
|
# Scan input and extract constructs.
|
|
@@ -63,14 +58,12 @@ module Markbridge
|
|
|
63
58
|
# @param input [String] Discourse Markdown input
|
|
64
59
|
# @return [ScanResult] result containing processed markdown and extracted nodes
|
|
65
60
|
def scan(input)
|
|
66
|
-
return ScanResult.new(markdown: "", nodes: []) if input.nil? || input.empty?
|
|
67
|
-
|
|
68
61
|
@code_tracker = CodeBlockTracker.new
|
|
69
62
|
@result = +""
|
|
70
63
|
@nodes = []
|
|
71
64
|
@node_index = 0
|
|
72
65
|
@pos = 0
|
|
73
|
-
@input = input
|
|
66
|
+
@input = input.to_s
|
|
74
67
|
@line_start = true
|
|
75
68
|
|
|
76
69
|
scan_input
|
|
@@ -82,14 +75,10 @@ module Markbridge
|
|
|
82
75
|
|
|
83
76
|
def build_detectors(detectors, mention_resolver)
|
|
84
77
|
detectors.map do |klass|
|
|
85
|
-
if klass
|
|
86
|
-
|
|
87
|
-
klass.new(type_resolver: mention_resolver)
|
|
88
|
-
else
|
|
89
|
-
klass.new
|
|
90
|
-
end
|
|
78
|
+
if klass == Detectors::Mention
|
|
79
|
+
klass.new(type_resolver: mention_resolver)
|
|
91
80
|
else
|
|
92
|
-
klass
|
|
81
|
+
klass.new
|
|
93
82
|
end
|
|
94
83
|
end
|
|
95
84
|
end
|
|
@@ -102,9 +91,10 @@ module Markbridge
|
|
|
102
91
|
next if advance_code_boundary(:check_indented_boundary)
|
|
103
92
|
end
|
|
104
93
|
|
|
105
|
-
# Check for inline code boundary
|
|
106
|
-
|
|
107
|
-
|
|
94
|
+
# Check for inline code boundary. check_inline_boundary's
|
|
95
|
+
# own fenced/indented guard means we don't need to pre-check
|
|
96
|
+
# here — it'll just return nil in those cases.
|
|
97
|
+
if @input[@pos] == "`"
|
|
108
98
|
new_pos = @code_tracker.check_inline_boundary(@input, @pos)
|
|
109
99
|
if new_pos
|
|
110
100
|
@result << @input[@pos...new_pos]
|
|
@@ -142,9 +132,15 @@ module Markbridge
|
|
|
142
132
|
new_pos = @code_tracker.public_send(method, @input, @pos, line_start: true)
|
|
143
133
|
return false unless new_pos
|
|
144
134
|
|
|
135
|
+
# check_fenced_boundary / check_indented_boundary always stop
|
|
136
|
+
# at pos_after_line, which is either after a "\n" or at EOF.
|
|
137
|
+
# After-newline → @line_start should be true; at EOF the
|
|
138
|
+
# outer `while @pos < @input.length` exits and @line_start
|
|
139
|
+
# is unobservable. Setting true unconditionally drops the
|
|
140
|
+
# `@input[new_pos - 1] == "\n"` dance.
|
|
145
141
|
@result << @input[@pos...new_pos]
|
|
146
142
|
@pos = new_pos
|
|
147
|
-
@line_start =
|
|
143
|
+
@line_start = true
|
|
148
144
|
true
|
|
149
145
|
end
|
|
150
146
|
|
|
@@ -159,26 +155,24 @@ module Markbridge
|
|
|
159
155
|
def handle_match(match)
|
|
160
156
|
node = match.node
|
|
161
157
|
@nodes << node
|
|
158
|
+
@result << render_placeholder(node)
|
|
162
159
|
|
|
163
|
-
#
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
160
|
+
# Every detector shipped today matches content that ends on a
|
|
161
|
+
# non-newline byte (`]`, `)`, `_`, alphanumeric), so @line_start
|
|
162
|
+
# is always false after a successful match. If a future custom
|
|
163
|
+
# detector produces a match whose end_pos sits right after
|
|
164
|
+
# "\n", re-introduce the `@input[@pos - 1] == "\n"` check.
|
|
167
165
|
@pos = match.end_pos
|
|
168
|
-
@line_start =
|
|
166
|
+
@line_start = false
|
|
169
167
|
@node_index += 1
|
|
170
168
|
end
|
|
171
169
|
|
|
172
170
|
def render_placeholder(node)
|
|
173
171
|
if @tag_library
|
|
174
172
|
tag = @tag_library[node.class]
|
|
175
|
-
if tag
|
|
176
|
-
# Create a minimal interface for rendering
|
|
177
|
-
return tag.render(node, nil)
|
|
178
|
-
end
|
|
173
|
+
return tag.render(node, nil) if tag
|
|
179
174
|
end
|
|
180
175
|
|
|
181
|
-
# Default placeholder format if no tag library or tag not found
|
|
182
176
|
default_placeholder(node)
|
|
183
177
|
end
|
|
184
178
|
|
|
@@ -192,8 +186,6 @@ module Markbridge
|
|
|
192
186
|
"<<EVENT:#{@node_index}:#{node.name}>>"
|
|
193
187
|
when AST::Upload
|
|
194
188
|
"<<UPLOAD:#{@node_index}:#{node.sha1}>>"
|
|
195
|
-
else
|
|
196
|
-
"<<UNKNOWN:#{@node_index}>>"
|
|
197
189
|
end
|
|
198
190
|
end
|
|
199
191
|
end
|
|
@@ -15,12 +15,10 @@ module Markbridge
|
|
|
15
15
|
# @return [String]
|
|
16
16
|
def build(content, marker:, indent:)
|
|
17
17
|
lines = content.split("\n")
|
|
18
|
-
lines = [""] if lines.empty? # Handle empty content
|
|
19
18
|
first_line = "#{indent}#{marker}#{lines.first}"
|
|
20
19
|
|
|
21
|
-
return "#{first_line}\n" if lines.size
|
|
20
|
+
return "#{first_line}\n" if lines.size < 2
|
|
22
21
|
|
|
23
|
-
# Handle multi-line content with sophisticated blank line handling
|
|
24
22
|
format_multiline(lines, first_line, indent)
|
|
25
23
|
end
|
|
26
24
|
|
|
@@ -63,15 +61,17 @@ module Markbridge
|
|
|
63
61
|
end
|
|
64
62
|
end
|
|
65
63
|
|
|
66
|
-
# Handle empty lines in continuation
|
|
64
|
+
# Handle empty lines in continuation. Caller (format_continuation_line)
|
|
65
|
+
# only invokes this when `line.empty?`, and `content.split("\n")`
|
|
66
|
+
# trims trailing empty strings, so the LAST continuation line is
|
|
67
|
+
# never empty — `idx + 1` is always in bounds when we get here.
|
|
67
68
|
# @param idx [Integer] index in continuation_lines
|
|
68
69
|
# @param continuation_lines [Array<String>] all continuation lines
|
|
69
70
|
# @param continuation_indent [String] indent for continuation
|
|
70
71
|
# @return [String, nil] formatted line or nil to skip
|
|
71
72
|
def handle_empty_line(idx, continuation_lines, continuation_indent)
|
|
72
73
|
# Skip empty lines that come before nested list items (structural blanks)
|
|
73
|
-
|
|
74
|
-
return nil if next_line&.match?(/\A\s*(?:-|\d+\.)\s/)
|
|
74
|
+
return nil if continuation_lines[idx + 1].match?(/\A\s*(?:-|\d+\.)\s/)
|
|
75
75
|
|
|
76
76
|
# Preserve empty lines within text content (paragraph breaks) with indentation
|
|
77
77
|
continuation_indent
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "cgi"
|
|
4
|
+
|
|
5
|
+
module Markbridge
|
|
6
|
+
module Renderers
|
|
7
|
+
module Discourse
|
|
8
|
+
# Escapes text for safe inclusion in HTML output. Used when rendering
|
|
9
|
+
# content inside a CommonMark HTML block (e.g. TableTag's fallback)
|
|
10
|
+
# where Markdown-level escaping would not be applied.
|
|
11
|
+
class HtmlEscaper
|
|
12
|
+
# @param text [String, nil]
|
|
13
|
+
# @return [String]
|
|
14
|
+
def self.escape(text)
|
|
15
|
+
CGI.escapeHTML(text || "")
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -36,18 +36,15 @@ module Markbridge
|
|
|
36
36
|
# breaks disabled by default.
|
|
37
37
|
def initialize(escape_hard_line_breaks: false)
|
|
38
38
|
@escape_hard_line_breaks = escape_hard_line_breaks
|
|
39
|
-
@inline_content
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
# @inline_content / @inline_result / @inline_len are set by
|
|
40
|
+
# escape_inline on every call before any helper reads them;
|
|
41
|
+
# no defensive init needed.
|
|
42
42
|
end
|
|
43
43
|
|
|
44
|
-
# Fast-path
|
|
45
|
-
#
|
|
46
|
-
# > is needed for blockquote detection at line start
|
|
44
|
+
# Fast-path: skip escape_text entirely for content with no special
|
|
45
|
+
# chars. `>` is needed for blockquote detection at line start.
|
|
47
46
|
MAYBE_SPECIAL = /[\\`*_\[#+\-.!<>&|~=>)]/
|
|
48
47
|
|
|
49
|
-
# Check for indented code on any line
|
|
50
|
-
# Matches: 4+ spaces, tab, or space+tab combinations that reach column 4+
|
|
51
48
|
MAYBE_INDENTED_CODE = /(?:^|\n)(?: {4}|\t| {1,3}\t)/
|
|
52
49
|
|
|
53
50
|
# Block-level patterns
|
|
@@ -122,8 +119,7 @@ module Markbridge
|
|
|
122
119
|
# @return [String] the escaped text, or empty string if input is nil
|
|
123
120
|
# @note Multi-line HTML tags and blocks are handled by escaping the opening <
|
|
124
121
|
def escape(text)
|
|
125
|
-
return ""
|
|
126
|
-
return text if text.empty?
|
|
122
|
+
return "" if text.nil?
|
|
127
123
|
|
|
128
124
|
# Neutralize hard line breaks (trailing 2+ spaces before newline)
|
|
129
125
|
text = text.gsub(/ +\n/, "\n") if @escape_hard_line_breaks && text.include?(" \n")
|
|
@@ -158,37 +154,32 @@ module Markbridge
|
|
|
158
154
|
end
|
|
159
155
|
|
|
160
156
|
def escape_line(line, prev_was_paragraph)
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
#
|
|
157
|
+
# No `line.empty?` early-return: it's redundant with the
|
|
158
|
+
# `line.getbyte(indent_len).nil?` guard below, which catches both
|
|
159
|
+
# empty and whitespace-only lines while also preserving object
|
|
160
|
+
# identity (returns `line`).
|
|
164
161
|
return escape_indented_code(line) if INDENTED_CODE.match?(line)
|
|
165
162
|
|
|
166
|
-
#
|
|
167
|
-
|
|
163
|
+
# After INDENTED_CODE, line has at most 3 leading spaces, so the
|
|
164
|
+
# `< 3` bound keeps this a tight YJIT-friendly hot loop.
|
|
168
165
|
indent_len = 0
|
|
169
|
-
|
|
170
|
-
indent_len += 1
|
|
171
|
-
end
|
|
166
|
+
indent_len += 1 while indent_len < 3 && line.getbyte(indent_len) == SPACE
|
|
172
167
|
|
|
173
|
-
|
|
168
|
+
# Whitespace-only line (1-3 spaces) — getbyte past end is nil.
|
|
169
|
+
return line if line.getbyte(indent_len).nil?
|
|
174
170
|
|
|
175
171
|
has_indent = indent_len > 0
|
|
176
172
|
content = has_indent ? line[indent_len..] : line
|
|
177
173
|
|
|
178
|
-
# Apply block-level escaping (which may also do inline escaping)
|
|
179
174
|
escaped, skip_inline = escape_block_level(content, prev_was_paragraph)
|
|
180
|
-
|
|
181
|
-
# Apply inline escaping if block-level didn't handle it
|
|
182
175
|
escaped = escape_inline(escaped) unless skip_inline
|
|
183
176
|
|
|
184
|
-
# Prepend indent if present, preserve encoding
|
|
185
177
|
if has_indent
|
|
186
|
-
|
|
187
|
-
result = String.new(encoding:)
|
|
178
|
+
result = String.new(encoding: line.encoding)
|
|
188
179
|
result << line[0, indent_len] << escaped
|
|
189
180
|
result
|
|
190
181
|
else
|
|
191
|
-
escaped.
|
|
182
|
+
escaped.force_encoding(line.encoding)
|
|
192
183
|
end
|
|
193
184
|
end
|
|
194
185
|
|
|
@@ -203,15 +194,14 @@ module Markbridge
|
|
|
203
194
|
# - Content doesn't start at valid block position (no lists, headings, etc.)
|
|
204
195
|
# - Visual indentation is preserved (NBSP renders as space)
|
|
205
196
|
# We still escape inline content since it's no longer protected.
|
|
197
|
+
# Caller (escape_line) guarantees INDENTED_CODE matched, so line
|
|
198
|
+
# starts with at least one SPACE or TAB; ws_end is always ≥ 1.
|
|
206
199
|
line_length = line.length
|
|
207
200
|
ws_end = 0
|
|
208
|
-
while ws_end < line_length
|
|
209
|
-
byte = line.getbyte(ws_end)
|
|
210
|
-
break if byte != SPACE && byte != TAB
|
|
201
|
+
while ws_end < line_length && ((byte = line.getbyte(ws_end)) == SPACE || byte == TAB)
|
|
211
202
|
ws_end += 1
|
|
212
203
|
end
|
|
213
204
|
|
|
214
|
-
return line if ws_end == 0 # No leading whitespace (shouldn't happen, but safe)
|
|
215
205
|
return line if ws_end >= line_length # Whitespace-only line
|
|
216
206
|
|
|
217
207
|
# Convert leading whitespace to NBSP (tab = 4 NBSP for visual consistency)
|
|
@@ -310,6 +300,13 @@ module Markbridge
|
|
|
310
300
|
@inline_len = bytesize
|
|
311
301
|
pos = 0
|
|
312
302
|
|
|
303
|
+
# No loop-progress guard: every `dispatch_inline_byte` branch
|
|
304
|
+
# returns `pos + N` for N >= 1 by construction, so the loop
|
|
305
|
+
# is provably terminating. Mutations that break this
|
|
306
|
+
# (`while true`, body drops, selector swaps that short-circuit
|
|
307
|
+
# the dispatch) surface as timeouts rather than alive
|
|
308
|
+
# mutations, and the inline guard would otherwise cost ~15%
|
|
309
|
+
# on this hot path per benchmark.
|
|
313
310
|
while pos < @inline_len
|
|
314
311
|
byte = @inline_content.getbyte(pos)
|
|
315
312
|
pos = dispatch_inline_byte(byte, pos)
|
|
@@ -474,39 +471,42 @@ module Markbridge
|
|
|
474
471
|
end
|
|
475
472
|
|
|
476
473
|
def paragraph_line?(line)
|
|
477
|
-
|
|
474
|
+
pos = 0
|
|
475
|
+
line_len = line.bytesize
|
|
476
|
+
pos += 1 while pos < line_len && line.getbyte(pos) == SPACE
|
|
477
|
+
first_non_space = pos
|
|
478
478
|
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
while first_non_space < line_length && line.getbyte(first_non_space) == SPACE
|
|
482
|
-
first_non_space += 1
|
|
483
|
-
end
|
|
484
|
-
return false if first_non_space >= line_length || line.getbyte(first_non_space) == TAB
|
|
479
|
+
# Empty or whitespace-only lines: getbyte past the end returns nil.
|
|
480
|
+
return false if line.getbyte(first_non_space).nil?
|
|
485
481
|
|
|
486
|
-
|
|
482
|
+
# Indented code (4+ spaces or any leading \t) is not a paragraph.
|
|
483
|
+
# INDENTED_CODE also catches lines where first_non_space > 3, so no
|
|
484
|
+
# separate numeric boundary check is needed.
|
|
485
|
+
return false if INDENTED_CODE.match?(line)
|
|
487
486
|
|
|
488
|
-
|
|
489
|
-
# So setext headings CAN follow them
|
|
490
|
-
return true if content.getbyte(0) == BRACKET_OPEN
|
|
487
|
+
content = first_non_space == 0 ? line : line[first_non_space..]
|
|
491
488
|
|
|
492
|
-
|
|
489
|
+
# Lines starting with [ are paragraph content (the escaper rewrites [
|
|
490
|
+
# to \[). block_construct? has no BRACKET_OPEN case arm, so such
|
|
491
|
+
# lines naturally fall through and !block_construct?(content) == true.
|
|
492
|
+
!block_construct?(content)
|
|
493
493
|
end
|
|
494
494
|
|
|
495
495
|
# Checks whether content starts with a block-level markdown construct.
|
|
496
496
|
# Used by both escape_block_level (to decide what to escape) and
|
|
497
497
|
# paragraph_line? (to decide if setext underlines can follow).
|
|
498
498
|
def block_construct?(content)
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
case first_byte
|
|
499
|
+
case content.getbyte(0)
|
|
502
500
|
when HASH
|
|
503
501
|
ATX_HEADING.match?(content)
|
|
504
502
|
when GT
|
|
505
503
|
true
|
|
506
|
-
when DASH
|
|
507
|
-
BULLET_LIST.match?(content) ||
|
|
508
|
-
|
|
509
|
-
|
|
504
|
+
when DASH
|
|
505
|
+
BULLET_LIST.match?(content) || THEMATIC_BREAK_DASH.match?(content)
|
|
506
|
+
when STAR
|
|
507
|
+
BULLET_LIST.match?(content) || THEMATIC_BREAK_STAR.match?(content)
|
|
508
|
+
when PLUS
|
|
509
|
+
BULLET_LIST.match?(content)
|
|
510
510
|
when UNDERSCORE
|
|
511
511
|
THEMATIC_BREAK_UNDERSCORE.match?(content)
|
|
512
512
|
when BACKTICK
|
|
@@ -11,26 +11,40 @@ module Markbridge
|
|
|
11
11
|
class RenderContext
|
|
12
12
|
attr_reader :parents, :depth
|
|
13
13
|
|
|
14
|
-
def initialize(parents = [], parent_cache: nil)
|
|
14
|
+
def initialize(parents = [], parent_cache: nil, html_mode: false)
|
|
15
15
|
@parents = parents.freeze
|
|
16
16
|
@depth = parents.size
|
|
17
17
|
@parent_cache = parent_cache || build_cache(parents)
|
|
18
|
+
@html_mode = html_mode
|
|
18
19
|
end
|
|
19
20
|
|
|
20
|
-
# Create new context with element added to parent chain
|
|
21
|
-
# Incrementally updates cache instead of rebuilding from
|
|
21
|
+
# Create new context with element added to parent chain.
|
|
22
|
+
# Incrementally updates the cache (O(1)) instead of rebuilding from
|
|
23
|
+
# parents (O(depth)) — important for deeply-nested documents.
|
|
22
24
|
# @param element [AST::Element]
|
|
23
25
|
# @return [RenderContext]
|
|
24
26
|
def with_parent(element)
|
|
25
27
|
new_parents = @parents + [element]
|
|
26
28
|
|
|
27
|
-
# Incrementally update cache instead of rebuilding
|
|
28
29
|
new_cache = @parent_cache.dup
|
|
29
30
|
element_class = element.class
|
|
30
31
|
new_cache[element_class] ||= []
|
|
31
32
|
new_cache[element_class] = new_cache[element_class] + [element]
|
|
32
33
|
|
|
33
|
-
self.class.new(new_parents, parent_cache: new_cache)
|
|
34
|
+
self.class.new(new_parents, parent_cache: new_cache, html_mode: @html_mode)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Create new context with html_mode toggled
|
|
38
|
+
# Preserves parent chain and cache
|
|
39
|
+
# @param value [Boolean]
|
|
40
|
+
# @return [RenderContext]
|
|
41
|
+
def with_html_mode(value)
|
|
42
|
+
self.class.new(@parents, parent_cache: @parent_cache, html_mode: value)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# @return [Boolean]
|
|
46
|
+
def html_mode?
|
|
47
|
+
@html_mode
|
|
34
48
|
end
|
|
35
49
|
|
|
36
50
|
# Find closest parent of given type
|
|
@@ -54,7 +68,7 @@ module Markbridge
|
|
|
54
68
|
# @param klass [Class]
|
|
55
69
|
# @return [Boolean]
|
|
56
70
|
def has_parent?(klass)
|
|
57
|
-
|
|
71
|
+
!@parent_cache[klass].nil?
|
|
58
72
|
end
|
|
59
73
|
|
|
60
74
|
# Check if we're at the root (no parents)
|
|
@@ -65,14 +79,12 @@ module Markbridge
|
|
|
65
79
|
|
|
66
80
|
private
|
|
67
81
|
|
|
68
|
-
# Build cache from parents array
|
|
69
|
-
# Groups parents by class for fast lookup
|
|
82
|
+
# Build cache from parents array.
|
|
83
|
+
# Groups parents by class for fast O(1) lookup.
|
|
70
84
|
# @param parents [Array<AST::Element>]
|
|
71
85
|
# @return [Hash{Class => Array<AST::Element>}]
|
|
72
86
|
def build_cache(parents)
|
|
73
|
-
parents.
|
|
74
|
-
cache[parent.class] = cache[parent.class] + [parent]
|
|
75
|
-
end
|
|
87
|
+
parents.group_by(&:class)
|
|
76
88
|
end
|
|
77
89
|
end
|
|
78
90
|
end
|
|
@@ -5,10 +5,13 @@ module Markbridge
|
|
|
5
5
|
module Discourse
|
|
6
6
|
# Renders AST to Discourse-flavored Markdown in-memory.
|
|
7
7
|
class Renderer
|
|
8
|
-
def initialize(tag_library: nil, escaper: nil)
|
|
8
|
+
def initialize(tag_library: nil, escaper: nil, html_escaper: nil)
|
|
9
9
|
@tag_library = tag_library || TagLibrary.default
|
|
10
10
|
@escaper = escaper || MarkdownEscaper.new
|
|
11
|
-
@
|
|
11
|
+
@html_escaper = html_escaper || HtmlEscaper
|
|
12
|
+
# @interface_cache is lazily initialized in #render's top-level
|
|
13
|
+
# call and reset to nil after the call completes. No init
|
|
14
|
+
# needed here — unset ivar returns nil under `.nil?` check.
|
|
12
15
|
end
|
|
13
16
|
|
|
14
17
|
# Render a node to Markdown
|
|
@@ -26,18 +29,12 @@ module Markbridge
|
|
|
26
29
|
end
|
|
27
30
|
|
|
28
31
|
case node
|
|
29
|
-
when AST::Document
|
|
32
|
+
when AST::Element # Document is an Element subclass
|
|
30
33
|
render_children(node, context:)
|
|
31
34
|
when AST::MarkdownText
|
|
32
|
-
|
|
33
|
-
node.text
|
|
35
|
+
render_markdown_text(node, context)
|
|
34
36
|
when AST::Text
|
|
35
|
-
|
|
36
|
-
if context.has_parent?(AST::Code)
|
|
37
|
-
node.text
|
|
38
|
-
else
|
|
39
|
-
@escaper.escape(node.text)
|
|
40
|
-
end
|
|
37
|
+
render_text(node, context)
|
|
41
38
|
else
|
|
42
39
|
""
|
|
43
40
|
end
|
|
@@ -50,14 +47,59 @@ module Markbridge
|
|
|
50
47
|
# @param context [RenderContext] rendering context
|
|
51
48
|
# @return [String]
|
|
52
49
|
def render_children(node, context:)
|
|
53
|
-
|
|
50
|
+
result = +""
|
|
51
|
+
node.children.each do |child|
|
|
52
|
+
part = render(child, context:)
|
|
53
|
+
next if part.empty?
|
|
54
|
+
|
|
55
|
+
# Integer-byte check avoids allocating substrings for the
|
|
56
|
+
# per-child adjacency probe. EMPHASIS_DELIMITER_BYTES.include?
|
|
57
|
+
# over a 4-element Set is O(1).
|
|
58
|
+
if !result.empty? && (last_byte = result.getbyte(-1)) == part.getbyte(0) &&
|
|
59
|
+
EMPHASIS_DELIMITER_BYTES.include?(last_byte)
|
|
60
|
+
result << EMPHASIS_BOUNDARY
|
|
61
|
+
end
|
|
62
|
+
result << part
|
|
63
|
+
end
|
|
64
|
+
result
|
|
54
65
|
end
|
|
55
66
|
|
|
56
67
|
private
|
|
57
68
|
|
|
69
|
+
# Inserted between sibling outputs when their adjacent characters
|
|
70
|
+
# would merge into a longer Markdown emphasis delimiter run (e.g.
|
|
71
|
+
# `***` + `*...` becoming `****...`). The HTML comment is invisible
|
|
72
|
+
# in rendered output but breaks the delimiter run during Markdown
|
|
73
|
+
# parsing.
|
|
74
|
+
EMPHASIS_BOUNDARY = "<!---->"
|
|
75
|
+
# Bytes where adjacent runs merge into a single longer run during
|
|
76
|
+
# Markdown parsing: emphasis (* _), strikethrough (~), code spans (`).
|
|
77
|
+
EMPHASIS_DELIMITER_BYTES = Set[42, 95, 126, 96].freeze
|
|
78
|
+
private_constant :EMPHASIS_BOUNDARY, :EMPHASIS_DELIMITER_BYTES
|
|
79
|
+
|
|
58
80
|
def interface_for(context)
|
|
59
81
|
@interface_cache[context.object_id] ||= RenderingInterface.new(self, context)
|
|
60
82
|
end
|
|
83
|
+
|
|
84
|
+
# In html_mode, surround pre-formatted Markdown with blank lines so that
|
|
85
|
+
# CommonMark terminates the enclosing HTML block (e.g. <table>) and
|
|
86
|
+
# parses the content as Markdown before the closing tags reopen another
|
|
87
|
+
# HTML block.
|
|
88
|
+
def render_markdown_text(node, context)
|
|
89
|
+
context.html_mode? ? "\n\n#{node.text}\n\n" : node.text
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def render_text(node, context)
|
|
93
|
+
# In html_mode even inside a code block we must HTML-escape, otherwise a
|
|
94
|
+
# stray `<` in a code cell would break the surrounding <td>.
|
|
95
|
+
if context.has_parent?(AST::Code)
|
|
96
|
+
context.html_mode? ? @html_escaper.escape(node.text) : node.text
|
|
97
|
+
elsif context.html_mode?
|
|
98
|
+
@html_escaper.escape(node.text)
|
|
99
|
+
else
|
|
100
|
+
@escaper.escape(node.text)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
61
103
|
end
|
|
62
104
|
end
|
|
63
105
|
end
|