markbridge 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/markbridge/all.rb +4 -7
- data/lib/markbridge/ast/document.rb +1 -1
- data/lib/markbridge/ast/element.rb +2 -2
- data/lib/markbridge/ast/list.rb +2 -2
- data/lib/markbridge/ast/table.rb +6 -12
- data/lib/markbridge/ast/text.rb +5 -1
- data/lib/markbridge/bbcode.rb +4 -0
- data/lib/markbridge/gem_loader.rb +2 -3
- data/lib/markbridge/html.rb +4 -0
- data/lib/markbridge/mediawiki.rb +4 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +0 -10
- data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -4
- data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +64 -44
- data/lib/markbridge/parsers/bbcode/handler_registry.rb +21 -11
- data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +17 -12
- data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +0 -10
- data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +6 -10
- data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +9 -17
- data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +1 -5
- data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +1 -2
- data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +6 -18
- data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +2 -6
- data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +4 -4
- data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +1 -1
- data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +2 -2
- data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +3 -3
- data/lib/markbridge/parsers/bbcode/parser.rb +5 -8
- data/lib/markbridge/parsers/bbcode/parser_state.rb +12 -18
- data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +9 -59
- data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +2 -2
- data/lib/markbridge/parsers/bbcode/scanner.rb +49 -63
- data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +1 -5
- data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +1 -6
- data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +1 -7
- data/lib/markbridge/parsers/bbcode/tokens/token.rb +1 -1
- data/lib/markbridge/parsers/bbcode.rb +1 -0
- data/lib/markbridge/parsers/html/handler_registry.rb +32 -49
- data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -2
- data/lib/markbridge/parsers/html/handlers/image_handler.rb +1 -4
- data/lib/markbridge/parsers/html/parser.rb +3 -13
- data/lib/markbridge/parsers/media_wiki/inline_parser.rb +56 -67
- data/lib/markbridge/parsers/media_wiki/inline_tag_registry.rb +103 -0
- data/lib/markbridge/parsers/media_wiki/parser.rb +51 -76
- data/lib/markbridge/parsers/media_wiki.rb +1 -0
- data/lib/markbridge/parsers/text_formatter/handler_registry.rb +5 -37
- data/lib/markbridge/parsers/text_formatter/parser.rb +3 -8
- data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +24 -17
- data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +9 -15
- data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +11 -10
- data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +11 -39
- data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +38 -63
- data/lib/markbridge/processors/discourse_markdown/scanner.rb +25 -33
- data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +6 -6
- data/lib/markbridge/renderers/discourse/html_escaper.rb +20 -0
- data/lib/markbridge/renderers/discourse/markdown_escaper.rb +57 -50
- data/lib/markbridge/renderers/discourse/render_context.rb +23 -11
- data/lib/markbridge/renderers/discourse/renderer.rb +54 -12
- data/lib/markbridge/renderers/discourse/rendering_interface.rb +12 -4
- data/lib/markbridge/renderers/discourse/tag.rb +14 -1
- data/lib/markbridge/renderers/discourse/tag_library.rb +30 -25
- data/lib/markbridge/renderers/discourse/tags/align_tag.rb +15 -7
- data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/code_tag.rb +14 -9
- data/lib/markbridge/renderers/discourse/tags/email_tag.rb +5 -3
- data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -1
- data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +6 -2
- data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +2 -2
- data/lib/markbridge/renderers/discourse/tags/image_tag.rb +13 -2
- data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +2 -2
- data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +24 -47
- data/lib/markbridge/renderers/discourse/tags/list_tag.rb +10 -15
- data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +5 -1
- data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +10 -0
- data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +9 -2
- data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +9 -0
- data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/table_tag.rb +12 -8
- data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +10 -3
- data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +29 -2
- data/lib/markbridge/renderers/discourse/tags/url_tag.rb +5 -3
- data/lib/markbridge/renderers/discourse.rb +1 -0
- data/lib/markbridge/textformatter.rb +4 -0
- data/lib/markbridge/version.rb +1 -1
- data/lib/markbridge.rb +8 -8
- metadata +8 -2
|
@@ -22,11 +22,18 @@ module Markbridge
|
|
|
22
22
|
# match = detector.detect(input, 0)
|
|
23
23
|
# match.node.type # => :attachment
|
|
24
24
|
class Upload < Base
|
|
25
|
-
#
|
|
26
|
-
IMAGE_PATTERN =
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
25
|
+
# Image: 
|
|
26
|
+
IMAGE_PATTERN =
|
|
27
|
+
%r{\A!\[(?<alt>[^|\]]*)(?:\|(?<dimensions>[^\]]*))?\]\(upload://(?<url>[^)]+)\)}
|
|
28
|
+
|
|
29
|
+
# Attachment: [filename|attachment](upload://sha1.ext) (size)
|
|
30
|
+
ATTACHMENT_PATTERN =
|
|
31
|
+
%r{
|
|
32
|
+
\A
|
|
33
|
+
\[(?<filename>[^|\]]*)\|attachment\]
|
|
34
|
+
\(upload://(?<url>[^)]+)\)
|
|
35
|
+
(?:\s*\((?<size>[^)]+)\))?
|
|
36
|
+
}xi
|
|
30
37
|
|
|
31
38
|
# Attempt to detect an upload at the given position.
|
|
32
39
|
#
|
|
@@ -34,14 +41,11 @@ module Markbridge
|
|
|
34
41
|
# @param pos [Integer] current position to check
|
|
35
42
|
# @return [Match, nil] match result or nil if no match
|
|
36
43
|
def detect(input, pos)
|
|
37
|
-
char = input[pos]
|
|
38
|
-
return nil unless char == "!" || char == "["
|
|
39
|
-
|
|
40
44
|
remaining = input[pos..]
|
|
41
|
-
|
|
42
|
-
|
|
45
|
+
case input[pos]
|
|
46
|
+
when "!"
|
|
43
47
|
detect_image(remaining, pos)
|
|
44
|
-
|
|
48
|
+
when "["
|
|
45
49
|
detect_attachment(remaining, pos)
|
|
46
50
|
end
|
|
47
51
|
end
|
|
@@ -50,71 +54,42 @@ module Markbridge
|
|
|
50
54
|
|
|
51
55
|
def detect_image(remaining, pos)
|
|
52
56
|
match = IMAGE_PATTERN.match(remaining)
|
|
53
|
-
return nil unless match
|
|
54
|
-
|
|
55
|
-
raw = match[0]
|
|
56
|
-
alt_part = match[1]
|
|
57
|
-
url_part = match[2]
|
|
57
|
+
return nil unless match
|
|
58
58
|
|
|
59
|
-
|
|
60
|
-
alt
|
|
59
|
+
sha1, filename = parse_upload_url(match[:url])
|
|
60
|
+
alt = match[:alt]
|
|
61
|
+
alt = nil if alt.empty?
|
|
61
62
|
|
|
62
|
-
#
|
|
63
|
-
|
|
63
|
+
# `type: :image` is omitted because it is AST::Upload's default -
|
|
64
|
+
# passing it explicitly was an equivalent-mutation surface.
|
|
65
|
+
node =
|
|
66
|
+
AST::Upload.new(sha1:, filename:, alt:, dimensions: match[:dimensions], raw: match[0])
|
|
64
67
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
Match.new(start_pos: pos, end_pos: pos + raw.length, node:)
|
|
68
|
+
Match.new(start_pos: pos, end_pos: pos + match[0].length, node:)
|
|
68
69
|
end
|
|
69
70
|
|
|
70
71
|
def detect_attachment(remaining, pos)
|
|
71
72
|
match = ATTACHMENT_PATTERN.match(remaining)
|
|
72
|
-
return nil unless match
|
|
73
|
-
|
|
74
|
-
raw = match[0]
|
|
75
|
-
name_part = match[1]
|
|
76
|
-
url_part = match[2]
|
|
77
|
-
size_part = match[3]
|
|
78
|
-
|
|
79
|
-
# Parse filename from "filename|attachment" format
|
|
80
|
-
filename = name_part.sub(/\|attachment$/i, "")
|
|
73
|
+
return nil unless match
|
|
81
74
|
|
|
82
|
-
|
|
83
|
-
sha1, _url_filename = parse_upload_url(url_part)
|
|
75
|
+
sha1, = parse_upload_url(match[:url])
|
|
84
76
|
|
|
85
|
-
|
|
86
|
-
|
|
77
|
+
node =
|
|
78
|
+
AST::Upload.new(
|
|
79
|
+
sha1:,
|
|
80
|
+
filename: match[:filename],
|
|
81
|
+
type: :attachment,
|
|
82
|
+
size: match[:size],
|
|
83
|
+
raw: match[0],
|
|
84
|
+
)
|
|
87
85
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
Match.new(start_pos: pos, end_pos: pos + raw.length, node:)
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
def parse_alt_dimensions(alt_part)
|
|
94
|
-
return nil, nil if alt_part.nil? || alt_part.empty?
|
|
95
|
-
|
|
96
|
-
if alt_part.include?("|")
|
|
97
|
-
parts = alt_part.split("|", 2)
|
|
98
|
-
alt = parts[0].empty? ? nil : parts[0]
|
|
99
|
-
dimensions = parts[1]
|
|
100
|
-
[alt, dimensions]
|
|
101
|
-
else
|
|
102
|
-
[alt_part, nil]
|
|
103
|
-
end
|
|
86
|
+
Match.new(start_pos: pos, end_pos: pos + match[0].length, node:)
|
|
104
87
|
end
|
|
105
88
|
|
|
89
|
+
# URL format: sha1.ext or just sha1. Returns [sha1, filename-or-nil].
|
|
106
90
|
def parse_upload_url(url_part)
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
parts = url_part.split(".", 2)
|
|
110
|
-
sha1 = parts[0]
|
|
111
|
-
filename = url_part
|
|
112
|
-
else
|
|
113
|
-
sha1 = url_part
|
|
114
|
-
filename = nil
|
|
115
|
-
end
|
|
116
|
-
|
|
117
|
-
[sha1, filename]
|
|
91
|
+
sha1, _, ext = url_part.partition(".")
|
|
92
|
+
[sha1, ext.empty? ? nil : url_part]
|
|
118
93
|
end
|
|
119
94
|
end
|
|
120
95
|
end
|
|
@@ -49,13 +49,8 @@ module Markbridge
|
|
|
49
49
|
def initialize(detectors: DEFAULT_DETECTORS, tag_library: nil, mention_resolver: nil)
|
|
50
50
|
@detector_instances = build_detectors(detectors, mention_resolver)
|
|
51
51
|
@tag_library = tag_library
|
|
52
|
-
@code_tracker
|
|
53
|
-
@
|
|
54
|
-
@nodes = nil
|
|
55
|
-
@node_index = 0
|
|
56
|
-
@pos = 0
|
|
57
|
-
@input = nil
|
|
58
|
-
@line_start = true
|
|
52
|
+
# @code_tracker / @result / @nodes / @node_index / @pos / @input /
|
|
53
|
+
# @line_start are set by #scan before use; no defensive init needed.
|
|
59
54
|
end
|
|
60
55
|
|
|
61
56
|
# Scan input and extract constructs.
|
|
@@ -63,14 +58,12 @@ module Markbridge
|
|
|
63
58
|
# @param input [String] Discourse Markdown input
|
|
64
59
|
# @return [ScanResult] result containing processed markdown and extracted nodes
|
|
65
60
|
def scan(input)
|
|
66
|
-
return ScanResult.new(markdown: "", nodes: []) if input.nil? || input.empty?
|
|
67
|
-
|
|
68
61
|
@code_tracker = CodeBlockTracker.new
|
|
69
62
|
@result = +""
|
|
70
63
|
@nodes = []
|
|
71
64
|
@node_index = 0
|
|
72
65
|
@pos = 0
|
|
73
|
-
@input = input
|
|
66
|
+
@input = input.to_s
|
|
74
67
|
@line_start = true
|
|
75
68
|
|
|
76
69
|
scan_input
|
|
@@ -82,14 +75,10 @@ module Markbridge
|
|
|
82
75
|
|
|
83
76
|
def build_detectors(detectors, mention_resolver)
|
|
84
77
|
detectors.map do |klass|
|
|
85
|
-
if klass
|
|
86
|
-
|
|
87
|
-
klass.new(type_resolver: mention_resolver)
|
|
88
|
-
else
|
|
89
|
-
klass.new
|
|
90
|
-
end
|
|
78
|
+
if klass == Detectors::Mention
|
|
79
|
+
klass.new(type_resolver: mention_resolver)
|
|
91
80
|
else
|
|
92
|
-
klass
|
|
81
|
+
klass.new
|
|
93
82
|
end
|
|
94
83
|
end
|
|
95
84
|
end
|
|
@@ -102,9 +91,10 @@ module Markbridge
|
|
|
102
91
|
next if advance_code_boundary(:check_indented_boundary)
|
|
103
92
|
end
|
|
104
93
|
|
|
105
|
-
# Check for inline code boundary
|
|
106
|
-
|
|
107
|
-
|
|
94
|
+
# Check for inline code boundary. check_inline_boundary's
|
|
95
|
+
# own fenced/indented guard means we don't need to pre-check
|
|
96
|
+
# here — it'll just return nil in those cases.
|
|
97
|
+
if @input[@pos] == "`"
|
|
108
98
|
new_pos = @code_tracker.check_inline_boundary(@input, @pos)
|
|
109
99
|
if new_pos
|
|
110
100
|
@result << @input[@pos...new_pos]
|
|
@@ -142,9 +132,15 @@ module Markbridge
|
|
|
142
132
|
new_pos = @code_tracker.public_send(method, @input, @pos, line_start: true)
|
|
143
133
|
return false unless new_pos
|
|
144
134
|
|
|
135
|
+
# check_fenced_boundary / check_indented_boundary always stop
|
|
136
|
+
# at pos_after_line, which is either after a "\n" or at EOF.
|
|
137
|
+
# After-newline → @line_start should be true; at EOF the
|
|
138
|
+
# outer `while @pos < @input.length` exits and @line_start
|
|
139
|
+
# is unobservable. Setting true unconditionally drops the
|
|
140
|
+
# `@input[new_pos - 1] == "\n"` dance.
|
|
145
141
|
@result << @input[@pos...new_pos]
|
|
146
142
|
@pos = new_pos
|
|
147
|
-
@line_start =
|
|
143
|
+
@line_start = true
|
|
148
144
|
true
|
|
149
145
|
end
|
|
150
146
|
|
|
@@ -159,26 +155,24 @@ module Markbridge
|
|
|
159
155
|
def handle_match(match)
|
|
160
156
|
node = match.node
|
|
161
157
|
@nodes << node
|
|
158
|
+
@result << render_placeholder(node)
|
|
162
159
|
|
|
163
|
-
#
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
160
|
+
# Every detector shipped today matches content that ends on a
|
|
161
|
+
# non-newline byte (`]`, `)`, `_`, alphanumeric), so @line_start
|
|
162
|
+
# is always false after a successful match. If a future custom
|
|
163
|
+
# detector produces a match whose end_pos sits right after
|
|
164
|
+
# "\n", re-introduce the `@input[@pos - 1] == "\n"` check.
|
|
167
165
|
@pos = match.end_pos
|
|
168
|
-
@line_start =
|
|
166
|
+
@line_start = false
|
|
169
167
|
@node_index += 1
|
|
170
168
|
end
|
|
171
169
|
|
|
172
170
|
def render_placeholder(node)
|
|
173
171
|
if @tag_library
|
|
174
172
|
tag = @tag_library[node.class]
|
|
175
|
-
if tag
|
|
176
|
-
# Create a minimal interface for rendering
|
|
177
|
-
return tag.render(node, nil)
|
|
178
|
-
end
|
|
173
|
+
return tag.render(node, nil) if tag
|
|
179
174
|
end
|
|
180
175
|
|
|
181
|
-
# Default placeholder format if no tag library or tag not found
|
|
182
176
|
default_placeholder(node)
|
|
183
177
|
end
|
|
184
178
|
|
|
@@ -192,8 +186,6 @@ module Markbridge
|
|
|
192
186
|
"<<EVENT:#{@node_index}:#{node.name}>>"
|
|
193
187
|
when AST::Upload
|
|
194
188
|
"<<UPLOAD:#{@node_index}:#{node.sha1}>>"
|
|
195
|
-
else
|
|
196
|
-
"<<UNKNOWN:#{@node_index}>>"
|
|
197
189
|
end
|
|
198
190
|
end
|
|
199
191
|
end
|
|
@@ -15,12 +15,10 @@ module Markbridge
|
|
|
15
15
|
# @return [String]
|
|
16
16
|
def build(content, marker:, indent:)
|
|
17
17
|
lines = content.split("\n")
|
|
18
|
-
lines = [""] if lines.empty? # Handle empty content
|
|
19
18
|
first_line = "#{indent}#{marker}#{lines.first}"
|
|
20
19
|
|
|
21
|
-
return "#{first_line}\n" if lines.size
|
|
20
|
+
return "#{first_line}\n" if lines.size < 2
|
|
22
21
|
|
|
23
|
-
# Handle multi-line content with sophisticated blank line handling
|
|
24
22
|
format_multiline(lines, first_line, indent)
|
|
25
23
|
end
|
|
26
24
|
|
|
@@ -63,15 +61,17 @@ module Markbridge
|
|
|
63
61
|
end
|
|
64
62
|
end
|
|
65
63
|
|
|
66
|
-
# Handle empty lines in continuation
|
|
64
|
+
# Handle empty lines in continuation. Caller (format_continuation_line)
|
|
65
|
+
# only invokes this when `line.empty?`, and `content.split("\n")`
|
|
66
|
+
# trims trailing empty strings, so the LAST continuation line is
|
|
67
|
+
# never empty — `idx + 1` is always in bounds when we get here.
|
|
67
68
|
# @param idx [Integer] index in continuation_lines
|
|
68
69
|
# @param continuation_lines [Array<String>] all continuation lines
|
|
69
70
|
# @param continuation_indent [String] indent for continuation
|
|
70
71
|
# @return [String, nil] formatted line or nil to skip
|
|
71
72
|
def handle_empty_line(idx, continuation_lines, continuation_indent)
|
|
72
73
|
# Skip empty lines that come before nested list items (structural blanks)
|
|
73
|
-
|
|
74
|
-
return nil if next_line&.match?(/\A\s*(?:-|\d+\.)\s/)
|
|
74
|
+
return nil if continuation_lines[idx + 1].match?(/\A\s*(?:-|\d+\.)\s/)
|
|
75
75
|
|
|
76
76
|
# Preserve empty lines within text content (paragraph breaks) with indentation
|
|
77
77
|
continuation_indent
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "cgi"
|
|
4
|
+
|
|
5
|
+
module Markbridge
|
|
6
|
+
module Renderers
|
|
7
|
+
module Discourse
|
|
8
|
+
# Escapes text for safe inclusion in HTML output. Used when rendering
|
|
9
|
+
# content inside a CommonMark HTML block (e.g. TableTag's fallback)
|
|
10
|
+
# where Markdown-level escaping would not be applied.
|
|
11
|
+
class HtmlEscaper
|
|
12
|
+
# @param text [String, nil]
|
|
13
|
+
# @return [String]
|
|
14
|
+
def self.escape(text)
|
|
15
|
+
CGI.escapeHTML(text || "")
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -36,18 +36,15 @@ module Markbridge
|
|
|
36
36
|
# breaks disabled by default.
|
|
37
37
|
def initialize(escape_hard_line_breaks: false)
|
|
38
38
|
@escape_hard_line_breaks = escape_hard_line_breaks
|
|
39
|
-
@inline_content
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
# @inline_content / @inline_result / @inline_len are set by
|
|
40
|
+
# escape_inline on every call before any helper reads them;
|
|
41
|
+
# no defensive init needed.
|
|
42
42
|
end
|
|
43
43
|
|
|
44
|
-
# Fast-path
|
|
45
|
-
#
|
|
46
|
-
# > is needed for blockquote detection at line start
|
|
44
|
+
# Fast-path: skip escape_text entirely for content with no special
|
|
45
|
+
# chars. `>` is needed for blockquote detection at line start.
|
|
47
46
|
MAYBE_SPECIAL = /[\\`*_\[#+\-.!<>&|~=>)]/
|
|
48
47
|
|
|
49
|
-
# Check for indented code on any line
|
|
50
|
-
# Matches: 4+ spaces, tab, or space+tab combinations that reach column 4+
|
|
51
48
|
MAYBE_INDENTED_CODE = /(?:^|\n)(?: {4}|\t| {1,3}\t)/
|
|
52
49
|
|
|
53
50
|
# Block-level patterns
|
|
@@ -122,8 +119,7 @@ module Markbridge
|
|
|
122
119
|
# @return [String] the escaped text, or empty string if input is nil
|
|
123
120
|
# @note Multi-line HTML tags and blocks are handled by escaping the opening <
|
|
124
121
|
def escape(text)
|
|
125
|
-
return ""
|
|
126
|
-
return text if text.empty?
|
|
122
|
+
return "" if text.nil?
|
|
127
123
|
|
|
128
124
|
# Neutralize hard line breaks (trailing 2+ spaces before newline)
|
|
129
125
|
text = text.gsub(/ +\n/, "\n") if @escape_hard_line_breaks && text.include?(" \n")
|
|
@@ -136,7 +132,14 @@ module Markbridge
|
|
|
136
132
|
private
|
|
137
133
|
|
|
138
134
|
def escape_text(text)
|
|
139
|
-
|
|
135
|
+
# On CRLF input, consume `\r` as part of the line terminator instead
|
|
136
|
+
# of leaving it on the line. A trailing `\r` breaks line-end anchored
|
|
137
|
+
# regexes (e.g. SETEXT_UNDERLINE_*) and the `ws_end >= line_length`
|
|
138
|
+
# early-out in escape_indented_code, leaking NBSPs onto
|
|
139
|
+
# whitespace-only CRLF lines. The `include?` guard keeps the
|
|
140
|
+
# LF-only fast path on a string split (regex split is ~20% slower
|
|
141
|
+
# on the indented-code hot path).
|
|
142
|
+
lines = text.include?("\r") ? text.split(/\r?\n/, -1) : text.split("\n", -1)
|
|
140
143
|
return escape_line(lines[0], false) if lines.size == 1
|
|
141
144
|
|
|
142
145
|
# Pre-allocate result buffer
|
|
@@ -158,37 +161,32 @@ module Markbridge
|
|
|
158
161
|
end
|
|
159
162
|
|
|
160
163
|
def escape_line(line, prev_was_paragraph)
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
#
|
|
164
|
+
# No `line.empty?` early-return: it's redundant with the
|
|
165
|
+
# `line.getbyte(indent_len).nil?` guard below, which catches both
|
|
166
|
+
# empty and whitespace-only lines while also preserving object
|
|
167
|
+
# identity (returns `line`).
|
|
164
168
|
return escape_indented_code(line) if INDENTED_CODE.match?(line)
|
|
165
169
|
|
|
166
|
-
#
|
|
167
|
-
|
|
170
|
+
# After INDENTED_CODE, line has at most 3 leading spaces, so the
|
|
171
|
+
# `< 3` bound keeps this a tight YJIT-friendly hot loop.
|
|
168
172
|
indent_len = 0
|
|
169
|
-
|
|
170
|
-
indent_len += 1
|
|
171
|
-
end
|
|
173
|
+
indent_len += 1 while indent_len < 3 && line.getbyte(indent_len) == SPACE
|
|
172
174
|
|
|
173
|
-
|
|
175
|
+
# Whitespace-only line (1-3 spaces) — getbyte past end is nil.
|
|
176
|
+
return line if line.getbyte(indent_len).nil?
|
|
174
177
|
|
|
175
178
|
has_indent = indent_len > 0
|
|
176
179
|
content = has_indent ? line[indent_len..] : line
|
|
177
180
|
|
|
178
|
-
# Apply block-level escaping (which may also do inline escaping)
|
|
179
181
|
escaped, skip_inline = escape_block_level(content, prev_was_paragraph)
|
|
180
|
-
|
|
181
|
-
# Apply inline escaping if block-level didn't handle it
|
|
182
182
|
escaped = escape_inline(escaped) unless skip_inline
|
|
183
183
|
|
|
184
|
-
# Prepend indent if present, preserve encoding
|
|
185
184
|
if has_indent
|
|
186
|
-
|
|
187
|
-
result = String.new(encoding:)
|
|
185
|
+
result = String.new(encoding: line.encoding)
|
|
188
186
|
result << line[0, indent_len] << escaped
|
|
189
187
|
result
|
|
190
188
|
else
|
|
191
|
-
escaped.
|
|
189
|
+
escaped.force_encoding(line.encoding)
|
|
192
190
|
end
|
|
193
191
|
end
|
|
194
192
|
|
|
@@ -203,15 +201,14 @@ module Markbridge
|
|
|
203
201
|
# - Content doesn't start at valid block position (no lists, headings, etc.)
|
|
204
202
|
# - Visual indentation is preserved (NBSP renders as space)
|
|
205
203
|
# We still escape inline content since it's no longer protected.
|
|
204
|
+
# Caller (escape_line) guarantees INDENTED_CODE matched, so line
|
|
205
|
+
# starts with at least one SPACE or TAB; ws_end is always ≥ 1.
|
|
206
206
|
line_length = line.length
|
|
207
207
|
ws_end = 0
|
|
208
|
-
while ws_end < line_length
|
|
209
|
-
byte = line.getbyte(ws_end)
|
|
210
|
-
break if byte != SPACE && byte != TAB
|
|
208
|
+
while ws_end < line_length && ((byte = line.getbyte(ws_end)) == SPACE || byte == TAB)
|
|
211
209
|
ws_end += 1
|
|
212
210
|
end
|
|
213
211
|
|
|
214
|
-
return line if ws_end == 0 # No leading whitespace (shouldn't happen, but safe)
|
|
215
212
|
return line if ws_end >= line_length # Whitespace-only line
|
|
216
213
|
|
|
217
214
|
# Convert leading whitespace to NBSP (tab = 4 NBSP for visual consistency)
|
|
@@ -310,6 +307,13 @@ module Markbridge
|
|
|
310
307
|
@inline_len = bytesize
|
|
311
308
|
pos = 0
|
|
312
309
|
|
|
310
|
+
# No loop-progress guard: every `dispatch_inline_byte` branch
|
|
311
|
+
# returns `pos + N` for N >= 1 by construction, so the loop
|
|
312
|
+
# is provably terminating. Mutations that break this
|
|
313
|
+
# (`while true`, body drops, selector swaps that short-circuit
|
|
314
|
+
# the dispatch) surface as timeouts rather than alive
|
|
315
|
+
# mutations, and the inline guard would otherwise cost ~15%
|
|
316
|
+
# on this hot path per benchmark.
|
|
313
317
|
while pos < @inline_len
|
|
314
318
|
byte = @inline_content.getbyte(pos)
|
|
315
319
|
pos = dispatch_inline_byte(byte, pos)
|
|
@@ -474,39 +478,42 @@ module Markbridge
|
|
|
474
478
|
end
|
|
475
479
|
|
|
476
480
|
def paragraph_line?(line)
|
|
477
|
-
|
|
481
|
+
pos = 0
|
|
482
|
+
line_len = line.bytesize
|
|
483
|
+
pos += 1 while pos < line_len && line.getbyte(pos) == SPACE
|
|
484
|
+
first_non_space = pos
|
|
478
485
|
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
while first_non_space < line_length && line.getbyte(first_non_space) == SPACE
|
|
482
|
-
first_non_space += 1
|
|
483
|
-
end
|
|
484
|
-
return false if first_non_space >= line_length || line.getbyte(first_non_space) == TAB
|
|
486
|
+
# Empty or whitespace-only lines: getbyte past the end returns nil.
|
|
487
|
+
return false if line.getbyte(first_non_space).nil?
|
|
485
488
|
|
|
486
|
-
|
|
489
|
+
# Indented code (4+ spaces or any leading \t) is not a paragraph.
|
|
490
|
+
# INDENTED_CODE also catches lines where first_non_space > 3, so no
|
|
491
|
+
# separate numeric boundary check is needed.
|
|
492
|
+
return false if INDENTED_CODE.match?(line)
|
|
487
493
|
|
|
488
|
-
|
|
489
|
-
# So setext headings CAN follow them
|
|
490
|
-
return true if content.getbyte(0) == BRACKET_OPEN
|
|
494
|
+
content = first_non_space == 0 ? line : line[first_non_space..]
|
|
491
495
|
|
|
492
|
-
|
|
496
|
+
# Lines starting with [ are paragraph content (the escaper rewrites [
|
|
497
|
+
# to \[). block_construct? has no BRACKET_OPEN case arm, so such
|
|
498
|
+
# lines naturally fall through and !block_construct?(content) == true.
|
|
499
|
+
!block_construct?(content)
|
|
493
500
|
end
|
|
494
501
|
|
|
495
502
|
# Checks whether content starts with a block-level markdown construct.
|
|
496
503
|
# Used by both escape_block_level (to decide what to escape) and
|
|
497
504
|
# paragraph_line? (to decide if setext underlines can follow).
|
|
498
505
|
def block_construct?(content)
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
case first_byte
|
|
506
|
+
case content.getbyte(0)
|
|
502
507
|
when HASH
|
|
503
508
|
ATX_HEADING.match?(content)
|
|
504
509
|
when GT
|
|
505
510
|
true
|
|
506
|
-
when DASH
|
|
507
|
-
BULLET_LIST.match?(content) ||
|
|
508
|
-
|
|
509
|
-
|
|
511
|
+
when DASH
|
|
512
|
+
BULLET_LIST.match?(content) || THEMATIC_BREAK_DASH.match?(content)
|
|
513
|
+
when STAR
|
|
514
|
+
BULLET_LIST.match?(content) || THEMATIC_BREAK_STAR.match?(content)
|
|
515
|
+
when PLUS
|
|
516
|
+
BULLET_LIST.match?(content)
|
|
510
517
|
when UNDERSCORE
|
|
511
518
|
THEMATIC_BREAK_UNDERSCORE.match?(content)
|
|
512
519
|
when BACKTICK
|
|
@@ -11,26 +11,40 @@ module Markbridge
|
|
|
11
11
|
class RenderContext
|
|
12
12
|
attr_reader :parents, :depth
|
|
13
13
|
|
|
14
|
-
def initialize(parents = [], parent_cache: nil)
|
|
14
|
+
def initialize(parents = [], parent_cache: nil, html_mode: false)
|
|
15
15
|
@parents = parents.freeze
|
|
16
16
|
@depth = parents.size
|
|
17
17
|
@parent_cache = parent_cache || build_cache(parents)
|
|
18
|
+
@html_mode = html_mode
|
|
18
19
|
end
|
|
19
20
|
|
|
20
|
-
# Create new context with element added to parent chain
|
|
21
|
-
# Incrementally updates cache instead of rebuilding from
|
|
21
|
+
# Create new context with element added to parent chain.
|
|
22
|
+
# Incrementally updates the cache (O(1)) instead of rebuilding from
|
|
23
|
+
# parents (O(depth)) — important for deeply-nested documents.
|
|
22
24
|
# @param element [AST::Element]
|
|
23
25
|
# @return [RenderContext]
|
|
24
26
|
def with_parent(element)
|
|
25
27
|
new_parents = @parents + [element]
|
|
26
28
|
|
|
27
|
-
# Incrementally update cache instead of rebuilding
|
|
28
29
|
new_cache = @parent_cache.dup
|
|
29
30
|
element_class = element.class
|
|
30
31
|
new_cache[element_class] ||= []
|
|
31
32
|
new_cache[element_class] = new_cache[element_class] + [element]
|
|
32
33
|
|
|
33
|
-
self.class.new(new_parents, parent_cache: new_cache)
|
|
34
|
+
self.class.new(new_parents, parent_cache: new_cache, html_mode: @html_mode)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Create new context with html_mode toggled
|
|
38
|
+
# Preserves parent chain and cache
|
|
39
|
+
# @param value [Boolean]
|
|
40
|
+
# @return [RenderContext]
|
|
41
|
+
def with_html_mode(value)
|
|
42
|
+
self.class.new(@parents, parent_cache: @parent_cache, html_mode: value)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# @return [Boolean]
|
|
46
|
+
def html_mode?
|
|
47
|
+
@html_mode
|
|
34
48
|
end
|
|
35
49
|
|
|
36
50
|
# Find closest parent of given type
|
|
@@ -54,7 +68,7 @@ module Markbridge
|
|
|
54
68
|
# @param klass [Class]
|
|
55
69
|
# @return [Boolean]
|
|
56
70
|
def has_parent?(klass)
|
|
57
|
-
|
|
71
|
+
!@parent_cache[klass].nil?
|
|
58
72
|
end
|
|
59
73
|
|
|
60
74
|
# Check if we're at the root (no parents)
|
|
@@ -65,14 +79,12 @@ module Markbridge
|
|
|
65
79
|
|
|
66
80
|
private
|
|
67
81
|
|
|
68
|
-
# Build cache from parents array
|
|
69
|
-
# Groups parents by class for fast lookup
|
|
82
|
+
# Build cache from parents array.
|
|
83
|
+
# Groups parents by class for fast O(1) lookup.
|
|
70
84
|
# @param parents [Array<AST::Element>]
|
|
71
85
|
# @return [Hash{Class => Array<AST::Element>}]
|
|
72
86
|
def build_cache(parents)
|
|
73
|
-
parents.
|
|
74
|
-
cache[parent.class] = cache[parent.class] + [parent]
|
|
75
|
-
end
|
|
87
|
+
parents.group_by(&:class)
|
|
76
88
|
end
|
|
77
89
|
end
|
|
78
90
|
end
|