markbridge 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/markbridge/all.rb +4 -7
- data/lib/markbridge/ast/document.rb +1 -1
- data/lib/markbridge/ast/element.rb +2 -2
- data/lib/markbridge/ast/list.rb +2 -2
- data/lib/markbridge/ast/table.rb +61 -0
- data/lib/markbridge/ast/text.rb +5 -1
- data/lib/markbridge/ast.rb +1 -0
- data/lib/markbridge/bbcode.rb +4 -0
- data/lib/markbridge/gem_loader.rb +2 -3
- data/lib/markbridge/html.rb +4 -0
- data/lib/markbridge/mediawiki.rb +4 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +0 -10
- data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -4
- data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +64 -44
- data/lib/markbridge/parsers/bbcode/handler_registry.rb +26 -11
- data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +17 -12
- data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +0 -10
- data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +6 -10
- data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +13 -19
- data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +1 -5
- data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +1 -2
- data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +30 -35
- data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +2 -6
- data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +4 -4
- data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +26 -0
- data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +32 -0
- data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +35 -0
- data/lib/markbridge/parsers/bbcode/parser.rb +5 -8
- data/lib/markbridge/parsers/bbcode/parser_state.rb +12 -18
- data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +9 -59
- data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +2 -2
- data/lib/markbridge/parsers/bbcode/scanner.rb +49 -63
- data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +1 -5
- data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +1 -6
- data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +1 -7
- data/lib/markbridge/parsers/bbcode/tokens/token.rb +1 -1
- data/lib/markbridge/parsers/bbcode.rb +4 -0
- data/lib/markbridge/parsers/html/handler_registry.rb +32 -44
- data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -3
- data/lib/markbridge/parsers/html/handlers/image_handler.rb +1 -4
- data/lib/markbridge/parsers/html/handlers/table_cell_handler.rb +24 -0
- data/lib/markbridge/parsers/html/handlers/table_handler.rb +24 -0
- data/lib/markbridge/parsers/html/handlers/table_row_handler.rb +24 -0
- data/lib/markbridge/parsers/html/parser.rb +16 -15
- data/lib/markbridge/parsers/html.rb +3 -0
- data/lib/markbridge/parsers/media_wiki/inline_parser.rb +115 -151
- data/lib/markbridge/parsers/media_wiki/inline_tag_registry.rb +103 -0
- data/lib/markbridge/parsers/media_wiki/parser.rb +174 -71
- data/lib/markbridge/parsers/media_wiki.rb +1 -0
- data/lib/markbridge/parsers/text_formatter/handler_registry.rb +10 -36
- data/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb +26 -0
- data/lib/markbridge/parsers/text_formatter/parser.rb +3 -8
- data/lib/markbridge/parsers/text_formatter.rb +1 -0
- data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +111 -92
- data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +13 -7
- data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +11 -20
- data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +10 -48
- data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +38 -63
- data/lib/markbridge/processors/discourse_markdown/scanner.rb +36 -41
- data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +6 -6
- data/lib/markbridge/renderers/discourse/html_escaper.rb +20 -0
- data/lib/markbridge/renderers/discourse/markdown_escaper.rb +262 -205
- data/lib/markbridge/renderers/discourse/render_context.rb +23 -11
- data/lib/markbridge/renderers/discourse/renderer.rb +54 -11
- data/lib/markbridge/renderers/discourse/rendering_interface.rb +12 -4
- data/lib/markbridge/renderers/discourse/tag.rb +14 -1
- data/lib/markbridge/renderers/discourse/tag_library.rb +30 -25
- data/lib/markbridge/renderers/discourse/tags/align_tag.rb +15 -7
- data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +1 -1
- data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/code_tag.rb +14 -8
- data/lib/markbridge/renderers/discourse/tags/email_tag.rb +5 -3
- data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -3
- data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +6 -2
- data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +2 -2
- data/lib/markbridge/renderers/discourse/tags/image_tag.rb +12 -1
- data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +2 -2
- data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +24 -47
- data/lib/markbridge/renderers/discourse/tags/list_tag.rb +10 -15
- data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +6 -2
- data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +10 -0
- data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +9 -4
- data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +17 -11
- data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +9 -0
- data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/table_cell_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/table_row_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/table_tag.rb +128 -0
- data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +10 -3
- data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +28 -1
- data/lib/markbridge/renderers/discourse/tags/url_tag.rb +5 -3
- data/lib/markbridge/renderers/discourse.rb +4 -0
- data/lib/markbridge/textformatter.rb +4 -0
- data/lib/markbridge/version.rb +1 -1
- data/lib/markbridge.rb +27 -62
- metadata +19 -2
|
@@ -36,15 +36,15 @@ module Markbridge
|
|
|
36
36
|
# breaks disabled by default.
|
|
37
37
|
def initialize(escape_hard_line_breaks: false)
|
|
38
38
|
@escape_hard_line_breaks = escape_hard_line_breaks
|
|
39
|
+
# @inline_content / @inline_result / @inline_len are set by
|
|
40
|
+
# escape_inline on every call before any helper reads them;
|
|
41
|
+
# no defensive init needed.
|
|
39
42
|
end
|
|
40
43
|
|
|
41
|
-
# Fast-path
|
|
42
|
-
#
|
|
43
|
-
# > is needed for blockquote detection at line start
|
|
44
|
+
# Fast-path: skip escape_text entirely for content with no special
|
|
45
|
+
# chars. `>` is needed for blockquote detection at line start.
|
|
44
46
|
MAYBE_SPECIAL = /[\\`*_\[#+\-.!<>&|~=>)]/
|
|
45
47
|
|
|
46
|
-
# Check for indented code on any line
|
|
47
|
-
# Matches: 4+ spaces, tab, or space+tab combinations that reach column 4+
|
|
48
48
|
MAYBE_INDENTED_CODE = /(?:^|\n)(?: {4}|\t| {1,3}\t)/
|
|
49
49
|
|
|
50
50
|
# Block-level patterns
|
|
@@ -119,8 +119,7 @@ module Markbridge
|
|
|
119
119
|
# @return [String] the escaped text, or empty string if input is nil
|
|
120
120
|
# @note Multi-line HTML tags and blocks are handled by escaping the opening <
|
|
121
121
|
def escape(text)
|
|
122
|
-
return ""
|
|
123
|
-
return text if text.empty?
|
|
122
|
+
return "" if text.nil?
|
|
124
123
|
|
|
125
124
|
# Neutralize hard line breaks (trailing 2+ spaces before newline)
|
|
126
125
|
text = text.gsub(/ +\n/, "\n") if @escape_hard_line_breaks && text.include?(" \n")
|
|
@@ -137,7 +136,8 @@ module Markbridge
|
|
|
137
136
|
return escape_line(lines[0], false) if lines.size == 1
|
|
138
137
|
|
|
139
138
|
# Pre-allocate result buffer
|
|
140
|
-
|
|
139
|
+
bytesize = text.bytesize
|
|
140
|
+
result = String.new(capacity: bytesize + bytesize / 3, encoding: text.encoding)
|
|
141
141
|
prev_was_paragraph = false
|
|
142
142
|
first = true
|
|
143
143
|
|
|
@@ -154,35 +154,32 @@ module Markbridge
|
|
|
154
154
|
end
|
|
155
155
|
|
|
156
156
|
def escape_line(line, prev_was_paragraph)
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
#
|
|
157
|
+
# No `line.empty?` early-return: it's redundant with the
|
|
158
|
+
# `line.getbyte(indent_len).nil?` guard below, which catches both
|
|
159
|
+
# empty and whitespace-only lines while also preserving object
|
|
160
|
+
# identity (returns `line`).
|
|
160
161
|
return escape_indented_code(line) if INDENTED_CODE.match?(line)
|
|
161
162
|
|
|
162
|
-
#
|
|
163
|
+
# After INDENTED_CODE, line has at most 3 leading spaces, so the
|
|
164
|
+
# `< 3` bound keeps this a tight YJIT-friendly hot loop.
|
|
163
165
|
indent_len = 0
|
|
164
|
-
|
|
165
|
-
indent_len += 1
|
|
166
|
-
end
|
|
166
|
+
indent_len += 1 while indent_len < 3 && line.getbyte(indent_len) == SPACE
|
|
167
167
|
|
|
168
|
-
|
|
168
|
+
# Whitespace-only line (1-3 spaces) — getbyte past end is nil.
|
|
169
|
+
return line if line.getbyte(indent_len).nil?
|
|
169
170
|
|
|
170
|
-
|
|
171
|
+
has_indent = indent_len > 0
|
|
172
|
+
content = has_indent ? line[indent_len..] : line
|
|
171
173
|
|
|
172
|
-
# Apply block-level escaping (which may also do inline escaping)
|
|
173
174
|
escaped, skip_inline = escape_block_level(content, prev_was_paragraph)
|
|
174
|
-
|
|
175
|
-
# Apply inline escaping if block-level didn't handle it
|
|
176
175
|
escaped = escape_inline(escaped) unless skip_inline
|
|
177
176
|
|
|
178
|
-
|
|
179
|
-
if indent_len > 0
|
|
177
|
+
if has_indent
|
|
180
178
|
result = String.new(encoding: line.encoding)
|
|
181
179
|
result << line[0, indent_len] << escaped
|
|
182
180
|
result
|
|
183
181
|
else
|
|
184
|
-
|
|
185
|
-
escaped.is_a?(String) ? escaped.force_encoding(line.encoding) : escaped
|
|
182
|
+
escaped.force_encoding(line.encoding)
|
|
186
183
|
end
|
|
187
184
|
end
|
|
188
185
|
|
|
@@ -197,21 +194,21 @@ module Markbridge
|
|
|
197
194
|
# - Content doesn't start at valid block position (no lists, headings, etc.)
|
|
198
195
|
# - Visual indentation is preserved (NBSP renders as space)
|
|
199
196
|
# We still escape inline content since it's no longer protected.
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
197
|
+
# Caller (escape_line) guarantees INDENTED_CODE matched, so line
|
|
198
|
+
# starts with at least one SPACE or TAB; ws_end is always ≥ 1.
|
|
199
|
+
line_length = line.length
|
|
200
|
+
ws_end = 0
|
|
201
|
+
while ws_end < line_length && ((byte = line.getbyte(ws_end)) == SPACE || byte == TAB)
|
|
202
|
+
ws_end += 1
|
|
205
203
|
end
|
|
206
204
|
|
|
207
|
-
return line if
|
|
208
|
-
return line if i >= line.length # Whitespace-only line
|
|
205
|
+
return line if ws_end >= line_length # Whitespace-only line
|
|
209
206
|
|
|
210
207
|
# Convert leading whitespace to NBSP (tab = 4 NBSP for visual consistency)
|
|
211
208
|
nbsp_indent = String.new(encoding: line.encoding)
|
|
212
|
-
line[0,
|
|
209
|
+
line[0, ws_end].each_char { |char| nbsp_indent << (char == "\t" ? (NBSP * 4) : NBSP) }
|
|
213
210
|
|
|
214
|
-
content = line[
|
|
211
|
+
content = line[ws_end..]
|
|
215
212
|
"#{nbsp_indent}#{escape_inline(content)}"
|
|
216
213
|
end
|
|
217
214
|
|
|
@@ -220,22 +217,15 @@ module Markbridge
|
|
|
220
217
|
|
|
221
218
|
case first_byte
|
|
222
219
|
when HASH
|
|
223
|
-
return
|
|
220
|
+
return escape_first_char_inline(content, "\\#") if ATX_HEADING.match?(content)
|
|
224
221
|
when GT
|
|
225
|
-
return
|
|
222
|
+
return escape_first_char_inline(content, "\\>")
|
|
226
223
|
when DASH
|
|
227
|
-
|
|
228
|
-
(prev_was_paragraph && SETEXT_UNDERLINE_DASH.match?(content))
|
|
229
|
-
return escape_all_chars(content, DASH, "\\-"), true
|
|
230
|
-
end
|
|
231
|
-
return "\\-#{escape_inline(content[1..])}", true if BULLET_LIST.match?(content)
|
|
224
|
+
return escape_block_dash(content, prev_was_paragraph)
|
|
232
225
|
when PLUS
|
|
233
|
-
return
|
|
226
|
+
return escape_first_char_inline(content, "\\+") if BULLET_LIST.match?(content)
|
|
234
227
|
when STAR
|
|
235
|
-
|
|
236
|
-
return escape_all_chars(content, STAR, "\\*"), true
|
|
237
|
-
end
|
|
238
|
-
return "\\*#{escape_inline(content[1..])}", true if BULLET_LIST.match?(content)
|
|
228
|
+
return escape_block_star(content)
|
|
239
229
|
when UNDERSCORE
|
|
240
230
|
if THEMATIC_BREAK_UNDERSCORE.match?(content)
|
|
241
231
|
return escape_all_chars(content, UNDERSCORE, "\\_"), true
|
|
@@ -246,162 +236,221 @@ module Markbridge
|
|
|
246
236
|
end
|
|
247
237
|
when BACKTICK
|
|
248
238
|
if FENCED_CODE_BACKTICK.match?(content)
|
|
249
|
-
# Escape ALL backticks to prevent code span interpretation
|
|
250
|
-
# e.g., ```` becomes \`\`\`\` not \```` (which would be \` + ```)
|
|
251
239
|
return escape_all_chars(content, BACKTICK, "\\`"), true
|
|
252
240
|
end
|
|
253
241
|
when TILDE
|
|
254
242
|
return "\\#{content}", true if FENCED_CODE_TILDE.match?(content)
|
|
255
243
|
when BRACKET_OPEN
|
|
256
|
-
return "\\[
|
|
244
|
+
return escape_first_char_inline(content, "\\[")
|
|
257
245
|
when PIPE
|
|
258
|
-
return
|
|
246
|
+
return escape_first_char_inline(content, "\\|")
|
|
259
247
|
when DIGIT_0..DIGIT_9
|
|
260
|
-
|
|
261
|
-
prefix = m[1]
|
|
262
|
-
delim = m[2]
|
|
263
|
-
rest = content[m[0].length..]
|
|
264
|
-
return "#{prefix}\\#{delim}#{escape_inline(rest)}", true
|
|
265
|
-
end
|
|
248
|
+
return escape_block_ordered_list(content)
|
|
266
249
|
end
|
|
267
250
|
|
|
268
251
|
[content, false]
|
|
269
252
|
end
|
|
270
253
|
|
|
254
|
+
# Escape the first character and inline-escape the rest.
|
|
255
|
+
def escape_first_char_inline(content, escaped_char)
|
|
256
|
+
["#{escaped_char}#{escape_inline(content[1..])}", true]
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def escape_block_dash(content, prev_was_paragraph)
|
|
260
|
+
if THEMATIC_BREAK_DASH.match?(content) ||
|
|
261
|
+
(prev_was_paragraph && SETEXT_UNDERLINE_DASH.match?(content))
|
|
262
|
+
return escape_all_chars(content, DASH, "\\-"), true
|
|
263
|
+
end
|
|
264
|
+
return escape_first_char_inline(content, "\\-") if BULLET_LIST.match?(content)
|
|
265
|
+
[content, false]
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
def escape_block_star(content)
|
|
269
|
+
return escape_all_chars(content, STAR, "\\*"), true if THEMATIC_BREAK_STAR.match?(content)
|
|
270
|
+
return escape_first_char_inline(content, "\\*") if BULLET_LIST.match?(content)
|
|
271
|
+
[content, false]
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
def escape_block_ordered_list(content)
|
|
275
|
+
if (match = ORDERED_LIST.match(content))
|
|
276
|
+
rest = content[match[0].length..]
|
|
277
|
+
return "#{match[1]}\\#{match[2]}#{escape_inline(rest)}", true
|
|
278
|
+
end
|
|
279
|
+
[content, false]
|
|
280
|
+
end
|
|
281
|
+
|
|
271
282
|
def escape_all_chars(str, byte_val, escaped)
|
|
272
283
|
result = String.new(capacity: str.bytesize * 2, encoding: str.encoding)
|
|
273
|
-
str.each_byte do |
|
|
274
|
-
if
|
|
284
|
+
str.each_byte do |byte|
|
|
285
|
+
if byte == byte_val
|
|
275
286
|
result << escaped
|
|
276
287
|
else
|
|
277
|
-
result <<
|
|
288
|
+
result << byte
|
|
278
289
|
end
|
|
279
290
|
end
|
|
280
291
|
result
|
|
281
292
|
end
|
|
282
293
|
|
|
283
294
|
def escape_inline(content)
|
|
284
|
-
# Quick check - if no special chars, return as-is
|
|
285
295
|
return content unless INLINE_SPECIAL.match?(content)
|
|
286
296
|
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
i += 1
|
|
304
|
-
elsif i + 1 == len # backslash at end (hard break)
|
|
305
|
-
result << "\\\\"
|
|
306
|
-
i += 1
|
|
307
|
-
else
|
|
308
|
-
result << b
|
|
309
|
-
i += 1
|
|
310
|
-
end
|
|
311
|
-
when DASH # -
|
|
312
|
-
if i + 1 < len && content.getbyte(i + 1) == DASH
|
|
313
|
-
# Consecutive dashes - escape each for Discourse ndash prevention
|
|
314
|
-
while i < len && content.getbyte(i) == DASH
|
|
315
|
-
result << "\\-"
|
|
316
|
-
i += 1
|
|
317
|
-
end
|
|
318
|
-
else
|
|
319
|
-
result << b
|
|
320
|
-
i += 1
|
|
321
|
-
end
|
|
322
|
-
when TILDE # ~
|
|
323
|
-
if i + 1 < len && content.getbyte(i + 1) == TILDE
|
|
324
|
-
result << "\\~\\~"
|
|
325
|
-
i += 2
|
|
326
|
-
else
|
|
327
|
-
result << b
|
|
328
|
-
i += 1
|
|
329
|
-
end
|
|
330
|
-
when STAR # *
|
|
331
|
-
while i < len && content.getbyte(i) == STAR
|
|
332
|
-
result << "\\*"
|
|
333
|
-
i += 1
|
|
334
|
-
end
|
|
335
|
-
when UNDERSCORE # _
|
|
336
|
-
while i < len && content.getbyte(i) == UNDERSCORE
|
|
337
|
-
result << "\\_"
|
|
338
|
-
i += 1
|
|
339
|
-
end
|
|
340
|
-
when BACKTICK # `
|
|
341
|
-
while i < len && content.getbyte(i) == BACKTICK
|
|
342
|
-
result << "\\`"
|
|
343
|
-
i += 1
|
|
344
|
-
end
|
|
345
|
-
when BANG # !
|
|
346
|
-
if i + 1 < len && content.getbyte(i + 1) == BRACKET_OPEN
|
|
347
|
-
result << "\\!\\["
|
|
348
|
-
i += 2
|
|
349
|
-
else
|
|
350
|
-
result << b
|
|
351
|
-
i += 1
|
|
352
|
-
end
|
|
353
|
-
when BRACKET_OPEN # [
|
|
354
|
-
result << "\\["
|
|
355
|
-
i += 1
|
|
356
|
-
when PIPE # |
|
|
357
|
-
result << "\\|"
|
|
358
|
-
i += 1
|
|
359
|
-
when LT # <
|
|
360
|
-
remaining = content.byteslice(i, len - i)
|
|
361
|
-
# Check for autolinks first - pass through entirely unchanged
|
|
362
|
-
if (m = AUTOLINK.match(remaining))
|
|
363
|
-
result << m[0]
|
|
364
|
-
i += m[0].bytesize
|
|
365
|
-
# Escape complete HTML tags (include tag in output for readability)
|
|
366
|
-
# Also escape backticks inside the tag to prevent code span interpretation
|
|
367
|
-
elsif (m = HTML_TAG.match(remaining))
|
|
368
|
-
escaped_tag = m[0].gsub("`") { "\\`" }
|
|
369
|
-
result << "\\" << escaped_tag
|
|
370
|
-
i += m[0].bytesize
|
|
371
|
-
# Escape HTML-like constructs: processing instructions, SGML declarations,
|
|
372
|
-
# and potential tag starts (including multi-line and custom elements)
|
|
373
|
-
elsif HTML_TAG_START.match?(remaining)
|
|
374
|
-
result << "\\<"
|
|
375
|
-
i += 1
|
|
376
|
-
else
|
|
377
|
-
# Not HTML-like (comparison operator, etc.)
|
|
378
|
-
result << b
|
|
379
|
-
i += 1
|
|
380
|
-
end
|
|
381
|
-
when AMP # &
|
|
382
|
-
remaining = content.byteslice(i, len - i)
|
|
383
|
-
if (m = ENTITY_REF.match(remaining))
|
|
384
|
-
result << "\\" << m[0]
|
|
385
|
-
i += m[0].bytesize
|
|
386
|
-
else
|
|
387
|
-
result << b
|
|
388
|
-
i += 1
|
|
389
|
-
end
|
|
390
|
-
else
|
|
391
|
-
# Regular character - handle multi-byte UTF-8
|
|
392
|
-
if b < 128
|
|
393
|
-
result << b
|
|
394
|
-
i += 1
|
|
395
|
-
else
|
|
396
|
-
char_len = utf8_char_length(b)
|
|
397
|
-
end_i = [i + char_len, len].min
|
|
398
|
-
result << content.byteslice(i, end_i - i)
|
|
399
|
-
i = end_i
|
|
400
|
-
end
|
|
401
|
-
end
|
|
297
|
+
bytesize = content.bytesize
|
|
298
|
+
@inline_content = content
|
|
299
|
+
@inline_result = String.new(capacity: bytesize + bytesize / 4, encoding: content.encoding)
|
|
300
|
+
@inline_len = bytesize
|
|
301
|
+
pos = 0
|
|
302
|
+
|
|
303
|
+
# No loop-progress guard: every `dispatch_inline_byte` branch
|
|
304
|
+
# returns `pos + N` for N >= 1 by construction, so the loop
|
|
305
|
+
# is provably terminating. Mutations that break this
|
|
306
|
+
# (`while true`, body drops, selector swaps that short-circuit
|
|
307
|
+
# the dispatch) surface as timeouts rather than alive
|
|
308
|
+
# mutations, and the inline guard would otherwise cost ~15%
|
|
309
|
+
# on this hot path per benchmark.
|
|
310
|
+
while pos < @inline_len
|
|
311
|
+
byte = @inline_content.getbyte(pos)
|
|
312
|
+
pos = dispatch_inline_byte(byte, pos)
|
|
402
313
|
end
|
|
403
314
|
|
|
404
|
-
|
|
315
|
+
@inline_result
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def dispatch_inline_byte(byte, pos)
|
|
319
|
+
case byte
|
|
320
|
+
when BACKSLASH
|
|
321
|
+
escape_backslash(pos)
|
|
322
|
+
when DASH
|
|
323
|
+
escape_consecutive_pair(pos, DASH, "\\-")
|
|
324
|
+
when TILDE
|
|
325
|
+
escape_tilde_pair(pos)
|
|
326
|
+
when STAR
|
|
327
|
+
escape_char_run(pos, STAR, "\\*")
|
|
328
|
+
when UNDERSCORE
|
|
329
|
+
escape_char_run(pos, UNDERSCORE, "\\_")
|
|
330
|
+
when BACKTICK
|
|
331
|
+
escape_char_run(pos, BACKTICK, "\\`")
|
|
332
|
+
when BANG
|
|
333
|
+
escape_image_open(pos)
|
|
334
|
+
when BRACKET_OPEN
|
|
335
|
+
@inline_result << "\\["
|
|
336
|
+
pos + 1
|
|
337
|
+
when PIPE
|
|
338
|
+
@inline_result << "\\|"
|
|
339
|
+
pos + 1
|
|
340
|
+
when LT
|
|
341
|
+
escape_lt(pos)
|
|
342
|
+
when AMP
|
|
343
|
+
escape_amp(pos)
|
|
344
|
+
else
|
|
345
|
+
escape_regular_char(byte, pos)
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
# Escape backslash before ASCII punctuation or at end of content.
|
|
350
|
+
def escape_backslash(pos)
|
|
351
|
+
next_pos = pos + 1
|
|
352
|
+
if next_pos >= @inline_len || ascii_punctuation?(@inline_content.getbyte(next_pos))
|
|
353
|
+
@inline_result << "\\\\"
|
|
354
|
+
else
|
|
355
|
+
@inline_result << BACKSLASH
|
|
356
|
+
end
|
|
357
|
+
next_pos
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
# Escape consecutive pairs (e.g., -- for ndash prevention) or pass single through.
|
|
361
|
+
def escape_consecutive_pair(pos, byte_val, escaped)
|
|
362
|
+
next_pos = pos + 1
|
|
363
|
+
if next_pos < @inline_len && @inline_content.getbyte(next_pos) == byte_val
|
|
364
|
+
escape_char_run(pos, byte_val, escaped)
|
|
365
|
+
else
|
|
366
|
+
@inline_result << byte_val
|
|
367
|
+
next_pos
|
|
368
|
+
end
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
# Escape ~~ pairs, pass single ~ through.
|
|
372
|
+
def escape_tilde_pair(pos)
|
|
373
|
+
next_pos = pos + 1
|
|
374
|
+
if next_pos < @inline_len && @inline_content.getbyte(next_pos) == TILDE
|
|
375
|
+
@inline_result << "\\~\\~"
|
|
376
|
+
pos + 2
|
|
377
|
+
else
|
|
378
|
+
@inline_result << TILDE
|
|
379
|
+
next_pos
|
|
380
|
+
end
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
# Escape all consecutive occurrences of a repeatable character (*, _, `).
|
|
384
|
+
def escape_char_run(pos, byte_val, escaped)
|
|
385
|
+
while pos < @inline_len && @inline_content.getbyte(pos) == byte_val
|
|
386
|
+
@inline_result << escaped
|
|
387
|
+
pos += 1
|
|
388
|
+
end
|
|
389
|
+
pos
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
# Escape ![ image syntax, pass standalone ! through.
|
|
393
|
+
def escape_image_open(pos)
|
|
394
|
+
next_pos = pos + 1
|
|
395
|
+
if next_pos < @inline_len && @inline_content.getbyte(next_pos) == BRACKET_OPEN
|
|
396
|
+
@inline_result << "\\!\\["
|
|
397
|
+
pos + 2
|
|
398
|
+
else
|
|
399
|
+
@inline_result << BANG
|
|
400
|
+
next_pos
|
|
401
|
+
end
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
# Handle < for autolinks (preserved), HTML tags (escaped), and other constructs.
|
|
405
|
+
def escape_lt(pos)
|
|
406
|
+
remaining = remaining_content(pos)
|
|
407
|
+
|
|
408
|
+
if (match = AUTOLINK.match(remaining))
|
|
409
|
+
matched = match[0]
|
|
410
|
+
@inline_result << matched
|
|
411
|
+
pos + matched.bytesize
|
|
412
|
+
elsif (match = HTML_TAG.match(remaining))
|
|
413
|
+
matched = match[0]
|
|
414
|
+
@inline_result << "\\" << matched.gsub("`") { "\\`" }
|
|
415
|
+
pos + matched.bytesize
|
|
416
|
+
elsif HTML_TAG_START.match?(remaining)
|
|
417
|
+
@inline_result << "\\<"
|
|
418
|
+
pos + 1
|
|
419
|
+
else
|
|
420
|
+
@inline_result << LT
|
|
421
|
+
pos + 1
|
|
422
|
+
end
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
# Handle & for entity references.
|
|
426
|
+
def escape_amp(pos)
|
|
427
|
+
remaining = remaining_content(pos)
|
|
428
|
+
|
|
429
|
+
if (match = ENTITY_REF.match(remaining))
|
|
430
|
+
matched = match[0]
|
|
431
|
+
@inline_result << "\\" << matched
|
|
432
|
+
pos + matched.bytesize
|
|
433
|
+
else
|
|
434
|
+
@inline_result << AMP
|
|
435
|
+
pos + 1
|
|
436
|
+
end
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
def remaining_content(pos)
|
|
440
|
+
@inline_content.byteslice(pos, @inline_len - pos)
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
# Handle regular characters including multi-byte UTF-8.
|
|
444
|
+
def escape_regular_char(byte, pos)
|
|
445
|
+
if byte < 128
|
|
446
|
+
@inline_result << byte
|
|
447
|
+
pos + 1
|
|
448
|
+
else
|
|
449
|
+
char_len = utf8_char_length(byte)
|
|
450
|
+
end_pos = [pos + char_len, @inline_len].min
|
|
451
|
+
@inline_result << @inline_content.byteslice(pos, end_pos - pos)
|
|
452
|
+
end_pos
|
|
453
|
+
end
|
|
405
454
|
end
|
|
406
455
|
|
|
407
456
|
def ascii_punctuation?(byte)
|
|
@@ -422,45 +471,53 @@ module Markbridge
|
|
|
422
471
|
end
|
|
423
472
|
|
|
424
473
|
def paragraph_line?(line)
|
|
425
|
-
|
|
474
|
+
pos = 0
|
|
475
|
+
line_len = line.bytesize
|
|
476
|
+
pos += 1 while pos < line_len && line.getbyte(pos) == SPACE
|
|
477
|
+
first_non_space = pos
|
|
426
478
|
|
|
427
|
-
#
|
|
428
|
-
|
|
429
|
-
while first_non_space < line.length && line.getbyte(first_non_space) == SPACE
|
|
430
|
-
first_non_space += 1
|
|
431
|
-
end
|
|
432
|
-
return false if first_non_space >= line.length || line.getbyte(first_non_space) == TAB
|
|
479
|
+
# Empty or whitespace-only lines: getbyte past the end returns nil.
|
|
480
|
+
return false if line.getbyte(first_non_space).nil?
|
|
433
481
|
|
|
434
|
-
#
|
|
435
|
-
|
|
436
|
-
|
|
482
|
+
# Indented code (4+ spaces or any leading \t) is not a paragraph.
|
|
483
|
+
# INDENTED_CODE also catches lines where first_non_space > 3, so no
|
|
484
|
+
# separate numeric boundary check is needed.
|
|
485
|
+
return false if INDENTED_CODE.match?(line)
|
|
437
486
|
|
|
438
|
-
|
|
487
|
+
content = first_non_space == 0 ? line : line[first_non_space..]
|
|
439
488
|
|
|
440
|
-
|
|
489
|
+
# Lines starting with [ are paragraph content (the escaper rewrites [
|
|
490
|
+
# to \[). block_construct? has no BRACKET_OPEN case arm, so such
|
|
491
|
+
# lines naturally fall through and !block_construct?(content) == true.
|
|
492
|
+
!block_construct?(content)
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
# Checks whether content starts with a block-level markdown construct.
|
|
496
|
+
# Used by both escape_block_level (to decide what to escape) and
|
|
497
|
+
# paragraph_line? (to decide if setext underlines can follow).
|
|
498
|
+
def block_construct?(content)
|
|
499
|
+
case content.getbyte(0)
|
|
441
500
|
when HASH
|
|
442
|
-
|
|
501
|
+
ATX_HEADING.match?(content)
|
|
443
502
|
when GT
|
|
444
|
-
|
|
445
|
-
when DASH
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
503
|
+
true
|
|
504
|
+
when DASH
|
|
505
|
+
BULLET_LIST.match?(content) || THEMATIC_BREAK_DASH.match?(content)
|
|
506
|
+
when STAR
|
|
507
|
+
BULLET_LIST.match?(content) || THEMATIC_BREAK_STAR.match?(content)
|
|
508
|
+
when PLUS
|
|
509
|
+
BULLET_LIST.match?(content)
|
|
449
510
|
when UNDERSCORE
|
|
450
|
-
|
|
451
|
-
when BACKTICK
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
when BRACKET_OPEN
|
|
456
|
-
# Lines starting with [ get escaped to \[, which IS paragraph content
|
|
457
|
-
# So setext headings CAN follow them
|
|
458
|
-
return true
|
|
511
|
+
THEMATIC_BREAK_UNDERSCORE.match?(content)
|
|
512
|
+
when BACKTICK
|
|
513
|
+
FENCED_CODE_BACKTICK.match?(content)
|
|
514
|
+
when TILDE
|
|
515
|
+
FENCED_CODE_TILDE.match?(content)
|
|
459
516
|
when DIGIT_0..DIGIT_9
|
|
460
|
-
|
|
517
|
+
ORDERED_LIST.match?(content)
|
|
518
|
+
else
|
|
519
|
+
false
|
|
461
520
|
end
|
|
462
|
-
|
|
463
|
-
!INDENTED_CODE.match?(line)
|
|
464
521
|
end
|
|
465
522
|
end
|
|
466
523
|
end
|
|
@@ -11,26 +11,40 @@ module Markbridge
|
|
|
11
11
|
class RenderContext
|
|
12
12
|
attr_reader :parents, :depth
|
|
13
13
|
|
|
14
|
-
def initialize(parents = [], parent_cache: nil)
|
|
14
|
+
def initialize(parents = [], parent_cache: nil, html_mode: false)
|
|
15
15
|
@parents = parents.freeze
|
|
16
16
|
@depth = parents.size
|
|
17
17
|
@parent_cache = parent_cache || build_cache(parents)
|
|
18
|
+
@html_mode = html_mode
|
|
18
19
|
end
|
|
19
20
|
|
|
20
|
-
# Create new context with element added to parent chain
|
|
21
|
-
# Incrementally updates cache instead of rebuilding from
|
|
21
|
+
# Create new context with element added to parent chain.
|
|
22
|
+
# Incrementally updates the cache (O(1)) instead of rebuilding from
|
|
23
|
+
# parents (O(depth)) — important for deeply-nested documents.
|
|
22
24
|
# @param element [AST::Element]
|
|
23
25
|
# @return [RenderContext]
|
|
24
26
|
def with_parent(element)
|
|
25
27
|
new_parents = @parents + [element]
|
|
26
28
|
|
|
27
|
-
# Incrementally update cache instead of rebuilding
|
|
28
29
|
new_cache = @parent_cache.dup
|
|
29
30
|
element_class = element.class
|
|
30
31
|
new_cache[element_class] ||= []
|
|
31
32
|
new_cache[element_class] = new_cache[element_class] + [element]
|
|
32
33
|
|
|
33
|
-
self.class.new(new_parents, parent_cache: new_cache)
|
|
34
|
+
self.class.new(new_parents, parent_cache: new_cache, html_mode: @html_mode)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Create new context with html_mode toggled
|
|
38
|
+
# Preserves parent chain and cache
|
|
39
|
+
# @param value [Boolean]
|
|
40
|
+
# @return [RenderContext]
|
|
41
|
+
def with_html_mode(value)
|
|
42
|
+
self.class.new(@parents, parent_cache: @parent_cache, html_mode: value)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# @return [Boolean]
|
|
46
|
+
def html_mode?
|
|
47
|
+
@html_mode
|
|
34
48
|
end
|
|
35
49
|
|
|
36
50
|
# Find closest parent of given type
|
|
@@ -54,7 +68,7 @@ module Markbridge
|
|
|
54
68
|
# @param klass [Class]
|
|
55
69
|
# @return [Boolean]
|
|
56
70
|
def has_parent?(klass)
|
|
57
|
-
|
|
71
|
+
!@parent_cache[klass].nil?
|
|
58
72
|
end
|
|
59
73
|
|
|
60
74
|
# Check if we're at the root (no parents)
|
|
@@ -65,14 +79,12 @@ module Markbridge
|
|
|
65
79
|
|
|
66
80
|
private
|
|
67
81
|
|
|
68
|
-
# Build cache from parents array
|
|
69
|
-
# Groups parents by class for fast lookup
|
|
82
|
+
# Build cache from parents array.
|
|
83
|
+
# Groups parents by class for fast O(1) lookup.
|
|
70
84
|
# @param parents [Array<AST::Element>]
|
|
71
85
|
# @return [Hash{Class => Array<AST::Element>}]
|
|
72
86
|
def build_cache(parents)
|
|
73
|
-
parents.
|
|
74
|
-
cache[parent.class] = cache[parent.class] + [parent]
|
|
75
|
-
end
|
|
87
|
+
parents.group_by(&:class)
|
|
76
88
|
end
|
|
77
89
|
end
|
|
78
90
|
end
|