markbridge 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/lib/markbridge/all.rb +9 -0
  4. data/lib/markbridge/ast/align.rb +24 -0
  5. data/lib/markbridge/ast/attachment.rb +42 -0
  6. data/lib/markbridge/ast/bold.rb +13 -0
  7. data/lib/markbridge/ast/code.rb +27 -0
  8. data/lib/markbridge/ast/color.rb +25 -0
  9. data/lib/markbridge/ast/document.rb +27 -0
  10. data/lib/markbridge/ast/element.rb +47 -0
  11. data/lib/markbridge/ast/email.rb +27 -0
  12. data/lib/markbridge/ast/event.rb +59 -0
  13. data/lib/markbridge/ast/heading.rb +23 -0
  14. data/lib/markbridge/ast/horizontal_rule.rb +12 -0
  15. data/lib/markbridge/ast/image.rb +35 -0
  16. data/lib/markbridge/ast/italic.rb +13 -0
  17. data/lib/markbridge/ast/line_break.rb +12 -0
  18. data/lib/markbridge/ast/list.rb +52 -0
  19. data/lib/markbridge/ast/list_item.rb +13 -0
  20. data/lib/markbridge/ast/markdown_text.rb +37 -0
  21. data/lib/markbridge/ast/mention.rb +29 -0
  22. data/lib/markbridge/ast/node.rb +19 -0
  23. data/lib/markbridge/ast/paragraph.rb +13 -0
  24. data/lib/markbridge/ast/poll.rb +74 -0
  25. data/lib/markbridge/ast/quote.rb +46 -0
  26. data/lib/markbridge/ast/size.rb +25 -0
  27. data/lib/markbridge/ast/spoiler.rb +27 -0
  28. data/lib/markbridge/ast/strikethrough.rb +13 -0
  29. data/lib/markbridge/ast/subscript.rb +13 -0
  30. data/lib/markbridge/ast/superscript.rb +13 -0
  31. data/lib/markbridge/ast/text.rb +38 -0
  32. data/lib/markbridge/ast/underline.rb +13 -0
  33. data/lib/markbridge/ast/upload.rb +74 -0
  34. data/lib/markbridge/ast/url.rb +27 -0
  35. data/lib/markbridge/ast.rb +42 -0
  36. data/lib/markbridge/configuration.rb +11 -0
  37. data/lib/markbridge/gem_loader.rb +23 -0
  38. data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +37 -0
  39. data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -0
  40. data/lib/markbridge/parsers/bbcode/closing_strategies/strict.rb +12 -0
  41. data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +121 -0
  42. data/lib/markbridge/parsers/bbcode/errors/max_depth_exceeded_error.rb +13 -0
  43. data/lib/markbridge/parsers/bbcode/handler_registry.rb +160 -0
  44. data/lib/markbridge/parsers/bbcode/handlers/align_handler.rb +26 -0
  45. data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +104 -0
  46. data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +44 -0
  47. data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +25 -0
  48. data/lib/markbridge/parsers/bbcode/handlers/color_handler.rb +31 -0
  49. data/lib/markbridge/parsers/bbcode/handlers/email_handler.rb +25 -0
  50. data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +51 -0
  51. data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +36 -0
  52. data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +26 -0
  53. data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +64 -0
  54. data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +48 -0
  55. data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +28 -0
  56. data/lib/markbridge/parsers/bbcode/handlers/simple_handler.rb +28 -0
  57. data/lib/markbridge/parsers/bbcode/handlers/size_handler.rb +31 -0
  58. data/lib/markbridge/parsers/bbcode/handlers/spoiler_handler.rb +28 -0
  59. data/lib/markbridge/parsers/bbcode/handlers/url_handler.rb +24 -0
  60. data/lib/markbridge/parsers/bbcode/parser.rb +123 -0
  61. data/lib/markbridge/parsers/bbcode/parser_state.rb +93 -0
  62. data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +126 -0
  63. data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +35 -0
  64. data/lib/markbridge/parsers/bbcode/raw_content_result.rb +25 -0
  65. data/lib/markbridge/parsers/bbcode/scanner.rb +231 -0
  66. data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +21 -0
  67. data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +23 -0
  68. data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +23 -0
  69. data/lib/markbridge/parsers/bbcode/tokens/token.rb +16 -0
  70. data/lib/markbridge/parsers/bbcode.rb +56 -0
  71. data/lib/markbridge/parsers/html/handler_registry.rb +87 -0
  72. data/lib/markbridge/parsers/html/handlers/base_handler.rb +27 -0
  73. data/lib/markbridge/parsers/html/handlers/image_handler.rb +40 -0
  74. data/lib/markbridge/parsers/html/handlers/list_handler.rb +29 -0
  75. data/lib/markbridge/parsers/html/handlers/list_item_handler.rb +26 -0
  76. data/lib/markbridge/parsers/html/handlers/paragraph_handler.rb +17 -0
  77. data/lib/markbridge/parsers/html/handlers/quote_handler.rb +28 -0
  78. data/lib/markbridge/parsers/html/handlers/raw_handler.rb +33 -0
  79. data/lib/markbridge/parsers/html/handlers/simple_handler.rb +26 -0
  80. data/lib/markbridge/parsers/html/handlers/url_handler.rb +27 -0
  81. data/lib/markbridge/parsers/html/parser.rb +113 -0
  82. data/lib/markbridge/parsers/html.rb +30 -0
  83. data/lib/markbridge/parsers/media_wiki/inline_parser.rb +332 -0
  84. data/lib/markbridge/parsers/media_wiki/parser.rb +279 -0
  85. data/lib/markbridge/parsers/media_wiki.rb +15 -0
  86. data/lib/markbridge/parsers/text_formatter/handler_registry.rb +130 -0
  87. data/lib/markbridge/parsers/text_formatter/handlers/attachment_handler.rb +33 -0
  88. data/lib/markbridge/parsers/text_formatter/handlers/attribute_handler.rb +40 -0
  89. data/lib/markbridge/parsers/text_formatter/handlers/base_handler.rb +45 -0
  90. data/lib/markbridge/parsers/text_formatter/handlers/code_handler.rb +28 -0
  91. data/lib/markbridge/parsers/text_formatter/handlers/email_handler.rb +27 -0
  92. data/lib/markbridge/parsers/text_formatter/handlers/image_handler.rb +32 -0
  93. data/lib/markbridge/parsers/text_formatter/handlers/list_handler.rb +31 -0
  94. data/lib/markbridge/parsers/text_formatter/handlers/quote_handler.rb +33 -0
  95. data/lib/markbridge/parsers/text_formatter/handlers/simple_handler.rb +37 -0
  96. data/lib/markbridge/parsers/text_formatter/handlers/url_handler.rb +29 -0
  97. data/lib/markbridge/parsers/text_formatter/parser.rb +132 -0
  98. data/lib/markbridge/parsers/text_formatter.rb +31 -0
  99. data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +199 -0
  100. data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +57 -0
  101. data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +73 -0
  102. data/lib/markbridge/processors/discourse_markdown/detectors/mention.rb +57 -0
  103. data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +90 -0
  104. data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +123 -0
  105. data/lib/markbridge/processors/discourse_markdown/scanner.rb +199 -0
  106. data/lib/markbridge/processors/discourse_markdown.rb +16 -0
  107. data/lib/markbridge/processors.rb +8 -0
  108. data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +83 -0
  109. data/lib/markbridge/renderers/discourse/markdown_escaper.rb +468 -0
  110. data/lib/markbridge/renderers/discourse/render_context.rb +80 -0
  111. data/lib/markbridge/renderers/discourse/renderer.rb +63 -0
  112. data/lib/markbridge/renderers/discourse/rendering_interface.rb +86 -0
  113. data/lib/markbridge/renderers/discourse/tag.rb +29 -0
  114. data/lib/markbridge/renderers/discourse/tag_library.rb +67 -0
  115. data/lib/markbridge/renderers/discourse/tags/align_tag.rb +24 -0
  116. data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +46 -0
  117. data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +18 -0
  118. data/lib/markbridge/renderers/discourse/tags/code_tag.rb +54 -0
  119. data/lib/markbridge/renderers/discourse/tags/color_tag.rb +27 -0
  120. data/lib/markbridge/renderers/discourse/tags/email_tag.rb +24 -0
  121. data/lib/markbridge/renderers/discourse/tags/event_tag.rb +49 -0
  122. data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +21 -0
  123. data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +16 -0
  124. data/lib/markbridge/renderers/discourse/tags/image_tag.rb +29 -0
  125. data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +18 -0
  126. data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +16 -0
  127. data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +87 -0
  128. data/lib/markbridge/renderers/discourse/tags/list_tag.rb +39 -0
  129. data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +34 -0
  130. data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +21 -0
  131. data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +51 -0
  132. data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +32 -0
  133. data/lib/markbridge/renderers/discourse/tags/size_tag.rb +27 -0
  134. data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +24 -0
  135. data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +18 -0
  136. data/lib/markbridge/renderers/discourse/tags/subscript_tag.rb +19 -0
  137. data/lib/markbridge/renderers/discourse/tags/superscript_tag.rb +19 -0
  138. data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +19 -0
  139. data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +80 -0
  140. data/lib/markbridge/renderers/discourse/tags/url_tag.rb +24 -0
  141. data/lib/markbridge/renderers/discourse.rb +50 -0
  142. data/lib/markbridge/version.rb +5 -0
  143. data/lib/markbridge.rb +201 -0
  144. metadata +186 -0
@@ -0,0 +1,468 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Renderers
5
+ module Discourse
6
+ # Escapes text to prevent interpretation as Markdown formatting.
7
+ #
8
+ # Design principles:
9
+ # - No false negatives: all potentially special sequences MUST be escaped
10
+ # - False positives OK: over-escaping is acceptable for safety
11
+ # - Autolinks preserved: <https://...>, <mailto:...>, and <email@domain> remain functional
12
+ # - HTML escaped: tags, processing instructions, and SGML declarations are neutralized
13
+ # - Performance: minimal allocations, byte-level processing, early returns
14
+ # - Discourse-compatible: handles ndash conversion, unlimited ordered list numbers
15
+ #
16
+ # Optimized for Ruby 3.3+ with YJIT. Key optimizations:
17
+ # - Fast path returns original string for plain text (no allocations)
18
+ # - Pre-allocated result buffers with estimated capacity
19
+ # - Byte-level processing for inline escaping (YJIT-friendly tight loops)
20
+ # - Simplified escaping rules: [ breaks links, so ] doesn't need escaping
21
+ #
22
+ # @example Basic escaping
23
+ # escaper = Markbridge::Renderers::Discourse::MarkdownEscaper.new
24
+ # escaper.escape("# Heading") # => "\\# Heading"
25
+ # escaper.escape("*emphasis*") # => "\\*emphasis\\*"
26
+ # escaper.escape("foo -- bar") # => "foo \\-\\- bar"
27
+ #
28
+ # @example HTML is escaped
29
+ # escaper.escape("<div>content</div>") # => "\\<div>content\\</div>"
30
+ # escaper.escape("<?php echo 1; ?>") # => "\\<?php echo 1; ?>"
31
+ #
32
+ class MarkdownEscaper
33
+ # @param escape_hard_line_breaks [Boolean] when true, strip trailing spaces
34
+ # before newlines to prevent CommonMark hard line breaks (<br/>).
35
+ # Defaults to false because Discourse has trailing-space hard line
36
+ # breaks disabled by default.
37
+ def initialize(escape_hard_line_breaks: false)
38
+ @escape_hard_line_breaks = escape_hard_line_breaks
39
+ end
40
+
41
+ # Fast-path check: any character that might need escaping
42
+ # Only includes characters we actually escape (removed ], {, }, ^)
43
+ # > is needed for blockquote detection at line start
44
+ MAYBE_SPECIAL = /[\\`*_\[#+\-.!<>&|~=>)]/
45
+
46
+ # Check for indented code on any line
47
+ # Matches: 4+ spaces, tab, or space+tab combinations that reach column 4+
48
+ MAYBE_INDENTED_CODE = /(?:^|\n)(?: {4}|\t| {1,3}\t)/
49
+
50
+ # Block-level patterns
51
+ ATX_HEADING = /\A\#{1,6}(?=[ \t]|$)/
52
+ BLOCK_QUOTE = /\A>/
53
+ # List markers followed by space, tab, or end of line
54
+ BULLET_LIST = /\A[-+*](?=[ \t]|$)/
55
+ ORDERED_LIST = /\A(\d+)([.)])(?=[ \t])/
56
+ THEMATIC_BREAK_DASH = /\A(?:-[ \t]*){3,}$/
57
+ THEMATIC_BREAK_STAR = /\A(?:\*[ \t]*){3,}$/
58
+ THEMATIC_BREAK_UNDERSCORE = /\A(?:_[ \t]*){3,}$/
59
+ FENCED_CODE_BACKTICK = /\A`{3,}[^`]*$/
60
+ FENCED_CODE_TILDE = /\A~{3,}/
61
+ SETEXT_UNDERLINE_EQUALS = /\A=+[ \t]*$/
62
+ SETEXT_UNDERLINE_DASH = /\A-+[ \t]*$/
63
+ # Indented code: 4+ spaces, tab at start, or space+tab reaching column 4+
64
+ INDENTED_CODE = /\A(?: {4}|\t| {1,3}\t)/
65
+
66
+ # Inline quick-check pattern (includes < for HTML tag escaping)
67
+ INLINE_SPECIAL = /[\\*_`\[!|<&~-]/
68
+
69
+ # Entity reference pattern (we escape these to prevent conversion)
70
+ ENTITY_REF = /\A&(?:\#[xX][0-9a-fA-F]{1,6}|\#[0-9]{1,7}|[a-zA-Z][a-zA-Z0-9]{0,31});/
71
+
72
+ # HTML tag pattern (we escape these, but NOT autolinks)
73
+ # Handles quoted attributes which can contain > characters
74
+ # Attribute patterns: name="value" | name='value' | name=value | name
75
+ HTML_ATTR = /(?:\s+[a-zA-Z_:][a-zA-Z0-9_.:-]*(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))?)/
76
+ HTML_TAG = %r{\A</?[a-zA-Z][a-zA-Z0-9-]*#{HTML_ATTR}*\s*/?>}
77
+
78
+ # Autolink pattern - we pass these through entirely unchanged
79
+ # Matches <http://...>, <https://...>, <mailto:...>, and email addresses
80
+ AUTOLINK =
81
+ %r{\A<(?:https?://|mailto:)[^>\s]*>|\A<[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*>}i
82
+
83
+ # Match HTML-like constructs that need escaping:
84
+ # - Processing instructions: <?php, <?xml, etc.
85
+ # - SGML declarations: <!DOCTYPE, <!ELEMENT, <![CDATA[, <!--, etc.
86
+ # - Incomplete/multi-line HTML tags: <div followed by attributes on next line
87
+ # - Custom elements: <my-component>, <responsive-image>
88
+ # The (?:[\s/]|$) ensures we don't match comparisons like "a < b"
89
+ HTML_TAG_START = %r{\A<(?:[?!]|/?\s*[a-zA-Z][a-zA-Z0-9-]*(?:[\s/]|$))}
90
+
91
+ # Byte constants for inline processing
92
+ BACKSLASH = 92 # \
93
+ BANG = 33 # !
94
+ HASH = 35 # #
95
+ AMP = 38 # &
96
+ STAR = 42 # *
97
+ PLUS = 43 # +
98
+ DASH = 45 # -
99
+ LT = 60 # <
100
+ EQUALS = 61 # =
101
+ GT = 62 # >
102
+ BRACKET_OPEN = 91 # [
103
+ UNDERSCORE = 95 # _
104
+ BACKTICK = 96 # `
105
+ PIPE = 124 # |
106
+ TILDE = 126 # ~
107
+ SPACE = 32
108
+ TAB = 9
109
+ DIGIT_0 = 48
110
+ DIGIT_9 = 57
111
+
112
+ # Escapes markdown special characters in the given text.
113
+ #
114
+ # Handles both block-level constructs (headings, lists, code blocks, HTML blocks)
115
+ # and inline formatting (emphasis, code spans, links, inline HTML).
116
+ # Autolinks (<https://...>, <email@domain>) are intentionally preserved.
117
+ #
118
+ # @param text [String, nil] the text to escape
119
+ # @return [String] the escaped text, or empty string if input is nil
120
+ # @note Multi-line HTML tags and blocks are handled by escaping the opening <
121
+ def escape(text)
122
+ return "".freeze if text.nil?
123
+ return text if text.empty?
124
+
125
+ # Neutralize hard line breaks (trailing 2+ spaces before newline)
126
+ text = text.gsub(/ +\n/, "\n") if @escape_hard_line_breaks && text.include?(" \n")
127
+
128
+ return text unless MAYBE_SPECIAL.match?(text) || MAYBE_INDENTED_CODE.match?(text)
129
+
130
+ escape_text(text)
131
+ end
132
+
133
+ private
134
+
135
+ def escape_text(text)
136
+ lines = text.split("\n", -1)
137
+ return escape_line(lines[0], false) if lines.size == 1
138
+
139
+ # Pre-allocate result buffer
140
+ result = String.new(capacity: text.bytesize + text.bytesize / 3, encoding: text.encoding)
141
+ prev_was_paragraph = false
142
+ first = true
143
+
144
+ lines.each do |line|
145
+ result << "\n" unless first
146
+ first = false
147
+
148
+ escaped = escape_line(line, prev_was_paragraph)
149
+ result << escaped
150
+ prev_was_paragraph = paragraph_line?(line)
151
+ end
152
+
153
+ result
154
+ end
155
+
156
+ def escape_line(line, prev_was_paragraph)
157
+ return line if line.empty?
158
+
159
+ # Handle indented code blocks first
160
+ return escape_indented_code(line) if INDENTED_CODE.match?(line)
161
+
162
+ # Extract 0-3 space indent
163
+ indent_len = 0
164
+ while indent_len < 3 && indent_len < line.length && line.getbyte(indent_len) == SPACE
165
+ indent_len += 1
166
+ end
167
+
168
+ return line if indent_len >= line.length
169
+
170
+ content = indent_len > 0 ? line[indent_len..] : line
171
+
172
+ # Apply block-level escaping (which may also do inline escaping)
173
+ escaped, skip_inline = escape_block_level(content, prev_was_paragraph)
174
+
175
+ # Apply inline escaping if block-level didn't handle it
176
+ escaped = escape_inline(escaped) unless skip_inline
177
+
178
+ # Prepend indent if present, preserve encoding
179
+ if indent_len > 0
180
+ result = String.new(encoding: line.encoding)
181
+ result << line[0, indent_len] << escaped
182
+ result
183
+ else
184
+ # Preserve original encoding
185
+ escaped.is_a?(String) ? escaped.force_encoding(line.encoding) : escaped
186
+ end
187
+ end
188
+
189
+ # Non-breaking space - used to preserve visual indentation without
190
+ # triggering code blocks or block-level markdown
191
+ NBSP = "\u00A0"
192
+
193
+ def escape_indented_code(line)
194
+ # Replace leading whitespace with NBSP to prevent code block interpretation.
195
+ # NBSP is not whitespace to CommonMark, so:
196
+ # - Line doesn't start with 4+ spaces (no code block)
197
+ # - Content doesn't start at valid block position (no lists, headings, etc.)
198
+ # - Visual indentation is preserved (NBSP renders as space)
199
+ # We still escape inline content since it's no longer protected.
200
+ i = 0
201
+ while i < line.length
202
+ b = line.getbyte(i)
203
+ break if b != SPACE && b != TAB
204
+ i += 1
205
+ end
206
+
207
+ return line if i == 0 # No leading whitespace (shouldn't happen, but safe)
208
+ return line if i >= line.length # Whitespace-only line
209
+
210
+ # Convert leading whitespace to NBSP (tab = 4 NBSP for visual consistency)
211
+ nbsp_indent = String.new(encoding: line.encoding)
212
+ line[0, i].each_char { |c| nbsp_indent << (c == "\t" ? (NBSP * 4) : NBSP) }
213
+
214
+ content = line[i..]
215
+ "#{nbsp_indent}#{escape_inline(content)}"
216
+ end
217
+
218
+ def escape_block_level(content, prev_was_paragraph)
219
+ first_byte = content.getbyte(0)
220
+
221
+ case first_byte
222
+ when HASH
223
+ return "\\##{escape_inline(content[1..])}", true if ATX_HEADING.match?(content)
224
+ when GT
225
+ return "\\>#{escape_inline(content[1..])}", true
226
+ when DASH
227
+ if THEMATIC_BREAK_DASH.match?(content) ||
228
+ (prev_was_paragraph && SETEXT_UNDERLINE_DASH.match?(content))
229
+ return escape_all_chars(content, DASH, "\\-"), true
230
+ end
231
+ return "\\-#{escape_inline(content[1..])}", true if BULLET_LIST.match?(content)
232
+ when PLUS
233
+ return "\\+#{escape_inline(content[1..])}", true if BULLET_LIST.match?(content)
234
+ when STAR
235
+ if THEMATIC_BREAK_STAR.match?(content)
236
+ return escape_all_chars(content, STAR, "\\*"), true
237
+ end
238
+ return "\\*#{escape_inline(content[1..])}", true if BULLET_LIST.match?(content)
239
+ when UNDERSCORE
240
+ if THEMATIC_BREAK_UNDERSCORE.match?(content)
241
+ return escape_all_chars(content, UNDERSCORE, "\\_"), true
242
+ end
243
+ when EQUALS
244
+ if prev_was_paragraph && SETEXT_UNDERLINE_EQUALS.match?(content)
245
+ return escape_all_chars(content, EQUALS, "\\="), true
246
+ end
247
+ when BACKTICK
248
+ if FENCED_CODE_BACKTICK.match?(content)
249
+ # Escape ALL backticks to prevent code span interpretation
250
+ # e.g., ```` becomes \`\`\`\` not \```` (which would be \` + ```)
251
+ return escape_all_chars(content, BACKTICK, "\\`"), true
252
+ end
253
+ when TILDE
254
+ return "\\#{content}", true if FENCED_CODE_TILDE.match?(content)
255
+ when BRACKET_OPEN
256
+ return "\\[#{escape_inline(content[1..])}", true
257
+ when PIPE
258
+ return "\\|#{escape_inline(content[1..])}", true
259
+ when DIGIT_0..DIGIT_9
260
+ if (m = ORDERED_LIST.match(content))
261
+ prefix = m[1]
262
+ delim = m[2]
263
+ rest = content[m[0].length..]
264
+ return "#{prefix}\\#{delim}#{escape_inline(rest)}", true
265
+ end
266
+ end
267
+
268
+ [content, false]
269
+ end
270
+
271
+ def escape_all_chars(str, byte_val, escaped)
272
+ result = String.new(capacity: str.bytesize * 2, encoding: str.encoding)
273
+ str.each_byte do |b|
274
+ if b == byte_val
275
+ result << escaped
276
+ else
277
+ result << b
278
+ end
279
+ end
280
+ result
281
+ end
282
+
283
+ def escape_inline(content)
284
+ # Quick check - if no special chars, return as-is
285
+ return content unless INLINE_SPECIAL.match?(content)
286
+
287
+ result =
288
+ String.new(
289
+ capacity: content.bytesize + content.bytesize / 4,
290
+ encoding: content.encoding,
291
+ )
292
+ len = content.bytesize
293
+ i = 0
294
+
295
+ while i < len
296
+ b = content.getbyte(i)
297
+
298
+ case b
299
+ when BACKSLASH # \
300
+ if i + 1 < len && ascii_punctuation?(content.getbyte(i + 1))
301
+ # Escape the backslash, but let the next char be processed on its own
302
+ result << "\\\\"
303
+ i += 1
304
+ elsif i + 1 == len # backslash at end (hard break)
305
+ result << "\\\\"
306
+ i += 1
307
+ else
308
+ result << b
309
+ i += 1
310
+ end
311
+ when DASH # -
312
+ if i + 1 < len && content.getbyte(i + 1) == DASH
313
+ # Consecutive dashes - escape each for Discourse ndash prevention
314
+ while i < len && content.getbyte(i) == DASH
315
+ result << "\\-"
316
+ i += 1
317
+ end
318
+ else
319
+ result << b
320
+ i += 1
321
+ end
322
+ when TILDE # ~
323
+ if i + 1 < len && content.getbyte(i + 1) == TILDE
324
+ result << "\\~\\~"
325
+ i += 2
326
+ else
327
+ result << b
328
+ i += 1
329
+ end
330
+ when STAR # *
331
+ while i < len && content.getbyte(i) == STAR
332
+ result << "\\*"
333
+ i += 1
334
+ end
335
+ when UNDERSCORE # _
336
+ while i < len && content.getbyte(i) == UNDERSCORE
337
+ result << "\\_"
338
+ i += 1
339
+ end
340
+ when BACKTICK # `
341
+ while i < len && content.getbyte(i) == BACKTICK
342
+ result << "\\`"
343
+ i += 1
344
+ end
345
+ when BANG # !
346
+ if i + 1 < len && content.getbyte(i + 1) == BRACKET_OPEN
347
+ result << "\\!\\["
348
+ i += 2
349
+ else
350
+ result << b
351
+ i += 1
352
+ end
353
+ when BRACKET_OPEN # [
354
+ result << "\\["
355
+ i += 1
356
+ when PIPE # |
357
+ result << "\\|"
358
+ i += 1
359
+ when LT # <
360
+ remaining = content.byteslice(i, len - i)
361
+ # Check for autolinks first - pass through entirely unchanged
362
+ if (m = AUTOLINK.match(remaining))
363
+ result << m[0]
364
+ i += m[0].bytesize
365
+ # Escape complete HTML tags (include tag in output for readability)
366
+ # Also escape backticks inside the tag to prevent code span interpretation
367
+ elsif (m = HTML_TAG.match(remaining))
368
+ escaped_tag = m[0].gsub("`") { "\\`" }
369
+ result << "\\" << escaped_tag
370
+ i += m[0].bytesize
371
+ # Escape HTML-like constructs: processing instructions, SGML declarations,
372
+ # and potential tag starts (including multi-line and custom elements)
373
+ elsif HTML_TAG_START.match?(remaining)
374
+ result << "\\<"
375
+ i += 1
376
+ else
377
+ # Not HTML-like (comparison operator, etc.)
378
+ result << b
379
+ i += 1
380
+ end
381
+ when AMP # &
382
+ remaining = content.byteslice(i, len - i)
383
+ if (m = ENTITY_REF.match(remaining))
384
+ result << "\\" << m[0]
385
+ i += m[0].bytesize
386
+ else
387
+ result << b
388
+ i += 1
389
+ end
390
+ else
391
+ # Regular character - handle multi-byte UTF-8
392
+ if b < 128
393
+ result << b
394
+ i += 1
395
+ else
396
+ char_len = utf8_char_length(b)
397
+ end_i = [i + char_len, len].min
398
+ result << content.byteslice(i, end_i - i)
399
+ i = end_i
400
+ end
401
+ end
402
+ end
403
+
404
+ result
405
+ end
406
+
407
+ def ascii_punctuation?(byte)
408
+ (byte >= 33 && byte <= 47) || (byte >= 58 && byte <= 64) || (byte >= 91 && byte <= 96) || # !"#$%&'()*+,-./ # :;<=>?@ # [\]^_`
409
+ (byte >= 123 && byte <= 126) # {|}~
410
+ end
411
+
412
+ def utf8_char_length(first_byte)
413
+ if first_byte >= 240
414
+ 4
415
+ elsif first_byte >= 224
416
+ 3
417
+ elsif first_byte >= 192
418
+ 2
419
+ else
420
+ 1
421
+ end
422
+ end
423
+
424
+ def paragraph_line?(line)
425
+ return false if line.empty?
426
+
427
+ # Quick whitespace-only check
428
+ first_non_space = 0
429
+ while first_non_space < line.length && line.getbyte(first_non_space) == SPACE
430
+ first_non_space += 1
431
+ end
432
+ return false if first_non_space >= line.length || line.getbyte(first_non_space) == TAB
433
+
434
+ # Check if this is a block construct
435
+ content = first_non_space <= 3 ? line[first_non_space..] : line
436
+ return false if content.nil? || content.empty?
437
+
438
+ first_byte = content.getbyte(0)
439
+
440
+ case first_byte
441
+ when HASH
442
+ return false if ATX_HEADING.match?(content)
443
+ when GT
444
+ return false
445
+ when DASH, PLUS, STAR
446
+ return false if BULLET_LIST.match?(content)
447
+ return false if first_byte == DASH && THEMATIC_BREAK_DASH.match?(content)
448
+ return false if first_byte == STAR && THEMATIC_BREAK_STAR.match?(content)
449
+ when UNDERSCORE
450
+ return false if THEMATIC_BREAK_UNDERSCORE.match?(content)
451
+ when BACKTICK, TILDE
452
+ if FENCED_CODE_BACKTICK.match?(content) || FENCED_CODE_TILDE.match?(content)
453
+ return false
454
+ end
455
+ when BRACKET_OPEN
456
+ # Lines starting with [ get escaped to \[, which IS paragraph content
457
+ # So setext headings CAN follow them
458
+ return true
459
+ when DIGIT_0..DIGIT_9
460
+ return false if ORDERED_LIST.match?(content)
461
+ end
462
+
463
+ !INDENTED_CODE.match?(line)
464
+ end
465
+ end
466
+ end
467
+ end
468
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Renderers
5
+ module Discourse
6
+ # Immutable context for rendering that wraps the parent chain
7
+ # Provides query methods to ask about parent elements without
8
+ # the renderer knowing about specific element types
9
+ #
10
+ # Uses a hash-based cache for O(1) parent lookups instead of O(depth) scans
11
+ class RenderContext
12
+ attr_reader :parents, :depth
13
+
14
+ def initialize(parents = [], parent_cache: nil)
15
+ @parents = parents.freeze
16
+ @depth = parents.size
17
+ @parent_cache = parent_cache || build_cache(parents)
18
+ end
19
+
20
+ # Create new context with element added to parent chain
21
+ # Incrementally updates cache instead of rebuilding from scratch
22
+ # @param element [AST::Element]
23
+ # @return [RenderContext]
24
+ def with_parent(element)
25
+ new_parents = @parents + [element]
26
+
27
+ # Incrementally update cache instead of rebuilding
28
+ new_cache = @parent_cache.dup
29
+ element_class = element.class
30
+ new_cache[element_class] ||= []
31
+ new_cache[element_class] = new_cache[element_class] + [element]
32
+
33
+ self.class.new(new_parents, parent_cache: new_cache)
34
+ end
35
+
36
+ # Find closest parent of given type
37
+ # O(1) hash lookup instead of O(depth) scan
38
+ # @param klass [Class]
39
+ # @return [AST::Element, nil]
40
+ def find_parent(klass)
41
+ @parent_cache[klass]&.last
42
+ end
43
+
44
+ # Count parents of given type
45
+ # O(1) instead of O(depth)
46
+ # @param klass [Class]
47
+ # @return [Integer]
48
+ def count_parents(klass)
49
+ @parent_cache[klass]&.size || 0
50
+ end
51
+
52
+ # Check if parent of type exists
53
+ # O(1) check
54
+ # @param klass [Class]
55
+ # @return [Boolean]
56
+ def has_parent?(klass)
57
+ @parent_cache.key?(klass) && !@parent_cache[klass].empty?
58
+ end
59
+
60
+ # Check if we're at the root (no parents)
61
+ # @return [Boolean]
62
+ def root?
63
+ @depth.zero?
64
+ end
65
+
66
+ private
67
+
68
+ # Build cache from parents array
69
+ # Groups parents by class for fast lookup
70
+ # @param parents [Array<AST::Element>]
71
+ # @return [Hash{Class => Array<AST::Element>}]
72
+ def build_cache(parents)
73
+ parents.each_with_object(Hash.new { |h, k| h[k] = [] }) do |parent, cache|
74
+ cache[parent.class] = cache[parent.class] + [parent]
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Renderers
5
+ module Discourse
6
+ # Renders AST to Discourse-flavored Markdown in-memory.
7
+ class Renderer
8
+ def initialize(tag_library: nil, escaper: nil)
9
+ @tag_library = tag_library || TagLibrary.default
10
+ @escaper = escaper || MarkdownEscaper.new
11
+ end
12
+
13
+ # Render a node to Markdown
14
+ # @param node [AST::Node]
15
+ # @param context [RenderContext] rendering context with parent chain
16
+ # @return [String]
17
+ def render(node, context: RenderContext.new)
18
+ root_call = @interface_cache.nil?
19
+ @interface_cache ||= {}
20
+
21
+ tag = @tag_library[node.class]
22
+ if tag
23
+ interface = interface_for(context)
24
+ return tag.render(node, interface)
25
+ end
26
+
27
+ case node
28
+ when AST::Document, AST::Element
29
+ render_children(node, context:)
30
+ when AST::MarkdownText
31
+ # Pass through markdown text as-is (already formatted)
32
+ node.text
33
+ when AST::Text
34
+ # Escape plain text unless we're inside a code block
35
+ if context.has_parent?(AST::Code)
36
+ node.text
37
+ else
38
+ @escaper.escape(node.text)
39
+ end
40
+ else
41
+ ""
42
+ end
43
+ ensure
44
+ @interface_cache = nil if root_call
45
+ end
46
+
47
+ # Render all children of a node
48
+ # @param node [AST::Element]
49
+ # @param context [RenderContext] rendering context
50
+ # @return [String]
51
+ def render_children(node, context:)
52
+ node.children.map { |child| render(child, context:) }.join
53
+ end
54
+
55
+ private
56
+
57
+ def interface_for(context)
58
+ @interface_cache[context.object_id] ||= RenderingInterface.new(self, context)
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Renderers
5
+ module Discourse
6
+ # Interface that tags use for rendering operations
7
+ # Decouples tags from renderer implementation details
8
+ class RenderingInterface
9
+ attr_reader :context
10
+
11
+ def initialize(renderer, context)
12
+ @renderer = renderer
13
+ @context = context
14
+ end
15
+
16
+ # Core rendering operations
17
+ def render_node(node, context: @context)
18
+ @renderer.render(node, context:)
19
+ end
20
+
21
+ def render_children(element, context: @context)
22
+ @renderer.render_children(element, context:)
23
+ end
24
+
25
+ # Context operations
26
+ def with_parent(element)
27
+ @context.with_parent(element)
28
+ end
29
+
30
+ def find_parent(klass)
31
+ @context.find_parent(klass)
32
+ end
33
+
34
+ def count_parents(klass)
35
+ @context.count_parents(klass)
36
+ end
37
+
38
+ def has_parent?(klass)
39
+ @context.has_parent?(klass)
40
+ end
41
+
42
+ def root?
43
+ @context.root?
44
+ end
45
+
46
+ # Check if element should be rendered in block context
47
+ # @param node [AST::Node] container node or leaf like HorizontalRule
48
+ # @return [Boolean]
49
+ def block_context?(node)
50
+ # Check if it's a block-level element type (but not code, which can be inline)
51
+ return true if node.is_a?(AST::List) || node.is_a?(AST::HorizontalRule)
52
+ return false unless node.is_a?(AST::Element)
53
+
54
+ # Check if content has newlines
55
+ node.children.any? { |c| c.is_a?(AST::Text) && c.text.include?("\n") }
56
+ end
57
+
58
+ # Helper: wrap inline content with markers
59
+ # Handles edge cases like existing markers and whitespace
60
+ def wrap_inline(content, open_marker, close_marker = nil)
61
+ close_marker ||= open_marker
62
+ return content if content.strip.empty?
63
+
64
+ # Handle conflicts with existing markers
65
+ if content.include?(open_marker) || content.include?(close_marker)
66
+ # Use HTML fallback for common cases
67
+ case open_marker
68
+ when "**"
69
+ return "<strong>#{content}</strong>"
70
+ when "*"
71
+ return "<em>#{content}</em>"
72
+ when "~~"
73
+ return "<s>#{content}</s>"
74
+ end
75
+ end
76
+
77
+ # Preserve leading/trailing whitespace
78
+ content.sub(/^(\s*)(.+?)(\s*)$/m) do
79
+ match = Regexp.last_match
80
+ "#{match[1]}#{open_marker}#{match[2]}#{close_marker}#{match[3]}"
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end