markbridge 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/markbridge/all.rb +4 -7
- data/lib/markbridge/ast/document.rb +1 -1
- data/lib/markbridge/ast/element.rb +2 -2
- data/lib/markbridge/ast/list.rb +2 -2
- data/lib/markbridge/ast/table.rb +61 -0
- data/lib/markbridge/ast/text.rb +5 -1
- data/lib/markbridge/ast.rb +1 -0
- data/lib/markbridge/bbcode.rb +4 -0
- data/lib/markbridge/gem_loader.rb +2 -3
- data/lib/markbridge/html.rb +4 -0
- data/lib/markbridge/mediawiki.rb +4 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +0 -10
- data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -4
- data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +64 -44
- data/lib/markbridge/parsers/bbcode/handler_registry.rb +26 -11
- data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +17 -12
- data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +0 -10
- data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +6 -10
- data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +13 -19
- data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +1 -5
- data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +1 -2
- data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +30 -35
- data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +2 -6
- data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +4 -4
- data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +26 -0
- data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +32 -0
- data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +35 -0
- data/lib/markbridge/parsers/bbcode/parser.rb +5 -8
- data/lib/markbridge/parsers/bbcode/parser_state.rb +12 -18
- data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +9 -59
- data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +2 -2
- data/lib/markbridge/parsers/bbcode/scanner.rb +49 -63
- data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +1 -5
- data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +1 -6
- data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +1 -7
- data/lib/markbridge/parsers/bbcode/tokens/token.rb +1 -1
- data/lib/markbridge/parsers/bbcode.rb +4 -0
- data/lib/markbridge/parsers/html/handler_registry.rb +32 -44
- data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -3
- data/lib/markbridge/parsers/html/handlers/image_handler.rb +1 -4
- data/lib/markbridge/parsers/html/handlers/table_cell_handler.rb +24 -0
- data/lib/markbridge/parsers/html/handlers/table_handler.rb +24 -0
- data/lib/markbridge/parsers/html/handlers/table_row_handler.rb +24 -0
- data/lib/markbridge/parsers/html/parser.rb +16 -15
- data/lib/markbridge/parsers/html.rb +3 -0
- data/lib/markbridge/parsers/media_wiki/inline_parser.rb +115 -151
- data/lib/markbridge/parsers/media_wiki/inline_tag_registry.rb +103 -0
- data/lib/markbridge/parsers/media_wiki/parser.rb +174 -71
- data/lib/markbridge/parsers/media_wiki.rb +1 -0
- data/lib/markbridge/parsers/text_formatter/handler_registry.rb +10 -36
- data/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb +26 -0
- data/lib/markbridge/parsers/text_formatter/parser.rb +3 -8
- data/lib/markbridge/parsers/text_formatter.rb +1 -0
- data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +111 -92
- data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +13 -7
- data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +11 -20
- data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +10 -48
- data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +38 -63
- data/lib/markbridge/processors/discourse_markdown/scanner.rb +36 -41
- data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +6 -6
- data/lib/markbridge/renderers/discourse/html_escaper.rb +20 -0
- data/lib/markbridge/renderers/discourse/markdown_escaper.rb +262 -205
- data/lib/markbridge/renderers/discourse/render_context.rb +23 -11
- data/lib/markbridge/renderers/discourse/renderer.rb +54 -11
- data/lib/markbridge/renderers/discourse/rendering_interface.rb +12 -4
- data/lib/markbridge/renderers/discourse/tag.rb +14 -1
- data/lib/markbridge/renderers/discourse/tag_library.rb +30 -25
- data/lib/markbridge/renderers/discourse/tags/align_tag.rb +15 -7
- data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +1 -1
- data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/code_tag.rb +14 -8
- data/lib/markbridge/renderers/discourse/tags/email_tag.rb +5 -3
- data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -3
- data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +6 -2
- data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +2 -2
- data/lib/markbridge/renderers/discourse/tags/image_tag.rb +12 -1
- data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +2 -2
- data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +24 -47
- data/lib/markbridge/renderers/discourse/tags/list_tag.rb +10 -15
- data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +6 -2
- data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +10 -0
- data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +9 -4
- data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +17 -11
- data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +9 -0
- data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/table_cell_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/table_row_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/table_tag.rb +128 -0
- data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +10 -3
- data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +28 -1
- data/lib/markbridge/renderers/discourse/tags/url_tag.rb +5 -3
- data/lib/markbridge/renderers/discourse.rb +4 -0
- data/lib/markbridge/textformatter.rb +4 -0
- data/lib/markbridge/version.rb +1 -1
- data/lib/markbridge.rb +27 -62
- metadata +19 -2
|
@@ -5,8 +5,21 @@ module Markbridge
|
|
|
5
5
|
module MediaWiki
|
|
6
6
|
# Parses inline MediaWiki markup within a line of text.
|
|
7
7
|
# Handles bold ('''), italic (''), links ([[...]]), external links ([...]),
|
|
8
|
-
# and HTML inline tags
|
|
8
|
+
# and HTML inline tags via an InlineTagRegistry.
|
|
9
|
+
#
|
|
10
|
+
# @example With custom registry
|
|
11
|
+
# registry = InlineTagRegistry.build_from_default do |r|
|
|
12
|
+
# r.register("mark", :formatting, AST::Bold)
|
|
13
|
+
# end
|
|
14
|
+
# parser = InlineParser.new(inline_tag_registry: registry)
|
|
9
15
|
class InlineParser
|
|
16
|
+
MAX_INLINE_DEPTH = 20
|
|
17
|
+
|
|
18
|
+
def initialize(inline_tag_registry: nil, depth: 0)
|
|
19
|
+
@registry = inline_tag_registry || InlineTagRegistry.default
|
|
20
|
+
@depth = depth
|
|
21
|
+
end
|
|
22
|
+
|
|
10
23
|
# Parse inline markup and append resulting AST nodes to the parent element.
|
|
11
24
|
#
|
|
12
25
|
# @param text [String] the text to parse for inline markup
|
|
@@ -19,20 +32,19 @@ module Markbridge
|
|
|
19
32
|
@text_buffer = +""
|
|
20
33
|
|
|
21
34
|
while @pos < @length
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
35
|
+
char = @input[@pos]
|
|
36
|
+
|
|
37
|
+
case char
|
|
38
|
+
when "'"
|
|
39
|
+
consecutive_apostrophes_at(@pos) >= 2 ? parse_bold_italic : append_literal(char)
|
|
40
|
+
when "["
|
|
28
41
|
flush_text
|
|
29
|
-
parse_external_link
|
|
30
|
-
|
|
42
|
+
@input[@pos + 1] == "[" ? parse_internal_link : parse_external_link
|
|
43
|
+
when "<"
|
|
31
44
|
flush_text
|
|
32
45
|
parse_html_tag
|
|
33
46
|
else
|
|
34
|
-
|
|
35
|
-
@pos += 1
|
|
47
|
+
append_literal(char)
|
|
36
48
|
end
|
|
37
49
|
end
|
|
38
50
|
|
|
@@ -41,77 +53,67 @@ module Markbridge
|
|
|
41
53
|
|
|
42
54
|
private
|
|
43
55
|
|
|
44
|
-
|
|
56
|
+
def append_literal(char)
|
|
57
|
+
@text_buffer << char
|
|
58
|
+
@pos += 1
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Precondition: caller has verified @input[@pos..@pos+1] is "''".
|
|
45
62
|
def parse_bold_italic
|
|
46
63
|
start = @pos
|
|
47
|
-
count =
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
if count >= 5
|
|
52
|
-
flush_text
|
|
53
|
-
@pos += 5
|
|
54
|
-
parse_bold_italic_combo(start)
|
|
55
|
-
elsif count >= 3
|
|
56
|
-
flush_text
|
|
57
|
-
@pos += 3
|
|
58
|
-
parse_bold_content(start)
|
|
59
|
-
elsif count >= 2
|
|
60
|
-
flush_text
|
|
61
|
-
@pos += 2
|
|
62
|
-
parse_italic_content(start)
|
|
63
|
-
else
|
|
64
|
-
@text_buffer << @input[@pos]
|
|
65
|
-
@pos += 1
|
|
66
|
-
end
|
|
64
|
+
count = [consecutive_apostrophes_at(@pos), 5].min
|
|
65
|
+
flush_text
|
|
66
|
+
@pos += count
|
|
67
|
+
parse_apostrophe_formatting(count, start)
|
|
67
68
|
end
|
|
68
69
|
|
|
69
|
-
# Parse
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
else
|
|
81
|
-
# No closing found - treat as literal text
|
|
82
|
-
@text_buffer << "'''''"
|
|
83
|
-
@pos = start + 5
|
|
70
|
+
# Parse apostrophe-delimited formatting (bold, italic, or bold+italic).
|
|
71
|
+
#
|
|
72
|
+
# @param apostrophe_count [Integer] number of apostrophes (2, 3, or 5)
|
|
73
|
+
# @param start [Integer] position before the opening apostrophes
|
|
74
|
+
def parse_apostrophe_formatting(apostrophe_count, start)
|
|
75
|
+
content = collect_until_apostrophes(apostrophe_count)
|
|
76
|
+
|
|
77
|
+
unless content
|
|
78
|
+
@text_buffer << ("'" * apostrophe_count)
|
|
79
|
+
@pos = start + apostrophe_count
|
|
80
|
+
return
|
|
84
81
|
end
|
|
85
|
-
end
|
|
86
82
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
83
|
+
element = build_formatting_element(apostrophe_count)
|
|
84
|
+
parse_inner_content(content, parent: innermost_element(element))
|
|
85
|
+
@parent << element
|
|
86
|
+
end
|
|
91
87
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
88
|
+
# Build the AST element(s) for the given apostrophe count.
|
|
89
|
+
def build_formatting_element(apostrophe_count)
|
|
90
|
+
case apostrophe_count
|
|
91
|
+
when 5
|
|
92
|
+
AST::Bold.new << AST::Italic.new
|
|
93
|
+
when 3
|
|
94
|
+
AST::Bold.new
|
|
95
|
+
when 2
|
|
96
|
+
AST::Italic.new
|
|
99
97
|
end
|
|
100
98
|
end
|
|
101
99
|
|
|
102
|
-
#
|
|
103
|
-
def
|
|
104
|
-
|
|
105
|
-
|
|
100
|
+
# Return the innermost element to receive parsed content.
|
|
101
|
+
def innermost_element(element)
|
|
102
|
+
element.children.empty? ? element : element.children.last
|
|
103
|
+
end
|
|
106
104
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
@pos = start + 2
|
|
105
|
+
# Parse inner content and append to a parent element.
|
|
106
|
+
# Respects MAX_INLINE_DEPTH to prevent stack overflow from deeply nested markup.
|
|
107
|
+
def parse_inner_content(content, parent:)
|
|
108
|
+
if @depth + 1 >= MAX_INLINE_DEPTH
|
|
109
|
+
parent << AST::Text.new(content)
|
|
110
|
+
return
|
|
114
111
|
end
|
|
112
|
+
|
|
113
|
+
InlineParser.new(inline_tag_registry: @registry, depth: @depth + 1).parse(
|
|
114
|
+
content,
|
|
115
|
+
parent:,
|
|
116
|
+
)
|
|
115
117
|
end
|
|
116
118
|
|
|
117
119
|
# Collect text until we find n consecutive apostrophes.
|
|
@@ -122,14 +124,13 @@ module Markbridge
|
|
|
122
124
|
def collect_until_apostrophes(count)
|
|
123
125
|
start = @pos
|
|
124
126
|
while @pos < @length
|
|
125
|
-
if
|
|
127
|
+
if consecutive_apostrophes_at(@pos) >= count
|
|
126
128
|
content = @input[start...@pos]
|
|
127
129
|
@pos += count
|
|
128
130
|
return content
|
|
129
131
|
end
|
|
130
132
|
@pos += 1
|
|
131
133
|
end
|
|
132
|
-
nil
|
|
133
134
|
end
|
|
134
135
|
|
|
135
136
|
# Count consecutive apostrophes starting at position.
|
|
@@ -137,9 +138,7 @@ module Markbridge
|
|
|
137
138
|
# @param pos [Integer]
|
|
138
139
|
# @return [Integer]
|
|
139
140
|
def consecutive_apostrophes_at(pos)
|
|
140
|
-
|
|
141
|
-
count += 1 while pos + count < @length && @input[pos + count] == "'"
|
|
142
|
-
count
|
|
141
|
+
@input[pos..].each_char.take_while { |c| c == "'" }.length
|
|
143
142
|
end
|
|
144
143
|
|
|
145
144
|
# Parse [[internal link]] or [[target|display text]].
|
|
@@ -191,7 +190,6 @@ module Markbridge
|
|
|
191
190
|
@parent << url
|
|
192
191
|
end
|
|
193
192
|
|
|
194
|
-
# Parse an HTML tag (<code>, <nowiki>, <pre>, <br>, <s>, <del>, <u>, <ins>, <sup>, <sub>).
|
|
195
193
|
def parse_html_tag
|
|
196
194
|
tag_match = @input[@pos..].match(%r{\A<(/?)([a-z]+)(?: [^>]*)?\s*(/?)>}i)
|
|
197
195
|
unless tag_match
|
|
@@ -200,75 +198,67 @@ module Markbridge
|
|
|
200
198
|
return
|
|
201
199
|
end
|
|
202
200
|
|
|
201
|
+
full_match = tag_match[0]
|
|
203
202
|
closing = !tag_match[1].empty?
|
|
204
|
-
tag_name = tag_match[2].downcase
|
|
205
203
|
self_closing = !tag_match[3].empty?
|
|
206
|
-
|
|
204
|
+
tag_name = tag_match[2].downcase
|
|
207
205
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
when "pre"
|
|
214
|
-
handle_paired_raw_tag(tag_name, closing, full_match, AST::Code)
|
|
215
|
-
when "br"
|
|
216
|
-
@pos += full_match.length
|
|
217
|
-
@parent << AST::LineBreak.new
|
|
218
|
-
when "s", "del"
|
|
219
|
-
handle_paired_tag(tag_name, closing, self_closing, full_match, AST::Strikethrough)
|
|
220
|
-
when "u", "ins"
|
|
221
|
-
handle_paired_tag(tag_name, closing, self_closing, full_match, AST::Underline)
|
|
222
|
-
when "sup"
|
|
223
|
-
handle_paired_tag(tag_name, closing, self_closing, full_match, AST::Superscript)
|
|
224
|
-
when "sub"
|
|
225
|
-
handle_paired_tag(tag_name, closing, self_closing, full_match, AST::Subscript)
|
|
226
|
-
else
|
|
227
|
-
# Unknown HTML tag - treat as text
|
|
228
|
-
@text_buffer << full_match
|
|
229
|
-
@pos += full_match.length
|
|
206
|
+
# Closing/self-closing tags and unknown tags are treated as literal text
|
|
207
|
+
entry = @registry[tag_name]
|
|
208
|
+
if closing || self_closing || !entry
|
|
209
|
+
advance_as_text(full_match)
|
|
210
|
+
return
|
|
230
211
|
end
|
|
212
|
+
|
|
213
|
+
dispatch_html_tag(entry, tag_name, full_match)
|
|
231
214
|
end
|
|
232
215
|
|
|
233
|
-
#
|
|
234
|
-
def
|
|
235
|
-
|
|
236
|
-
|
|
216
|
+
# Dispatch an HTML-like tag based on its registry entry type.
|
|
217
|
+
def dispatch_html_tag(entry, tag_name, full_match)
|
|
218
|
+
case entry.type
|
|
219
|
+
when :raw
|
|
220
|
+
if entry.element_class.nil?
|
|
221
|
+
handle_nowiki_tag(full_match)
|
|
222
|
+
else
|
|
223
|
+
handle_paired_raw_tag(tag_name, full_match, entry.element_class)
|
|
224
|
+
end
|
|
225
|
+
when :formatting
|
|
226
|
+
handle_paired_tag(tag_name, full_match, entry.element_class)
|
|
227
|
+
when :self_closing
|
|
237
228
|
@pos += full_match.length
|
|
238
|
-
|
|
229
|
+
@parent << entry.element_class.new
|
|
239
230
|
end
|
|
231
|
+
end
|
|
240
232
|
|
|
233
|
+
# Advance position and buffer the match as literal text.
|
|
234
|
+
def advance_as_text(full_match)
|
|
235
|
+
@text_buffer << full_match
|
|
241
236
|
@pos += full_match.length
|
|
242
|
-
|
|
243
|
-
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Handle <nowiki>...</nowiki> - preserves content as literal text.
|
|
240
|
+
def handle_nowiki_tag(full_match)
|
|
241
|
+
@pos += full_match.length
|
|
242
|
+
close_pos = @input.index("</nowiki>", @pos)
|
|
244
243
|
|
|
245
244
|
if close_pos
|
|
246
|
-
|
|
247
|
-
@
|
|
248
|
-
@pos = close_pos + close_tag.length
|
|
245
|
+
@text_buffer << @input[@pos...close_pos]
|
|
246
|
+
@pos = close_pos + "</nowiki>".length
|
|
249
247
|
else
|
|
250
|
-
# No closing tag found - treat opening tag as text
|
|
251
248
|
@text_buffer << full_match
|
|
252
249
|
end
|
|
253
250
|
end
|
|
254
251
|
|
|
255
252
|
# Handle paired raw tags like <code>...</code> and <pre>...</pre>.
|
|
256
253
|
# Content inside is not parsed for wiki markup.
|
|
257
|
-
def handle_paired_raw_tag(tag_name,
|
|
258
|
-
if closing
|
|
259
|
-
@text_buffer << full_match
|
|
260
|
-
@pos += full_match.length
|
|
261
|
-
return
|
|
262
|
-
end
|
|
263
|
-
|
|
254
|
+
def handle_paired_raw_tag(tag_name, full_match, element_class)
|
|
264
255
|
@pos += full_match.length
|
|
265
256
|
close_tag = "</#{tag_name}>"
|
|
266
257
|
close_pos = @input.index(close_tag, @pos)
|
|
267
258
|
|
|
268
259
|
if close_pos
|
|
269
|
-
raw_content = @input[@pos...close_pos]
|
|
270
260
|
element = element_class.new
|
|
271
|
-
element << AST::Text.new(
|
|
261
|
+
element << AST::Text.new(@input[@pos...close_pos])
|
|
272
262
|
@parent << element
|
|
273
263
|
@pos = close_pos + close_tag.length
|
|
274
264
|
else
|
|
@@ -278,47 +268,21 @@ module Markbridge
|
|
|
278
268
|
|
|
279
269
|
# Handle paired formatting tags like <s>, <u>, <sup>, <sub>.
|
|
280
270
|
# Content inside IS parsed for wiki markup.
|
|
281
|
-
def handle_paired_tag(tag_name,
|
|
282
|
-
if closing || self_closing
|
|
283
|
-
@text_buffer << full_match
|
|
284
|
-
@pos += full_match.length
|
|
285
|
-
return
|
|
286
|
-
end
|
|
287
|
-
|
|
271
|
+
def handle_paired_tag(tag_name, full_match, element_class)
|
|
288
272
|
@pos += full_match.length
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
close_pos = nil
|
|
292
|
-
close_tag_length = 0
|
|
293
|
-
|
|
294
|
-
close_tags.each do |ct|
|
|
295
|
-
pos = @input.index(ct, @pos)
|
|
296
|
-
if pos && (close_pos.nil? || pos < close_pos)
|
|
297
|
-
close_pos = pos
|
|
298
|
-
close_tag_length = ct.length
|
|
299
|
-
end
|
|
300
|
-
end
|
|
273
|
+
close_tag = "</#{tag_name}>"
|
|
274
|
+
close_pos = @input.index(close_tag, @pos)
|
|
301
275
|
|
|
302
276
|
if close_pos
|
|
303
|
-
inner_content = @input[@pos...close_pos]
|
|
304
277
|
element = element_class.new
|
|
305
|
-
|
|
306
|
-
inner_parser.parse(inner_content, parent: element)
|
|
278
|
+
parse_inner_content(@input[@pos...close_pos], parent: element)
|
|
307
279
|
@parent << element
|
|
308
|
-
@pos = close_pos +
|
|
280
|
+
@pos = close_pos + close_tag.length
|
|
309
281
|
else
|
|
310
282
|
@text_buffer << full_match
|
|
311
283
|
end
|
|
312
284
|
end
|
|
313
285
|
|
|
314
|
-
# Return the possible closing tags for a given tag name.
|
|
315
|
-
#
|
|
316
|
-
# @param tag_name [String]
|
|
317
|
-
# @return [Array<String>]
|
|
318
|
-
def close_tags_for(tag_name)
|
|
319
|
-
["</#{tag_name}>"]
|
|
320
|
-
end
|
|
321
|
-
|
|
322
286
|
# Flush accumulated text buffer to the parent as a Text node.
|
|
323
287
|
def flush_text
|
|
324
288
|
return if @text_buffer.empty?
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module MediaWiki
|
|
6
|
+
# Registry of inline HTML-like tag handlers for the MediaWiki parser.
|
|
7
|
+
#
|
|
8
|
+
# Supports three tag types:
|
|
9
|
+
# - :raw - content is preserved verbatim (e.g., <code>, <nowiki>)
|
|
10
|
+
# - :formatting - content is parsed for inline wiki markup (e.g., <s>, <u>)
|
|
11
|
+
# - :self_closing - no content, produces a leaf AST node (e.g., <br>)
|
|
12
|
+
#
|
|
13
|
+
# @example Default usage
|
|
14
|
+
# registry = InlineTagRegistry.default
|
|
15
|
+
# entry = registry["s"]
|
|
16
|
+
# entry.type # => :formatting
|
|
17
|
+
# entry.element_class # => AST::Strikethrough
|
|
18
|
+
#
|
|
19
|
+
# @example Custom registration
|
|
20
|
+
# registry = InlineTagRegistry.build_from_default do |r|
|
|
21
|
+
# r.register("mark", :formatting, AST::Bold)
|
|
22
|
+
# end
|
|
23
|
+
class InlineTagRegistry
|
|
24
|
+
Entry = Data.define(:type, :element_class)
|
|
25
|
+
|
|
26
|
+
def initialize
|
|
27
|
+
@entries = {}
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Register a handler for an inline HTML-like tag.
|
|
31
|
+
#
|
|
32
|
+
# @param tag_name [String] the tag name (case-insensitive)
|
|
33
|
+
# @param type [:raw, :formatting, :self_closing] how the tag content is handled
|
|
34
|
+
# @param element_class [Class] the AST node class to create
|
|
35
|
+
# @return [self]
|
|
36
|
+
def register(tag_name, type, element_class)
|
|
37
|
+
validate_type!(type)
|
|
38
|
+
@entries[tag_name.downcase] = Entry.new(type:, element_class:)
|
|
39
|
+
self
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Look up a tag entry by name.
|
|
43
|
+
#
|
|
44
|
+
# @param tag_name [String]
|
|
45
|
+
# @return [Entry, nil]
|
|
46
|
+
def [](tag_name)
|
|
47
|
+
@entries[tag_name.downcase]
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Check if a tag name is registered.
|
|
51
|
+
#
|
|
52
|
+
# @param tag_name [String]
|
|
53
|
+
# @return [Boolean]
|
|
54
|
+
def known?(tag_name)
|
|
55
|
+
@entries.key?(tag_name.downcase)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Create the default registry with standard MediaWiki inline tags.
|
|
59
|
+
#
|
|
60
|
+
# @return [InlineTagRegistry]
|
|
61
|
+
def self.default
|
|
62
|
+
registry = new
|
|
63
|
+
|
|
64
|
+
# Raw tags -content preserved verbatim, not parsed for wiki markup
|
|
65
|
+
registry.register("nowiki", :raw, nil)
|
|
66
|
+
registry.register("code", :raw, AST::Code)
|
|
67
|
+
registry.register("pre", :raw, AST::Code)
|
|
68
|
+
|
|
69
|
+
# Formatting tags -content parsed for inline wiki markup
|
|
70
|
+
registry.register("s", :formatting, AST::Strikethrough)
|
|
71
|
+
registry.register("del", :formatting, AST::Strikethrough)
|
|
72
|
+
registry.register("u", :formatting, AST::Underline)
|
|
73
|
+
registry.register("ins", :formatting, AST::Underline)
|
|
74
|
+
registry.register("sup", :formatting, AST::Superscript)
|
|
75
|
+
registry.register("sub", :formatting, AST::Subscript)
|
|
76
|
+
|
|
77
|
+
# Self-closing tags -produce a leaf node, no content
|
|
78
|
+
registry.register("br", :self_closing, AST::LineBreak)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Build a registry from the default with optional customization.
|
|
82
|
+
#
|
|
83
|
+
# @yield [InlineTagRegistry] the registry to customize
|
|
84
|
+
# @return [InlineTagRegistry]
|
|
85
|
+
def self.build_from_default
|
|
86
|
+
registry = default
|
|
87
|
+
yield(registry) if block_given?
|
|
88
|
+
registry
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
private
|
|
92
|
+
|
|
93
|
+
VALID_TYPES = %i[raw formatting self_closing].freeze
|
|
94
|
+
|
|
95
|
+
def validate_type!(type)
|
|
96
|
+
return if VALID_TYPES.include?(type)
|
|
97
|
+
|
|
98
|
+
raise ArgumentError, "type must be one of #{VALID_TYPES}, got #{type.inspect}"
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|