markbridge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/lib/markbridge/all.rb +9 -0
- data/lib/markbridge/ast/align.rb +24 -0
- data/lib/markbridge/ast/attachment.rb +42 -0
- data/lib/markbridge/ast/bold.rb +13 -0
- data/lib/markbridge/ast/code.rb +27 -0
- data/lib/markbridge/ast/color.rb +25 -0
- data/lib/markbridge/ast/document.rb +27 -0
- data/lib/markbridge/ast/element.rb +47 -0
- data/lib/markbridge/ast/email.rb +27 -0
- data/lib/markbridge/ast/event.rb +59 -0
- data/lib/markbridge/ast/heading.rb +23 -0
- data/lib/markbridge/ast/horizontal_rule.rb +12 -0
- data/lib/markbridge/ast/image.rb +35 -0
- data/lib/markbridge/ast/italic.rb +13 -0
- data/lib/markbridge/ast/line_break.rb +12 -0
- data/lib/markbridge/ast/list.rb +52 -0
- data/lib/markbridge/ast/list_item.rb +13 -0
- data/lib/markbridge/ast/markdown_text.rb +37 -0
- data/lib/markbridge/ast/mention.rb +29 -0
- data/lib/markbridge/ast/node.rb +19 -0
- data/lib/markbridge/ast/paragraph.rb +13 -0
- data/lib/markbridge/ast/poll.rb +74 -0
- data/lib/markbridge/ast/quote.rb +46 -0
- data/lib/markbridge/ast/size.rb +25 -0
- data/lib/markbridge/ast/spoiler.rb +27 -0
- data/lib/markbridge/ast/strikethrough.rb +13 -0
- data/lib/markbridge/ast/subscript.rb +13 -0
- data/lib/markbridge/ast/superscript.rb +13 -0
- data/lib/markbridge/ast/text.rb +38 -0
- data/lib/markbridge/ast/underline.rb +13 -0
- data/lib/markbridge/ast/upload.rb +74 -0
- data/lib/markbridge/ast/url.rb +27 -0
- data/lib/markbridge/ast.rb +42 -0
- data/lib/markbridge/configuration.rb +11 -0
- data/lib/markbridge/gem_loader.rb +23 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +37 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/strict.rb +12 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +121 -0
- data/lib/markbridge/parsers/bbcode/errors/max_depth_exceeded_error.rb +13 -0
- data/lib/markbridge/parsers/bbcode/handler_registry.rb +160 -0
- data/lib/markbridge/parsers/bbcode/handlers/align_handler.rb +26 -0
- data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +104 -0
- data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +44 -0
- data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +25 -0
- data/lib/markbridge/parsers/bbcode/handlers/color_handler.rb +31 -0
- data/lib/markbridge/parsers/bbcode/handlers/email_handler.rb +25 -0
- data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +51 -0
- data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +36 -0
- data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +26 -0
- data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +64 -0
- data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +48 -0
- data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +28 -0
- data/lib/markbridge/parsers/bbcode/handlers/simple_handler.rb +28 -0
- data/lib/markbridge/parsers/bbcode/handlers/size_handler.rb +31 -0
- data/lib/markbridge/parsers/bbcode/handlers/spoiler_handler.rb +28 -0
- data/lib/markbridge/parsers/bbcode/handlers/url_handler.rb +24 -0
- data/lib/markbridge/parsers/bbcode/parser.rb +123 -0
- data/lib/markbridge/parsers/bbcode/parser_state.rb +93 -0
- data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +126 -0
- data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +35 -0
- data/lib/markbridge/parsers/bbcode/raw_content_result.rb +25 -0
- data/lib/markbridge/parsers/bbcode/scanner.rb +231 -0
- data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +21 -0
- data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +23 -0
- data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +23 -0
- data/lib/markbridge/parsers/bbcode/tokens/token.rb +16 -0
- data/lib/markbridge/parsers/bbcode.rb +56 -0
- data/lib/markbridge/parsers/html/handler_registry.rb +87 -0
- data/lib/markbridge/parsers/html/handlers/base_handler.rb +27 -0
- data/lib/markbridge/parsers/html/handlers/image_handler.rb +40 -0
- data/lib/markbridge/parsers/html/handlers/list_handler.rb +29 -0
- data/lib/markbridge/parsers/html/handlers/list_item_handler.rb +26 -0
- data/lib/markbridge/parsers/html/handlers/paragraph_handler.rb +17 -0
- data/lib/markbridge/parsers/html/handlers/quote_handler.rb +28 -0
- data/lib/markbridge/parsers/html/handlers/raw_handler.rb +33 -0
- data/lib/markbridge/parsers/html/handlers/simple_handler.rb +26 -0
- data/lib/markbridge/parsers/html/handlers/url_handler.rb +27 -0
- data/lib/markbridge/parsers/html/parser.rb +113 -0
- data/lib/markbridge/parsers/html.rb +30 -0
- data/lib/markbridge/parsers/media_wiki/inline_parser.rb +332 -0
- data/lib/markbridge/parsers/media_wiki/parser.rb +279 -0
- data/lib/markbridge/parsers/media_wiki.rb +15 -0
- data/lib/markbridge/parsers/text_formatter/handler_registry.rb +130 -0
- data/lib/markbridge/parsers/text_formatter/handlers/attachment_handler.rb +33 -0
- data/lib/markbridge/parsers/text_formatter/handlers/attribute_handler.rb +40 -0
- data/lib/markbridge/parsers/text_formatter/handlers/base_handler.rb +45 -0
- data/lib/markbridge/parsers/text_formatter/handlers/code_handler.rb +28 -0
- data/lib/markbridge/parsers/text_formatter/handlers/email_handler.rb +27 -0
- data/lib/markbridge/parsers/text_formatter/handlers/image_handler.rb +32 -0
- data/lib/markbridge/parsers/text_formatter/handlers/list_handler.rb +31 -0
- data/lib/markbridge/parsers/text_formatter/handlers/quote_handler.rb +33 -0
- data/lib/markbridge/parsers/text_formatter/handlers/simple_handler.rb +37 -0
- data/lib/markbridge/parsers/text_formatter/handlers/url_handler.rb +29 -0
- data/lib/markbridge/parsers/text_formatter/parser.rb +132 -0
- data/lib/markbridge/parsers/text_formatter.rb +31 -0
- data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +199 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +57 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +73 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/mention.rb +57 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +90 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +123 -0
- data/lib/markbridge/processors/discourse_markdown/scanner.rb +199 -0
- data/lib/markbridge/processors/discourse_markdown.rb +16 -0
- data/lib/markbridge/processors.rb +8 -0
- data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +83 -0
- data/lib/markbridge/renderers/discourse/markdown_escaper.rb +468 -0
- data/lib/markbridge/renderers/discourse/render_context.rb +80 -0
- data/lib/markbridge/renderers/discourse/renderer.rb +63 -0
- data/lib/markbridge/renderers/discourse/rendering_interface.rb +86 -0
- data/lib/markbridge/renderers/discourse/tag.rb +29 -0
- data/lib/markbridge/renderers/discourse/tag_library.rb +67 -0
- data/lib/markbridge/renderers/discourse/tags/align_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +46 -0
- data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/code_tag.rb +54 -0
- data/lib/markbridge/renderers/discourse/tags/color_tag.rb +27 -0
- data/lib/markbridge/renderers/discourse/tags/email_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse/tags/event_tag.rb +49 -0
- data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +21 -0
- data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +16 -0
- data/lib/markbridge/renderers/discourse/tags/image_tag.rb +29 -0
- data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +16 -0
- data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +87 -0
- data/lib/markbridge/renderers/discourse/tags/list_tag.rb +39 -0
- data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +34 -0
- data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +21 -0
- data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +51 -0
- data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +32 -0
- data/lib/markbridge/renderers/discourse/tags/size_tag.rb +27 -0
- data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/subscript_tag.rb +19 -0
- data/lib/markbridge/renderers/discourse/tags/superscript_tag.rb +19 -0
- data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +19 -0
- data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +80 -0
- data/lib/markbridge/renderers/discourse/tags/url_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse.rb +50 -0
- data/lib/markbridge/version.rb +5 -0
- data/lib/markbridge.rb +201 -0
- metadata +186 -0
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module MediaWiki
|
|
6
|
+
# Parses MediaWiki wikitext into an AST.
|
|
7
|
+
#
|
|
8
|
+
# Supports:
|
|
9
|
+
# - Bold ('''), italic (''), bold italic (''''')
|
|
10
|
+
# - Headings (= through ======)
|
|
11
|
+
# - Unordered lists (* / ** / ***)
|
|
12
|
+
# - Ordered lists (# / ## / ###)
|
|
13
|
+
# - Horizontal rules (----)
|
|
14
|
+
# - Internal links ([[target]] / [[target|display]])
|
|
15
|
+
# - External links ([url text])
|
|
16
|
+
# - Preformatted text (lines starting with a space)
|
|
17
|
+
# - HTML tags: <nowiki>, <code>, <pre>, <br>, <s>, <del>, <u>, <ins>, <sup>, <sub>
|
|
18
|
+
#
|
|
19
|
+
# @example Basic usage
|
|
20
|
+
# parser = Markbridge::Parsers::MediaWiki::Parser.new
|
|
21
|
+
# ast = parser.parse("'''bold''' and ''italic''")
|
|
22
|
+
class Parser
|
|
23
|
+
# Parse MediaWiki wikitext into an AST Document.
|
|
24
|
+
#
|
|
25
|
+
# @param input [String] MediaWiki source
|
|
26
|
+
# @return [AST::Document]
|
|
27
|
+
def parse(input)
|
|
28
|
+
normalized = normalize_line_endings(input)
|
|
29
|
+
lines = normalized.split("\n", -1)
|
|
30
|
+
|
|
31
|
+
@document = AST::Document.new
|
|
32
|
+
@inline_parser = InlineParser.new
|
|
33
|
+
@list_stack = []
|
|
34
|
+
|
|
35
|
+
process_lines(lines)
|
|
36
|
+
close_open_lists
|
|
37
|
+
@document
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
# Normalize line endings (CR, CRLF, and Unicode separators).
|
|
43
|
+
#
|
|
44
|
+
# @param input [String]
|
|
45
|
+
# @return [String]
|
|
46
|
+
def normalize_line_endings(input)
|
|
47
|
+
input.gsub(/\r\n?|[\u2028\u2029]+/, "\n")
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Process all lines of input.
|
|
51
|
+
#
|
|
52
|
+
# @param lines [Array<String>]
|
|
53
|
+
def process_lines(lines)
|
|
54
|
+
i = 0
|
|
55
|
+
while i < lines.length
|
|
56
|
+
line = lines[i]
|
|
57
|
+
|
|
58
|
+
if heading_line?(line)
|
|
59
|
+
close_open_lists
|
|
60
|
+
process_heading(line)
|
|
61
|
+
elsif horizontal_rule_line?(line)
|
|
62
|
+
close_open_lists
|
|
63
|
+
@document << AST::HorizontalRule.new
|
|
64
|
+
elsif list_line?(line)
|
|
65
|
+
process_list_item(line)
|
|
66
|
+
elsif preformatted_line?(line)
|
|
67
|
+
close_open_lists
|
|
68
|
+
i = process_preformatted_block(lines, i)
|
|
69
|
+
elsif pre_tag_line?(line)
|
|
70
|
+
close_open_lists
|
|
71
|
+
i = process_pre_tag_block(lines, i)
|
|
72
|
+
elsif blank_line?(line)
|
|
73
|
+
close_open_lists
|
|
74
|
+
else
|
|
75
|
+
close_open_lists
|
|
76
|
+
process_inline_content(line)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
i += 1
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Check if a line is a heading (starts and ends with = signs).
|
|
84
|
+
#
|
|
85
|
+
# @param line [String]
|
|
86
|
+
# @return [Boolean]
|
|
87
|
+
def heading_line?(line)
|
|
88
|
+
line.match?(/\A={1,6}[^=].*[^=]={1,6}\s*\z/) || line.match?(/\A={1,6}[^=]+=*\s*\z/)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Check if a line is a horizontal rule (4+ dashes).
|
|
92
|
+
#
|
|
93
|
+
# @param line [String]
|
|
94
|
+
# @return [Boolean]
|
|
95
|
+
def horizontal_rule_line?(line)
|
|
96
|
+
line.match?(/\A-{4,}\s*\z/)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Check if a line is a list item (starts with * or #).
|
|
100
|
+
#
|
|
101
|
+
# @param line [String]
|
|
102
|
+
# @return [Boolean]
|
|
103
|
+
def list_line?(line)
|
|
104
|
+
line.match?(/\A[*#]/)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Check if a line starts with a space (preformatted text).
|
|
108
|
+
#
|
|
109
|
+
# @param line [String]
|
|
110
|
+
# @return [Boolean]
|
|
111
|
+
def preformatted_line?(line)
|
|
112
|
+
line.start_with?(" ")
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Check if a line starts a <pre> block.
|
|
116
|
+
#
|
|
117
|
+
# @param line [String]
|
|
118
|
+
# @return [Boolean]
|
|
119
|
+
def pre_tag_line?(line)
|
|
120
|
+
line.match?(/\A\s*<pre\b/i)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Check if a line is blank.
|
|
124
|
+
#
|
|
125
|
+
# @param line [String]
|
|
126
|
+
# @return [Boolean]
|
|
127
|
+
def blank_line?(line)
|
|
128
|
+
line.strip.empty?
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Process a heading line and add it to the document.
|
|
132
|
+
#
|
|
133
|
+
# @param line [String]
|
|
134
|
+
def process_heading(line)
|
|
135
|
+
stripped = line.strip
|
|
136
|
+
# Count leading = signs for level
|
|
137
|
+
level = 0
|
|
138
|
+
level += 1 while level < stripped.length && stripped[level] == "="
|
|
139
|
+
level = [level, 6].min
|
|
140
|
+
|
|
141
|
+
# Remove leading/trailing = signs and whitespace
|
|
142
|
+
content = stripped[level..].sub(/\s*={1,6}\s*\z/, "").strip
|
|
143
|
+
|
|
144
|
+
heading = AST::Heading.new(level:)
|
|
145
|
+
@inline_parser.parse(content, parent: heading)
|
|
146
|
+
@document << heading
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Process a list item line, managing list nesting.
|
|
150
|
+
#
|
|
151
|
+
# @param line [String]
|
|
152
|
+
def process_list_item(line)
|
|
153
|
+
# Count prefix characters to determine depth and type
|
|
154
|
+
prefix = +""
|
|
155
|
+
i = 0
|
|
156
|
+
while i < line.length && (line[i] == "*" || line[i] == "#")
|
|
157
|
+
prefix << line[i]
|
|
158
|
+
i += 1
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
content = line[i..].strip
|
|
162
|
+
desired_depth = prefix.length
|
|
163
|
+
|
|
164
|
+
# Adjust list stack to match desired depth
|
|
165
|
+
reconcile_list_stack(prefix, desired_depth)
|
|
166
|
+
|
|
167
|
+
# Create list item and add content
|
|
168
|
+
item = AST::ListItem.new
|
|
169
|
+
@inline_parser.parse(content, parent: item)
|
|
170
|
+
@list_stack.last[:list] << item
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Reconcile the list stack with the desired prefix.
|
|
174
|
+
# Opens new lists or closes existing ones as needed.
|
|
175
|
+
#
|
|
176
|
+
# @param prefix [String] the list prefix characters (e.g., "**#")
|
|
177
|
+
# @param desired_depth [Integer]
|
|
178
|
+
def reconcile_list_stack(prefix, desired_depth)
|
|
179
|
+
# Close lists that no longer match
|
|
180
|
+
@list_stack.pop while @list_stack.length > desired_depth
|
|
181
|
+
|
|
182
|
+
# Check if existing stack entries match the type at each level
|
|
183
|
+
prefix.chars.each_with_index do |char, idx|
|
|
184
|
+
ordered = char == "#"
|
|
185
|
+
if idx < @list_stack.length
|
|
186
|
+
# If type changed at this level, close from here and reopen
|
|
187
|
+
if @list_stack[idx][:ordered] != ordered
|
|
188
|
+
@list_stack.pop while @list_stack.length > idx
|
|
189
|
+
open_new_list(ordered, idx)
|
|
190
|
+
end
|
|
191
|
+
else
|
|
192
|
+
open_new_list(ordered, idx)
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Open a new list at the given depth.
|
|
198
|
+
#
|
|
199
|
+
# @param ordered [Boolean]
|
|
200
|
+
# @param depth [Integer]
|
|
201
|
+
def open_new_list(ordered, depth)
|
|
202
|
+
list = AST::List.new(ordered:)
|
|
203
|
+
|
|
204
|
+
if depth.zero?
|
|
205
|
+
@document << list
|
|
206
|
+
else
|
|
207
|
+
# Nest inside the last item of the parent list
|
|
208
|
+
parent_list = @list_stack.last[:list]
|
|
209
|
+
parent_list << AST::ListItem.new if parent_list.children.empty?
|
|
210
|
+
parent_list.children.last << list
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
@list_stack << { list:, ordered: }
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Close all open lists.
|
|
217
|
+
def close_open_lists
|
|
218
|
+
@list_stack.clear
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Process consecutive lines starting with a space as a preformatted block.
|
|
222
|
+
#
|
|
223
|
+
# @param lines [Array<String>]
|
|
224
|
+
# @param start_index [Integer]
|
|
225
|
+
# @return [Integer] the last index consumed (will be incremented by caller)
|
|
226
|
+
def process_preformatted_block(lines, start_index)
|
|
227
|
+
content_lines = []
|
|
228
|
+
i = start_index
|
|
229
|
+
|
|
230
|
+
while i < lines.length && lines[i].start_with?(" ")
|
|
231
|
+
content_lines << lines[i][1..] # Remove leading space
|
|
232
|
+
i += 1
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
code = AST::Code.new
|
|
236
|
+
code << AST::Text.new(content_lines.join("\n"))
|
|
237
|
+
@document << code
|
|
238
|
+
|
|
239
|
+
i - 1 # Return last consumed index
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Process a <pre>...</pre> block that may span multiple lines.
|
|
243
|
+
#
|
|
244
|
+
# @param lines [Array<String>]
|
|
245
|
+
# @param start_index [Integer]
|
|
246
|
+
# @return [Integer] the last index consumed
|
|
247
|
+
def process_pre_tag_block(lines, start_index)
|
|
248
|
+
combined = +""
|
|
249
|
+
i = start_index
|
|
250
|
+
|
|
251
|
+
while i < lines.length
|
|
252
|
+
combined << lines[i]
|
|
253
|
+
break if lines[i].match?(%r{</pre\s*>}i)
|
|
254
|
+
combined << "\n"
|
|
255
|
+
i += 1
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# Extract content between <pre> and </pre>
|
|
259
|
+
content = combined.sub(/\A\s*<pre\b[^>]*>/i, "").sub(%r{</pre\s*>\s*\z}i, "")
|
|
260
|
+
|
|
261
|
+
code = AST::Code.new
|
|
262
|
+
code << AST::Text.new(content)
|
|
263
|
+
@document << code
|
|
264
|
+
|
|
265
|
+
i
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# Process a line as inline content wrapped in a paragraph.
|
|
269
|
+
#
|
|
270
|
+
# @param line [String]
|
|
271
|
+
def process_inline_content(line)
|
|
272
|
+
paragraph = AST::Paragraph.new
|
|
273
|
+
@inline_parser.parse(line, parent: paragraph)
|
|
274
|
+
@document << paragraph
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# AST Nodes
|
|
4
|
+
require_relative "../ast"
|
|
5
|
+
|
|
6
|
+
# Parser components
|
|
7
|
+
require_relative "media_wiki/inline_parser"
|
|
8
|
+
require_relative "media_wiki/parser"
|
|
9
|
+
|
|
10
|
+
module Markbridge
|
|
11
|
+
module Parsers
|
|
12
|
+
module MediaWiki
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module TextFormatter
|
|
6
|
+
# Registry of s9e/TextFormatter XML element handlers
|
|
7
|
+
#
|
|
8
|
+
# Manages mappings between XML element names and handler objects.
|
|
9
|
+
# Handlers must respond to #process(element:, parent:) and return the created element
|
|
10
|
+
# if children should be processed, or nil otherwise.
|
|
11
|
+
#
|
|
12
|
+
# @example Using default mappings
|
|
13
|
+
# registry = HandlerRegistry.default
|
|
14
|
+
# element = registry.process_element(xml_element, parent)
|
|
15
|
+
#
|
|
16
|
+
# @example Customizing mappings with handler objects
|
|
17
|
+
# registry = HandlerRegistry.build_from_default do |r|
|
|
18
|
+
# r.register("CUSTOM", MyCustomHandler.new)
|
|
19
|
+
# r.register("B", SimpleHandler.new(AST::Bold)) # Override default
|
|
20
|
+
# end
|
|
21
|
+
#
|
|
22
|
+
# @example Using lambdas for simple mappings
|
|
23
|
+
# registry = HandlerRegistry.new
|
|
24
|
+
# registry.register("CUSTOM", ->(element:, parent:) {
|
|
25
|
+
# node = AST::Custom.new
|
|
26
|
+
# parent << node
|
|
27
|
+
# node # Return node to process children
|
|
28
|
+
# })
|
|
29
|
+
class HandlerRegistry
|
|
30
|
+
# Create a new registry with default mappings
|
|
31
|
+
# @return [HandlerRegistry]
|
|
32
|
+
def self.default
|
|
33
|
+
new.tap(&:register_defaults)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Build from default mappings with custom additions
|
|
37
|
+
# @yield [HandlerRegistry] registry with default mappings loaded
|
|
38
|
+
# @return [HandlerRegistry]
|
|
39
|
+
def self.build_from_default
|
|
40
|
+
default.tap { |registry| yield registry if block_given? }
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def initialize
|
|
44
|
+
@mappings = {}
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Register a handler for an element
|
|
48
|
+
# @param element_name [String] XML element name (case-insensitive)
|
|
49
|
+
# @param handler [#process, #call] Handler object or lambda
|
|
50
|
+
# @example With handler object
|
|
51
|
+
# registry.register("CUSTOM", MyCustomHandler.new)
|
|
52
|
+
# @example With lambda
|
|
53
|
+
# registry.register("CUSTOM", ->(element:, parent:) { ... })
|
|
54
|
+
def register(element_name, handler)
|
|
55
|
+
@mappings[element_name.upcase] = handler
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Check if a handler is registered for an element
|
|
59
|
+
# @param element_name [String] XML element name
|
|
60
|
+
# @return [Boolean] true if handler is registered
|
|
61
|
+
def has_handler?(element_name)
|
|
62
|
+
@mappings.key?(element_name.upcase)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Process an XML element using the registered handler
|
|
66
|
+
# @param element [Nokogiri::XML::Element]
|
|
67
|
+
# @param parent [AST::Element] parent node to add children to
|
|
68
|
+
# @return [AST::Element, nil] the created element if children should be processed, nil otherwise
|
|
69
|
+
def process_element(element, parent)
|
|
70
|
+
tag_name = element.name.upcase
|
|
71
|
+
handler = @mappings[tag_name]
|
|
72
|
+
return nil unless handler
|
|
73
|
+
|
|
74
|
+
# Call handler and return its result (element or nil)
|
|
75
|
+
if handler.respond_to?(:process)
|
|
76
|
+
handler.process(element:, parent:)
|
|
77
|
+
elsif handler.respond_to?(:call)
|
|
78
|
+
handler.call(element:, parent:)
|
|
79
|
+
else
|
|
80
|
+
raise ArgumentError, "Handler must respond to :process or :call"
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Register all default s9e/TextFormatter element mappings
|
|
85
|
+
def register_defaults
|
|
86
|
+
# Simple formatting elements
|
|
87
|
+
register("B", Handlers::SimpleHandler.new(AST::Bold))
|
|
88
|
+
register("I", Handlers::SimpleHandler.new(AST::Italic))
|
|
89
|
+
register("U", Handlers::SimpleHandler.new(AST::Underline))
|
|
90
|
+
register("S", Handlers::SimpleHandler.new(AST::Strikethrough))
|
|
91
|
+
|
|
92
|
+
# Complex elements with attributes
|
|
93
|
+
register("URL", Handlers::UrlHandler.new)
|
|
94
|
+
register("EMAIL", Handlers::EmailHandler.new)
|
|
95
|
+
register("CODE", Handlers::CodeHandler.new)
|
|
96
|
+
register("QUOTE", Handlers::QuoteHandler.new)
|
|
97
|
+
register("IMG", Handlers::ImageHandler.new)
|
|
98
|
+
register("LIST", Handlers::ListHandler.new)
|
|
99
|
+
register(
|
|
100
|
+
"COLOR",
|
|
101
|
+
Handlers::AttributeHandler.new(AST::Color, attribute: :color, param: :color),
|
|
102
|
+
)
|
|
103
|
+
register(
|
|
104
|
+
"SIZE",
|
|
105
|
+
Handlers::AttributeHandler.new(AST::Size, attribute: :size, param: :size),
|
|
106
|
+
)
|
|
107
|
+
register(
|
|
108
|
+
"ALIGN",
|
|
109
|
+
Handlers::AttributeHandler.new(AST::Align, attribute: :align, param: :alignment),
|
|
110
|
+
)
|
|
111
|
+
register(
|
|
112
|
+
"SPOILER",
|
|
113
|
+
Handlers::AttributeHandler.new(AST::Spoiler, attribute: :title, param: :title),
|
|
114
|
+
)
|
|
115
|
+
register("ATTACHMENT", Handlers::AttachmentHandler.new)
|
|
116
|
+
register("ATTACH", Handlers::AttachmentHandler.new)
|
|
117
|
+
|
|
118
|
+
# List item (supports both LI and * for compatibility)
|
|
119
|
+
register("LI", Handlers::SimpleHandler.new(AST::ListItem))
|
|
120
|
+
register("*", Handlers::SimpleHandler.new(AST::ListItem))
|
|
121
|
+
|
|
122
|
+
# Paragraphs
|
|
123
|
+
register("P", Handlers::SimpleHandler.new(AST::Paragraph))
|
|
124
|
+
|
|
125
|
+
self
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module TextFormatter
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for ATTACHMENT and ATTACH elements in s9e/TextFormatter XML
|
|
8
|
+
class AttachmentHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::Attachment
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
attrs = extract_attributes(element)
|
|
15
|
+
node =
|
|
16
|
+
AST::Attachment.new(
|
|
17
|
+
id: attrs[:id],
|
|
18
|
+
index: attrs[:index],
|
|
19
|
+
filename: attrs[:filename],
|
|
20
|
+
alt: attrs[:alt],
|
|
21
|
+
)
|
|
22
|
+
parent << node
|
|
23
|
+
|
|
24
|
+
# Return node to signal: process children into this node
|
|
25
|
+
node
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
attr_reader :element_class
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module TextFormatter
|
|
6
|
+
module Handlers
|
|
7
|
+
# Generic handler for elements that take a single attribute
|
|
8
|
+
#
|
|
9
|
+
# This handler extracts a specified attribute and passes it to the AST node constructor.
|
|
10
|
+
# Use this for elements like COLOR, SIZE, ALIGN, SPOILER.
|
|
11
|
+
#
|
|
12
|
+
# @example
|
|
13
|
+
# # For <COLOR color="red">text</COLOR>
|
|
14
|
+
# handler = AttributeHandler.new(AST::Color, attribute: :color, param: :color)
|
|
15
|
+
# registry.register("COLOR", handler)
|
|
16
|
+
class AttributeHandler < BaseHandler
|
|
17
|
+
# @param element_class [Class] the AST node class to instantiate
|
|
18
|
+
# @param attribute [Symbol] the XML attribute name to extract
|
|
19
|
+
# @param param [Symbol] the parameter name to pass to the AST node constructor
|
|
20
|
+
def initialize(element_class, attribute:, param: nil)
|
|
21
|
+
@element_class = element_class
|
|
22
|
+
@attribute = attribute
|
|
23
|
+
@param = param || attribute
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def process(element:, parent:)
|
|
27
|
+
attrs = extract_attributes(element)
|
|
28
|
+
node = @element_class.new(@param => attrs[@attribute])
|
|
29
|
+
parent << node
|
|
30
|
+
|
|
31
|
+
# Return node to signal: process children into this node
|
|
32
|
+
node
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
attr_reader :element_class
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module TextFormatter
|
|
6
|
+
module Handlers
|
|
7
|
+
# Base class for TextFormatter XML element handlers
|
|
8
|
+
#
|
|
9
|
+
# Handlers process s9e/TextFormatter XML elements and convert them to AST nodes.
|
|
10
|
+
# Each handler implements the process method to handle a specific element type.
|
|
11
|
+
#
|
|
12
|
+
# @abstract Subclass and override {#process} to implement a custom handler
|
|
13
|
+
class BaseHandler
|
|
14
|
+
# Process an XML element and convert it to AST node(s)
|
|
15
|
+
#
|
|
16
|
+
# @param element [Nokogiri::XML::Element] the XML element to process
|
|
17
|
+
# @param parent [AST::Element] the parent AST node to add children to
|
|
18
|
+
# @return [AST::Element, nil] the created element if children should be processed, nil otherwise
|
|
19
|
+
def process(element:, parent:)
|
|
20
|
+
raise NotImplementedError, "#{self.class} must implement #process"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# The AST element class this handler creates
|
|
24
|
+
# Used for introspection and documentation
|
|
25
|
+
#
|
|
26
|
+
# @return [Class] the AST node class
|
|
27
|
+
def element_class
|
|
28
|
+
raise NotImplementedError, "#{self.class} must implement #element_class"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
# Extract attributes from XML element as a symbolized hash
|
|
34
|
+
# @param element [Nokogiri::XML::Element]
|
|
35
|
+
# @return [Hash<Symbol, String>] attributes hash with symbolized, lowercased keys
|
|
36
|
+
def extract_attributes(element)
|
|
37
|
+
attrs = {}
|
|
38
|
+
element.attributes.each { |name, attr| attrs[name.downcase.to_sym] = attr.value }
|
|
39
|
+
attrs
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module TextFormatter
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for CODE elements in s9e/TextFormatter XML
|
|
8
|
+
class CodeHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::Code
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
attrs = extract_attributes(element)
|
|
15
|
+
lang = attrs[:lang] || attrs[:language]
|
|
16
|
+
node = AST::Code.new(language: lang)
|
|
17
|
+
parent << node
|
|
18
|
+
|
|
19
|
+
# Return node to signal: process children into this node
|
|
20
|
+
node
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
attr_reader :element_class
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module TextFormatter
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for EMAIL elements in s9e/TextFormatter XML
|
|
8
|
+
class EmailHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::Email
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
attrs = extract_attributes(element)
|
|
15
|
+
node = AST::Email.new(address: attrs[:email])
|
|
16
|
+
parent << node
|
|
17
|
+
|
|
18
|
+
# Return node to signal: process children into this node
|
|
19
|
+
node
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
attr_reader :element_class
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module TextFormatter
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for IMG elements in s9e/TextFormatter XML
|
|
8
|
+
class ImageHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::Image
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
attrs = extract_attributes(element)
|
|
15
|
+
node =
|
|
16
|
+
AST::Image.new(
|
|
17
|
+
src: attrs[:src],
|
|
18
|
+
width: attrs[:width]&.to_i,
|
|
19
|
+
height: attrs[:height]&.to_i,
|
|
20
|
+
)
|
|
21
|
+
parent << node
|
|
22
|
+
|
|
23
|
+
# Return node to signal: process children into this node
|
|
24
|
+
node
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
attr_reader :element_class
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module TextFormatter
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for LIST elements in s9e/TextFormatter XML
|
|
8
|
+
class ListHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::List
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
attrs = extract_attributes(element)
|
|
15
|
+
type_str = attrs[:type]
|
|
16
|
+
# Ordered if type is not empty, disc, circle, or square
|
|
17
|
+
ordered = !type_str.nil? && !["", "disc", "circle", "square"].include?(type_str)
|
|
18
|
+
|
|
19
|
+
node = AST::List.new(ordered:)
|
|
20
|
+
parent << node
|
|
21
|
+
|
|
22
|
+
# Return node to signal: process children into this node
|
|
23
|
+
node
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
attr_reader :element_class
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|