markbridge 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/markbridge/all.rb +4 -7
- data/lib/markbridge/ast/document.rb +1 -1
- data/lib/markbridge/ast/element.rb +2 -2
- data/lib/markbridge/ast/list.rb +2 -2
- data/lib/markbridge/ast/table.rb +61 -0
- data/lib/markbridge/ast/text.rb +5 -1
- data/lib/markbridge/ast.rb +1 -0
- data/lib/markbridge/bbcode.rb +4 -0
- data/lib/markbridge/gem_loader.rb +2 -3
- data/lib/markbridge/html.rb +4 -0
- data/lib/markbridge/mediawiki.rb +4 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +0 -10
- data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -4
- data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +64 -44
- data/lib/markbridge/parsers/bbcode/handler_registry.rb +26 -11
- data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +17 -12
- data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +0 -10
- data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +6 -10
- data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +13 -19
- data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +1 -5
- data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +1 -2
- data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +30 -35
- data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +2 -6
- data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +4 -4
- data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +26 -0
- data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +32 -0
- data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +35 -0
- data/lib/markbridge/parsers/bbcode/parser.rb +5 -8
- data/lib/markbridge/parsers/bbcode/parser_state.rb +12 -18
- data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +9 -59
- data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +2 -2
- data/lib/markbridge/parsers/bbcode/scanner.rb +49 -63
- data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +1 -5
- data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +1 -6
- data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +1 -7
- data/lib/markbridge/parsers/bbcode/tokens/token.rb +1 -1
- data/lib/markbridge/parsers/bbcode.rb +4 -0
- data/lib/markbridge/parsers/html/handler_registry.rb +32 -44
- data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -3
- data/lib/markbridge/parsers/html/handlers/image_handler.rb +1 -4
- data/lib/markbridge/parsers/html/handlers/table_cell_handler.rb +24 -0
- data/lib/markbridge/parsers/html/handlers/table_handler.rb +24 -0
- data/lib/markbridge/parsers/html/handlers/table_row_handler.rb +24 -0
- data/lib/markbridge/parsers/html/parser.rb +16 -15
- data/lib/markbridge/parsers/html.rb +3 -0
- data/lib/markbridge/parsers/media_wiki/inline_parser.rb +115 -151
- data/lib/markbridge/parsers/media_wiki/inline_tag_registry.rb +103 -0
- data/lib/markbridge/parsers/media_wiki/parser.rb +174 -71
- data/lib/markbridge/parsers/media_wiki.rb +1 -0
- data/lib/markbridge/parsers/text_formatter/handler_registry.rb +10 -36
- data/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb +26 -0
- data/lib/markbridge/parsers/text_formatter/parser.rb +3 -8
- data/lib/markbridge/parsers/text_formatter.rb +1 -0
- data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +111 -92
- data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +13 -7
- data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +11 -20
- data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +10 -48
- data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +38 -63
- data/lib/markbridge/processors/discourse_markdown/scanner.rb +36 -41
- data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +6 -6
- data/lib/markbridge/renderers/discourse/html_escaper.rb +20 -0
- data/lib/markbridge/renderers/discourse/markdown_escaper.rb +262 -205
- data/lib/markbridge/renderers/discourse/render_context.rb +23 -11
- data/lib/markbridge/renderers/discourse/renderer.rb +54 -11
- data/lib/markbridge/renderers/discourse/rendering_interface.rb +12 -4
- data/lib/markbridge/renderers/discourse/tag.rb +14 -1
- data/lib/markbridge/renderers/discourse/tag_library.rb +30 -25
- data/lib/markbridge/renderers/discourse/tags/align_tag.rb +15 -7
- data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +1 -1
- data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/code_tag.rb +14 -8
- data/lib/markbridge/renderers/discourse/tags/email_tag.rb +5 -3
- data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -3
- data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +6 -2
- data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +2 -2
- data/lib/markbridge/renderers/discourse/tags/image_tag.rb +12 -1
- data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +2 -2
- data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +24 -47
- data/lib/markbridge/renderers/discourse/tags/list_tag.rb +10 -15
- data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +6 -2
- data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +10 -0
- data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +9 -4
- data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +17 -11
- data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +9 -0
- data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/table_cell_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/table_row_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/table_tag.rb +128 -0
- data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +10 -3
- data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +28 -1
- data/lib/markbridge/renderers/discourse/tags/url_tag.rb +5 -3
- data/lib/markbridge/renderers/discourse.rb +4 -0
- data/lib/markbridge/textformatter.rb +4 -0
- data/lib/markbridge/version.rb +1 -1
- data/lib/markbridge.rb +27 -62
- metadata +19 -2
|
@@ -14,26 +14,35 @@ module Markbridge
|
|
|
14
14
|
# - Internal links ([[target]] / [[target|display]])
|
|
15
15
|
# - External links ([url text])
|
|
16
16
|
# - Preformatted text (lines starting with a space)
|
|
17
|
+
# - Tables ({| ... |})
|
|
17
18
|
# - HTML tags: <nowiki>, <code>, <pre>, <br>, <s>, <del>, <u>, <ins>, <sup>, <sub>
|
|
18
19
|
#
|
|
19
20
|
# @example Basic usage
|
|
20
21
|
# parser = Markbridge::Parsers::MediaWiki::Parser.new
|
|
21
22
|
# ast = parser.parse("'''bold''' and ''italic''")
|
|
22
23
|
class Parser
|
|
24
|
+
# @param inline_tag_registry [InlineTagRegistry, nil] custom registry or use default
|
|
25
|
+
# @yield [InlineTagRegistry] optional block to customize the default registry
|
|
26
|
+
def initialize(inline_tag_registry: nil, &block)
|
|
27
|
+
# InlineParser falls back to InlineTagRegistry.default when this is
|
|
28
|
+
# nil, so we don't need to materialise it here.
|
|
29
|
+
@inline_tag_registry =
|
|
30
|
+
block_given? ? InlineTagRegistry.build_from_default(&block) : inline_tag_registry
|
|
31
|
+
end
|
|
32
|
+
|
|
23
33
|
# Parse MediaWiki wikitext into an AST Document.
|
|
24
34
|
#
|
|
25
35
|
# @param input [String] MediaWiki source
|
|
26
36
|
# @return [AST::Document]
|
|
27
37
|
def parse(input)
|
|
28
38
|
normalized = normalize_line_endings(input)
|
|
29
|
-
lines = normalized.split("\n"
|
|
39
|
+
lines = normalized.split("\n")
|
|
30
40
|
|
|
31
41
|
@document = AST::Document.new
|
|
32
|
-
@inline_parser = InlineParser.new
|
|
42
|
+
@inline_parser = InlineParser.new(inline_tag_registry: @inline_tag_registry)
|
|
33
43
|
@list_stack = []
|
|
34
44
|
|
|
35
45
|
process_lines(lines)
|
|
36
|
-
close_open_lists
|
|
37
46
|
@document
|
|
38
47
|
end
|
|
39
48
|
|
|
@@ -53,7 +62,7 @@ module Markbridge
|
|
|
53
62
|
def process_lines(lines)
|
|
54
63
|
i = 0
|
|
55
64
|
while i < lines.length
|
|
56
|
-
line = lines
|
|
65
|
+
line = lines.fetch(i)
|
|
57
66
|
|
|
58
67
|
if heading_line?(line)
|
|
59
68
|
close_open_lists
|
|
@@ -61,6 +70,9 @@ module Markbridge
|
|
|
61
70
|
elsif horizontal_rule_line?(line)
|
|
62
71
|
close_open_lists
|
|
63
72
|
@document << AST::HorizontalRule.new
|
|
73
|
+
elsif table_start_line?(line)
|
|
74
|
+
close_open_lists
|
|
75
|
+
i = process_table(lines, i)
|
|
64
76
|
elsif list_line?(line)
|
|
65
77
|
process_list_item(line)
|
|
66
78
|
elsif preformatted_line?(line)
|
|
@@ -80,12 +92,15 @@ module Markbridge
|
|
|
80
92
|
end
|
|
81
93
|
end
|
|
82
94
|
|
|
95
|
+
HEADING_LINE = /\A={1,6}(?:[^=].*[^=]={1,6}|[^=]+=*)\s*\z/
|
|
96
|
+
private_constant :HEADING_LINE
|
|
97
|
+
|
|
83
98
|
# Check if a line is a heading (starts and ends with = signs).
|
|
84
99
|
#
|
|
85
100
|
# @param line [String]
|
|
86
101
|
# @return [Boolean]
|
|
87
102
|
def heading_line?(line)
|
|
88
|
-
line.match?(
|
|
103
|
+
line.match?(HEADING_LINE)
|
|
89
104
|
end
|
|
90
105
|
|
|
91
106
|
# Check if a line is a horizontal rule (4+ dashes).
|
|
@@ -125,23 +140,138 @@ module Markbridge
|
|
|
125
140
|
# @param line [String]
|
|
126
141
|
# @return [Boolean]
|
|
127
142
|
def blank_line?(line)
|
|
128
|
-
line.
|
|
143
|
+
!line.match?(/\S/)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
HEADING_LEVEL_PREFIX = /\A={1,6}/
|
|
147
|
+
HEADING_LEVEL_SUFFIX = /\s*={1,6}\s*\z/
|
|
148
|
+
private_constant :HEADING_LEVEL_PREFIX, :HEADING_LEVEL_SUFFIX
|
|
149
|
+
|
|
150
|
+
# Check if a line starts a table ({|).
|
|
151
|
+
#
|
|
152
|
+
# @param line [String]
|
|
153
|
+
# @return [Boolean]
|
|
154
|
+
def table_start_line?(line)
|
|
155
|
+
line.match?(/\A\s*\{\|/)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Process a table block from {| to |}.
|
|
159
|
+
# Consumes lines until the closing |} is found.
|
|
160
|
+
#
|
|
161
|
+
# @param lines [Array<String>]
|
|
162
|
+
# @param start_index [Integer]
|
|
163
|
+
# @return [Integer] the last index consumed
|
|
164
|
+
def process_table(lines, start_index)
|
|
165
|
+
table = AST::Table.new
|
|
166
|
+
current_row = nil
|
|
167
|
+
i = start_index + 1 # Skip the {| line
|
|
168
|
+
|
|
169
|
+
while i < lines.length
|
|
170
|
+
stripped = lines[i].strip
|
|
171
|
+
|
|
172
|
+
if stripped.start_with?("|}")
|
|
173
|
+
break
|
|
174
|
+
elsif stripped.start_with?("|-")
|
|
175
|
+
# Row separator - next cells will go in a new row
|
|
176
|
+
current_row = nil
|
|
177
|
+
elsif stripped.start_with?("!")
|
|
178
|
+
# Header cells
|
|
179
|
+
current_row = ensure_table_row(table, current_row)
|
|
180
|
+
parse_table_cells(stripped[1..], header: true, row: current_row)
|
|
181
|
+
elsif stripped.start_with?("|")
|
|
182
|
+
# Data cells
|
|
183
|
+
current_row = ensure_table_row(table, current_row)
|
|
184
|
+
parse_table_cells(stripped[1..], header: false, row: current_row)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
i += 1
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
@document << table
|
|
191
|
+
i
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Ensure a row exists for the table, creating one if needed.
|
|
195
|
+
#
|
|
196
|
+
# @param table [AST::Table]
|
|
197
|
+
# @param current_row [AST::TableRow, nil]
|
|
198
|
+
# @return [AST::TableRow]
|
|
199
|
+
def ensure_table_row(table, current_row)
|
|
200
|
+
return current_row if current_row
|
|
201
|
+
|
|
202
|
+
row = AST::TableRow.new
|
|
203
|
+
table << row
|
|
204
|
+
row
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Parse cell content from a line and add cells to the row.
|
|
208
|
+
# Cells are separated by !! (headers) or || (data cells).
|
|
209
|
+
# Separators inside [[...]] internal links are preserved so that
|
|
210
|
+
# pipes like [[Target|Display]] survive cell splitting.
|
|
211
|
+
#
|
|
212
|
+
# @param content [String] the line content after the leading ! or |
|
|
213
|
+
# @param header [Boolean] whether these are header cells
|
|
214
|
+
# @param row [AST::TableRow]
|
|
215
|
+
def parse_table_cells(content, header:, row:)
|
|
216
|
+
separator = header ? "!!" : "||"
|
|
217
|
+
cells = split_outside_brackets(content, separator)
|
|
218
|
+
|
|
219
|
+
cells.each do |raw_cell|
|
|
220
|
+
# A single | in a cell separates attributes from content
|
|
221
|
+
parts = split_outside_brackets(raw_cell, "|", limit: 2)
|
|
222
|
+
cell_text = parts.last
|
|
223
|
+
|
|
224
|
+
cell = AST::TableCell.new(header:)
|
|
225
|
+
@inline_parser.parse(cell_text.strip, parent: cell)
|
|
226
|
+
row << cell
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
# Split content on separator, ignoring occurrences inside [[...]] pairs.
|
|
231
|
+
# With limit: n, stops after n-1 splits (matching String#split semantics).
|
|
232
|
+
#
|
|
233
|
+
# @param content [String]
|
|
234
|
+
# @param separator [String]
|
|
235
|
+
# @param limit [Integer, nil]
|
|
236
|
+
# @return [Array<String>]
|
|
237
|
+
def split_outside_brackets(content, separator, limit: nil)
|
|
238
|
+
parts = []
|
|
239
|
+
buffer = +""
|
|
240
|
+
depth = 0
|
|
241
|
+
i = 0
|
|
242
|
+
sep_len = separator.length
|
|
243
|
+
|
|
244
|
+
while i < content.length
|
|
245
|
+
if content[i, 2] == "[["
|
|
246
|
+
depth += 1
|
|
247
|
+
buffer << "[["
|
|
248
|
+
i += 2
|
|
249
|
+
elsif content[i, 2] == "]]" && depth.positive?
|
|
250
|
+
depth -= 1
|
|
251
|
+
buffer << "]]"
|
|
252
|
+
i += 2
|
|
253
|
+
elsif depth.zero? && content[i, sep_len] == separator &&
|
|
254
|
+
(limit.nil? || parts.length < limit - 1)
|
|
255
|
+
parts << buffer
|
|
256
|
+
buffer = +""
|
|
257
|
+
i += sep_len
|
|
258
|
+
else
|
|
259
|
+
buffer << content[i]
|
|
260
|
+
i += 1
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
parts << buffer
|
|
129
265
|
end
|
|
130
266
|
|
|
131
267
|
# Process a heading line and add it to the document.
|
|
132
268
|
#
|
|
133
269
|
# @param line [String]
|
|
134
270
|
def process_heading(line)
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
level = 0
|
|
138
|
-
level += 1 while level < stripped.length && stripped[level] == "="
|
|
139
|
-
level = [level, 6].min
|
|
140
|
-
|
|
141
|
-
# Remove leading/trailing = signs and whitespace
|
|
142
|
-
content = stripped[level..].sub(/\s*={1,6}\s*\z/, "").strip
|
|
271
|
+
leading = line[HEADING_LEVEL_PREFIX]
|
|
272
|
+
content = line[leading.length..].sub(HEADING_LEVEL_SUFFIX, "").strip
|
|
143
273
|
|
|
144
|
-
heading = AST::Heading.new(level:)
|
|
274
|
+
heading = AST::Heading.new(level: leading.length)
|
|
145
275
|
@inline_parser.parse(content, parent: heading)
|
|
146
276
|
@document << heading
|
|
147
277
|
end
|
|
@@ -150,48 +280,28 @@ module Markbridge
|
|
|
150
280
|
#
|
|
151
281
|
# @param line [String]
|
|
152
282
|
def process_list_item(line)
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
i = 0
|
|
156
|
-
while i < line.length && (line[i] == "*" || line[i] == "#")
|
|
157
|
-
prefix << line[i]
|
|
158
|
-
i += 1
|
|
159
|
-
end
|
|
160
|
-
|
|
161
|
-
content = line[i..].strip
|
|
162
|
-
desired_depth = prefix.length
|
|
283
|
+
prefix = line[/\A[*#]+/]
|
|
284
|
+
content = line[prefix.length..].strip
|
|
163
285
|
|
|
164
|
-
|
|
165
|
-
reconcile_list_stack(prefix, desired_depth)
|
|
286
|
+
reconcile_list_stack(prefix)
|
|
166
287
|
|
|
167
|
-
# Create list item and add content
|
|
168
288
|
item = AST::ListItem.new
|
|
169
289
|
@inline_parser.parse(content, parent: item)
|
|
170
|
-
@list_stack.last
|
|
290
|
+
@list_stack.last.fetch(:list) << item
|
|
171
291
|
end
|
|
172
292
|
|
|
173
293
|
# Reconcile the list stack with the desired prefix.
|
|
174
294
|
# Opens new lists or closes existing ones as needed.
|
|
175
295
|
#
|
|
176
296
|
# @param prefix [String] the list prefix characters (e.g., "**#")
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
if idx < @list_stack.length
|
|
186
|
-
# If type changed at this level, close from here and reopen
|
|
187
|
-
if @list_stack[idx][:ordered] != ordered
|
|
188
|
-
@list_stack.pop while @list_stack.length > idx
|
|
189
|
-
open_new_list(ordered, idx)
|
|
190
|
-
end
|
|
191
|
-
else
|
|
192
|
-
open_new_list(ordered, idx)
|
|
193
|
-
end
|
|
194
|
-
end
|
|
297
|
+
def reconcile_list_stack(prefix)
|
|
298
|
+
keep = matching_prefix_depth(prefix)
|
|
299
|
+
@list_stack.pop while @list_stack.length > keep
|
|
300
|
+
prefix[keep..].each_char { |char| open_new_list(char == "#", @list_stack.length) }
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
def matching_prefix_depth(prefix)
|
|
304
|
+
@list_stack.take_while.with_index { |entry, i| entry.fetch(:char) == prefix[i] }.length
|
|
195
305
|
end
|
|
196
306
|
|
|
197
307
|
# Open a new list at the given depth.
|
|
@@ -204,13 +314,12 @@ module Markbridge
|
|
|
204
314
|
if depth.zero?
|
|
205
315
|
@document << list
|
|
206
316
|
else
|
|
207
|
-
|
|
208
|
-
parent_list = @list_stack.last[:list]
|
|
317
|
+
parent_list = @list_stack.last.fetch(:list)
|
|
209
318
|
parent_list << AST::ListItem.new if parent_list.children.empty?
|
|
210
319
|
parent_list.children.last << list
|
|
211
320
|
end
|
|
212
321
|
|
|
213
|
-
@list_stack << { list:, ordered: }
|
|
322
|
+
@list_stack << { list:, char: ordered ? "#" : "*" }
|
|
214
323
|
end
|
|
215
324
|
|
|
216
325
|
# Close all open lists.
|
|
@@ -224,45 +333,39 @@ module Markbridge
|
|
|
224
333
|
# @param start_index [Integer]
|
|
225
334
|
# @return [Integer] the last index consumed (will be incremented by caller)
|
|
226
335
|
def process_preformatted_block(lines, start_index)
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
while i < lines.length && lines[i].start_with?(" ")
|
|
231
|
-
content_lines << lines[i][1..] # Remove leading space
|
|
232
|
-
i += 1
|
|
233
|
-
end
|
|
336
|
+
consumed = lines[start_index..].take_while { |line| line.start_with?(" ") }
|
|
337
|
+
content = consumed.map { |line| line[1..] }.join("\n")
|
|
234
338
|
|
|
235
339
|
code = AST::Code.new
|
|
236
|
-
code << AST::Text.new(
|
|
340
|
+
code << AST::Text.new(content)
|
|
237
341
|
@document << code
|
|
238
342
|
|
|
239
|
-
|
|
343
|
+
start_index + consumed.length - 1
|
|
240
344
|
end
|
|
241
345
|
|
|
346
|
+
PRE_TAG_OPEN = /\A\s*<pre\b[^>]*>/i
|
|
347
|
+
PRE_TAG_CLOSE = %r{</pre\s*>}i
|
|
348
|
+
PRE_TAG_CLOSE_TRAILING = %r{</pre\s*>\s*\z}i
|
|
349
|
+
private_constant :PRE_TAG_OPEN, :PRE_TAG_CLOSE, :PRE_TAG_CLOSE_TRAILING
|
|
350
|
+
|
|
242
351
|
# Process a <pre>...</pre> block that may span multiple lines.
|
|
243
352
|
#
|
|
244
353
|
# @param lines [Array<String>]
|
|
245
354
|
# @param start_index [Integer]
|
|
246
355
|
# @return [Integer] the last index consumed
|
|
247
356
|
def process_pre_tag_block(lines, start_index)
|
|
248
|
-
|
|
249
|
-
|
|
357
|
+
consumed = lines[start_index..].take_while { |line| !line.match?(PRE_TAG_CLOSE) }
|
|
358
|
+
terminated = consumed.length < lines.length - start_index
|
|
359
|
+
consumed << lines.fetch(start_index + consumed.length) if terminated
|
|
250
360
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
break if lines[i].match?(%r{</pre\s*>}i)
|
|
254
|
-
combined << "\n"
|
|
255
|
-
i += 1
|
|
256
|
-
end
|
|
257
|
-
|
|
258
|
-
# Extract content between <pre> and </pre>
|
|
259
|
-
content = combined.sub(/\A\s*<pre\b[^>]*>/i, "").sub(%r{</pre\s*>\s*\z}i, "")
|
|
361
|
+
combined = consumed.join("\n")
|
|
362
|
+
content = combined.sub(PRE_TAG_OPEN, "").sub(PRE_TAG_CLOSE_TRAILING, "")
|
|
260
363
|
|
|
261
364
|
code = AST::Code.new
|
|
262
365
|
code << AST::Text.new(content)
|
|
263
366
|
@document << code
|
|
264
367
|
|
|
265
|
-
|
|
368
|
+
start_index + consumed.length - 1
|
|
266
369
|
end
|
|
267
370
|
|
|
268
371
|
# Process a line as inline content wrapped in a paragraph.
|
|
@@ -18,14 +18,6 @@ module Markbridge
|
|
|
18
18
|
# r.register("CUSTOM", MyCustomHandler.new)
|
|
19
19
|
# r.register("B", SimpleHandler.new(AST::Bold)) # Override default
|
|
20
20
|
# end
|
|
21
|
-
#
|
|
22
|
-
# @example Using lambdas for simple mappings
|
|
23
|
-
# registry = HandlerRegistry.new
|
|
24
|
-
# registry.register("CUSTOM", ->(element:, parent:) {
|
|
25
|
-
# node = AST::Custom.new
|
|
26
|
-
# parent << node
|
|
27
|
-
# node # Return node to process children
|
|
28
|
-
# })
|
|
29
21
|
class HandlerRegistry
|
|
30
22
|
# Create a new registry with default mappings
|
|
31
23
|
# @return [HandlerRegistry]
|
|
@@ -46,11 +38,7 @@ module Markbridge
|
|
|
46
38
|
|
|
47
39
|
# Register a handler for an element
|
|
48
40
|
# @param element_name [String] XML element name (case-insensitive)
|
|
49
|
-
# @param handler [#process
|
|
50
|
-
# @example With handler object
|
|
51
|
-
# registry.register("CUSTOM", MyCustomHandler.new)
|
|
52
|
-
# @example With lambda
|
|
53
|
-
# registry.register("CUSTOM", ->(element:, parent:) { ... })
|
|
41
|
+
# @param handler [#process] Handler object responding to `process(element:, parent:)`
|
|
54
42
|
def register(element_name, handler)
|
|
55
43
|
@mappings[element_name.upcase] = handler
|
|
56
44
|
end
|
|
@@ -69,16 +57,7 @@ module Markbridge
|
|
|
69
57
|
def process_element(element, parent)
|
|
70
58
|
tag_name = element.name.upcase
|
|
71
59
|
handler = @mappings[tag_name]
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
# Call handler and return its result (element or nil)
|
|
75
|
-
if handler.respond_to?(:process)
|
|
76
|
-
handler.process(element:, parent:)
|
|
77
|
-
elsif handler.respond_to?(:call)
|
|
78
|
-
handler.call(element:, parent:)
|
|
79
|
-
else
|
|
80
|
-
raise ArgumentError, "Handler must respond to :process or :call"
|
|
81
|
-
end
|
|
60
|
+
handler&.process(element:, parent:)
|
|
82
61
|
end
|
|
83
62
|
|
|
84
63
|
# Register all default s9e/TextFormatter element mappings
|
|
@@ -96,22 +75,13 @@ module Markbridge
|
|
|
96
75
|
register("QUOTE", Handlers::QuoteHandler.new)
|
|
97
76
|
register("IMG", Handlers::ImageHandler.new)
|
|
98
77
|
register("LIST", Handlers::ListHandler.new)
|
|
99
|
-
register(
|
|
100
|
-
|
|
101
|
-
Handlers::AttributeHandler.new(AST::Color, attribute: :color, param: :color),
|
|
102
|
-
)
|
|
103
|
-
register(
|
|
104
|
-
"SIZE",
|
|
105
|
-
Handlers::AttributeHandler.new(AST::Size, attribute: :size, param: :size),
|
|
106
|
-
)
|
|
78
|
+
register("COLOR", Handlers::AttributeHandler.new(AST::Color, attribute: :color))
|
|
79
|
+
register("SIZE", Handlers::AttributeHandler.new(AST::Size, attribute: :size))
|
|
107
80
|
register(
|
|
108
81
|
"ALIGN",
|
|
109
82
|
Handlers::AttributeHandler.new(AST::Align, attribute: :align, param: :alignment),
|
|
110
83
|
)
|
|
111
|
-
register(
|
|
112
|
-
"SPOILER",
|
|
113
|
-
Handlers::AttributeHandler.new(AST::Spoiler, attribute: :title, param: :title),
|
|
114
|
-
)
|
|
84
|
+
register("SPOILER", Handlers::AttributeHandler.new(AST::Spoiler, attribute: :title))
|
|
115
85
|
register("ATTACHMENT", Handlers::AttachmentHandler.new)
|
|
116
86
|
register("ATTACH", Handlers::AttachmentHandler.new)
|
|
117
87
|
|
|
@@ -122,7 +92,11 @@ module Markbridge
|
|
|
122
92
|
# Paragraphs
|
|
123
93
|
register("P", Handlers::SimpleHandler.new(AST::Paragraph))
|
|
124
94
|
|
|
125
|
-
|
|
95
|
+
# Table elements
|
|
96
|
+
register("TABLE", Handlers::SimpleHandler.new(AST::Table))
|
|
97
|
+
register("TR", Handlers::SimpleHandler.new(AST::TableRow))
|
|
98
|
+
register("TD", Handlers::TableCellHandler.new)
|
|
99
|
+
register("TH", Handlers::TableCellHandler.new)
|
|
126
100
|
end
|
|
127
101
|
end
|
|
128
102
|
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module TextFormatter
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for table cell elements (TD, TH)
|
|
8
|
+
class TableCellHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::TableCell
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
node = AST::TableCell.new(header: element.name.upcase == "TH")
|
|
15
|
+
parent << node
|
|
16
|
+
node
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def element_class
|
|
20
|
+
@element_class
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -57,11 +57,8 @@ module Markbridge
|
|
|
57
57
|
document = AST::Document.new
|
|
58
58
|
process_node(root, document)
|
|
59
59
|
document
|
|
60
|
-
rescue Nokogiri::XML::SyntaxError
|
|
61
|
-
|
|
62
|
-
document = AST::Document.new
|
|
63
|
-
document << AST::Text.new(input)
|
|
64
|
-
document
|
|
60
|
+
rescue Nokogiri::XML::SyntaxError
|
|
61
|
+
AST::Document.new << AST::Text.new(input)
|
|
65
62
|
end
|
|
66
63
|
|
|
67
64
|
# Process children of an XML element (public for handler access)
|
|
@@ -122,9 +119,7 @@ module Markbridge
|
|
|
122
119
|
# @param ast_parent [AST::Element]
|
|
123
120
|
def process_text(text_node, ast_parent)
|
|
124
121
|
text = text_node.content
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
ast_parent << AST::Text.new(text)
|
|
122
|
+
ast_parent << AST::Text.new(text) if text.match?(/\S/)
|
|
128
123
|
end
|
|
129
124
|
end
|
|
130
125
|
end
|
|
@@ -18,6 +18,7 @@ require_relative "text_formatter/handlers/image_handler"
|
|
|
18
18
|
require_relative "text_formatter/handlers/list_handler"
|
|
19
19
|
require_relative "text_formatter/handlers/quote_handler"
|
|
20
20
|
require_relative "text_formatter/handlers/url_handler"
|
|
21
|
+
require_relative "text_formatter/handlers/table_cell_handler"
|
|
21
22
|
|
|
22
23
|
# Parser components
|
|
23
24
|
require_relative "text_formatter/handler_registry"
|