markbridge 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/lib/markbridge/ast/table.rb +67 -0
  3. data/lib/markbridge/ast.rb +1 -0
  4. data/lib/markbridge/parsers/bbcode/handler_registry.rb +5 -0
  5. data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +13 -11
  6. data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +40 -33
  7. data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +26 -0
  8. data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +32 -0
  9. data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +35 -0
  10. data/lib/markbridge/parsers/bbcode/parser.rb +1 -1
  11. data/lib/markbridge/parsers/bbcode.rb +3 -0
  12. data/lib/markbridge/parsers/html/handler_registry.rb +5 -0
  13. data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -1
  14. data/lib/markbridge/parsers/html/handlers/table_cell_handler.rb +24 -0
  15. data/lib/markbridge/parsers/html/handlers/table_handler.rb +24 -0
  16. data/lib/markbridge/parsers/html/handlers/table_row_handler.rb +24 -0
  17. data/lib/markbridge/parsers/html/parser.rb +13 -2
  18. data/lib/markbridge/parsers/html.rb +3 -0
  19. data/lib/markbridge/parsers/media_wiki/inline_parser.rb +105 -130
  20. data/lib/markbridge/parsers/media_wiki/parser.rb +128 -0
  21. data/lib/markbridge/parsers/text_formatter/handler_registry.rb +6 -0
  22. data/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb +26 -0
  23. data/lib/markbridge/parsers/text_formatter.rb +1 -0
  24. data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +96 -84
  25. data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +12 -0
  26. data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +0 -10
  27. data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +0 -10
  28. data/lib/markbridge/processors/discourse_markdown/scanner.rb +19 -16
  29. data/lib/markbridge/renderers/discourse/markdown_escaper.rb +237 -180
  30. data/lib/markbridge/renderers/discourse/renderer.rb +1 -0
  31. data/lib/markbridge/renderers/discourse/tags/align_tag.rb +1 -1
  32. data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +1 -1
  33. data/lib/markbridge/renderers/discourse/tags/code_tag.rb +2 -1
  34. data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -5
  35. data/lib/markbridge/renderers/discourse/tags/image_tag.rb +1 -1
  36. data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +1 -1
  37. data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +3 -5
  38. data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +15 -11
  39. data/lib/markbridge/renderers/discourse/tags/table_cell_tag.rb +18 -0
  40. data/lib/markbridge/renderers/discourse/tags/table_row_tag.rb +18 -0
  41. data/lib/markbridge/renderers/discourse/tags/table_tag.rb +124 -0
  42. data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +1 -1
  43. data/lib/markbridge/renderers/discourse.rb +3 -0
  44. data/lib/markbridge/version.rb +1 -1
  45. data/lib/markbridge.rb +20 -55
  46. metadata +12 -1
@@ -7,6 +7,14 @@ module Markbridge
7
7
  # Handles bold ('''), italic (''), links ([[...]]), external links ([...]),
8
8
  # and HTML inline tags (<code>, <nowiki>, <s>, <del>, <u>, <ins>, <sup>, <sub>, <br>).
9
9
  class InlineParser
10
+ def initialize
11
+ @input = nil
12
+ @pos = 0
13
+ @length = 0
14
+ @parent = nil
15
+ @text_buffer = +""
16
+ end
17
+
10
18
  # Parse inline markup and append resulting AST nodes to the parent element.
11
19
  #
12
20
  # @param text [String] the text to parse for inline markup
@@ -19,19 +27,29 @@ module Markbridge
19
27
  @text_buffer = +""
20
28
 
21
29
  while @pos < @length
22
- if @input[@pos] == "'" && @pos + 1 < @length && @input[@pos + 1] == "'"
23
- parse_bold_italic
24
- elsif @input[@pos] == "[" && @pos + 1 < @length && @input[@pos + 1] == "["
30
+ char = @input[@pos]
31
+ next_char = @pos + 1 < @length ? @input[@pos + 1] : nil
32
+
33
+ case char
34
+ when "'"
35
+ if next_char == "'"
36
+ parse_bold_italic
37
+ else
38
+ @text_buffer << char
39
+ @pos += 1
40
+ end
41
+ when "["
25
42
  flush_text
26
- parse_internal_link
27
- elsif @input[@pos] == "[" && !(@pos + 1 < @length && @input[@pos + 1] == "[")
28
- flush_text
29
- parse_external_link
30
- elsif @input[@pos] == "<"
43
+ if next_char == "["
44
+ parse_internal_link
45
+ else
46
+ parse_external_link
47
+ end
48
+ when "<"
31
49
  flush_text
32
50
  parse_html_tag
33
51
  else
34
- @text_buffer << @input[@pos]
52
+ @text_buffer << char
35
53
  @pos += 1
36
54
  end
37
55
  end
@@ -46,72 +64,59 @@ module Markbridge
46
64
  start = @pos
47
65
  count = 0
48
66
  count += 1 while @pos + count < @length && @input[@pos + count] == "'"
49
- count = 5 if count > 5
67
+ # Clamp: 5 = bold+italic, 3 = bold, 2 = italic
68
+ count = [count, 5].min
50
69
 
51
- if count >= 5
52
- flush_text
53
- @pos += 5
54
- parse_bold_italic_combo(start)
55
- elsif count >= 3
56
- flush_text
57
- @pos += 3
58
- parse_bold_content(start)
59
- elsif count >= 2
60
- flush_text
61
- @pos += 2
62
- parse_italic_content(start)
63
- else
70
+ if count < 2
64
71
  @text_buffer << @input[@pos]
65
72
  @pos += 1
73
+ else
74
+ flush_text
75
+ @pos += count
76
+ parse_apostrophe_formatting(count, start)
66
77
  end
67
78
  end
68
79
 
69
- # Parse '''''bold italic''''' content.
70
- def parse_bold_italic_combo(start)
71
- bold = AST::Bold.new
72
- italic = AST::Italic.new
73
- content = collect_until_apostrophes(5)
74
-
75
- if content
76
- inner_parser = InlineParser.new
77
- inner_parser.parse(content, parent: italic)
78
- bold << italic
79
- @parent << bold
80
- else
81
- # No closing found - treat as literal text
82
- @text_buffer << "'''''"
83
- @pos = start + 5
80
+ # Parse apostrophe-delimited formatting (bold, italic, or bold+italic).
81
+ #
82
+ # @param apostrophe_count [Integer] number of apostrophes (2, 3, or 5)
83
+ # @param start [Integer] position before the opening apostrophes
84
+ def parse_apostrophe_formatting(apostrophe_count, start)
85
+ content = collect_until_apostrophes(apostrophe_count)
86
+
87
+ unless content
88
+ @text_buffer << ("'" * apostrophe_count)
89
+ @pos = start + apostrophe_count
90
+ return
84
91
  end
85
- end
86
92
 
87
- # Parse '''bold''' content.
88
- def parse_bold_content(start)
89
- bold = AST::Bold.new
90
- content = collect_until_apostrophes(3)
93
+ element = build_formatting_element(apostrophe_count)
94
+ parse_inner_content(content, parent: innermost_element(element))
95
+ @parent << element
96
+ end
91
97
 
92
- if content
93
- inner_parser = InlineParser.new
94
- inner_parser.parse(content, parent: bold)
95
- @parent << bold
96
- else
97
- @text_buffer << "'''"
98
- @pos = start + 3
98
+ # Build the AST element(s) for the given apostrophe count.
99
+ def build_formatting_element(apostrophe_count)
100
+ case apostrophe_count
101
+ when 5
102
+ bold = AST::Bold.new
103
+ bold << AST::Italic.new
104
+ bold
105
+ when 3
106
+ AST::Bold.new
107
+ when 2
108
+ AST::Italic.new
99
109
  end
100
110
  end
101
111
 
102
- # Parse ''italic'' content.
103
- def parse_italic_content(start)
104
- italic = AST::Italic.new
105
- content = collect_until_apostrophes(2)
112
+ # Return the innermost element to receive parsed content.
113
+ def innermost_element(element)
114
+ element.children.empty? ? element : element.children.last
115
+ end
106
116
 
107
- if content
108
- inner_parser = InlineParser.new
109
- inner_parser.parse(content, parent: italic)
110
- @parent << italic
111
- else
112
- @text_buffer << "''"
113
- @pos = start + 2
114
- end
117
+ # Parse inner content and append to a parent element.
118
+ def parse_inner_content(content, parent:)
119
+ InlineParser.new.parse(content, parent:)
115
120
  end
116
121
 
117
122
  # Collect text until we find n consecutive apostrophes.
@@ -200,75 +205,71 @@ module Markbridge
200
205
  return
201
206
  end
202
207
 
208
+ full_match = tag_match[0]
203
209
  closing = !tag_match[1].empty?
204
- tag_name = tag_match[2].downcase
205
210
  self_closing = !tag_match[3].empty?
206
- full_match = tag_match[0]
211
+ tag_name = tag_match[2].downcase
212
+
213
+ # Closing/self-closing tags and unknown tags are treated as literal text
214
+ if closing || self_closing || !known_html_tag?(tag_name)
215
+ advance_as_text(full_match)
216
+ return
217
+ end
207
218
 
208
219
  case tag_name
209
220
  when "nowiki"
210
- handle_nowiki_tag(closing, full_match)
211
- when "code"
212
- handle_paired_raw_tag(tag_name, closing, full_match, AST::Code)
213
- when "pre"
214
- handle_paired_raw_tag(tag_name, closing, full_match, AST::Code)
221
+ handle_nowiki_tag(full_match)
222
+ when "code", "pre"
223
+ handle_paired_raw_tag(tag_name, full_match, AST::Code)
215
224
  when "br"
216
225
  @pos += full_match.length
217
226
  @parent << AST::LineBreak.new
218
227
  when "s", "del"
219
- handle_paired_tag(tag_name, closing, self_closing, full_match, AST::Strikethrough)
228
+ handle_paired_tag(tag_name, full_match, AST::Strikethrough)
220
229
  when "u", "ins"
221
- handle_paired_tag(tag_name, closing, self_closing, full_match, AST::Underline)
230
+ handle_paired_tag(tag_name, full_match, AST::Underline)
222
231
  when "sup"
223
- handle_paired_tag(tag_name, closing, self_closing, full_match, AST::Superscript)
232
+ handle_paired_tag(tag_name, full_match, AST::Superscript)
224
233
  when "sub"
225
- handle_paired_tag(tag_name, closing, self_closing, full_match, AST::Subscript)
226
- else
227
- # Unknown HTML tag - treat as text
228
- @text_buffer << full_match
229
- @pos += full_match.length
234
+ handle_paired_tag(tag_name, full_match, AST::Subscript)
230
235
  end
231
236
  end
232
237
 
233
- # Handle <nowiki>...</nowiki> - preserves content as literal text.
234
- def handle_nowiki_tag(closing, full_match)
235
- if closing
236
- @text_buffer << full_match
237
- @pos += full_match.length
238
- return
239
- end
238
+ KNOWN_HTML_TAGS = %w[nowiki code pre br s del u ins sup sub].freeze
240
239
 
240
+ def known_html_tag?(tag_name)
241
+ KNOWN_HTML_TAGS.include?(tag_name)
242
+ end
243
+
244
+ # Advance position and buffer the match as literal text.
245
+ def advance_as_text(full_match)
246
+ @text_buffer << full_match
241
247
  @pos += full_match.length
242
- close_tag = "</nowiki>"
243
- close_pos = @input.index(close_tag, @pos)
248
+ end
249
+
250
+ # Handle <nowiki>...</nowiki> - preserves content as literal text.
251
+ def handle_nowiki_tag(full_match)
252
+ @pos += full_match.length
253
+ close_pos = @input.index("</nowiki>", @pos)
244
254
 
245
255
  if close_pos
246
- raw_content = @input[@pos...close_pos]
247
- @text_buffer << raw_content
248
- @pos = close_pos + close_tag.length
256
+ @text_buffer << @input[@pos...close_pos]
257
+ @pos = close_pos + "</nowiki>".length
249
258
  else
250
- # No closing tag found - treat opening tag as text
251
259
  @text_buffer << full_match
252
260
  end
253
261
  end
254
262
 
255
263
  # Handle paired raw tags like <code>...</code> and <pre>...</pre>.
256
264
  # Content inside is not parsed for wiki markup.
257
- def handle_paired_raw_tag(tag_name, closing, full_match, element_class)
258
- if closing
259
- @text_buffer << full_match
260
- @pos += full_match.length
261
- return
262
- end
263
-
265
+ def handle_paired_raw_tag(tag_name, full_match, element_class)
264
266
  @pos += full_match.length
265
267
  close_tag = "</#{tag_name}>"
266
268
  close_pos = @input.index(close_tag, @pos)
267
269
 
268
270
  if close_pos
269
- raw_content = @input[@pos...close_pos]
270
271
  element = element_class.new
271
- element << AST::Text.new(raw_content)
272
+ element << AST::Text.new(@input[@pos...close_pos])
272
273
  @parent << element
273
274
  @pos = close_pos + close_tag.length
274
275
  else
@@ -278,47 +279,21 @@ module Markbridge
278
279
 
279
280
  # Handle paired formatting tags like <s>, <u>, <sup>, <sub>.
280
281
  # Content inside IS parsed for wiki markup.
281
- def handle_paired_tag(tag_name, closing, self_closing, full_match, element_class)
282
- if closing || self_closing
283
- @text_buffer << full_match
284
- @pos += full_match.length
285
- return
286
- end
287
-
282
+ def handle_paired_tag(tag_name, full_match, element_class)
288
283
  @pos += full_match.length
289
- # Find matching close tag, accounting for the alias tags
290
- close_tags = close_tags_for(tag_name)
291
- close_pos = nil
292
- close_tag_length = 0
293
-
294
- close_tags.each do |ct|
295
- pos = @input.index(ct, @pos)
296
- if pos && (close_pos.nil? || pos < close_pos)
297
- close_pos = pos
298
- close_tag_length = ct.length
299
- end
300
- end
284
+ close_tag = "</#{tag_name}>"
285
+ close_pos = @input.index(close_tag, @pos)
301
286
 
302
287
  if close_pos
303
- inner_content = @input[@pos...close_pos]
304
288
  element = element_class.new
305
- inner_parser = InlineParser.new
306
- inner_parser.parse(inner_content, parent: element)
289
+ parse_inner_content(@input[@pos...close_pos], parent: element)
307
290
  @parent << element
308
- @pos = close_pos + close_tag_length
291
+ @pos = close_pos + close_tag.length
309
292
  else
310
293
  @text_buffer << full_match
311
294
  end
312
295
  end
313
296
 
314
- # Return the possible closing tags for a given tag name.
315
- #
316
- # @param tag_name [String]
317
- # @return [Array<String>]
318
- def close_tags_for(tag_name)
319
- ["</#{tag_name}>"]
320
- end
321
-
322
297
  # Flush accumulated text buffer to the parent as a Text node.
323
298
  def flush_text
324
299
  return if @text_buffer.empty?
@@ -14,12 +14,19 @@ module Markbridge
14
14
  # - Internal links ([[target]] / [[target|display]])
15
15
  # - External links ([url text])
16
16
  # - Preformatted text (lines starting with a space)
17
+ # - Tables ({| ... |})
17
18
  # - HTML tags: <nowiki>, <code>, <pre>, <br>, <s>, <del>, <u>, <ins>, <sup>, <sub>
18
19
  #
19
20
  # @example Basic usage
20
21
  # parser = Markbridge::Parsers::MediaWiki::Parser.new
21
22
  # ast = parser.parse("'''bold''' and ''italic''")
22
23
  class Parser
24
+ def initialize
25
+ @document = nil
26
+ @inline_parser = nil
27
+ @list_stack = []
28
+ end
29
+
23
30
  # Parse MediaWiki wikitext into an AST Document.
24
31
  #
25
32
  # @param input [String] MediaWiki source
@@ -61,6 +68,9 @@ module Markbridge
61
68
  elsif horizontal_rule_line?(line)
62
69
  close_open_lists
63
70
  @document << AST::HorizontalRule.new
71
+ elsif table_start_line?(line)
72
+ close_open_lists
73
+ i = process_table(lines, i)
64
74
  elsif list_line?(line)
65
75
  process_list_item(line)
66
76
  elsif preformatted_line?(line)
@@ -128,6 +138,124 @@ module Markbridge
128
138
  line.strip.empty?
129
139
  end
130
140
 
141
+ # Check if a line starts a table ({|).
142
+ #
143
+ # @param line [String]
144
+ # @return [Boolean]
145
+ def table_start_line?(line)
146
+ line.match?(/\A\s*\{\|/)
147
+ end
148
+
149
+ # Process a table block from {| to |}.
150
+ # Consumes lines until the closing |} is found.
151
+ #
152
+ # @param lines [Array<String>]
153
+ # @param start_index [Integer]
154
+ # @return [Integer] the last index consumed
155
+ def process_table(lines, start_index)
156
+ table = AST::Table.new
157
+ current_row = nil
158
+ i = start_index + 1 # Skip the {| line
159
+
160
+ while i < lines.length
161
+ stripped = lines[i].strip
162
+
163
+ if stripped.start_with?("|}")
164
+ break
165
+ elsif stripped.start_with?("|-")
166
+ # Row separator - next cells will go in a new row
167
+ current_row = nil
168
+ elsif stripped.start_with?("!")
169
+ # Header cells
170
+ current_row = ensure_table_row(table, current_row)
171
+ parse_table_cells(stripped[1..], header: true, row: current_row)
172
+ elsif stripped.start_with?("|")
173
+ # Data cells
174
+ current_row = ensure_table_row(table, current_row)
175
+ parse_table_cells(stripped[1..], header: false, row: current_row)
176
+ end
177
+
178
+ i += 1
179
+ end
180
+
181
+ @document << table
182
+ i
183
+ end
184
+
185
+ # Ensure a row exists for the table, creating one if needed.
186
+ #
187
+ # @param table [AST::Table]
188
+ # @param current_row [AST::TableRow, nil]
189
+ # @return [AST::TableRow]
190
+ def ensure_table_row(table, current_row)
191
+ return current_row if current_row
192
+
193
+ row = AST::TableRow.new
194
+ table << row
195
+ row
196
+ end
197
+
198
+ # Parse cell content from a line and add cells to the row.
199
+ # Cells are separated by !! (headers) or || (data cells).
200
+ # Separators inside [[...]] internal links are preserved so that
201
+ # pipes like [[Target|Display]] survive cell splitting.
202
+ #
203
+ # @param content [String] the line content after the leading ! or |
204
+ # @param header [Boolean] whether these are header cells
205
+ # @param row [AST::TableRow]
206
+ def parse_table_cells(content, header:, row:)
207
+ separator = header ? "!!" : "||"
208
+ cells = split_outside_brackets(content, separator)
209
+
210
+ cells.each do |raw_cell|
211
+ # A single | in a cell separates attributes from content
212
+ parts = split_outside_brackets(raw_cell, "|", limit: 2)
213
+ cell_text = parts.last
214
+
215
+ cell = AST::TableCell.new(header:)
216
+ @inline_parser.parse(cell_text.strip, parent: cell)
217
+ row << cell
218
+ end
219
+ end
220
+
221
+ # Split content on separator, ignoring occurrences inside [[...]] pairs.
222
+ # With limit: n, stops after n-1 splits (matching String#split semantics).
223
+ #
224
+ # @param content [String]
225
+ # @param separator [String]
226
+ # @param limit [Integer, nil]
227
+ # @return [Array<String>]
228
+ def split_outside_brackets(content, separator, limit: nil)
229
+ parts = []
230
+ buffer = +""
231
+ depth = 0
232
+ i = 0
233
+ sep_len = separator.length
234
+
235
+ while i < content.length
236
+ if content[i, 2] == "[["
237
+ depth += 1
238
+ buffer << "[["
239
+ i += 2
240
+ elsif content[i, 2] == "]]" && depth.positive?
241
+ depth -= 1
242
+ buffer << "]]"
243
+ i += 2
244
+ elsif depth.zero? && content[i, sep_len] == separator &&
245
+ (limit.nil? || parts.length < limit - 1)
246
+ parts << buffer
247
+ buffer = +""
248
+ i += sep_len
249
+ else
250
+ buffer << content[i]
251
+ i += 1
252
+ end
253
+ end
254
+
255
+ parts << buffer
256
+ parts
257
+ end
258
+
131
259
  # Process a heading line and add it to the document.
132
260
  #
133
261
  # @param line [String]
@@ -122,6 +122,12 @@ module Markbridge
122
122
  # Paragraphs
123
123
  register("P", Handlers::SimpleHandler.new(AST::Paragraph))
124
124
 
125
+ # Table elements
126
+ register("TABLE", Handlers::SimpleHandler.new(AST::Table))
127
+ register("TR", Handlers::SimpleHandler.new(AST::TableRow))
128
+ register("TD", Handlers::TableCellHandler.new)
129
+ register("TH", Handlers::TableCellHandler.new)
130
+
125
131
  self
126
132
  end
127
133
  end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module TextFormatter
6
+ module Handlers
7
+ # Handler for table cell elements (TD, TH)
8
+ class TableCellHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::TableCell
11
+ end
12
+
13
+ def process(element:, parent:)
14
+ node = AST::TableCell.new(header: element.name.upcase == "TH")
15
+ parent << node
16
+ node
17
+ end
18
+
19
+ def element_class
20
+ @element_class
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -18,6 +18,7 @@ require_relative "text_formatter/handlers/image_handler"
18
18
  require_relative "text_formatter/handlers/list_handler"
19
19
  require_relative "text_formatter/handlers/quote_handler"
20
20
  require_relative "text_formatter/handlers/url_handler"
21
+ require_relative "text_formatter/handlers/table_cell_handler"
21
22
 
22
23
  # Parser components
23
24
  require_relative "text_formatter/handler_registry"