markbridge 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/lib/markbridge/ast/table.rb +67 -0
  3. data/lib/markbridge/ast.rb +1 -0
  4. data/lib/markbridge/parsers/bbcode/handler_registry.rb +5 -0
  5. data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +13 -11
  6. data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +40 -33
  7. data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +26 -0
  8. data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +32 -0
  9. data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +35 -0
  10. data/lib/markbridge/parsers/bbcode/parser.rb +1 -1
  11. data/lib/markbridge/parsers/bbcode.rb +3 -0
  12. data/lib/markbridge/parsers/html/handler_registry.rb +5 -0
  13. data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -1
  14. data/lib/markbridge/parsers/html/handlers/table_cell_handler.rb +24 -0
  15. data/lib/markbridge/parsers/html/handlers/table_handler.rb +24 -0
  16. data/lib/markbridge/parsers/html/handlers/table_row_handler.rb +24 -0
  17. data/lib/markbridge/parsers/html/parser.rb +13 -2
  18. data/lib/markbridge/parsers/html.rb +3 -0
  19. data/lib/markbridge/parsers/media_wiki/inline_parser.rb +105 -130
  20. data/lib/markbridge/parsers/media_wiki/parser.rb +128 -0
  21. data/lib/markbridge/parsers/text_formatter/handler_registry.rb +6 -0
  22. data/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb +26 -0
  23. data/lib/markbridge/parsers/text_formatter.rb +1 -0
  24. data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +96 -84
  25. data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +12 -0
  26. data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +0 -10
  27. data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +0 -10
  28. data/lib/markbridge/processors/discourse_markdown/scanner.rb +19 -16
  29. data/lib/markbridge/renderers/discourse/markdown_escaper.rb +237 -180
  30. data/lib/markbridge/renderers/discourse/renderer.rb +1 -0
  31. data/lib/markbridge/renderers/discourse/tags/align_tag.rb +1 -1
  32. data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +1 -1
  33. data/lib/markbridge/renderers/discourse/tags/code_tag.rb +2 -1
  34. data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -5
  35. data/lib/markbridge/renderers/discourse/tags/image_tag.rb +1 -1
  36. data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +1 -1
  37. data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +3 -5
  38. data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +15 -11
  39. data/lib/markbridge/renderers/discourse/tags/table_cell_tag.rb +18 -0
  40. data/lib/markbridge/renderers/discourse/tags/table_row_tag.rb +18 -0
  41. data/lib/markbridge/renderers/discourse/tags/table_tag.rb +124 -0
  42. data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +1 -1
  43. data/lib/markbridge/renderers/discourse.rb +3 -0
  44. data/lib/markbridge/version.rb +1 -1
  45. data/lib/markbridge.rb +20 -55
  46. metadata +12 -1
@@ -52,55 +52,20 @@ module Markbridge
52
52
  def check_fenced_boundary(input, pos, line_start:)
53
53
  return nil unless line_start
54
54
 
55
- # Skip up to 3 spaces of indentation
56
- scan_pos = pos
57
- spaces = 0
58
- while spaces < 3 && scan_pos < input.length && input[scan_pos] == " "
59
- spaces += 1
60
- scan_pos += 1
61
- end
62
-
63
- return nil if scan_pos >= input.length
55
+ input_length = input.length
56
+ scan_pos = skip_leading_spaces(input, pos, input_length)
57
+ return nil if scan_pos >= input_length
64
58
 
65
59
  fence_char = input[scan_pos]
66
60
  return nil unless fence_char == "`" || fence_char == "~"
67
61
 
68
- # Count consecutive fence characters
69
- fence_start = scan_pos
70
- fence_length = 0
71
- while scan_pos < input.length && input[scan_pos] == fence_char
72
- fence_length += 1
73
- scan_pos += 1
74
- end
75
-
62
+ fence_length, scan_pos = count_fence_chars(input, scan_pos, fence_char, input_length)
76
63
  return nil if fence_length < 3
77
64
 
78
65
  if @in_fenced_block
79
- # Check if this closes the current block
80
- if fence_char == @fence_char && fence_length >= @fence_length
81
- # Closing fence - must be followed by newline or end of input
82
- # Skip any trailing whitespace
83
- scan_pos += 1 while scan_pos < input.length && input[scan_pos] == " "
84
-
85
- if scan_pos >= input.length || input[scan_pos] == "\n"
86
- @in_fenced_block = false
87
- @fence_char = nil
88
- @fence_length = 0
89
- # Return position after the newline if present
90
- return scan_pos < input.length ? scan_pos + 1 : scan_pos
91
- end
92
- end
93
- nil
66
+ try_close_fence(input, scan_pos, fence_char, fence_length, input_length)
94
67
  else
95
- # Opening fence - skip to end of line (info string)
96
- scan_pos += 1 while scan_pos < input.length && input[scan_pos] != "\n"
97
-
98
- @in_fenced_block = true
99
- @fence_char = fence_char
100
- @fence_length = fence_length
101
-
102
- # Return position after the newline if present
103
- scan_pos < input.length ? scan_pos + 1 : scan_pos
68
+ open_fence(input, scan_pos, fence_char, fence_length, input_length)
104
69
  end
105
70
  end
106
71
 
@@ -116,38 +81,23 @@ module Markbridge
116
81
  return nil unless line_start
117
82
  return nil if @in_fenced_block # Fenced blocks take precedence
118
83
 
119
- # Find end of line
120
- line_end = input.index("\n", pos) || input.length
121
-
122
- # Check if line is blank
84
+ input_length = input.length
85
+ line_end = input.index("\n", pos) || input_length
123
86
  line_content = input[pos...line_end]
124
87
  is_blank = line_content.match?(/\A\s*\z/)
125
-
126
- # Check indentation (4+ spaces or tab)
127
88
  has_code_indent = line_content.start_with?(" ") || line_content.start_with?("\t")
128
89
 
129
90
  if @in_indented_block
130
- if is_blank
131
- # Blank lines continue the indented block
132
- # Return end of line (after newline if present)
133
- return line_end < input.length ? line_end + 1 : line_end
134
- elsif has_code_indent
135
- # Still in indented code
136
- return line_end < input.length ? line_end + 1 : line_end
91
+ if is_blank || has_code_indent
92
+ pos_after_line(line_end, input_length)
137
93
  else
138
- # Non-blank, non-indented line ends the block
139
94
  @in_indented_block = false
140
- return nil
141
- end
142
- else
143
- if has_code_indent
144
- # Start of indented code block
145
- @in_indented_block = true
146
- return line_end < input.length ? line_end + 1 : line_end
95
+ nil
147
96
  end
97
+ elsif has_code_indent
98
+ @in_indented_block = true
99
+ pos_after_line(line_end, input_length)
148
100
  end
149
-
150
- nil
151
101
  end
152
102
 
153
103
  # Check for inline code boundary
@@ -156,34 +106,96 @@ module Markbridge
156
106
  # @return [Integer, nil] end position after inline code, or nil if not at boundary
157
107
  def check_inline_boundary(input, pos)
158
108
  return nil if @in_fenced_block || @in_indented_block
159
- return nil if pos >= input.length || input[pos] != "`"
109
+
110
+ input_length = input.length
111
+ return nil if pos >= input_length || input[pos] != "`"
160
112
 
161
113
  if @in_inline_code
162
- # Check if this closes the current inline code
163
- delimiter_length = @inline_delimiter.length
164
- if input[pos, delimiter_length] == @inline_delimiter
165
- # Check what follows - should not be another backtick
166
- next_pos = pos + delimiter_length
167
- if next_pos >= input.length || input[next_pos] != "`"
168
- @in_inline_code = false
169
- @inline_delimiter = nil
170
- return next_pos
171
- end
172
- end
173
- nil
114
+ try_close_inline(input, pos, input_length)
174
115
  else
175
- # Opening inline code - count backticks
176
- delimiter_start = pos
177
- pos += 1 while pos < input.length && input[pos] == "`"
116
+ open_inline(input, pos, input_length)
117
+ end
118
+ end
178
119
 
179
- @inline_delimiter = input[delimiter_start...pos]
180
- @in_inline_code = true
120
+ private
181
121
 
182
- # Return position after opening delimiter
183
- pos
122
+ # Skip up to 3 leading spaces of indentation.
123
+ def skip_leading_spaces(input, pos, input_length)
124
+ scan_pos = pos
125
+ spaces = 0
126
+ while spaces < 3 && scan_pos < input_length && input[scan_pos] == " "
127
+ spaces += 1
128
+ scan_pos += 1
184
129
  end
130
+ scan_pos
185
131
  end
186
132
 
133
+ # Count consecutive fence characters and return [count, new_position].
134
+ def count_fence_chars(input, scan_pos, fence_char, input_length)
135
+ fence_length = 0
136
+ while scan_pos < input_length && input[scan_pos] == fence_char
137
+ fence_length += 1
138
+ scan_pos += 1
139
+ end
140
+ [fence_length, scan_pos]
141
+ end
142
+
143
+ # Try to close an open fenced code block. Returns position after fence or nil.
144
+ def try_close_fence(input, scan_pos, fence_char, fence_length, input_length)
145
+ return nil unless fence_char == @fence_char && fence_length >= @fence_length
146
+
147
+ # Closing fence must be followed only by spaces then newline/EOF
148
+ scan_pos += 1 while scan_pos < input_length && input[scan_pos] == " "
149
+ return nil unless scan_pos >= input_length || input[scan_pos] == "\n"
150
+
151
+ @in_fenced_block = false
152
+ @fence_char = nil
153
+ @fence_length = 0
154
+ pos_after_line(scan_pos, input_length)
155
+ end
156
+
157
+ # Open a new fenced code block. Returns position after the opening line.
158
+ def open_fence(input, scan_pos, fence_char, fence_length, input_length)
159
+ # Skip to end of line (info string)
160
+ scan_pos += 1 while scan_pos < input_length && input[scan_pos] != "\n"
161
+
162
+ @in_fenced_block = true
163
+ @fence_char = fence_char
164
+ @fence_length = fence_length
165
+ pos_after_line(scan_pos, input_length)
166
+ end
167
+
168
+ # Try to close inline code. Returns position after delimiter or nil.
169
+ def try_close_inline(input, pos, input_length)
170
+ delimiter_length = @inline_delimiter.length
171
+ return nil unless input[pos, delimiter_length] == @inline_delimiter
172
+
173
+ # Should not be followed by another backtick
174
+ next_pos = pos + delimiter_length
175
+ return nil if next_pos < input_length && input[next_pos] == "`"
176
+
177
+ @in_inline_code = false
178
+ @inline_delimiter = nil
179
+ next_pos
180
+ end
181
+
182
+ # Open inline code. Returns position after opening delimiter.
183
+ def open_inline(input, pos, input_length)
184
+ delimiter_start = pos
185
+ pos += 1 while pos < input_length && input[pos] == "`"
186
+
187
+ @inline_delimiter = input[delimiter_start...pos]
188
+ @in_inline_code = true
189
+ pos
190
+ end
191
+
192
+ # Return position after a line (after newline if present, otherwise at end).
193
+ def pos_after_line(line_end, input_length)
194
+ line_end < input_length ? line_end + 1 : line_end
195
+ end
196
+
197
+ public
198
+
187
199
  # Reset the tracker state
188
200
  def reset!
189
201
  @in_fenced_block = false
@@ -50,6 +50,18 @@ module Markbridge
50
50
  end
51
51
  word
52
52
  end
53
+
54
+ # Parse key="value" or key='value' attribute pairs from a string
55
+ # @param attr_string [String, nil] the attribute string to parse
56
+ # @return [Hash<String, String>] parsed attributes with downcased keys
57
+ def parse_attributes(attr_string)
58
+ attrs = {}
59
+ return attrs if attr_string.nil? || attr_string.empty?
60
+
61
+ attr_string.scan(/(\w+)=["']([^"']*)["']/) { |key, value| attrs[key.downcase] = value }
62
+
63
+ attrs
64
+ end
53
65
  end
54
66
  end
55
67
  end
@@ -56,16 +56,6 @@ module Markbridge
56
56
  end
57
57
 
58
58
  private
59
-
60
- def parse_attributes(attr_string)
61
- attrs = {}
62
- return attrs if attr_string.nil? || attr_string.empty?
63
-
64
- # Match key="value" or key='value' patterns
65
- attr_string.scan(/(\w+)=["']([^"']*)["']/) { |key, value| attrs[key.downcase] = value }
66
-
67
- attrs
68
- end
69
59
  end
70
60
  end
71
61
  end
@@ -59,16 +59,6 @@ module Markbridge
59
59
 
60
60
  private
61
61
 
62
- def parse_attributes(attr_string)
63
- attrs = {}
64
- return attrs if attr_string.nil? || attr_string.empty?
65
-
66
- # Match key="value" or key='value' patterns
67
- attr_string.scan(/(\w+)=["']([^"']*)["']/) { |key, value| attrs[key.downcase] = value }
68
-
69
- attrs
70
- end
71
-
72
62
  def extract_options(content)
73
63
  options = []
74
64
  content.each_line do |line|
@@ -49,6 +49,13 @@ module Markbridge
49
49
  def initialize(detectors: DEFAULT_DETECTORS, tag_library: nil, mention_resolver: nil)
50
50
  @detector_instances = build_detectors(detectors, mention_resolver)
51
51
  @tag_library = tag_library
52
+ @code_tracker = nil
53
+ @result = nil
54
+ @nodes = nil
55
+ @node_index = 0
56
+ @pos = 0
57
+ @input = nil
58
+ @line_start = true
52
59
  end
53
60
 
54
61
  # Scan input and extract constructs.
@@ -91,22 +98,8 @@ module Markbridge
91
98
  while @pos < @input.length
92
99
  # Check for fenced code block boundary at line start
93
100
  if @line_start
94
- new_pos = @code_tracker.check_fenced_boundary(@input, @pos, line_start: true)
95
- if new_pos
96
- @result << @input[@pos...new_pos]
97
- @pos = new_pos
98
- @line_start = new_pos > 0 && @input[new_pos - 1] == "\n"
99
- next
100
- end
101
-
102
- # Check for indented code block (4+ spaces or tab)
103
- new_pos = @code_tracker.check_indented_boundary(@input, @pos, line_start: true)
104
- if new_pos
105
- @result << @input[@pos...new_pos]
106
- @pos = new_pos
107
- @line_start = new_pos > 0 && @input[new_pos - 1] == "\n"
108
- next
109
- end
101
+ next if advance_code_boundary(:check_fenced_boundary)
102
+ next if advance_code_boundary(:check_indented_boundary)
110
103
  end
111
104
 
112
105
  # Check for inline code boundary
@@ -145,6 +138,16 @@ module Markbridge
145
138
  end
146
139
  end
147
140
 
141
+ def advance_code_boundary(method)
142
+ new_pos = @code_tracker.public_send(method, @input, @pos, line_start: true)
143
+ return false unless new_pos
144
+
145
+ @result << @input[@pos...new_pos]
146
+ @pos = new_pos
147
+ @line_start = new_pos > 0 && @input[new_pos - 1] == "\n"
148
+ true
149
+ end
150
+
148
151
  def detect_at_position
149
152
  @detector_instances.each do |detector|
150
153
  match = detector.detect(@input, @pos)