markbridge 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/lib/markbridge/all.rb +4 -7
  3. data/lib/markbridge/ast/document.rb +1 -1
  4. data/lib/markbridge/ast/element.rb +2 -2
  5. data/lib/markbridge/ast/list.rb +2 -2
  6. data/lib/markbridge/ast/table.rb +61 -0
  7. data/lib/markbridge/ast/text.rb +5 -1
  8. data/lib/markbridge/ast.rb +1 -0
  9. data/lib/markbridge/bbcode.rb +4 -0
  10. data/lib/markbridge/gem_loader.rb +2 -3
  11. data/lib/markbridge/html.rb +4 -0
  12. data/lib/markbridge/mediawiki.rb +4 -0
  13. data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +0 -10
  14. data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -4
  15. data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +64 -44
  16. data/lib/markbridge/parsers/bbcode/handler_registry.rb +26 -11
  17. data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +17 -12
  18. data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +0 -10
  19. data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +6 -10
  20. data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +13 -19
  21. data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +1 -5
  22. data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +1 -2
  23. data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +30 -35
  24. data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +2 -6
  25. data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +4 -4
  26. data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +26 -0
  27. data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +32 -0
  28. data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +35 -0
  29. data/lib/markbridge/parsers/bbcode/parser.rb +5 -8
  30. data/lib/markbridge/parsers/bbcode/parser_state.rb +12 -18
  31. data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +9 -59
  32. data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +2 -2
  33. data/lib/markbridge/parsers/bbcode/scanner.rb +49 -63
  34. data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +1 -5
  35. data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +1 -6
  36. data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +1 -7
  37. data/lib/markbridge/parsers/bbcode/tokens/token.rb +1 -1
  38. data/lib/markbridge/parsers/bbcode.rb +4 -0
  39. data/lib/markbridge/parsers/html/handler_registry.rb +32 -44
  40. data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -3
  41. data/lib/markbridge/parsers/html/handlers/image_handler.rb +1 -4
  42. data/lib/markbridge/parsers/html/handlers/table_cell_handler.rb +24 -0
  43. data/lib/markbridge/parsers/html/handlers/table_handler.rb +24 -0
  44. data/lib/markbridge/parsers/html/handlers/table_row_handler.rb +24 -0
  45. data/lib/markbridge/parsers/html/parser.rb +16 -15
  46. data/lib/markbridge/parsers/html.rb +3 -0
  47. data/lib/markbridge/parsers/media_wiki/inline_parser.rb +115 -151
  48. data/lib/markbridge/parsers/media_wiki/inline_tag_registry.rb +103 -0
  49. data/lib/markbridge/parsers/media_wiki/parser.rb +174 -71
  50. data/lib/markbridge/parsers/media_wiki.rb +1 -0
  51. data/lib/markbridge/parsers/text_formatter/handler_registry.rb +10 -36
  52. data/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb +26 -0
  53. data/lib/markbridge/parsers/text_formatter/parser.rb +3 -8
  54. data/lib/markbridge/parsers/text_formatter.rb +1 -0
  55. data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +111 -92
  56. data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +13 -7
  57. data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +11 -20
  58. data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +10 -48
  59. data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +38 -63
  60. data/lib/markbridge/processors/discourse_markdown/scanner.rb +36 -41
  61. data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +6 -6
  62. data/lib/markbridge/renderers/discourse/html_escaper.rb +20 -0
  63. data/lib/markbridge/renderers/discourse/markdown_escaper.rb +262 -205
  64. data/lib/markbridge/renderers/discourse/render_context.rb +23 -11
  65. data/lib/markbridge/renderers/discourse/renderer.rb +54 -11
  66. data/lib/markbridge/renderers/discourse/rendering_interface.rb +12 -4
  67. data/lib/markbridge/renderers/discourse/tag.rb +14 -1
  68. data/lib/markbridge/renderers/discourse/tag_library.rb +30 -25
  69. data/lib/markbridge/renderers/discourse/tags/align_tag.rb +15 -7
  70. data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +1 -1
  71. data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +2 -0
  72. data/lib/markbridge/renderers/discourse/tags/code_tag.rb +14 -8
  73. data/lib/markbridge/renderers/discourse/tags/email_tag.rb +5 -3
  74. data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -3
  75. data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +6 -2
  76. data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +2 -2
  77. data/lib/markbridge/renderers/discourse/tags/image_tag.rb +12 -1
  78. data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +2 -0
  79. data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +2 -2
  80. data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +24 -47
  81. data/lib/markbridge/renderers/discourse/tags/list_tag.rb +10 -15
  82. data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +6 -2
  83. data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +10 -0
  84. data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +9 -4
  85. data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +17 -11
  86. data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +9 -0
  87. data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +2 -0
  88. data/lib/markbridge/renderers/discourse/tags/table_cell_tag.rb +18 -0
  89. data/lib/markbridge/renderers/discourse/tags/table_row_tag.rb +18 -0
  90. data/lib/markbridge/renderers/discourse/tags/table_tag.rb +128 -0
  91. data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +10 -3
  92. data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +28 -1
  93. data/lib/markbridge/renderers/discourse/tags/url_tag.rb +5 -3
  94. data/lib/markbridge/renderers/discourse.rb +4 -0
  95. data/lib/markbridge/textformatter.rb +4 -0
  96. data/lib/markbridge/version.rb +1 -1
  97. data/lib/markbridge.rb +27 -62
  98. metadata +19 -2
@@ -49,6 +49,8 @@ module Markbridge
49
49
  def initialize(detectors: DEFAULT_DETECTORS, tag_library: nil, mention_resolver: nil)
50
50
  @detector_instances = build_detectors(detectors, mention_resolver)
51
51
  @tag_library = tag_library
52
+ # @code_tracker / @result / @nodes / @node_index / @pos / @input /
53
+ # @line_start are set by #scan before use; no defensive init needed.
52
54
  end
53
55
 
54
56
  # Scan input and extract constructs.
@@ -56,14 +58,12 @@ module Markbridge
56
58
  # @param input [String] Discourse Markdown input
57
59
  # @return [ScanResult] result containing processed markdown and extracted nodes
58
60
  def scan(input)
59
- return ScanResult.new(markdown: "", nodes: []) if input.nil? || input.empty?
60
-
61
61
  @code_tracker = CodeBlockTracker.new
62
62
  @result = +""
63
63
  @nodes = []
64
64
  @node_index = 0
65
65
  @pos = 0
66
- @input = input
66
+ @input = input.to_s
67
67
  @line_start = true
68
68
 
69
69
  scan_input
@@ -75,14 +75,10 @@ module Markbridge
75
75
 
76
76
  def build_detectors(detectors, mention_resolver)
77
77
  detectors.map do |klass|
78
- if klass.is_a?(Class)
79
- if klass == Detectors::Mention && mention_resolver
80
- klass.new(type_resolver: mention_resolver)
81
- else
82
- klass.new
83
- end
78
+ if klass == Detectors::Mention
79
+ klass.new(type_resolver: mention_resolver)
84
80
  else
85
- klass
81
+ klass.new
86
82
  end
87
83
  end
88
84
  end
@@ -91,27 +87,14 @@ module Markbridge
91
87
  while @pos < @input.length
92
88
  # Check for fenced code block boundary at line start
93
89
  if @line_start
94
- new_pos = @code_tracker.check_fenced_boundary(@input, @pos, line_start: true)
95
- if new_pos
96
- @result << @input[@pos...new_pos]
97
- @pos = new_pos
98
- @line_start = new_pos > 0 && @input[new_pos - 1] == "\n"
99
- next
100
- end
101
-
102
- # Check for indented code block (4+ spaces or tab)
103
- new_pos = @code_tracker.check_indented_boundary(@input, @pos, line_start: true)
104
- if new_pos
105
- @result << @input[@pos...new_pos]
106
- @pos = new_pos
107
- @line_start = new_pos > 0 && @input[new_pos - 1] == "\n"
108
- next
109
- end
90
+ next if advance_code_boundary(:check_fenced_boundary)
91
+ next if advance_code_boundary(:check_indented_boundary)
110
92
  end
111
93
 
112
- # Check for inline code boundary
113
- if @input[@pos] == "`" && !@code_tracker.in_fenced_block &&
114
- !@code_tracker.in_indented_block
94
+ # Check for inline code boundary. check_inline_boundary's
95
+ # own fenced/indented guard means we don't need to pre-check
96
+ # here — it'll just return nil in those cases.
97
+ if @input[@pos] == "`"
115
98
  new_pos = @code_tracker.check_inline_boundary(@input, @pos)
116
99
  if new_pos
117
100
  @result << @input[@pos...new_pos]
@@ -145,6 +128,22 @@ module Markbridge
145
128
  end
146
129
  end
147
130
 
131
+ def advance_code_boundary(method)
132
+ new_pos = @code_tracker.public_send(method, @input, @pos, line_start: true)
133
+ return false unless new_pos
134
+
135
+ # check_fenced_boundary / check_indented_boundary always stop
136
+ # at pos_after_line, which is either after a "\n" or at EOF.
137
+ # After-newline → @line_start should be true; at EOF the
138
+ # outer `while @pos < @input.length` exits and @line_start
139
+ # is unobservable. Setting true unconditionally drops the
140
+ # `@input[new_pos - 1] == "\n"` dance.
141
+ @result << @input[@pos...new_pos]
142
+ @pos = new_pos
143
+ @line_start = true
144
+ true
145
+ end
146
+
148
147
  def detect_at_position
149
148
  @detector_instances.each do |detector|
150
149
  match = detector.detect(@input, @pos)
@@ -156,26 +155,24 @@ module Markbridge
156
155
  def handle_match(match)
157
156
  node = match.node
158
157
  @nodes << node
158
+ @result << render_placeholder(node)
159
159
 
160
- # Render placeholder using tag library if available
161
- placeholder = render_placeholder(node)
162
- @result << placeholder
163
-
160
+ # Every detector shipped today matches content that ends on a
161
+ # non-newline byte (`]`, `)`, `_`, alphanumeric), so @line_start
162
+ # is always false after a successful match. If a future custom
163
+ # detector produces a match whose end_pos sits right after
164
+ # "\n", re-introduce the `@input[@pos - 1] == "\n"` check.
164
165
  @pos = match.end_pos
165
- @line_start = @pos > 0 && @input[@pos - 1] == "\n"
166
+ @line_start = false
166
167
  @node_index += 1
167
168
  end
168
169
 
169
170
  def render_placeholder(node)
170
171
  if @tag_library
171
172
  tag = @tag_library[node.class]
172
- if tag
173
- # Create a minimal interface for rendering
174
- return tag.render(node, nil)
175
- end
173
+ return tag.render(node, nil) if tag
176
174
  end
177
175
 
178
- # Default placeholder format if no tag library or tag not found
179
176
  default_placeholder(node)
180
177
  end
181
178
 
@@ -189,8 +186,6 @@ module Markbridge
189
186
  "<<EVENT:#{@node_index}:#{node.name}>>"
190
187
  when AST::Upload
191
188
  "<<UPLOAD:#{@node_index}:#{node.sha1}>>"
192
- else
193
- "<<UNKNOWN:#{@node_index}>>"
194
189
  end
195
190
  end
196
191
  end
@@ -15,12 +15,10 @@ module Markbridge
15
15
  # @return [String]
16
16
  def build(content, marker:, indent:)
17
17
  lines = content.split("\n")
18
- lines = [""] if lines.empty? # Handle empty content
19
18
  first_line = "#{indent}#{marker}#{lines.first}"
20
19
 
21
- return "#{first_line}\n" if lines.size == 1
20
+ return "#{first_line}\n" if lines.size < 2
22
21
 
23
- # Handle multi-line content with sophisticated blank line handling
24
22
  format_multiline(lines, first_line, indent)
25
23
  end
26
24
 
@@ -63,15 +61,17 @@ module Markbridge
63
61
  end
64
62
  end
65
63
 
66
- # Handle empty lines in continuation
64
+ # Handle empty lines in continuation. Caller (format_continuation_line)
65
+ # only invokes this when `line.empty?`, and `content.split("\n")`
66
+ # trims trailing empty strings, so the LAST continuation line is
67
+ # never empty — `idx + 1` is always in bounds when we get here.
67
68
  # @param idx [Integer] index in continuation_lines
68
69
  # @param continuation_lines [Array<String>] all continuation lines
69
70
  # @param continuation_indent [String] indent for continuation
70
71
  # @return [String, nil] formatted line or nil to skip
71
72
  def handle_empty_line(idx, continuation_lines, continuation_indent)
72
73
  # Skip empty lines that come before nested list items (structural blanks)
73
- next_line = continuation_lines[idx + 1]
74
- return nil if next_line&.match?(/\A\s*(?:-|\d+\.)\s/)
74
+ return nil if continuation_lines[idx + 1].match?(/\A\s*(?:-|\d+\.)\s/)
75
75
 
76
76
  # Preserve empty lines within text content (paragraph breaks) with indentation
77
77
  continuation_indent
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "cgi"
4
+
5
+ module Markbridge
6
+ module Renderers
7
+ module Discourse
8
+ # Escapes text for safe inclusion in HTML output. Used when rendering
9
+ # content inside a CommonMark HTML block (e.g. TableTag's fallback)
10
+ # where Markdown-level escaping would not be applied.
11
+ class HtmlEscaper
12
+ # @param text [String, nil]
13
+ # @return [String]
14
+ def self.escape(text)
15
+ CGI.escapeHTML(text || "")
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end