markbridge 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/lib/markbridge/all.rb +9 -0
  4. data/lib/markbridge/ast/align.rb +24 -0
  5. data/lib/markbridge/ast/attachment.rb +42 -0
  6. data/lib/markbridge/ast/bold.rb +13 -0
  7. data/lib/markbridge/ast/code.rb +27 -0
  8. data/lib/markbridge/ast/color.rb +25 -0
  9. data/lib/markbridge/ast/document.rb +27 -0
  10. data/lib/markbridge/ast/element.rb +47 -0
  11. data/lib/markbridge/ast/email.rb +27 -0
  12. data/lib/markbridge/ast/event.rb +59 -0
  13. data/lib/markbridge/ast/heading.rb +23 -0
  14. data/lib/markbridge/ast/horizontal_rule.rb +12 -0
  15. data/lib/markbridge/ast/image.rb +35 -0
  16. data/lib/markbridge/ast/italic.rb +13 -0
  17. data/lib/markbridge/ast/line_break.rb +12 -0
  18. data/lib/markbridge/ast/list.rb +52 -0
  19. data/lib/markbridge/ast/list_item.rb +13 -0
  20. data/lib/markbridge/ast/markdown_text.rb +37 -0
  21. data/lib/markbridge/ast/mention.rb +29 -0
  22. data/lib/markbridge/ast/node.rb +19 -0
  23. data/lib/markbridge/ast/paragraph.rb +13 -0
  24. data/lib/markbridge/ast/poll.rb +74 -0
  25. data/lib/markbridge/ast/quote.rb +46 -0
  26. data/lib/markbridge/ast/size.rb +25 -0
  27. data/lib/markbridge/ast/spoiler.rb +27 -0
  28. data/lib/markbridge/ast/strikethrough.rb +13 -0
  29. data/lib/markbridge/ast/subscript.rb +13 -0
  30. data/lib/markbridge/ast/superscript.rb +13 -0
  31. data/lib/markbridge/ast/text.rb +38 -0
  32. data/lib/markbridge/ast/underline.rb +13 -0
  33. data/lib/markbridge/ast/upload.rb +74 -0
  34. data/lib/markbridge/ast/url.rb +27 -0
  35. data/lib/markbridge/ast.rb +42 -0
  36. data/lib/markbridge/configuration.rb +11 -0
  37. data/lib/markbridge/gem_loader.rb +23 -0
  38. data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +37 -0
  39. data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -0
  40. data/lib/markbridge/parsers/bbcode/closing_strategies/strict.rb +12 -0
  41. data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +121 -0
  42. data/lib/markbridge/parsers/bbcode/errors/max_depth_exceeded_error.rb +13 -0
  43. data/lib/markbridge/parsers/bbcode/handler_registry.rb +160 -0
  44. data/lib/markbridge/parsers/bbcode/handlers/align_handler.rb +26 -0
  45. data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +104 -0
  46. data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +44 -0
  47. data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +25 -0
  48. data/lib/markbridge/parsers/bbcode/handlers/color_handler.rb +31 -0
  49. data/lib/markbridge/parsers/bbcode/handlers/email_handler.rb +25 -0
  50. data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +51 -0
  51. data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +36 -0
  52. data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +26 -0
  53. data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +64 -0
  54. data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +48 -0
  55. data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +28 -0
  56. data/lib/markbridge/parsers/bbcode/handlers/simple_handler.rb +28 -0
  57. data/lib/markbridge/parsers/bbcode/handlers/size_handler.rb +31 -0
  58. data/lib/markbridge/parsers/bbcode/handlers/spoiler_handler.rb +28 -0
  59. data/lib/markbridge/parsers/bbcode/handlers/url_handler.rb +24 -0
  60. data/lib/markbridge/parsers/bbcode/parser.rb +123 -0
  61. data/lib/markbridge/parsers/bbcode/parser_state.rb +93 -0
  62. data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +126 -0
  63. data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +35 -0
  64. data/lib/markbridge/parsers/bbcode/raw_content_result.rb +25 -0
  65. data/lib/markbridge/parsers/bbcode/scanner.rb +231 -0
  66. data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +21 -0
  67. data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +23 -0
  68. data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +23 -0
  69. data/lib/markbridge/parsers/bbcode/tokens/token.rb +16 -0
  70. data/lib/markbridge/parsers/bbcode.rb +56 -0
  71. data/lib/markbridge/parsers/html/handler_registry.rb +87 -0
  72. data/lib/markbridge/parsers/html/handlers/base_handler.rb +27 -0
  73. data/lib/markbridge/parsers/html/handlers/image_handler.rb +40 -0
  74. data/lib/markbridge/parsers/html/handlers/list_handler.rb +29 -0
  75. data/lib/markbridge/parsers/html/handlers/list_item_handler.rb +26 -0
  76. data/lib/markbridge/parsers/html/handlers/paragraph_handler.rb +17 -0
  77. data/lib/markbridge/parsers/html/handlers/quote_handler.rb +28 -0
  78. data/lib/markbridge/parsers/html/handlers/raw_handler.rb +33 -0
  79. data/lib/markbridge/parsers/html/handlers/simple_handler.rb +26 -0
  80. data/lib/markbridge/parsers/html/handlers/url_handler.rb +27 -0
  81. data/lib/markbridge/parsers/html/parser.rb +113 -0
  82. data/lib/markbridge/parsers/html.rb +30 -0
  83. data/lib/markbridge/parsers/media_wiki/inline_parser.rb +332 -0
  84. data/lib/markbridge/parsers/media_wiki/parser.rb +279 -0
  85. data/lib/markbridge/parsers/media_wiki.rb +15 -0
  86. data/lib/markbridge/parsers/text_formatter/handler_registry.rb +130 -0
  87. data/lib/markbridge/parsers/text_formatter/handlers/attachment_handler.rb +33 -0
  88. data/lib/markbridge/parsers/text_formatter/handlers/attribute_handler.rb +40 -0
  89. data/lib/markbridge/parsers/text_formatter/handlers/base_handler.rb +45 -0
  90. data/lib/markbridge/parsers/text_formatter/handlers/code_handler.rb +28 -0
  91. data/lib/markbridge/parsers/text_formatter/handlers/email_handler.rb +27 -0
  92. data/lib/markbridge/parsers/text_formatter/handlers/image_handler.rb +32 -0
  93. data/lib/markbridge/parsers/text_formatter/handlers/list_handler.rb +31 -0
  94. data/lib/markbridge/parsers/text_formatter/handlers/quote_handler.rb +33 -0
  95. data/lib/markbridge/parsers/text_formatter/handlers/simple_handler.rb +37 -0
  96. data/lib/markbridge/parsers/text_formatter/handlers/url_handler.rb +29 -0
  97. data/lib/markbridge/parsers/text_formatter/parser.rb +132 -0
  98. data/lib/markbridge/parsers/text_formatter.rb +31 -0
  99. data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +199 -0
  100. data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +57 -0
  101. data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +73 -0
  102. data/lib/markbridge/processors/discourse_markdown/detectors/mention.rb +57 -0
  103. data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +90 -0
  104. data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +123 -0
  105. data/lib/markbridge/processors/discourse_markdown/scanner.rb +199 -0
  106. data/lib/markbridge/processors/discourse_markdown.rb +16 -0
  107. data/lib/markbridge/processors.rb +8 -0
  108. data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +83 -0
  109. data/lib/markbridge/renderers/discourse/markdown_escaper.rb +468 -0
  110. data/lib/markbridge/renderers/discourse/render_context.rb +80 -0
  111. data/lib/markbridge/renderers/discourse/renderer.rb +63 -0
  112. data/lib/markbridge/renderers/discourse/rendering_interface.rb +86 -0
  113. data/lib/markbridge/renderers/discourse/tag.rb +29 -0
  114. data/lib/markbridge/renderers/discourse/tag_library.rb +67 -0
  115. data/lib/markbridge/renderers/discourse/tags/align_tag.rb +24 -0
  116. data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +46 -0
  117. data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +18 -0
  118. data/lib/markbridge/renderers/discourse/tags/code_tag.rb +54 -0
  119. data/lib/markbridge/renderers/discourse/tags/color_tag.rb +27 -0
  120. data/lib/markbridge/renderers/discourse/tags/email_tag.rb +24 -0
  121. data/lib/markbridge/renderers/discourse/tags/event_tag.rb +49 -0
  122. data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +21 -0
  123. data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +16 -0
  124. data/lib/markbridge/renderers/discourse/tags/image_tag.rb +29 -0
  125. data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +18 -0
  126. data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +16 -0
  127. data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +87 -0
  128. data/lib/markbridge/renderers/discourse/tags/list_tag.rb +39 -0
  129. data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +34 -0
  130. data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +21 -0
  131. data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +51 -0
  132. data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +32 -0
  133. data/lib/markbridge/renderers/discourse/tags/size_tag.rb +27 -0
  134. data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +24 -0
  135. data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +18 -0
  136. data/lib/markbridge/renderers/discourse/tags/subscript_tag.rb +19 -0
  137. data/lib/markbridge/renderers/discourse/tags/superscript_tag.rb +19 -0
  138. data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +19 -0
  139. data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +80 -0
  140. data/lib/markbridge/renderers/discourse/tags/url_tag.rb +24 -0
  141. data/lib/markbridge/renderers/discourse.rb +50 -0
  142. data/lib/markbridge/version.rb +5 -0
  143. data/lib/markbridge.rb +201 -0
  144. metadata +186 -0
@@ -0,0 +1,231 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module BBCode
6
+ # High-performance character-by-character BBCode scanner
7
+ # Tokenizes BBCode in O(n) time with minimal allocations and bounded backtracking
8
+ class Scanner
9
+ def initialize(input)
10
+ @input = input
11
+ @length = input.length
12
+ @current_pos = 0
13
+ end
14
+
15
+ def next_token
16
+ return nil if end_of_input?
17
+ start_pos = @current_pos
18
+ bracket_index = @input.index("[", @current_pos)
19
+
20
+ if bracket_index.nil?
21
+ text = @input[@current_pos..]
22
+ @current_pos = @length
23
+ return TextToken.new(text:, pos: start_pos)
24
+ end
25
+
26
+ if bracket_index > @current_pos
27
+ text = @input[@current_pos...bracket_index]
28
+ @current_pos = bracket_index
29
+ return TextToken.new(text:, pos: start_pos)
30
+ end
31
+
32
+ if (tag_token = parse_tag_at_cursor)
33
+ tag_token
34
+ else
35
+ @current_pos += 1
36
+ TextToken.new(text: "[", pos: start_pos)
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ TAG_INITIAL_CHAR = /[a-z*]/i
43
+ TAG_NAME_CHAR = /[a-z0-9]/i
44
+ UID_HEX_CHAR = /[0-9a-f]/i
45
+ ATTR_NAME_CHAR = /\w/
46
+ WHITESPACE_CHAR = /\s/
47
+ UNQUOTED_VALUE_STOP = /[\[\]\s]/
48
+
49
+ private_constant :TAG_INITIAL_CHAR,
50
+ :TAG_NAME_CHAR,
51
+ :UID_HEX_CHAR,
52
+ :ATTR_NAME_CHAR,
53
+ :WHITESPACE_CHAR,
54
+ :UNQUOTED_VALUE_STOP
55
+
56
+ def parse_tag_at_cursor
57
+ return nil if current_char != "["
58
+
59
+ tag_start_pos = @current_pos
60
+ @current_pos += 1 # skip '['
61
+
62
+ # Check for closing tag
63
+ closing = current_char == "/"
64
+ @current_pos += 1 if closing
65
+
66
+ # Parse tag name
67
+ tag_name = scan_tag_name
68
+ return rollback(tag_start_pos) unless tag_name
69
+
70
+ # Parse attributes (only for opening tags)
71
+ attrs = closing ? {} : scan_attributes
72
+ return rollback(tag_start_pos) if current_char != "]"
73
+
74
+ @current_pos += 1 # skip ']'
75
+
76
+ # Capture original source text
77
+ source = @input[tag_start_pos...@current_pos]
78
+
79
+ normalized_tag_name = tag_name.downcase
80
+
81
+ if closing
82
+ TagEndToken.new(tag: normalized_tag_name, pos: tag_start_pos, source:)
83
+ else
84
+ TagStartToken.new(tag: normalized_tag_name, attrs:, pos: tag_start_pos, source:)
85
+ end
86
+ end
87
+
88
+ def rollback(pos)
89
+ @current_pos = pos
90
+ nil
91
+ end
92
+
93
+ # Scan a tag name: [a-z*.][a-z0-9]*(:uid)?
94
+ # @return [String, nil]
95
+ def scan_tag_name
96
+ start = @current_pos
97
+
98
+ # First character: letter, *, or .
99
+ return nil unless current_char&.match?(TAG_INITIAL_CHAR)
100
+ @current_pos += 1
101
+
102
+ # Remaining characters: letters or digits
103
+ @current_pos += 1 while current_char&.match?(TAG_NAME_CHAR)
104
+
105
+ # Optional :uid suffix (e.g., [quote:abc123])
106
+ if current_char == ":"
107
+ @current_pos += 1
108
+ @current_pos += 1 while current_char&.match?(UID_HEX_CHAR)
109
+ end
110
+
111
+ @input[start...@current_pos]
112
+ end
113
+
114
+ # Scan tag attributes
115
+ # The first `=value` (if present) becomes the `:option` attribute
116
+ # Additional `key=value` pairs become named attributes
117
+ # @return [Hash]
118
+ def scan_attributes
119
+ attrs = {}
120
+ skip_whitespace
121
+
122
+ # First attribute might be option: [tag=value]
123
+ if current_char == "="
124
+ @current_pos += 1
125
+ skip_whitespace
126
+ if (val = scan_attribute_value)
127
+ attrs[:option] = val
128
+ end
129
+ skip_whitespace
130
+ end
131
+
132
+ # Named attributes: [tag key=value key=value ...]
133
+ while (char = current_char) && char != "]"
134
+ name = scan_while(ATTR_NAME_CHAR)
135
+ break if name.nil?
136
+
137
+ skip_whitespace
138
+ break if current_char != "="
139
+
140
+ @current_pos += 1
141
+ skip_whitespace
142
+
143
+ value = scan_attribute_value
144
+ attrs[name.downcase.to_sym] = value if value
145
+ skip_whitespace
146
+ end
147
+
148
+ attrs
149
+ end
150
+
151
+ def scan_attribute_value
152
+ char = current_char
153
+ if char == '"' || char == "'"
154
+ scan_quoted_string
155
+ else
156
+ scan_unquoted_value
157
+ end
158
+ end
159
+
160
+ # Scans a quoted attribute value (double or single quoted)
161
+ #
162
+ # IMPORTANT: This method does NOT support escape sequences (e.g., \" or \\).
163
+ # This is intentional - standard BBCode does not define escape syntax.
164
+ # The scanner stops at the first matching quote character.
165
+ #
166
+ # Examples:
167
+ # [url="http://example.com"] → option: "http://example.com" ✓
168
+ # [url='single quotes'] → option: "single quotes" ✓
169
+ # [url="has \"quotes\" inside"] → FAILS (stops at first inner quote) ✗
170
+ #
171
+ # Workaround: Use single quotes if you need double quotes in the value:
172
+ # [url='has "quotes" inside'] → option: "has \"quotes\" inside" ✓
173
+ #
174
+ # @return [String] the unescaped attribute value
175
+ def scan_quoted_string
176
+ quote_char = current_char
177
+ start = (@current_pos += 1) # skip opening quote
178
+
179
+ closing_index = @input.index(quote_char, start)
180
+
181
+ if closing_index
182
+ value = @input[start...closing_index]
183
+ @current_pos = closing_index + 1 # position after closing quote
184
+ else
185
+ value = @input[start..] || ""
186
+ @current_pos = @length
187
+ end
188
+
189
+ value
190
+ end
191
+
192
+ def scan_unquoted_value
193
+ scan_until(UNQUOTED_VALUE_STOP)
194
+ end
195
+
196
+ # Consumes characters matching +pattern+; returns substring or nil if empty
197
+ def scan_while(pattern)
198
+ start = @current_pos
199
+ while (char = current_char) && char.match?(pattern)
200
+ @current_pos += 1
201
+ end
202
+
203
+ return nil if @current_pos == start
204
+ @input[start...@current_pos]
205
+ end
206
+
207
+ # Consumes characters until +pattern+ matches; returns substring or nil if empty
208
+ def scan_until(pattern)
209
+ stop_index = @input.index(pattern, @current_pos) || @length
210
+ return nil if stop_index == @current_pos
211
+
212
+ value = @input[@current_pos...stop_index]
213
+ @current_pos = stop_index
214
+ value
215
+ end
216
+
217
+ def current_char
218
+ @input[@current_pos]
219
+ end
220
+
221
+ def skip_whitespace
222
+ @current_pos += 1 while current_char&.match?(WHITESPACE_CHAR)
223
+ end
224
+
225
+ def end_of_input?
226
+ @current_pos >= @length
227
+ end
228
+ end
229
+ end
230
+ end
231
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module BBCode
6
+ # Token representing a closing BBCode tag like [/b]
7
+ class TagEndToken < Token
8
+ attr_reader :tag
9
+
10
+ def initialize(tag:, pos: 0, source: nil)
11
+ super(pos:, source:)
12
+ @tag = tag.freeze
13
+ end
14
+
15
+ def inspect
16
+ "#<TagEndToken [/#{tag}]>"
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module BBCode
6
+ # Token representing an opening BBCode tag like [b] or [url=...]
7
+ class TagStartToken < Token
8
+ attr_reader :tag, :attrs
9
+
10
+ def initialize(tag:, attrs: {}, pos: 0, source: nil)
11
+ super(pos:, source:)
12
+ @tag = tag.freeze
13
+ @attrs = attrs.freeze
14
+ end
15
+
16
+ def inspect
17
+ attrs_str = attrs.empty? ? "" : " #{attrs.inspect}"
18
+ "#<TagStartToken [#{tag}]#{attrs_str}>"
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module BBCode
6
+ # Token representing text content
7
+ class TextToken < Token
8
+ attr_reader :text
9
+
10
+ def initialize(text:, pos: 0)
11
+ super(pos:, source: text)
12
+ @text = text.freeze
13
+ end
14
+
15
+ alias source text
16
+
17
+ def inspect
18
+ "#<TextToken #{text.inspect}>"
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module BBCode
6
+ class Token
7
+ attr_reader :pos, :source
8
+
9
+ def initialize(pos: 0, source: nil)
10
+ @pos = pos
11
+ @source = source
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ # AST Nodes
4
+ require_relative "../ast"
5
+
6
+ # Errors
7
+ require_relative "bbcode/errors/max_depth_exceeded_error"
8
+
9
+ # Tokens
10
+ require_relative "bbcode/tokens/token"
11
+ require_relative "bbcode/tokens/text_token"
12
+ require_relative "bbcode/tokens/tag_start_token"
13
+ require_relative "bbcode/tokens/tag_end_token"
14
+
15
+ # Closing Strategies
16
+ require_relative "bbcode/closing_strategies/tag_reconciler"
17
+ require_relative "bbcode/closing_strategies/base"
18
+ require_relative "bbcode/closing_strategies/strict"
19
+ require_relative "bbcode/closing_strategies/reordering"
20
+
21
+ # Base Handlers
22
+ require_relative "bbcode/handlers/base_handler"
23
+ require_relative "bbcode/handlers/raw_handler"
24
+
25
+ # Handlers
26
+ require_relative "bbcode/handlers/align_handler"
27
+ require_relative "bbcode/handlers/attachment_handler"
28
+ require_relative "bbcode/handlers/color_handler"
29
+ require_relative "bbcode/handlers/email_handler"
30
+ require_relative "bbcode/handlers/image_handler"
31
+ require_relative "bbcode/handlers/list_handler"
32
+ require_relative "bbcode/handlers/list_item_handler"
33
+ require_relative "bbcode/handlers/quote_handler"
34
+ require_relative "bbcode/handlers/self_closing_handler"
35
+ require_relative "bbcode/handlers/simple_handler"
36
+ require_relative "bbcode/handlers/size_handler"
37
+ require_relative "bbcode/handlers/spoiler_handler"
38
+ require_relative "bbcode/handlers/url_handler"
39
+
40
+ # Parser components
41
+ require_relative "bbcode/handler_registry"
42
+ require_relative "bbcode/parser_state"
43
+ require_relative "bbcode/peekable_enumerator"
44
+ require_relative "bbcode/raw_content_result"
45
+ require_relative "bbcode/raw_content_collector"
46
+ require_relative "bbcode/scanner"
47
+
48
+ # Parser
49
+ require_relative "bbcode/parser"
50
+
51
+ module Markbridge
52
+ module Parsers
53
+ module BBCode
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module HTML
6
+ # Registry of HTML tag handlers
7
+ class HandlerRegistry
8
+ def initialize
9
+ @handlers = {}
10
+ end
11
+
12
+ # Register a handler for one or more tag names
13
+ # @param tag_names [String, Array<String>] tag name(s) to register
14
+ # @param handler [BaseHandler, Proc] the handler instance or proc
15
+ def register(tag_names, handler)
16
+ Array(tag_names).each { |tag_name| @handlers[tag_name.to_s.downcase] = handler }
17
+ self
18
+ end
19
+
20
+ # Get handler for a tag name
21
+ # @param tag_name [String]
22
+ # @return [BaseHandler, Proc, nil]
23
+ def [](tag_name)
24
+ @handlers[tag_name.to_s.downcase]
25
+ end
26
+
27
+ # Create the default handler registry with common HTML tags
28
+ # @return [HandlerRegistry]
29
+ def self.default
30
+ registry = new
31
+
32
+ # Simple formatting handlers
33
+ registry.register(%w[b strong], Handlers::SimpleHandler.new(AST::Bold))
34
+ registry.register(%w[i em], Handlers::SimpleHandler.new(AST::Italic))
35
+ registry.register(%w[s strike del], Handlers::SimpleHandler.new(AST::Strikethrough))
36
+ registry.register("u", Handlers::SimpleHandler.new(AST::Underline))
37
+ registry.register("sup", Handlers::SimpleHandler.new(AST::Superscript))
38
+ registry.register("sub", Handlers::SimpleHandler.new(AST::Subscript))
39
+
40
+ # Code handlers (raw content)
41
+ registry.register(%w[code pre tt], Handlers::RawHandler.new(AST::Code))
42
+
43
+ # Link and image handlers
44
+ registry.register("a", Handlers::UrlHandler.new)
45
+ registry.register("img", Handlers::ImageHandler.new)
46
+
47
+ # Blockquote handler
48
+ registry.register("blockquote", Handlers::QuoteHandler.new)
49
+
50
+ # Void elements - use simple inline handlers
51
+ registry.register(
52
+ "br",
53
+ lambda do |element:, parent:|
54
+ parent << AST::LineBreak.new
55
+ nil # Return nil - void element, no children
56
+ end,
57
+ )
58
+ registry.register(
59
+ "hr",
60
+ lambda do |element:, parent:|
61
+ parent << AST::HorizontalRule.new
62
+ nil # Return nil - void element, no children
63
+ end,
64
+ )
65
+
66
+ # List handlers
67
+ registry.register(%w[ul ol], Handlers::ListHandler.new)
68
+ registry.register("li", Handlers::ListItemHandler.new)
69
+
70
+ # Paragraph handler (transparent - doesn't create AST node)
71
+ registry.register("p", Handlers::ParagraphHandler.new)
72
+
73
+ registry
74
+ end
75
+
76
+ # Build a registry from the default configuration with optional customization
77
+ # @yield [HandlerRegistry] the registry to customize
78
+ # @return [HandlerRegistry]
79
+ def self.build_from_default
80
+ registry = default
81
+ yield(registry) if block_given?
82
+ registry
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module HTML
6
+ module Handlers
7
+ class BaseHandler
8
+ # Process a Nokogiri node and add it to the parent AST node
9
+ # Subclasses should override this method
10
+ # @param node [Nokogiri::XML::Element] the HTML element
11
+ # @param parent [AST::Element] the parent AST node
12
+ # @return [AST::Element, nil] the created element if children should be processed, nil otherwise
13
+ def process(element:, parent:)
14
+ node = element # Alias for compatibility
15
+ # Default: do nothing, subclasses override
16
+ nil
17
+ end
18
+
19
+ # The element class created by this handler
20
+ # Subclasses must expose this via attr_reader :element_class
21
+ # @return [Class]
22
+ attr_reader :element_class
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module HTML
6
+ module Handlers
7
+ # Handler for <img> tags
8
+ class ImageHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::Image
11
+ end
12
+
13
+ def process(element:, parent:)
14
+ src = element["src"]
15
+ width = sanitize_dimension(element["width"])
16
+ height = sanitize_dimension(element["height"])
17
+
18
+ ast_element = AST::Image.new(src:, width:, height:)
19
+ parent << ast_element
20
+
21
+ # Return nil to signal: don't process children (void element)
22
+ nil
23
+ end
24
+
25
+ attr_reader :element_class
26
+
27
+ private
28
+
29
+ # Convert dimension to positive integer or nil
30
+ def sanitize_dimension(value)
31
+ return nil if value.nil?
32
+
33
+ dim = value.to_i
34
+ dim.positive? ? dim : nil
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module HTML
6
+ module Handlers
7
+ # Handler for list tags (<ul>, <ol>)
8
+ class ListHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::List
11
+ end
12
+
13
+ def process(element:, parent:)
14
+ # Check if ordered: <ol> tag
15
+ ordered = element.name.downcase == "ol"
16
+
17
+ ast_element = AST::List.new(ordered:)
18
+ parent << ast_element
19
+
20
+ # Return element to signal: process children into this element
21
+ ast_element
22
+ end
23
+
24
+ attr_reader :element_class
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module HTML
6
+ module Handlers
7
+ # Handler for list item tags (<li>)
8
+ class ListItemHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::ListItem
11
+ end
12
+
13
+ def process(element:, parent:)
14
+ ast_element = AST::ListItem.new
15
+ parent << ast_element
16
+
17
+ # Return element to signal: process children into this element
18
+ ast_element
19
+ end
20
+
21
+ attr_reader :element_class
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module HTML
6
+ module Handlers
7
+ # Handler for <p> tags
8
+ # Creates AST::Paragraph nodes to preserve paragraph boundaries
9
+ class ParagraphHandler < SimpleHandler
10
+ def initialize
11
+ super(AST::Paragraph)
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module HTML
6
+ module Handlers
7
+ # Handler for <blockquote> tags
8
+ class QuoteHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::Quote
11
+ end
12
+
13
+ def process(element:, parent:)
14
+ # Extract optional author from cite attribute
15
+ author = element["cite"]
16
+ ast_element = AST::Quote.new(author:)
17
+ parent << ast_element
18
+
19
+ # Return element to signal: process children into this element
20
+ ast_element
21
+ end
22
+
23
+ attr_reader :element_class
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module HTML
6
+ module Handlers
7
+ # Handler for raw/preformatted tags that preserve content as-is
8
+ class RawHandler < BaseHandler
9
+ def initialize(element_class)
10
+ @element_class = element_class
11
+ end
12
+
13
+ def process(element:, parent:)
14
+ # Get the inner text content
15
+ content = element.inner_text
16
+
17
+ # Extract language from class or lang attribute
18
+ language = element["class"] || element["lang"]
19
+
20
+ ast_element = @element_class.new(language:)
21
+ ast_element << AST::Text.new(content) unless content.empty?
22
+ parent << ast_element
23
+
24
+ # Return nil to signal: don't process children (we handled content directly)
25
+ nil
26
+ end
27
+
28
+ attr_reader :element_class
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module HTML
6
+ module Handlers
7
+ # Simple formatting handlers that create an element and process children
8
+ class SimpleHandler < BaseHandler
9
+ def initialize(element_class)
10
+ @element_class = element_class
11
+ end
12
+
13
+ def process(element:, parent:)
14
+ ast_element = @element_class.new
15
+ parent << ast_element
16
+
17
+ # Return element to signal: process children into this element
18
+ ast_element
19
+ end
20
+
21
+ attr_reader :element_class
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end