markbridge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/lib/markbridge/all.rb +9 -0
- data/lib/markbridge/ast/align.rb +24 -0
- data/lib/markbridge/ast/attachment.rb +42 -0
- data/lib/markbridge/ast/bold.rb +13 -0
- data/lib/markbridge/ast/code.rb +27 -0
- data/lib/markbridge/ast/color.rb +25 -0
- data/lib/markbridge/ast/document.rb +27 -0
- data/lib/markbridge/ast/element.rb +47 -0
- data/lib/markbridge/ast/email.rb +27 -0
- data/lib/markbridge/ast/event.rb +59 -0
- data/lib/markbridge/ast/heading.rb +23 -0
- data/lib/markbridge/ast/horizontal_rule.rb +12 -0
- data/lib/markbridge/ast/image.rb +35 -0
- data/lib/markbridge/ast/italic.rb +13 -0
- data/lib/markbridge/ast/line_break.rb +12 -0
- data/lib/markbridge/ast/list.rb +52 -0
- data/lib/markbridge/ast/list_item.rb +13 -0
- data/lib/markbridge/ast/markdown_text.rb +37 -0
- data/lib/markbridge/ast/mention.rb +29 -0
- data/lib/markbridge/ast/node.rb +19 -0
- data/lib/markbridge/ast/paragraph.rb +13 -0
- data/lib/markbridge/ast/poll.rb +74 -0
- data/lib/markbridge/ast/quote.rb +46 -0
- data/lib/markbridge/ast/size.rb +25 -0
- data/lib/markbridge/ast/spoiler.rb +27 -0
- data/lib/markbridge/ast/strikethrough.rb +13 -0
- data/lib/markbridge/ast/subscript.rb +13 -0
- data/lib/markbridge/ast/superscript.rb +13 -0
- data/lib/markbridge/ast/text.rb +38 -0
- data/lib/markbridge/ast/underline.rb +13 -0
- data/lib/markbridge/ast/upload.rb +74 -0
- data/lib/markbridge/ast/url.rb +27 -0
- data/lib/markbridge/ast.rb +42 -0
- data/lib/markbridge/configuration.rb +11 -0
- data/lib/markbridge/gem_loader.rb +23 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +37 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/strict.rb +12 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +121 -0
- data/lib/markbridge/parsers/bbcode/errors/max_depth_exceeded_error.rb +13 -0
- data/lib/markbridge/parsers/bbcode/handler_registry.rb +160 -0
- data/lib/markbridge/parsers/bbcode/handlers/align_handler.rb +26 -0
- data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +104 -0
- data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +44 -0
- data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +25 -0
- data/lib/markbridge/parsers/bbcode/handlers/color_handler.rb +31 -0
- data/lib/markbridge/parsers/bbcode/handlers/email_handler.rb +25 -0
- data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +51 -0
- data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +36 -0
- data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +26 -0
- data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +64 -0
- data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +48 -0
- data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +28 -0
- data/lib/markbridge/parsers/bbcode/handlers/simple_handler.rb +28 -0
- data/lib/markbridge/parsers/bbcode/handlers/size_handler.rb +31 -0
- data/lib/markbridge/parsers/bbcode/handlers/spoiler_handler.rb +28 -0
- data/lib/markbridge/parsers/bbcode/handlers/url_handler.rb +24 -0
- data/lib/markbridge/parsers/bbcode/parser.rb +123 -0
- data/lib/markbridge/parsers/bbcode/parser_state.rb +93 -0
- data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +126 -0
- data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +35 -0
- data/lib/markbridge/parsers/bbcode/raw_content_result.rb +25 -0
- data/lib/markbridge/parsers/bbcode/scanner.rb +231 -0
- data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +21 -0
- data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +23 -0
- data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +23 -0
- data/lib/markbridge/parsers/bbcode/tokens/token.rb +16 -0
- data/lib/markbridge/parsers/bbcode.rb +56 -0
- data/lib/markbridge/parsers/html/handler_registry.rb +87 -0
- data/lib/markbridge/parsers/html/handlers/base_handler.rb +27 -0
- data/lib/markbridge/parsers/html/handlers/image_handler.rb +40 -0
- data/lib/markbridge/parsers/html/handlers/list_handler.rb +29 -0
- data/lib/markbridge/parsers/html/handlers/list_item_handler.rb +26 -0
- data/lib/markbridge/parsers/html/handlers/paragraph_handler.rb +17 -0
- data/lib/markbridge/parsers/html/handlers/quote_handler.rb +28 -0
- data/lib/markbridge/parsers/html/handlers/raw_handler.rb +33 -0
- data/lib/markbridge/parsers/html/handlers/simple_handler.rb +26 -0
- data/lib/markbridge/parsers/html/handlers/url_handler.rb +27 -0
- data/lib/markbridge/parsers/html/parser.rb +113 -0
- data/lib/markbridge/parsers/html.rb +30 -0
- data/lib/markbridge/parsers/media_wiki/inline_parser.rb +332 -0
- data/lib/markbridge/parsers/media_wiki/parser.rb +279 -0
- data/lib/markbridge/parsers/media_wiki.rb +15 -0
- data/lib/markbridge/parsers/text_formatter/handler_registry.rb +130 -0
- data/lib/markbridge/parsers/text_formatter/handlers/attachment_handler.rb +33 -0
- data/lib/markbridge/parsers/text_formatter/handlers/attribute_handler.rb +40 -0
- data/lib/markbridge/parsers/text_formatter/handlers/base_handler.rb +45 -0
- data/lib/markbridge/parsers/text_formatter/handlers/code_handler.rb +28 -0
- data/lib/markbridge/parsers/text_formatter/handlers/email_handler.rb +27 -0
- data/lib/markbridge/parsers/text_formatter/handlers/image_handler.rb +32 -0
- data/lib/markbridge/parsers/text_formatter/handlers/list_handler.rb +31 -0
- data/lib/markbridge/parsers/text_formatter/handlers/quote_handler.rb +33 -0
- data/lib/markbridge/parsers/text_formatter/handlers/simple_handler.rb +37 -0
- data/lib/markbridge/parsers/text_formatter/handlers/url_handler.rb +29 -0
- data/lib/markbridge/parsers/text_formatter/parser.rb +132 -0
- data/lib/markbridge/parsers/text_formatter.rb +31 -0
- data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +199 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +57 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +73 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/mention.rb +57 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +90 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +123 -0
- data/lib/markbridge/processors/discourse_markdown/scanner.rb +199 -0
- data/lib/markbridge/processors/discourse_markdown.rb +16 -0
- data/lib/markbridge/processors.rb +8 -0
- data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +83 -0
- data/lib/markbridge/renderers/discourse/markdown_escaper.rb +468 -0
- data/lib/markbridge/renderers/discourse/render_context.rb +80 -0
- data/lib/markbridge/renderers/discourse/renderer.rb +63 -0
- data/lib/markbridge/renderers/discourse/rendering_interface.rb +86 -0
- data/lib/markbridge/renderers/discourse/tag.rb +29 -0
- data/lib/markbridge/renderers/discourse/tag_library.rb +67 -0
- data/lib/markbridge/renderers/discourse/tags/align_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +46 -0
- data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/code_tag.rb +54 -0
- data/lib/markbridge/renderers/discourse/tags/color_tag.rb +27 -0
- data/lib/markbridge/renderers/discourse/tags/email_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse/tags/event_tag.rb +49 -0
- data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +21 -0
- data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +16 -0
- data/lib/markbridge/renderers/discourse/tags/image_tag.rb +29 -0
- data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +16 -0
- data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +87 -0
- data/lib/markbridge/renderers/discourse/tags/list_tag.rb +39 -0
- data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +34 -0
- data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +21 -0
- data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +51 -0
- data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +32 -0
- data/lib/markbridge/renderers/discourse/tags/size_tag.rb +27 -0
- data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/subscript_tag.rb +19 -0
- data/lib/markbridge/renderers/discourse/tags/superscript_tag.rb +19 -0
- data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +19 -0
- data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +80 -0
- data/lib/markbridge/renderers/discourse/tags/url_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse.rb +50 -0
- data/lib/markbridge/version.rb +5 -0
- data/lib/markbridge.rb +201 -0
- metadata +186 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
# High-performance character-by-character BBCode scanner
|
|
7
|
+
# Tokenizes BBCode in O(n) time with minimal allocations and bounded backtracking
|
|
8
|
+
class Scanner
|
|
9
|
+
def initialize(input)
|
|
10
|
+
@input = input
|
|
11
|
+
@length = input.length
|
|
12
|
+
@current_pos = 0
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def next_token
|
|
16
|
+
return nil if end_of_input?
|
|
17
|
+
start_pos = @current_pos
|
|
18
|
+
bracket_index = @input.index("[", @current_pos)
|
|
19
|
+
|
|
20
|
+
if bracket_index.nil?
|
|
21
|
+
text = @input[@current_pos..]
|
|
22
|
+
@current_pos = @length
|
|
23
|
+
return TextToken.new(text:, pos: start_pos)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
if bracket_index > @current_pos
|
|
27
|
+
text = @input[@current_pos...bracket_index]
|
|
28
|
+
@current_pos = bracket_index
|
|
29
|
+
return TextToken.new(text:, pos: start_pos)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
if (tag_token = parse_tag_at_cursor)
|
|
33
|
+
tag_token
|
|
34
|
+
else
|
|
35
|
+
@current_pos += 1
|
|
36
|
+
TextToken.new(text: "[", pos: start_pos)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
TAG_INITIAL_CHAR = /[a-z*]/i
|
|
43
|
+
TAG_NAME_CHAR = /[a-z0-9]/i
|
|
44
|
+
UID_HEX_CHAR = /[0-9a-f]/i
|
|
45
|
+
ATTR_NAME_CHAR = /\w/
|
|
46
|
+
WHITESPACE_CHAR = /\s/
|
|
47
|
+
UNQUOTED_VALUE_STOP = /[\[\]\s]/
|
|
48
|
+
|
|
49
|
+
private_constant :TAG_INITIAL_CHAR,
|
|
50
|
+
:TAG_NAME_CHAR,
|
|
51
|
+
:UID_HEX_CHAR,
|
|
52
|
+
:ATTR_NAME_CHAR,
|
|
53
|
+
:WHITESPACE_CHAR,
|
|
54
|
+
:UNQUOTED_VALUE_STOP
|
|
55
|
+
|
|
56
|
+
def parse_tag_at_cursor
|
|
57
|
+
return nil if current_char != "["
|
|
58
|
+
|
|
59
|
+
tag_start_pos = @current_pos
|
|
60
|
+
@current_pos += 1 # skip '['
|
|
61
|
+
|
|
62
|
+
# Check for closing tag
|
|
63
|
+
closing = current_char == "/"
|
|
64
|
+
@current_pos += 1 if closing
|
|
65
|
+
|
|
66
|
+
# Parse tag name
|
|
67
|
+
tag_name = scan_tag_name
|
|
68
|
+
return rollback(tag_start_pos) unless tag_name
|
|
69
|
+
|
|
70
|
+
# Parse attributes (only for opening tags)
|
|
71
|
+
attrs = closing ? {} : scan_attributes
|
|
72
|
+
return rollback(tag_start_pos) if current_char != "]"
|
|
73
|
+
|
|
74
|
+
@current_pos += 1 # skip ']'
|
|
75
|
+
|
|
76
|
+
# Capture original source text
|
|
77
|
+
source = @input[tag_start_pos...@current_pos]
|
|
78
|
+
|
|
79
|
+
normalized_tag_name = tag_name.downcase
|
|
80
|
+
|
|
81
|
+
if closing
|
|
82
|
+
TagEndToken.new(tag: normalized_tag_name, pos: tag_start_pos, source:)
|
|
83
|
+
else
|
|
84
|
+
TagStartToken.new(tag: normalized_tag_name, attrs:, pos: tag_start_pos, source:)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def rollback(pos)
|
|
89
|
+
@current_pos = pos
|
|
90
|
+
nil
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Scan a tag name: [a-z*.][a-z0-9]*(:uid)?
|
|
94
|
+
# @return [String, nil]
|
|
95
|
+
def scan_tag_name
|
|
96
|
+
start = @current_pos
|
|
97
|
+
|
|
98
|
+
# First character: letter, *, or .
|
|
99
|
+
return nil unless current_char&.match?(TAG_INITIAL_CHAR)
|
|
100
|
+
@current_pos += 1
|
|
101
|
+
|
|
102
|
+
# Remaining characters: letters or digits
|
|
103
|
+
@current_pos += 1 while current_char&.match?(TAG_NAME_CHAR)
|
|
104
|
+
|
|
105
|
+
# Optional :uid suffix (e.g., [quote:abc123])
|
|
106
|
+
if current_char == ":"
|
|
107
|
+
@current_pos += 1
|
|
108
|
+
@current_pos += 1 while current_char&.match?(UID_HEX_CHAR)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
@input[start...@current_pos]
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Scan tag attributes
|
|
115
|
+
# The first `=value` (if present) becomes the `:option` attribute
|
|
116
|
+
# Additional `key=value` pairs become named attributes
|
|
117
|
+
# @return [Hash]
|
|
118
|
+
def scan_attributes
|
|
119
|
+
attrs = {}
|
|
120
|
+
skip_whitespace
|
|
121
|
+
|
|
122
|
+
# First attribute might be option: [tag=value]
|
|
123
|
+
if current_char == "="
|
|
124
|
+
@current_pos += 1
|
|
125
|
+
skip_whitespace
|
|
126
|
+
if (val = scan_attribute_value)
|
|
127
|
+
attrs[:option] = val
|
|
128
|
+
end
|
|
129
|
+
skip_whitespace
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Named attributes: [tag key=value key=value ...]
|
|
133
|
+
while (char = current_char) && char != "]"
|
|
134
|
+
name = scan_while(ATTR_NAME_CHAR)
|
|
135
|
+
break if name.nil?
|
|
136
|
+
|
|
137
|
+
skip_whitespace
|
|
138
|
+
break if current_char != "="
|
|
139
|
+
|
|
140
|
+
@current_pos += 1
|
|
141
|
+
skip_whitespace
|
|
142
|
+
|
|
143
|
+
value = scan_attribute_value
|
|
144
|
+
attrs[name.downcase.to_sym] = value if value
|
|
145
|
+
skip_whitespace
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
attrs
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def scan_attribute_value
|
|
152
|
+
char = current_char
|
|
153
|
+
if char == '"' || char == "'"
|
|
154
|
+
scan_quoted_string
|
|
155
|
+
else
|
|
156
|
+
scan_unquoted_value
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Scans a quoted attribute value (double or single quoted)
|
|
161
|
+
#
|
|
162
|
+
# IMPORTANT: This method does NOT support escape sequences (e.g., \" or \\).
|
|
163
|
+
# This is intentional - standard BBCode does not define escape syntax.
|
|
164
|
+
# The scanner stops at the first matching quote character.
|
|
165
|
+
#
|
|
166
|
+
# Examples:
|
|
167
|
+
# [url="http://example.com"] → option: "http://example.com" ✓
|
|
168
|
+
# [url='single quotes'] → option: "single quotes" ✓
|
|
169
|
+
# [url="has \"quotes\" inside"] → FAILS (stops at first inner quote) ✗
|
|
170
|
+
#
|
|
171
|
+
# Workaround: Use single quotes if you need double quotes in the value:
|
|
172
|
+
# [url='has "quotes" inside'] → option: "has \"quotes\" inside" ✓
|
|
173
|
+
#
|
|
174
|
+
# @return [String] the unescaped attribute value
|
|
175
|
+
def scan_quoted_string
|
|
176
|
+
quote_char = current_char
|
|
177
|
+
start = (@current_pos += 1) # skip opening quote
|
|
178
|
+
|
|
179
|
+
closing_index = @input.index(quote_char, start)
|
|
180
|
+
|
|
181
|
+
if closing_index
|
|
182
|
+
value = @input[start...closing_index]
|
|
183
|
+
@current_pos = closing_index + 1 # position after closing quote
|
|
184
|
+
else
|
|
185
|
+
value = @input[start..] || ""
|
|
186
|
+
@current_pos = @length
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
value
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def scan_unquoted_value
|
|
193
|
+
scan_until(UNQUOTED_VALUE_STOP)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Consumes characters matching +pattern+; returns substring or nil if empty
|
|
197
|
+
def scan_while(pattern)
|
|
198
|
+
start = @current_pos
|
|
199
|
+
while (char = current_char) && char.match?(pattern)
|
|
200
|
+
@current_pos += 1
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
return nil if @current_pos == start
|
|
204
|
+
@input[start...@current_pos]
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Consumes characters until +pattern+ matches; returns substring or nil if empty
|
|
208
|
+
def scan_until(pattern)
|
|
209
|
+
stop_index = @input.index(pattern, @current_pos) || @length
|
|
210
|
+
return nil if stop_index == @current_pos
|
|
211
|
+
|
|
212
|
+
value = @input[@current_pos...stop_index]
|
|
213
|
+
@current_pos = stop_index
|
|
214
|
+
value
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def current_char
|
|
218
|
+
@input[@current_pos]
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
def skip_whitespace
|
|
222
|
+
@current_pos += 1 while current_char&.match?(WHITESPACE_CHAR)
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def end_of_input?
|
|
226
|
+
@current_pos >= @length
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
# Token representing a closing BBCode tag like [/b]
|
|
7
|
+
class TagEndToken < Token
|
|
8
|
+
attr_reader :tag
|
|
9
|
+
|
|
10
|
+
def initialize(tag:, pos: 0, source: nil)
|
|
11
|
+
super(pos:, source:)
|
|
12
|
+
@tag = tag.freeze
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def inspect
|
|
16
|
+
"#<TagEndToken [/#{tag}]>"
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
# Token representing an opening BBCode tag like [b] or [url=...]
|
|
7
|
+
class TagStartToken < Token
|
|
8
|
+
attr_reader :tag, :attrs
|
|
9
|
+
|
|
10
|
+
def initialize(tag:, attrs: {}, pos: 0, source: nil)
|
|
11
|
+
super(pos:, source:)
|
|
12
|
+
@tag = tag.freeze
|
|
13
|
+
@attrs = attrs.freeze
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def inspect
|
|
17
|
+
attrs_str = attrs.empty? ? "" : " #{attrs.inspect}"
|
|
18
|
+
"#<TagStartToken [#{tag}]#{attrs_str}>"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
# Token representing text content
|
|
7
|
+
class TextToken < Token
|
|
8
|
+
attr_reader :text
|
|
9
|
+
|
|
10
|
+
def initialize(text:, pos: 0)
|
|
11
|
+
super(pos:, source: text)
|
|
12
|
+
@text = text.freeze
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
alias source text
|
|
16
|
+
|
|
17
|
+
def inspect
|
|
18
|
+
"#<TextToken #{text.inspect}>"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# AST Nodes
|
|
4
|
+
require_relative "../ast"
|
|
5
|
+
|
|
6
|
+
# Errors
|
|
7
|
+
require_relative "bbcode/errors/max_depth_exceeded_error"
|
|
8
|
+
|
|
9
|
+
# Tokens
|
|
10
|
+
require_relative "bbcode/tokens/token"
|
|
11
|
+
require_relative "bbcode/tokens/text_token"
|
|
12
|
+
require_relative "bbcode/tokens/tag_start_token"
|
|
13
|
+
require_relative "bbcode/tokens/tag_end_token"
|
|
14
|
+
|
|
15
|
+
# Closing Strategies
|
|
16
|
+
require_relative "bbcode/closing_strategies/tag_reconciler"
|
|
17
|
+
require_relative "bbcode/closing_strategies/base"
|
|
18
|
+
require_relative "bbcode/closing_strategies/strict"
|
|
19
|
+
require_relative "bbcode/closing_strategies/reordering"
|
|
20
|
+
|
|
21
|
+
# Base Handlers
|
|
22
|
+
require_relative "bbcode/handlers/base_handler"
|
|
23
|
+
require_relative "bbcode/handlers/raw_handler"
|
|
24
|
+
|
|
25
|
+
# Handlers
|
|
26
|
+
require_relative "bbcode/handlers/align_handler"
|
|
27
|
+
require_relative "bbcode/handlers/attachment_handler"
|
|
28
|
+
require_relative "bbcode/handlers/color_handler"
|
|
29
|
+
require_relative "bbcode/handlers/email_handler"
|
|
30
|
+
require_relative "bbcode/handlers/image_handler"
|
|
31
|
+
require_relative "bbcode/handlers/list_handler"
|
|
32
|
+
require_relative "bbcode/handlers/list_item_handler"
|
|
33
|
+
require_relative "bbcode/handlers/quote_handler"
|
|
34
|
+
require_relative "bbcode/handlers/self_closing_handler"
|
|
35
|
+
require_relative "bbcode/handlers/simple_handler"
|
|
36
|
+
require_relative "bbcode/handlers/size_handler"
|
|
37
|
+
require_relative "bbcode/handlers/spoiler_handler"
|
|
38
|
+
require_relative "bbcode/handlers/url_handler"
|
|
39
|
+
|
|
40
|
+
# Parser components
|
|
41
|
+
require_relative "bbcode/handler_registry"
|
|
42
|
+
require_relative "bbcode/parser_state"
|
|
43
|
+
require_relative "bbcode/peekable_enumerator"
|
|
44
|
+
require_relative "bbcode/raw_content_result"
|
|
45
|
+
require_relative "bbcode/raw_content_collector"
|
|
46
|
+
require_relative "bbcode/scanner"
|
|
47
|
+
|
|
48
|
+
# Parser
|
|
49
|
+
require_relative "bbcode/parser"
|
|
50
|
+
|
|
51
|
+
module Markbridge
|
|
52
|
+
module Parsers
|
|
53
|
+
module BBCode
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
# Registry of HTML tag handlers
|
|
7
|
+
class HandlerRegistry
|
|
8
|
+
def initialize
|
|
9
|
+
@handlers = {}
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# Register a handler for one or more tag names
|
|
13
|
+
# @param tag_names [String, Array<String>] tag name(s) to register
|
|
14
|
+
# @param handler [BaseHandler, Proc] the handler instance or proc
|
|
15
|
+
def register(tag_names, handler)
|
|
16
|
+
Array(tag_names).each { |tag_name| @handlers[tag_name.to_s.downcase] = handler }
|
|
17
|
+
self
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Get handler for a tag name
|
|
21
|
+
# @param tag_name [String]
|
|
22
|
+
# @return [BaseHandler, Proc, nil]
|
|
23
|
+
def [](tag_name)
|
|
24
|
+
@handlers[tag_name.to_s.downcase]
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Create the default handler registry with common HTML tags
|
|
28
|
+
# @return [HandlerRegistry]
|
|
29
|
+
def self.default
|
|
30
|
+
registry = new
|
|
31
|
+
|
|
32
|
+
# Simple formatting handlers
|
|
33
|
+
registry.register(%w[b strong], Handlers::SimpleHandler.new(AST::Bold))
|
|
34
|
+
registry.register(%w[i em], Handlers::SimpleHandler.new(AST::Italic))
|
|
35
|
+
registry.register(%w[s strike del], Handlers::SimpleHandler.new(AST::Strikethrough))
|
|
36
|
+
registry.register("u", Handlers::SimpleHandler.new(AST::Underline))
|
|
37
|
+
registry.register("sup", Handlers::SimpleHandler.new(AST::Superscript))
|
|
38
|
+
registry.register("sub", Handlers::SimpleHandler.new(AST::Subscript))
|
|
39
|
+
|
|
40
|
+
# Code handlers (raw content)
|
|
41
|
+
registry.register(%w[code pre tt], Handlers::RawHandler.new(AST::Code))
|
|
42
|
+
|
|
43
|
+
# Link and image handlers
|
|
44
|
+
registry.register("a", Handlers::UrlHandler.new)
|
|
45
|
+
registry.register("img", Handlers::ImageHandler.new)
|
|
46
|
+
|
|
47
|
+
# Blockquote handler
|
|
48
|
+
registry.register("blockquote", Handlers::QuoteHandler.new)
|
|
49
|
+
|
|
50
|
+
# Void elements - use simple inline handlers
|
|
51
|
+
registry.register(
|
|
52
|
+
"br",
|
|
53
|
+
lambda do |element:, parent:|
|
|
54
|
+
parent << AST::LineBreak.new
|
|
55
|
+
nil # Return nil - void element, no children
|
|
56
|
+
end,
|
|
57
|
+
)
|
|
58
|
+
registry.register(
|
|
59
|
+
"hr",
|
|
60
|
+
lambda do |element:, parent:|
|
|
61
|
+
parent << AST::HorizontalRule.new
|
|
62
|
+
nil # Return nil - void element, no children
|
|
63
|
+
end,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# List handlers
|
|
67
|
+
registry.register(%w[ul ol], Handlers::ListHandler.new)
|
|
68
|
+
registry.register("li", Handlers::ListItemHandler.new)
|
|
69
|
+
|
|
70
|
+
# Paragraph handler (transparent - doesn't create AST node)
|
|
71
|
+
registry.register("p", Handlers::ParagraphHandler.new)
|
|
72
|
+
|
|
73
|
+
registry
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Build a registry from the default configuration with optional customization
|
|
77
|
+
# @yield [HandlerRegistry] the registry to customize
|
|
78
|
+
# @return [HandlerRegistry]
|
|
79
|
+
def self.build_from_default
|
|
80
|
+
registry = default
|
|
81
|
+
yield(registry) if block_given?
|
|
82
|
+
registry
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
module Handlers
|
|
7
|
+
class BaseHandler
|
|
8
|
+
# Process a Nokogiri node and add it to the parent AST node
|
|
9
|
+
# Subclasses should override this method
|
|
10
|
+
# @param node [Nokogiri::XML::Element] the HTML element
|
|
11
|
+
# @param parent [AST::Element] the parent AST node
|
|
12
|
+
# @return [AST::Element, nil] the created element if children should be processed, nil otherwise
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
node = element # Alias for compatibility
|
|
15
|
+
# Default: do nothing, subclasses override
|
|
16
|
+
nil
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# The element class created by this handler
|
|
20
|
+
# Subclasses must expose this via attr_reader :element_class
|
|
21
|
+
# @return [Class]
|
|
22
|
+
attr_reader :element_class
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for <img> tags
|
|
8
|
+
class ImageHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::Image
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
src = element["src"]
|
|
15
|
+
width = sanitize_dimension(element["width"])
|
|
16
|
+
height = sanitize_dimension(element["height"])
|
|
17
|
+
|
|
18
|
+
ast_element = AST::Image.new(src:, width:, height:)
|
|
19
|
+
parent << ast_element
|
|
20
|
+
|
|
21
|
+
# Return nil to signal: don't process children (void element)
|
|
22
|
+
nil
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
attr_reader :element_class
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
# Convert dimension to positive integer or nil
|
|
30
|
+
def sanitize_dimension(value)
|
|
31
|
+
return nil if value.nil?
|
|
32
|
+
|
|
33
|
+
dim = value.to_i
|
|
34
|
+
dim.positive? ? dim : nil
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for list tags (<ul>, <ol>)
|
|
8
|
+
class ListHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::List
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
# Check if ordered: <ol> tag
|
|
15
|
+
ordered = element.name.downcase == "ol"
|
|
16
|
+
|
|
17
|
+
ast_element = AST::List.new(ordered:)
|
|
18
|
+
parent << ast_element
|
|
19
|
+
|
|
20
|
+
# Return element to signal: process children into this element
|
|
21
|
+
ast_element
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
attr_reader :element_class
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for list item tags (<li>)
|
|
8
|
+
class ListItemHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::ListItem
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
ast_element = AST::ListItem.new
|
|
15
|
+
parent << ast_element
|
|
16
|
+
|
|
17
|
+
# Return element to signal: process children into this element
|
|
18
|
+
ast_element
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
attr_reader :element_class
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for <p> tags
|
|
8
|
+
# Creates AST::Paragraph nodes to preserve paragraph boundaries
|
|
9
|
+
class ParagraphHandler < SimpleHandler
|
|
10
|
+
def initialize
|
|
11
|
+
super(AST::Paragraph)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for <blockquote> tags
|
|
8
|
+
class QuoteHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::Quote
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
# Extract optional author from cite attribute
|
|
15
|
+
author = element["cite"]
|
|
16
|
+
ast_element = AST::Quote.new(author:)
|
|
17
|
+
parent << ast_element
|
|
18
|
+
|
|
19
|
+
# Return element to signal: process children into this element
|
|
20
|
+
ast_element
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
attr_reader :element_class
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for raw/preformatted tags that preserve content as-is
|
|
8
|
+
class RawHandler < BaseHandler
|
|
9
|
+
def initialize(element_class)
|
|
10
|
+
@element_class = element_class
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
# Get the inner text content
|
|
15
|
+
content = element.inner_text
|
|
16
|
+
|
|
17
|
+
# Extract language from class or lang attribute
|
|
18
|
+
language = element["class"] || element["lang"]
|
|
19
|
+
|
|
20
|
+
ast_element = @element_class.new(language:)
|
|
21
|
+
ast_element << AST::Text.new(content) unless content.empty?
|
|
22
|
+
parent << ast_element
|
|
23
|
+
|
|
24
|
+
# Return nil to signal: don't process children (we handled content directly)
|
|
25
|
+
nil
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
attr_reader :element_class
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
module Handlers
|
|
7
|
+
# Simple formatting handlers that create an element and process children
|
|
8
|
+
class SimpleHandler < BaseHandler
|
|
9
|
+
def initialize(element_class)
|
|
10
|
+
@element_class = element_class
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
ast_element = @element_class.new
|
|
15
|
+
parent << ast_element
|
|
16
|
+
|
|
17
|
+
# Return element to signal: process children into this element
|
|
18
|
+
ast_element
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
attr_reader :element_class
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|