markbridge 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/markbridge/all.rb +4 -7
- data/lib/markbridge/ast/document.rb +1 -1
- data/lib/markbridge/ast/element.rb +2 -2
- data/lib/markbridge/ast/list.rb +2 -2
- data/lib/markbridge/ast/table.rb +61 -0
- data/lib/markbridge/ast/text.rb +5 -1
- data/lib/markbridge/ast.rb +1 -0
- data/lib/markbridge/bbcode.rb +4 -0
- data/lib/markbridge/gem_loader.rb +2 -3
- data/lib/markbridge/html.rb +4 -0
- data/lib/markbridge/mediawiki.rb +4 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +0 -10
- data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -4
- data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +64 -44
- data/lib/markbridge/parsers/bbcode/handler_registry.rb +26 -11
- data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +17 -12
- data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +0 -10
- data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +6 -10
- data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +13 -19
- data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +1 -5
- data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +1 -2
- data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +30 -35
- data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +2 -6
- data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +4 -4
- data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +26 -0
- data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +32 -0
- data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +35 -0
- data/lib/markbridge/parsers/bbcode/parser.rb +5 -8
- data/lib/markbridge/parsers/bbcode/parser_state.rb +12 -18
- data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +9 -59
- data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +2 -2
- data/lib/markbridge/parsers/bbcode/scanner.rb +49 -63
- data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +1 -5
- data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +1 -6
- data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +1 -7
- data/lib/markbridge/parsers/bbcode/tokens/token.rb +1 -1
- data/lib/markbridge/parsers/bbcode.rb +4 -0
- data/lib/markbridge/parsers/html/handler_registry.rb +32 -44
- data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -3
- data/lib/markbridge/parsers/html/handlers/image_handler.rb +1 -4
- data/lib/markbridge/parsers/html/handlers/table_cell_handler.rb +24 -0
- data/lib/markbridge/parsers/html/handlers/table_handler.rb +24 -0
- data/lib/markbridge/parsers/html/handlers/table_row_handler.rb +24 -0
- data/lib/markbridge/parsers/html/parser.rb +16 -15
- data/lib/markbridge/parsers/html.rb +3 -0
- data/lib/markbridge/parsers/media_wiki/inline_parser.rb +115 -151
- data/lib/markbridge/parsers/media_wiki/inline_tag_registry.rb +103 -0
- data/lib/markbridge/parsers/media_wiki/parser.rb +174 -71
- data/lib/markbridge/parsers/media_wiki.rb +1 -0
- data/lib/markbridge/parsers/text_formatter/handler_registry.rb +10 -36
- data/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb +26 -0
- data/lib/markbridge/parsers/text_formatter/parser.rb +3 -8
- data/lib/markbridge/parsers/text_formatter.rb +1 -0
- data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +111 -92
- data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +13 -7
- data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +11 -20
- data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +10 -48
- data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +38 -63
- data/lib/markbridge/processors/discourse_markdown/scanner.rb +36 -41
- data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +6 -6
- data/lib/markbridge/renderers/discourse/html_escaper.rb +20 -0
- data/lib/markbridge/renderers/discourse/markdown_escaper.rb +262 -205
- data/lib/markbridge/renderers/discourse/render_context.rb +23 -11
- data/lib/markbridge/renderers/discourse/renderer.rb +54 -11
- data/lib/markbridge/renderers/discourse/rendering_interface.rb +12 -4
- data/lib/markbridge/renderers/discourse/tag.rb +14 -1
- data/lib/markbridge/renderers/discourse/tag_library.rb +30 -25
- data/lib/markbridge/renderers/discourse/tags/align_tag.rb +15 -7
- data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +1 -1
- data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/code_tag.rb +14 -8
- data/lib/markbridge/renderers/discourse/tags/email_tag.rb +5 -3
- data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -3
- data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +6 -2
- data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +2 -2
- data/lib/markbridge/renderers/discourse/tags/image_tag.rb +12 -1
- data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +2 -2
- data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +24 -47
- data/lib/markbridge/renderers/discourse/tags/list_tag.rb +10 -15
- data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +6 -2
- data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +10 -0
- data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +9 -4
- data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +17 -11
- data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +9 -0
- data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/table_cell_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/table_row_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/table_tag.rb +128 -0
- data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +10 -3
- data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +28 -1
- data/lib/markbridge/renderers/discourse/tags/url_tag.rb +5 -3
- data/lib/markbridge/renderers/discourse.rb +4 -0
- data/lib/markbridge/textformatter.rb +4 -0
- data/lib/markbridge/version.rb +1 -1
- data/lib/markbridge.rb +27 -62
- metadata +19 -2
|
@@ -11,7 +11,6 @@ module Markbridge
|
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
def on_open(token:, context:, registry:, tokens: nil)
|
|
14
|
-
# Check if ordered: explicit ol/olist tag, or type=1, or option=1
|
|
15
14
|
ordered =
|
|
16
15
|
%w[ol olist].include?(token.tag) || token.attrs[:type] == "1" ||
|
|
17
16
|
token.attrs[:option] == "1"
|
|
@@ -21,10 +20,7 @@ module Markbridge
|
|
|
21
20
|
end
|
|
22
21
|
|
|
23
22
|
def on_close(token:, context:, registry:, tokens: nil)
|
|
24
|
-
|
|
25
|
-
context.pop if context.current.is_a?(AST::ListItem)
|
|
26
|
-
|
|
27
|
-
# Then use default closing behavior
|
|
23
|
+
context.pop if context.current.instance_of?(AST::ListItem)
|
|
28
24
|
super
|
|
29
25
|
end
|
|
30
26
|
|
|
@@ -11,8 +11,7 @@ module Markbridge
|
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
def on_open(token:, context:, registry:, tokens: nil)
|
|
14
|
-
|
|
15
|
-
context.pop if context.current.is_a?(AST::ListItem)
|
|
14
|
+
context.pop if context.current.instance_of?(AST::ListItem)
|
|
16
15
|
|
|
17
16
|
element = AST::ListItem.new
|
|
18
17
|
context.push(element, token:)
|
|
@@ -17,46 +17,41 @@ module Markbridge
|
|
|
17
17
|
end
|
|
18
18
|
|
|
19
19
|
def on_open(token:, context:, registry:, tokens: nil)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
username = nil
|
|
20
|
+
attrs = extract_quote_attrs(token)
|
|
21
|
+
element = AST::Quote.new(**attrs)
|
|
22
|
+
context.push(element, token:)
|
|
23
|
+
end
|
|
25
24
|
|
|
26
|
-
|
|
27
|
-
if token.attrs[:author]
|
|
28
|
-
author = token.attrs[:author]
|
|
29
|
-
elsif token.attrs[:option]
|
|
30
|
-
# Parse Discourse-style quote: "username, post:123, topic:456"
|
|
31
|
-
option = token.attrs[:option]
|
|
32
|
-
if option.match?(/,\s*post:\d+/)
|
|
33
|
-
# Discourse format with post/topic
|
|
34
|
-
parts = option.split(",").map(&:strip)
|
|
35
|
-
username = parts[0]
|
|
36
|
-
parts[1..].each do |part|
|
|
37
|
-
if part =~ /^post:(\d+)$/
|
|
38
|
-
post = ::Regexp.last_match(1)
|
|
39
|
-
elsif part =~ /^topic:(\d+)$/
|
|
40
|
-
topic = ::Regexp.last_match(1)
|
|
41
|
-
end
|
|
42
|
-
end
|
|
43
|
-
author = username
|
|
44
|
-
else
|
|
45
|
-
# Simple author attribution
|
|
46
|
-
author = option
|
|
47
|
-
end
|
|
48
|
-
end
|
|
25
|
+
attr_reader :element_class
|
|
49
26
|
|
|
50
|
-
|
|
51
|
-
username = token.attrs[:username] if token.attrs[:username]
|
|
52
|
-
post = token.attrs[:post] if token.attrs[:post]
|
|
53
|
-
topic = token.attrs[:topic] if token.attrs[:topic]
|
|
27
|
+
private
|
|
54
28
|
|
|
55
|
-
|
|
56
|
-
|
|
29
|
+
def extract_quote_attrs(token)
|
|
30
|
+
author, post, topic, username = extract_from_option(token)
|
|
31
|
+
author ||= token.attrs[:author]
|
|
32
|
+
|
|
33
|
+
{
|
|
34
|
+
author:,
|
|
35
|
+
post: token.attrs[:post] || post,
|
|
36
|
+
topic: token.attrs[:topic] || topic,
|
|
37
|
+
username: token.attrs[:username] || username,
|
|
38
|
+
}
|
|
57
39
|
end
|
|
58
40
|
|
|
59
|
-
|
|
41
|
+
def extract_from_option(token)
|
|
42
|
+
option = token.attrs[:option]
|
|
43
|
+
return nil, nil, nil, nil unless option
|
|
44
|
+
|
|
45
|
+
post = option[/,\s*post:(\d+)/, 1]
|
|
46
|
+
return option, nil, nil, nil unless post
|
|
47
|
+
|
|
48
|
+
# Discourse format: "username, post:123, topic:456" (topic optional,
|
|
49
|
+
# order irrelevant between post: and topic:).
|
|
50
|
+
username = option.split(",").first.strip
|
|
51
|
+
topic = option[/,\s*topic:(\d+)/, 1]
|
|
52
|
+
|
|
53
|
+
[username, post, topic, username]
|
|
54
|
+
end
|
|
60
55
|
end
|
|
61
56
|
end
|
|
62
57
|
end
|
|
@@ -15,19 +15,15 @@ module Markbridge
|
|
|
15
15
|
|
|
16
16
|
def on_open(token:, context:, registry:, tokens:)
|
|
17
17
|
result = @collector.collect(token.tag, tokens)
|
|
18
|
-
|
|
19
|
-
# Track unclosed raw tags for diagnostics
|
|
20
18
|
context.mark_unclosed_raw!(token.tag) if result.unclosed?
|
|
21
19
|
|
|
22
20
|
element = create_element(token:, content: result.content)
|
|
23
21
|
context.add_child(element)
|
|
24
22
|
end
|
|
25
23
|
|
|
26
|
-
#
|
|
24
|
+
# The collector consumes the closing tag, so this fires only when a
|
|
25
|
+
# `[/raw]` token leaks past the collector — treat it as literal text.
|
|
27
26
|
def on_close(token:, context:, registry:, tokens: nil)
|
|
28
|
-
# Raw content was already consumed by collector
|
|
29
|
-
# Closing tag was consumed by collector, so this shouldn't be called
|
|
30
|
-
# If it is called, treat as text
|
|
31
27
|
context.add_child(AST::Text.new(token.source))
|
|
32
28
|
end
|
|
33
29
|
|
|
@@ -15,10 +15,10 @@ module Markbridge
|
|
|
15
15
|
context.add_child(element)
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
18
|
+
# on_close is inherited from BaseHandler. SelfClosing elements are
|
|
19
|
+
# never pushed onto the stack, so the registry's closing strategy
|
|
20
|
+
# always falls through to adding the closing-tag source as text -
|
|
21
|
+
# the same result as a dedicated override.
|
|
22
22
|
|
|
23
23
|
attr_reader :element_class
|
|
24
24
|
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for table cell tags (td, th)
|
|
8
|
+
class TableCellHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::TableCell
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def on_open(token:, context:, registry:, tokens: nil)
|
|
14
|
+
# Auto-close previous cell if still open
|
|
15
|
+
context.pop if context.current.instance_of?(AST::TableCell)
|
|
16
|
+
|
|
17
|
+
element = AST::TableCell.new(header: token.tag == "th")
|
|
18
|
+
context.push(element, token:)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
attr_reader :element_class
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for table tags
|
|
8
|
+
class TableHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::Table
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def on_open(token:, context:, registry:, tokens: nil)
|
|
14
|
+
element = AST::Table.new
|
|
15
|
+
context.push(element, token:)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def on_close(token:, context:, registry:, tokens: nil)
|
|
19
|
+
# Auto-close open cell before closing row
|
|
20
|
+
context.pop if context.current.instance_of?(AST::TableCell)
|
|
21
|
+
# Auto-close open row before closing table
|
|
22
|
+
context.pop if context.current.instance_of?(AST::TableRow)
|
|
23
|
+
|
|
24
|
+
super
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
attr_reader :element_class
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for table row tags (tr)
|
|
8
|
+
class TableRowHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::TableRow
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def on_open(token:, context:, registry:, tokens: nil)
|
|
14
|
+
# Auto-close open cell before starting new row
|
|
15
|
+
context.pop if context.current.instance_of?(AST::TableCell)
|
|
16
|
+
# Auto-close previous row if still open
|
|
17
|
+
context.pop if context.current.instance_of?(AST::TableRow)
|
|
18
|
+
|
|
19
|
+
element = AST::TableRow.new
|
|
20
|
+
context.push(element, token:)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def on_close(token:, context:, registry:, tokens: nil)
|
|
24
|
+
# Auto-close open cell before closing row
|
|
25
|
+
context.pop if context.current.instance_of?(AST::TableCell)
|
|
26
|
+
|
|
27
|
+
super
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
attr_reader :element_class
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -66,8 +66,7 @@ module Markbridge
|
|
|
66
66
|
def parse_tokens(scanner, context)
|
|
67
67
|
tokens = PeekableEnumerator.new(scanner)
|
|
68
68
|
|
|
69
|
-
while tokens.
|
|
70
|
-
token = tokens.next
|
|
69
|
+
while (token = tokens.next)
|
|
71
70
|
case token
|
|
72
71
|
when TextToken
|
|
73
72
|
process_text(token, context)
|
|
@@ -94,7 +93,7 @@ module Markbridge
|
|
|
94
93
|
if (handler = @handlers[token.tag])
|
|
95
94
|
handler.on_open(token:, context:, registry: @handlers, tokens:)
|
|
96
95
|
else
|
|
97
|
-
|
|
96
|
+
track_unknown_tag(token)
|
|
98
97
|
end
|
|
99
98
|
end
|
|
100
99
|
|
|
@@ -106,15 +105,13 @@ module Markbridge
|
|
|
106
105
|
if (handler = @handlers[token.tag])
|
|
107
106
|
handler.on_close(token:, context:, registry: @handlers, tokens:)
|
|
108
107
|
else
|
|
109
|
-
|
|
108
|
+
track_unknown_tag(token)
|
|
110
109
|
end
|
|
111
110
|
end
|
|
112
111
|
|
|
113
|
-
#
|
|
114
|
-
# while still processing its children
|
|
112
|
+
# Track unknown tag by name; the wrapper is ignored, children pass through.
|
|
115
113
|
# @param token [Token]
|
|
116
|
-
|
|
117
|
-
def handle_unknown_tag(token, context)
|
|
114
|
+
def track_unknown_tag(token)
|
|
118
115
|
@unknown_tags[token.tag] += 1
|
|
119
116
|
end
|
|
120
117
|
end
|
|
@@ -28,16 +28,13 @@ module Markbridge
|
|
|
28
28
|
# @return [Boolean] true if pushed successfully, false if depth exceeded
|
|
29
29
|
# @raise [MaxDepthExceededError] when pushing would exceed MAX_DEPTH and no token provided
|
|
30
30
|
def push(element, token: nil)
|
|
31
|
-
if @depth
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
# Legacy behavior: raise error
|
|
39
|
-
raise MaxDepthExceededError, MAX_DEPTH
|
|
40
|
-
end
|
|
31
|
+
if @depth == MAX_DEPTH
|
|
32
|
+
raise MaxDepthExceededError, MAX_DEPTH unless token
|
|
33
|
+
|
|
34
|
+
# Graceful degradation: treat as text
|
|
35
|
+
@current << AST::Text.new(token.source)
|
|
36
|
+
@depth_exceeded_count += 1
|
|
37
|
+
return false
|
|
41
38
|
end
|
|
42
39
|
|
|
43
40
|
@current << element
|
|
@@ -50,10 +47,10 @@ module Markbridge
|
|
|
50
47
|
# Pop current element and return to parent
|
|
51
48
|
# @return [AST::Element] the parent node
|
|
52
49
|
def pop
|
|
53
|
-
return @root if @node_stack.size
|
|
50
|
+
return @root if @node_stack.size == 1
|
|
54
51
|
|
|
55
52
|
@node_stack.pop
|
|
56
|
-
@current = @node_stack.
|
|
53
|
+
@current = @node_stack.fetch(-1)
|
|
57
54
|
@depth -= 1
|
|
58
55
|
@current
|
|
59
56
|
end
|
|
@@ -80,12 +77,9 @@ module Markbridge
|
|
|
80
77
|
# @param limit [Integer, nil] number of elements to include from the top
|
|
81
78
|
# @return [Array<AST::Node>]
|
|
82
79
|
def elements_from_current(limit = nil)
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
limit
|
|
86
|
-
limit = [limit, @node_stack.size - 1].min
|
|
87
|
-
|
|
88
|
-
(0..limit).map { |offset| @node_stack[@node_stack.size - 1 - offset] }
|
|
80
|
+
max_offset = @node_stack.size - 1
|
|
81
|
+
limit = [limit || max_offset, max_offset].min
|
|
82
|
+
(0..limit).map { |offset| @node_stack.fetch(max_offset - offset) }
|
|
89
83
|
end
|
|
90
84
|
end
|
|
91
85
|
end
|
|
@@ -10,7 +10,7 @@ module Markbridge
|
|
|
10
10
|
# `next_token`) so callers can:
|
|
11
11
|
# - inspect the next token with {#peek} without advancing the scanner
|
|
12
12
|
# - inspect several upcoming tokens with {#peek_ahead}
|
|
13
|
-
# - consume tokens with {#next}
|
|
13
|
+
# - consume tokens with {#next} (returns `nil` when exhausted)
|
|
14
14
|
#
|
|
15
15
|
# The enumerator is lazy: tokens are only requested from the scanner
|
|
16
16
|
# when needed. Once the underlying scanner returns `nil`, the enumerator
|
|
@@ -32,71 +32,29 @@ module Markbridge
|
|
|
32
32
|
def initialize(scanner)
|
|
33
33
|
@scanner = scanner
|
|
34
34
|
@peeked = []
|
|
35
|
-
@finished = false
|
|
36
35
|
end
|
|
37
36
|
|
|
38
37
|
# Consume and return the next token.
|
|
39
|
-
#
|
|
40
|
-
# If there are tokens in the internal buffer (from prior peeks) the
|
|
41
|
-
# buffered token is returned. Otherwise, the next token is requested
|
|
42
|
-
# from the underlying scanner via `next_token`.
|
|
43
|
-
#
|
|
44
38
|
# @return [Object, nil] next token or `nil` when exhausted
|
|
45
39
|
def next
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
value = @scanner.next_token
|
|
50
|
-
@finished = true if value.nil?
|
|
51
|
-
value
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
# Return whether more tokens are available.
|
|
55
|
-
#
|
|
56
|
-
# This will attempt to fetch one token from the scanner if necessary
|
|
57
|
-
# to determine whether more tokens remain.
|
|
58
|
-
#
|
|
59
|
-
# @return [Boolean] `true` if at least one token is available
|
|
60
|
-
def has_next?
|
|
61
|
-
return true if @peeked.any?
|
|
62
|
-
return false if @finished
|
|
63
|
-
|
|
64
|
-
value = @scanner.next_token
|
|
65
|
-
if value.nil?
|
|
66
|
-
@finished = true
|
|
67
|
-
false
|
|
68
|
-
else
|
|
69
|
-
@peeked << value
|
|
70
|
-
true
|
|
71
|
-
end
|
|
40
|
+
ensure_peeked(1)
|
|
41
|
+
@peeked.shift
|
|
72
42
|
end
|
|
73
43
|
|
|
74
44
|
# Peek at the next single token without consuming it.
|
|
75
|
-
#
|
|
76
|
-
# If the enumerator has been exhausted this returns `nil`.
|
|
77
|
-
#
|
|
78
45
|
# @return [Object, nil] the next token or `nil` when exhausted
|
|
79
46
|
def peek
|
|
80
|
-
return @peeked.first if @peeked.any?
|
|
81
|
-
return nil if @finished
|
|
82
|
-
|
|
83
47
|
ensure_peeked(1)
|
|
84
48
|
@peeked.first
|
|
85
49
|
end
|
|
86
50
|
|
|
87
51
|
# Peek ahead at up to `count` upcoming tokens without consuming them.
|
|
88
|
-
#
|
|
89
|
-
# The method will return an array with at most `count` elements.
|
|
90
|
-
# If fewer tokens remain, a shorter array is returned. When the
|
|
91
|
-
# enumerator is exhausted an empty array is returned.
|
|
92
|
-
#
|
|
93
|
-
# @param count [Integer] number of tokens to peek ahead (non\-negative)
|
|
52
|
+
# @param count [Integer] number of tokens to peek ahead (clamped to 0..)
|
|
94
53
|
# @return [Array<Object>] array of upcoming tokens (possibly empty)
|
|
95
54
|
def peek_ahead(count)
|
|
96
|
-
|
|
97
|
-
|
|
55
|
+
count = [count, 0].max
|
|
98
56
|
ensure_peeked(count)
|
|
99
|
-
@peeked.
|
|
57
|
+
@peeked.first(count)
|
|
100
58
|
end
|
|
101
59
|
|
|
102
60
|
alias next_token next
|
|
@@ -104,19 +62,11 @@ module Markbridge
|
|
|
104
62
|
private
|
|
105
63
|
|
|
106
64
|
# Ensure at least `count` items are present in the peek buffer.
|
|
107
|
-
#
|
|
108
|
-
# This will repeatedly call `next_token` on the scanner until the
|
|
109
|
-
# buffer contains `count` items or the scanner returns `nil`.
|
|
110
|
-
#
|
|
111
|
-
# @param count [Integer] desired buffer size
|
|
112
|
-
# @return [void]
|
|
113
65
|
def ensure_peeked(count)
|
|
114
|
-
while
|
|
66
|
+
while @peeked.size < count
|
|
115
67
|
value = @scanner.next_token
|
|
116
|
-
if value.nil?
|
|
117
|
-
|
|
118
|
-
break
|
|
119
|
-
end
|
|
68
|
+
break if value.nil?
|
|
69
|
+
|
|
120
70
|
@peeked << value
|
|
121
71
|
end
|
|
122
72
|
end
|
|
@@ -15,9 +15,9 @@ module Markbridge
|
|
|
15
15
|
closed = false
|
|
16
16
|
|
|
17
17
|
while (token = scanner.next_token)
|
|
18
|
-
if token.
|
|
18
|
+
if token.instance_of?(TagStartToken) && token.tag == tag_name
|
|
19
19
|
depth += 1
|
|
20
|
-
elsif token.
|
|
20
|
+
elsif token.instance_of?(TagEndToken) && token.tag == tag_name
|
|
21
21
|
if (depth -= 1) == 0
|
|
22
22
|
closed = true
|
|
23
23
|
break
|
|
@@ -20,16 +20,12 @@ module Markbridge
|
|
|
20
20
|
if bracket_index.nil?
|
|
21
21
|
text = @input[@current_pos..]
|
|
22
22
|
@current_pos = @length
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
if bracket_index > @current_pos
|
|
23
|
+
TextToken.new(text:, pos: start_pos)
|
|
24
|
+
elsif bracket_index > @current_pos
|
|
27
25
|
text = @input[@current_pos...bracket_index]
|
|
28
26
|
@current_pos = bracket_index
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
if (tag_token = parse_tag_at_cursor)
|
|
27
|
+
TextToken.new(text:, pos: start_pos)
|
|
28
|
+
elsif (tag_token = parse_tag_at_cursor)
|
|
33
29
|
tag_token
|
|
34
30
|
else
|
|
35
31
|
@current_pos += 1
|
|
@@ -53,35 +49,25 @@ module Markbridge
|
|
|
53
49
|
:WHITESPACE_CHAR,
|
|
54
50
|
:UNQUOTED_VALUE_STOP
|
|
55
51
|
|
|
52
|
+
# @return [Token, nil] tag token or nil if not a valid tag (caller rolls back)
|
|
53
|
+
# Precondition: caller has verified current_char == "[".
|
|
56
54
|
def parse_tag_at_cursor
|
|
57
|
-
return nil if current_char != "["
|
|
58
|
-
|
|
59
55
|
tag_start_pos = @current_pos
|
|
60
56
|
@current_pos += 1 # skip '['
|
|
61
|
-
|
|
62
|
-
# Check for closing tag
|
|
63
|
-
closing = current_char == "/"
|
|
64
|
-
@current_pos += 1 if closing
|
|
65
|
-
|
|
66
|
-
# Parse tag name
|
|
57
|
+
closing = consume("/")
|
|
67
58
|
tag_name = scan_tag_name
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
# Parse attributes (only for opening tags)
|
|
71
|
-
attrs = closing ? {} : scan_attributes
|
|
72
|
-
return rollback(tag_start_pos) if current_char != "]"
|
|
59
|
+
attrs = (closing || tag_name.nil?) ? {} : scan_attributes
|
|
60
|
+
return rollback(tag_start_pos) unless tag_name && consume("]")
|
|
73
61
|
|
|
74
|
-
@current_pos += 1 # skip ']'
|
|
75
|
-
|
|
76
|
-
# Capture original source text
|
|
77
62
|
source = @input[tag_start_pos...@current_pos]
|
|
63
|
+
build_token(closing:, tag: tag_name.downcase, attrs:, pos: tag_start_pos, source:)
|
|
64
|
+
end
|
|
78
65
|
|
|
79
|
-
|
|
80
|
-
|
|
66
|
+
def build_token(closing:, tag:, attrs:, pos:, source:)
|
|
81
67
|
if closing
|
|
82
|
-
TagEndToken.new(tag
|
|
68
|
+
TagEndToken.new(tag:, pos:, source:)
|
|
83
69
|
else
|
|
84
|
-
TagStartToken.new(tag
|
|
70
|
+
TagStartToken.new(tag:, attrs:, pos:, source:)
|
|
85
71
|
end
|
|
86
72
|
end
|
|
87
73
|
|
|
@@ -90,19 +76,21 @@ module Markbridge
|
|
|
90
76
|
nil
|
|
91
77
|
end
|
|
92
78
|
|
|
93
|
-
# Scan a tag name: [a-z
|
|
79
|
+
# Scan a tag name: [a-z*][a-z0-9]*(:hex*)?
|
|
80
|
+
#
|
|
81
|
+
# Char-by-char rather than a single regex over `@input[pos..]`
|
|
82
|
+
# because the regex form allocates a substring for every tag,
|
|
83
|
+
# which is a dominant cost on tag-heavy input. The char-based
|
|
84
|
+
# loop is ~3x faster under YJIT.
|
|
94
85
|
# @return [String, nil]
|
|
95
86
|
def scan_tag_name
|
|
96
87
|
start = @current_pos
|
|
97
88
|
|
|
98
|
-
# First character: letter, *, or .
|
|
99
89
|
return nil unless current_char&.match?(TAG_INITIAL_CHAR)
|
|
100
90
|
@current_pos += 1
|
|
101
91
|
|
|
102
|
-
# Remaining characters: letters or digits
|
|
103
92
|
@current_pos += 1 while current_char&.match?(TAG_NAME_CHAR)
|
|
104
93
|
|
|
105
|
-
# Optional :uid suffix (e.g., [quote:abc123])
|
|
106
94
|
if current_char == ":"
|
|
107
95
|
@current_pos += 1
|
|
108
96
|
@current_pos += 1 while current_char&.match?(UID_HEX_CHAR)
|
|
@@ -119,7 +107,6 @@ module Markbridge
|
|
|
119
107
|
attrs = {}
|
|
120
108
|
skip_whitespace
|
|
121
109
|
|
|
122
|
-
# First attribute might be option: [tag=value]
|
|
123
110
|
if current_char == "="
|
|
124
111
|
@current_pos += 1
|
|
125
112
|
skip_whitespace
|
|
@@ -129,17 +116,11 @@ module Markbridge
|
|
|
129
116
|
skip_whitespace
|
|
130
117
|
end
|
|
131
118
|
|
|
132
|
-
|
|
133
|
-
while (char = current_char) && char != "]"
|
|
134
|
-
name = scan_while(ATTR_NAME_CHAR)
|
|
135
|
-
break if name.nil?
|
|
136
|
-
|
|
119
|
+
while (name = scan_while(ATTR_NAME_CHAR))
|
|
137
120
|
skip_whitespace
|
|
138
|
-
break
|
|
121
|
+
break unless consume("=")
|
|
139
122
|
|
|
140
|
-
@current_pos += 1
|
|
141
123
|
skip_whitespace
|
|
142
|
-
|
|
143
124
|
value = scan_attribute_value
|
|
144
125
|
attrs[name.downcase.to_sym] = value if value
|
|
145
126
|
skip_whitespace
|
|
@@ -148,6 +129,13 @@ module Markbridge
|
|
|
148
129
|
attrs
|
|
149
130
|
end
|
|
150
131
|
|
|
132
|
+
def consume(char)
|
|
133
|
+
return false if current_char != char
|
|
134
|
+
|
|
135
|
+
@current_pos += 1
|
|
136
|
+
true
|
|
137
|
+
end
|
|
138
|
+
|
|
151
139
|
def scan_attribute_value
|
|
152
140
|
char = current_char
|
|
153
141
|
if char == '"' || char == "'"
|
|
@@ -171,22 +159,15 @@ module Markbridge
|
|
|
171
159
|
# Workaround: Use single quotes if you need double quotes in the value:
|
|
172
160
|
# [url='has "quotes" inside'] → option: "has \"quotes\" inside" ✓
|
|
173
161
|
#
|
|
174
|
-
# @return [String] the unescaped attribute value
|
|
162
|
+
# @return [String, nil] the unescaped attribute value, or nil if unterminated
|
|
175
163
|
def scan_quoted_string
|
|
176
164
|
quote_char = current_char
|
|
177
165
|
start = (@current_pos += 1) # skip opening quote
|
|
178
|
-
|
|
179
166
|
closing_index = @input.index(quote_char, start)
|
|
167
|
+
return nil unless closing_index
|
|
180
168
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
@current_pos = closing_index + 1 # position after closing quote
|
|
184
|
-
else
|
|
185
|
-
value = @input[start..] || ""
|
|
186
|
-
@current_pos = @length
|
|
187
|
-
end
|
|
188
|
-
|
|
189
|
-
value
|
|
169
|
+
@current_pos = closing_index + 1
|
|
170
|
+
@input[start...closing_index]
|
|
190
171
|
end
|
|
191
172
|
|
|
192
173
|
def scan_unquoted_value
|
|
@@ -195,18 +176,18 @@ module Markbridge
|
|
|
195
176
|
|
|
196
177
|
# Consumes characters matching +pattern+; returns substring or nil if empty
|
|
197
178
|
def scan_while(pattern)
|
|
198
|
-
|
|
199
|
-
while
|
|
200
|
-
|
|
201
|
-
end
|
|
202
|
-
|
|
203
|
-
return nil if @current_pos == start
|
|
204
|
-
@input[start...@current_pos]
|
|
179
|
+
stop_index = @current_pos
|
|
180
|
+
stop_index += 1 while stop_index < @length && @input[stop_index].match?(pattern)
|
|
181
|
+
consume_range(stop_index)
|
|
205
182
|
end
|
|
206
183
|
|
|
207
|
-
# Consumes characters until +pattern+ matches; returns substring or nil if empty
|
|
184
|
+
# Consumes characters until +pattern+ matches (or end of input); returns substring or nil if empty
|
|
208
185
|
def scan_until(pattern)
|
|
209
|
-
|
|
186
|
+
consume_range(@input.index(pattern, @current_pos) || @length)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Slice [@current_pos, stop_index), advance the cursor, or return nil for empty.
|
|
190
|
+
def consume_range(stop_index)
|
|
210
191
|
return nil if stop_index == @current_pos
|
|
211
192
|
|
|
212
193
|
value = @input[@current_pos...stop_index]
|
|
@@ -219,11 +200,16 @@ module Markbridge
|
|
|
219
200
|
end
|
|
220
201
|
|
|
221
202
|
def skip_whitespace
|
|
222
|
-
@current_pos += 1 while
|
|
203
|
+
@current_pos += 1 while @current_pos < @length &&
|
|
204
|
+
@input[@current_pos].match?(WHITESPACE_CHAR)
|
|
223
205
|
end
|
|
224
206
|
|
|
225
207
|
def end_of_input?
|
|
226
|
-
@current_pos
|
|
208
|
+
# All callers maintain @current_pos <= @length (scan_while
|
|
209
|
+
# bounds on @length; scan_until uses `index || @length`;
|
|
210
|
+
# consume is a no-op at EOF); `==` and `>=` are observably
|
|
211
|
+
# identical here.
|
|
212
|
+
@current_pos == @length
|
|
227
213
|
end
|
|
228
214
|
end
|
|
229
215
|
end
|