markbridge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/lib/markbridge/all.rb +9 -0
- data/lib/markbridge/ast/align.rb +24 -0
- data/lib/markbridge/ast/attachment.rb +42 -0
- data/lib/markbridge/ast/bold.rb +13 -0
- data/lib/markbridge/ast/code.rb +27 -0
- data/lib/markbridge/ast/color.rb +25 -0
- data/lib/markbridge/ast/document.rb +27 -0
- data/lib/markbridge/ast/element.rb +47 -0
- data/lib/markbridge/ast/email.rb +27 -0
- data/lib/markbridge/ast/event.rb +59 -0
- data/lib/markbridge/ast/heading.rb +23 -0
- data/lib/markbridge/ast/horizontal_rule.rb +12 -0
- data/lib/markbridge/ast/image.rb +35 -0
- data/lib/markbridge/ast/italic.rb +13 -0
- data/lib/markbridge/ast/line_break.rb +12 -0
- data/lib/markbridge/ast/list.rb +52 -0
- data/lib/markbridge/ast/list_item.rb +13 -0
- data/lib/markbridge/ast/markdown_text.rb +37 -0
- data/lib/markbridge/ast/mention.rb +29 -0
- data/lib/markbridge/ast/node.rb +19 -0
- data/lib/markbridge/ast/paragraph.rb +13 -0
- data/lib/markbridge/ast/poll.rb +74 -0
- data/lib/markbridge/ast/quote.rb +46 -0
- data/lib/markbridge/ast/size.rb +25 -0
- data/lib/markbridge/ast/spoiler.rb +27 -0
- data/lib/markbridge/ast/strikethrough.rb +13 -0
- data/lib/markbridge/ast/subscript.rb +13 -0
- data/lib/markbridge/ast/superscript.rb +13 -0
- data/lib/markbridge/ast/text.rb +38 -0
- data/lib/markbridge/ast/underline.rb +13 -0
- data/lib/markbridge/ast/upload.rb +74 -0
- data/lib/markbridge/ast/url.rb +27 -0
- data/lib/markbridge/ast.rb +42 -0
- data/lib/markbridge/configuration.rb +11 -0
- data/lib/markbridge/gem_loader.rb +23 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +37 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/strict.rb +12 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +121 -0
- data/lib/markbridge/parsers/bbcode/errors/max_depth_exceeded_error.rb +13 -0
- data/lib/markbridge/parsers/bbcode/handler_registry.rb +160 -0
- data/lib/markbridge/parsers/bbcode/handlers/align_handler.rb +26 -0
- data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +104 -0
- data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +44 -0
- data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +25 -0
- data/lib/markbridge/parsers/bbcode/handlers/color_handler.rb +31 -0
- data/lib/markbridge/parsers/bbcode/handlers/email_handler.rb +25 -0
- data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +51 -0
- data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +36 -0
- data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +26 -0
- data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +64 -0
- data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +48 -0
- data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +28 -0
- data/lib/markbridge/parsers/bbcode/handlers/simple_handler.rb +28 -0
- data/lib/markbridge/parsers/bbcode/handlers/size_handler.rb +31 -0
- data/lib/markbridge/parsers/bbcode/handlers/spoiler_handler.rb +28 -0
- data/lib/markbridge/parsers/bbcode/handlers/url_handler.rb +24 -0
- data/lib/markbridge/parsers/bbcode/parser.rb +123 -0
- data/lib/markbridge/parsers/bbcode/parser_state.rb +93 -0
- data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +126 -0
- data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +35 -0
- data/lib/markbridge/parsers/bbcode/raw_content_result.rb +25 -0
- data/lib/markbridge/parsers/bbcode/scanner.rb +231 -0
- data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +21 -0
- data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +23 -0
- data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +23 -0
- data/lib/markbridge/parsers/bbcode/tokens/token.rb +16 -0
- data/lib/markbridge/parsers/bbcode.rb +56 -0
- data/lib/markbridge/parsers/html/handler_registry.rb +87 -0
- data/lib/markbridge/parsers/html/handlers/base_handler.rb +27 -0
- data/lib/markbridge/parsers/html/handlers/image_handler.rb +40 -0
- data/lib/markbridge/parsers/html/handlers/list_handler.rb +29 -0
- data/lib/markbridge/parsers/html/handlers/list_item_handler.rb +26 -0
- data/lib/markbridge/parsers/html/handlers/paragraph_handler.rb +17 -0
- data/lib/markbridge/parsers/html/handlers/quote_handler.rb +28 -0
- data/lib/markbridge/parsers/html/handlers/raw_handler.rb +33 -0
- data/lib/markbridge/parsers/html/handlers/simple_handler.rb +26 -0
- data/lib/markbridge/parsers/html/handlers/url_handler.rb +27 -0
- data/lib/markbridge/parsers/html/parser.rb +113 -0
- data/lib/markbridge/parsers/html.rb +30 -0
- data/lib/markbridge/parsers/media_wiki/inline_parser.rb +332 -0
- data/lib/markbridge/parsers/media_wiki/parser.rb +279 -0
- data/lib/markbridge/parsers/media_wiki.rb +15 -0
- data/lib/markbridge/parsers/text_formatter/handler_registry.rb +130 -0
- data/lib/markbridge/parsers/text_formatter/handlers/attachment_handler.rb +33 -0
- data/lib/markbridge/parsers/text_formatter/handlers/attribute_handler.rb +40 -0
- data/lib/markbridge/parsers/text_formatter/handlers/base_handler.rb +45 -0
- data/lib/markbridge/parsers/text_formatter/handlers/code_handler.rb +28 -0
- data/lib/markbridge/parsers/text_formatter/handlers/email_handler.rb +27 -0
- data/lib/markbridge/parsers/text_formatter/handlers/image_handler.rb +32 -0
- data/lib/markbridge/parsers/text_formatter/handlers/list_handler.rb +31 -0
- data/lib/markbridge/parsers/text_formatter/handlers/quote_handler.rb +33 -0
- data/lib/markbridge/parsers/text_formatter/handlers/simple_handler.rb +37 -0
- data/lib/markbridge/parsers/text_formatter/handlers/url_handler.rb +29 -0
- data/lib/markbridge/parsers/text_formatter/parser.rb +132 -0
- data/lib/markbridge/parsers/text_formatter.rb +31 -0
- data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +199 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +57 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +73 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/mention.rb +57 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +90 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +123 -0
- data/lib/markbridge/processors/discourse_markdown/scanner.rb +199 -0
- data/lib/markbridge/processors/discourse_markdown.rb +16 -0
- data/lib/markbridge/processors.rb +8 -0
- data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +83 -0
- data/lib/markbridge/renderers/discourse/markdown_escaper.rb +468 -0
- data/lib/markbridge/renderers/discourse/render_context.rb +80 -0
- data/lib/markbridge/renderers/discourse/renderer.rb +63 -0
- data/lib/markbridge/renderers/discourse/rendering_interface.rb +86 -0
- data/lib/markbridge/renderers/discourse/tag.rb +29 -0
- data/lib/markbridge/renderers/discourse/tag_library.rb +67 -0
- data/lib/markbridge/renderers/discourse/tags/align_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +46 -0
- data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/code_tag.rb +54 -0
- data/lib/markbridge/renderers/discourse/tags/color_tag.rb +27 -0
- data/lib/markbridge/renderers/discourse/tags/email_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse/tags/event_tag.rb +49 -0
- data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +21 -0
- data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +16 -0
- data/lib/markbridge/renderers/discourse/tags/image_tag.rb +29 -0
- data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +16 -0
- data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +87 -0
- data/lib/markbridge/renderers/discourse/tags/list_tag.rb +39 -0
- data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +34 -0
- data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +21 -0
- data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +51 -0
- data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +32 -0
- data/lib/markbridge/renderers/discourse/tags/size_tag.rb +27 -0
- data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/subscript_tag.rb +19 -0
- data/lib/markbridge/renderers/discourse/tags/superscript_tag.rb +19 -0
- data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +19 -0
- data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +80 -0
- data/lib/markbridge/renderers/discourse/tags/url_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse.rb +50 -0
- data/lib/markbridge/version.rb +5 -0
- data/lib/markbridge.rb +201 -0
- metadata +186 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
module Handlers
|
|
7
|
+
# Simple formatting handlers that just push an element
|
|
8
|
+
class SimpleHandler < BaseHandler
|
|
9
|
+
def initialize(element_class, auto_closeable: false)
|
|
10
|
+
@element_class = element_class
|
|
11
|
+
@auto_closeable = auto_closeable
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def on_open(token:, context:, registry:, tokens: nil)
|
|
15
|
+
element = @element_class.new
|
|
16
|
+
context.push(element, token:)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def auto_closeable?
|
|
20
|
+
@auto_closeable
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
attr_reader :element_class
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for SIZE tags
|
|
8
|
+
# Supports:
|
|
9
|
+
# - [size=20]text[/size]
|
|
10
|
+
# - [size=large]text[/size]
|
|
11
|
+
class SizeHandler < BaseHandler
|
|
12
|
+
def initialize
|
|
13
|
+
@element_class = AST::Size
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def on_open(token:, context:, registry:, tokens: nil)
|
|
17
|
+
size = token.attrs[:size] || token.attrs[:option]
|
|
18
|
+
element = AST::Size.new(size:)
|
|
19
|
+
context.push(element, token:)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def auto_closeable?
|
|
23
|
+
true
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
attr_reader :element_class
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for SPOILER tags
|
|
8
|
+
# Supports:
|
|
9
|
+
# - [spoiler]text[/spoiler]
|
|
10
|
+
# - [spoiler=title]text[/spoiler]
|
|
11
|
+
# - [hide]text[/hide] (alias for spoiler)
|
|
12
|
+
class SpoilerHandler < BaseHandler
|
|
13
|
+
def initialize
|
|
14
|
+
@element_class = AST::Spoiler
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def on_open(token:, context:, registry:, tokens: nil)
|
|
18
|
+
title = token.attrs[:title] || token.attrs[:option]
|
|
19
|
+
element = AST::Spoiler.new(title:)
|
|
20
|
+
context.push(element, token:)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
attr_reader :element_class
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for URL tags
|
|
8
|
+
class UrlHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::Url
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def on_open(token:, context:, registry:, tokens: nil)
|
|
14
|
+
href = token.attrs[:href] || token.attrs[:url] || token.attrs[:option]
|
|
15
|
+
element = AST::Url.new(href:)
|
|
16
|
+
context.push(element, token:)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
attr_reader :element_class
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
# Parses BBCode into an AST using handlers from HandlerRegistry
|
|
7
|
+
class Parser
|
|
8
|
+
attr_reader :unknown_tags,
|
|
9
|
+
:auto_closed_tags_count,
|
|
10
|
+
:depth_exceeded_count,
|
|
11
|
+
:unclosed_raw_tags
|
|
12
|
+
|
|
13
|
+
# Create a new parser with optional custom handlers
|
|
14
|
+
# @param handlers [HandlerRegistry, nil] custom handler registry, defaults to HandlerRegistry.default
|
|
15
|
+
# @yield [HandlerRegistry] optional block to customize the default registry
|
|
16
|
+
# @example Using default handlers
|
|
17
|
+
# parser = Parser.new
|
|
18
|
+
# @example Using custom handlers
|
|
19
|
+
# parser = Parser.new(handlers: my_registry)
|
|
20
|
+
# @example Customizing default handlers
|
|
21
|
+
# parser = Parser.new do |registry|
|
|
22
|
+
# registry.register("quote", QuoteHandler.new)
|
|
23
|
+
# end
|
|
24
|
+
def initialize(handlers: nil, &block)
|
|
25
|
+
@handlers =
|
|
26
|
+
if block_given?
|
|
27
|
+
HandlerRegistry.build_from_default(&block)
|
|
28
|
+
else
|
|
29
|
+
handlers || HandlerRegistry.default
|
|
30
|
+
end
|
|
31
|
+
@unknown_tags = Hash.new(0)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Parse BBCode string into an AST
|
|
35
|
+
# @param input [String] BBCode source
|
|
36
|
+
# @return [AST::Document]
|
|
37
|
+
def parse(input)
|
|
38
|
+
@unknown_tags.clear
|
|
39
|
+
|
|
40
|
+
normalized = normalize_line_endings(input)
|
|
41
|
+
|
|
42
|
+
document = AST::Document.new
|
|
43
|
+
context = ParserState.new(document)
|
|
44
|
+
|
|
45
|
+
scanner = Scanner.new(normalized)
|
|
46
|
+
parse_tokens(scanner, context)
|
|
47
|
+
|
|
48
|
+
@auto_closed_tags_count = context.auto_closed_count
|
|
49
|
+
@depth_exceeded_count = context.depth_exceeded_count
|
|
50
|
+
@unclosed_raw_tags = context.unclosed_raw_tags
|
|
51
|
+
document
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
# Normalize line endings (CR, CRLF, and Unicode separators)
|
|
57
|
+
# @param input [String]
|
|
58
|
+
# @return [String]
|
|
59
|
+
def normalize_line_endings(input)
|
|
60
|
+
input.gsub(/\r\n?|[\u2028\u2029]+/, "\n")
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Parse tokens using scanner
|
|
64
|
+
# @param scanner [Scanner]
|
|
65
|
+
# @param context [ParserState]
|
|
66
|
+
def parse_tokens(scanner, context)
|
|
67
|
+
tokens = PeekableEnumerator.new(scanner)
|
|
68
|
+
|
|
69
|
+
while tokens.has_next?
|
|
70
|
+
token = tokens.next
|
|
71
|
+
case token
|
|
72
|
+
when TextToken
|
|
73
|
+
process_text(token, context)
|
|
74
|
+
when TagStartToken
|
|
75
|
+
process_tag_start(token, context, tokens)
|
|
76
|
+
when TagEndToken
|
|
77
|
+
process_tag_end(token, context, tokens)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Process text token
|
|
83
|
+
# @param token [TextToken]
|
|
84
|
+
# @param context [ParserState]
|
|
85
|
+
def process_text(token, context)
|
|
86
|
+
context.add_child(AST::Text.new(token.text))
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Process opening tag
|
|
90
|
+
# @param token [TagStartToken]
|
|
91
|
+
# @param context [ParserState]
|
|
92
|
+
# @param tokens [PeekableEnumerator]
|
|
93
|
+
def process_tag_start(token, context, tokens)
|
|
94
|
+
if (handler = @handlers[token.tag])
|
|
95
|
+
handler.on_open(token:, context:, registry: @handlers, tokens:)
|
|
96
|
+
else
|
|
97
|
+
handle_unknown_tag(token, context)
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Process closing tag
|
|
102
|
+
# @param token [TagEndToken]
|
|
103
|
+
# @param context [ParserState]
|
|
104
|
+
# @param tokens [PeekableEnumerator]
|
|
105
|
+
def process_tag_end(token, context, tokens)
|
|
106
|
+
if (handler = @handlers[token.tag])
|
|
107
|
+
handler.on_close(token:, context:, registry: @handlers, tokens:)
|
|
108
|
+
else
|
|
109
|
+
handle_unknown_tag(token, context)
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Handle unknown tag by tracking it and ignoring the wrapper
|
|
114
|
+
# while still processing its children
|
|
115
|
+
# @param token [Token]
|
|
116
|
+
# @param context [ParserState]
|
|
117
|
+
def handle_unknown_tag(token, context)
|
|
118
|
+
@unknown_tags[token.tag] += 1
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
# Manages parsing state
|
|
7
|
+
class ParserState
|
|
8
|
+
MAX_DEPTH = 100
|
|
9
|
+
|
|
10
|
+
attr_reader :current, :depth, :auto_closed_count, :depth_exceeded_count, :unclosed_raw_tags
|
|
11
|
+
|
|
12
|
+
def initialize(root)
|
|
13
|
+
@root = root
|
|
14
|
+
@current = root
|
|
15
|
+
@depth = 0
|
|
16
|
+
@node_stack = [root]
|
|
17
|
+
@auto_closed_count = 0
|
|
18
|
+
@depth_exceeded_count = 0
|
|
19
|
+
@unclosed_raw_tags = Hash.new(0)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Add element as child to current node and push the element onto the stack
|
|
23
|
+
# Uses graceful degradation: if max depth is exceeded and token is provided,
|
|
24
|
+
# treats the tag as text instead of raising. If no token is provided,
|
|
25
|
+
# raises MaxDepthExceededError (for backwards compatibility).
|
|
26
|
+
# @param element [AST::Element]
|
|
27
|
+
# @param token [Token, nil] the token that created this element (for graceful degradation)
|
|
28
|
+
# @return [Boolean] true if pushed successfully, false if depth exceeded
|
|
29
|
+
# @raise [MaxDepthExceededError] when pushing would exceed MAX_DEPTH and no token provided
|
|
30
|
+
def push(element, token: nil)
|
|
31
|
+
if @depth >= MAX_DEPTH
|
|
32
|
+
if token
|
|
33
|
+
# Graceful degradation: treat as text
|
|
34
|
+
@current << AST::Text.new(token.source)
|
|
35
|
+
@depth_exceeded_count += 1
|
|
36
|
+
return false
|
|
37
|
+
else
|
|
38
|
+
# Legacy behavior: raise error
|
|
39
|
+
raise MaxDepthExceededError, MAX_DEPTH
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
@current << element
|
|
44
|
+
@current = element
|
|
45
|
+
@node_stack << element
|
|
46
|
+
@depth += 1
|
|
47
|
+
true
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Pop current element and return to parent
|
|
51
|
+
# @return [AST::Element] the parent node
|
|
52
|
+
def pop
|
|
53
|
+
return @root if @node_stack.size <= 1
|
|
54
|
+
|
|
55
|
+
@node_stack.pop
|
|
56
|
+
@current = @node_stack.last
|
|
57
|
+
@depth -= 1
|
|
58
|
+
@current
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Add a child to current node without changing context
|
|
62
|
+
# @param node [AST::Node]
|
|
63
|
+
def add_child(node)
|
|
64
|
+
@current << node
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Increment the count of auto-closed tags after external reconciliation
|
|
68
|
+
# @param count [Integer]
|
|
69
|
+
def auto_close!(count = 1)
|
|
70
|
+
@auto_closed_count += count
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Mark a raw tag as unclosed (for tracking parsing issues)
|
|
74
|
+
# @param tag_name [String]
|
|
75
|
+
def mark_unclosed_raw!(tag_name)
|
|
76
|
+
@unclosed_raw_tags[tag_name] += 1
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Return elements from the current node downward
|
|
80
|
+
# @param limit [Integer, nil] number of elements to include from the top
|
|
81
|
+
# @return [Array<AST::Node>]
|
|
82
|
+
def elements_from_current(limit = nil)
|
|
83
|
+
return [] if @node_stack.empty?
|
|
84
|
+
|
|
85
|
+
limit = (@node_stack.size - 1) if limit.nil?
|
|
86
|
+
limit = [limit, @node_stack.size - 1].min
|
|
87
|
+
|
|
88
|
+
(0..limit).map { |offset| @node_stack[@node_stack.size - 1 - offset] }
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
# Wrapper around a scanner that allows peeking at upcoming tokens
|
|
7
|
+
# without consuming them.
|
|
8
|
+
#
|
|
9
|
+
# This class buffers tokens pulled from a scanner (which must implement
|
|
10
|
+
# `next_token`) so callers can:
|
|
11
|
+
# - inspect the next token with {#peek} without advancing the scanner
|
|
12
|
+
# - inspect several upcoming tokens with {#peek_ahead}
|
|
13
|
+
# - consume tokens with {#next}
|
|
14
|
+
#
|
|
15
|
+
# The enumerator is lazy: tokens are only requested from the scanner
|
|
16
|
+
# when needed. Once the underlying scanner returns `nil`, the enumerator
|
|
17
|
+
# is marked finished and further peeks return `nil` (for single peeks)
|
|
18
|
+
# or an empty array (for multi-peeks).
|
|
19
|
+
#
|
|
20
|
+
# @example Basic usage
|
|
21
|
+
# scanner = YourScanner.new("...") # responds to `next_token`
|
|
22
|
+
# enum = PeekableEnumerator.new(scanner)
|
|
23
|
+
# enum.peek # => next token (no consume)
|
|
24
|
+
# enum.peek_ahead(3) # => array of up to 3 upcoming tokens
|
|
25
|
+
# enum.next # => consumes and returns next token
|
|
26
|
+
#
|
|
27
|
+
# @see Markbridge::Parsers::BBCode::Scanner
|
|
28
|
+
class PeekableEnumerator
|
|
29
|
+
# Initialize a new PeekableEnumerator.
|
|
30
|
+
#
|
|
31
|
+
# @param scanner [Object] the scanner object that responds to `next_token`
|
|
32
|
+
def initialize(scanner)
|
|
33
|
+
@scanner = scanner
|
|
34
|
+
@peeked = []
|
|
35
|
+
@finished = false
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Consume and return the next token.
|
|
39
|
+
#
|
|
40
|
+
# If there are tokens in the internal buffer (from prior peeks) the
|
|
41
|
+
# buffered token is returned. Otherwise, the next token is requested
|
|
42
|
+
# from the underlying scanner via `next_token`.
|
|
43
|
+
#
|
|
44
|
+
# @return [Object, nil] next token or `nil` when exhausted
|
|
45
|
+
def next
|
|
46
|
+
return @peeked.shift if @peeked.any?
|
|
47
|
+
return nil if @finished
|
|
48
|
+
|
|
49
|
+
value = @scanner.next_token
|
|
50
|
+
@finished = true if value.nil?
|
|
51
|
+
value
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Return whether more tokens are available.
|
|
55
|
+
#
|
|
56
|
+
# This will attempt to fetch one token from the scanner if necessary
|
|
57
|
+
# to determine whether more tokens remain.
|
|
58
|
+
#
|
|
59
|
+
# @return [Boolean] `true` if at least one token is available
|
|
60
|
+
def has_next?
|
|
61
|
+
return true if @peeked.any?
|
|
62
|
+
return false if @finished
|
|
63
|
+
|
|
64
|
+
value = @scanner.next_token
|
|
65
|
+
if value.nil?
|
|
66
|
+
@finished = true
|
|
67
|
+
false
|
|
68
|
+
else
|
|
69
|
+
@peeked << value
|
|
70
|
+
true
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Peek at the next single token without consuming it.
|
|
75
|
+
#
|
|
76
|
+
# If the enumerator has been exhausted this returns `nil`.
|
|
77
|
+
#
|
|
78
|
+
# @return [Object, nil] the next token or `nil` when exhausted
|
|
79
|
+
def peek
|
|
80
|
+
return @peeked.first if @peeked.any?
|
|
81
|
+
return nil if @finished
|
|
82
|
+
|
|
83
|
+
ensure_peeked(1)
|
|
84
|
+
@peeked.first
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Peek ahead at up to `count` upcoming tokens without consuming them.
|
|
88
|
+
#
|
|
89
|
+
# The method will return an array with at most `count` elements.
|
|
90
|
+
# If fewer tokens remain, a shorter array is returned. When the
|
|
91
|
+
# enumerator is exhausted an empty array is returned.
|
|
92
|
+
#
|
|
93
|
+
# @param count [Integer] number of tokens to peek ahead (non\-negative)
|
|
94
|
+
# @return [Array<Object>] array of upcoming tokens (possibly empty)
|
|
95
|
+
def peek_ahead(count)
|
|
96
|
+
return [] if count <= 0
|
|
97
|
+
|
|
98
|
+
ensure_peeked(count)
|
|
99
|
+
@peeked.take(count)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
alias next_token next
|
|
103
|
+
|
|
104
|
+
private
|
|
105
|
+
|
|
106
|
+
# Ensure at least `count` items are present in the peek buffer.
|
|
107
|
+
#
|
|
108
|
+
# This will repeatedly call `next_token` on the scanner until the
|
|
109
|
+
# buffer contains `count` items or the scanner returns `nil`.
|
|
110
|
+
#
|
|
111
|
+
# @param count [Integer] desired buffer size
|
|
112
|
+
# @return [void]
|
|
113
|
+
def ensure_peeked(count)
|
|
114
|
+
while !@finished && @peeked.size < count
|
|
115
|
+
value = @scanner.next_token
|
|
116
|
+
if value.nil?
|
|
117
|
+
@finished = true
|
|
118
|
+
break
|
|
119
|
+
end
|
|
120
|
+
@peeked << value
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
# Strategy for collecting raw unparsed content between BBCode tags
|
|
7
|
+
class RawContentCollector
|
|
8
|
+
# Collect raw unparsed content between BBCode tags
|
|
9
|
+
# @param tag_name [String] the tag to match
|
|
10
|
+
# @param scanner [Scanner] the token source
|
|
11
|
+
# @return [RawContentResult] result with content and closed status
|
|
12
|
+
def collect(tag_name, scanner)
|
|
13
|
+
depth = 1
|
|
14
|
+
content = +""
|
|
15
|
+
closed = false
|
|
16
|
+
|
|
17
|
+
while (token = scanner.next_token)
|
|
18
|
+
if token.is_a?(TagStartToken) && token.tag == tag_name
|
|
19
|
+
depth += 1
|
|
20
|
+
elsif token.is_a?(TagEndToken) && token.tag == tag_name
|
|
21
|
+
if (depth -= 1) == 0
|
|
22
|
+
closed = true
|
|
23
|
+
break
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
content << token.source
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
RawContentResult.new(content:, closed:)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
# Result object for raw content collection
|
|
7
|
+
class RawContentResult
|
|
8
|
+
attr_reader :content, :closed
|
|
9
|
+
|
|
10
|
+
def initialize(content:, closed:)
|
|
11
|
+
@content = content
|
|
12
|
+
@closed = closed
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def closed?
|
|
16
|
+
@closed
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def unclosed?
|
|
20
|
+
!@closed
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|