markbridge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/lib/markbridge/all.rb +9 -0
- data/lib/markbridge/ast/align.rb +24 -0
- data/lib/markbridge/ast/attachment.rb +42 -0
- data/lib/markbridge/ast/bold.rb +13 -0
- data/lib/markbridge/ast/code.rb +27 -0
- data/lib/markbridge/ast/color.rb +25 -0
- data/lib/markbridge/ast/document.rb +27 -0
- data/lib/markbridge/ast/element.rb +47 -0
- data/lib/markbridge/ast/email.rb +27 -0
- data/lib/markbridge/ast/event.rb +59 -0
- data/lib/markbridge/ast/heading.rb +23 -0
- data/lib/markbridge/ast/horizontal_rule.rb +12 -0
- data/lib/markbridge/ast/image.rb +35 -0
- data/lib/markbridge/ast/italic.rb +13 -0
- data/lib/markbridge/ast/line_break.rb +12 -0
- data/lib/markbridge/ast/list.rb +52 -0
- data/lib/markbridge/ast/list_item.rb +13 -0
- data/lib/markbridge/ast/markdown_text.rb +37 -0
- data/lib/markbridge/ast/mention.rb +29 -0
- data/lib/markbridge/ast/node.rb +19 -0
- data/lib/markbridge/ast/paragraph.rb +13 -0
- data/lib/markbridge/ast/poll.rb +74 -0
- data/lib/markbridge/ast/quote.rb +46 -0
- data/lib/markbridge/ast/size.rb +25 -0
- data/lib/markbridge/ast/spoiler.rb +27 -0
- data/lib/markbridge/ast/strikethrough.rb +13 -0
- data/lib/markbridge/ast/subscript.rb +13 -0
- data/lib/markbridge/ast/superscript.rb +13 -0
- data/lib/markbridge/ast/text.rb +38 -0
- data/lib/markbridge/ast/underline.rb +13 -0
- data/lib/markbridge/ast/upload.rb +74 -0
- data/lib/markbridge/ast/url.rb +27 -0
- data/lib/markbridge/ast.rb +42 -0
- data/lib/markbridge/configuration.rb +11 -0
- data/lib/markbridge/gem_loader.rb +23 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +37 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/strict.rb +12 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +121 -0
- data/lib/markbridge/parsers/bbcode/errors/max_depth_exceeded_error.rb +13 -0
- data/lib/markbridge/parsers/bbcode/handler_registry.rb +160 -0
- data/lib/markbridge/parsers/bbcode/handlers/align_handler.rb +26 -0
- data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +104 -0
- data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +44 -0
- data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +25 -0
- data/lib/markbridge/parsers/bbcode/handlers/color_handler.rb +31 -0
- data/lib/markbridge/parsers/bbcode/handlers/email_handler.rb +25 -0
- data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +51 -0
- data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +36 -0
- data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +26 -0
- data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +64 -0
- data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +48 -0
- data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +28 -0
- data/lib/markbridge/parsers/bbcode/handlers/simple_handler.rb +28 -0
- data/lib/markbridge/parsers/bbcode/handlers/size_handler.rb +31 -0
- data/lib/markbridge/parsers/bbcode/handlers/spoiler_handler.rb +28 -0
- data/lib/markbridge/parsers/bbcode/handlers/url_handler.rb +24 -0
- data/lib/markbridge/parsers/bbcode/parser.rb +123 -0
- data/lib/markbridge/parsers/bbcode/parser_state.rb +93 -0
- data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +126 -0
- data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +35 -0
- data/lib/markbridge/parsers/bbcode/raw_content_result.rb +25 -0
- data/lib/markbridge/parsers/bbcode/scanner.rb +231 -0
- data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +21 -0
- data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +23 -0
- data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +23 -0
- data/lib/markbridge/parsers/bbcode/tokens/token.rb +16 -0
- data/lib/markbridge/parsers/bbcode.rb +56 -0
- data/lib/markbridge/parsers/html/handler_registry.rb +87 -0
- data/lib/markbridge/parsers/html/handlers/base_handler.rb +27 -0
- data/lib/markbridge/parsers/html/handlers/image_handler.rb +40 -0
- data/lib/markbridge/parsers/html/handlers/list_handler.rb +29 -0
- data/lib/markbridge/parsers/html/handlers/list_item_handler.rb +26 -0
- data/lib/markbridge/parsers/html/handlers/paragraph_handler.rb +17 -0
- data/lib/markbridge/parsers/html/handlers/quote_handler.rb +28 -0
- data/lib/markbridge/parsers/html/handlers/raw_handler.rb +33 -0
- data/lib/markbridge/parsers/html/handlers/simple_handler.rb +26 -0
- data/lib/markbridge/parsers/html/handlers/url_handler.rb +27 -0
- data/lib/markbridge/parsers/html/parser.rb +113 -0
- data/lib/markbridge/parsers/html.rb +30 -0
- data/lib/markbridge/parsers/media_wiki/inline_parser.rb +332 -0
- data/lib/markbridge/parsers/media_wiki/parser.rb +279 -0
- data/lib/markbridge/parsers/media_wiki.rb +15 -0
- data/lib/markbridge/parsers/text_formatter/handler_registry.rb +130 -0
- data/lib/markbridge/parsers/text_formatter/handlers/attachment_handler.rb +33 -0
- data/lib/markbridge/parsers/text_formatter/handlers/attribute_handler.rb +40 -0
- data/lib/markbridge/parsers/text_formatter/handlers/base_handler.rb +45 -0
- data/lib/markbridge/parsers/text_formatter/handlers/code_handler.rb +28 -0
- data/lib/markbridge/parsers/text_formatter/handlers/email_handler.rb +27 -0
- data/lib/markbridge/parsers/text_formatter/handlers/image_handler.rb +32 -0
- data/lib/markbridge/parsers/text_formatter/handlers/list_handler.rb +31 -0
- data/lib/markbridge/parsers/text_formatter/handlers/quote_handler.rb +33 -0
- data/lib/markbridge/parsers/text_formatter/handlers/simple_handler.rb +37 -0
- data/lib/markbridge/parsers/text_formatter/handlers/url_handler.rb +29 -0
- data/lib/markbridge/parsers/text_formatter/parser.rb +132 -0
- data/lib/markbridge/parsers/text_formatter.rb +31 -0
- data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +199 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +57 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +73 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/mention.rb +57 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +90 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +123 -0
- data/lib/markbridge/processors/discourse_markdown/scanner.rb +199 -0
- data/lib/markbridge/processors/discourse_markdown.rb +16 -0
- data/lib/markbridge/processors.rb +8 -0
- data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +83 -0
- data/lib/markbridge/renderers/discourse/markdown_escaper.rb +468 -0
- data/lib/markbridge/renderers/discourse/render_context.rb +80 -0
- data/lib/markbridge/renderers/discourse/renderer.rb +63 -0
- data/lib/markbridge/renderers/discourse/rendering_interface.rb +86 -0
- data/lib/markbridge/renderers/discourse/tag.rb +29 -0
- data/lib/markbridge/renderers/discourse/tag_library.rb +67 -0
- data/lib/markbridge/renderers/discourse/tags/align_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +46 -0
- data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/code_tag.rb +54 -0
- data/lib/markbridge/renderers/discourse/tags/color_tag.rb +27 -0
- data/lib/markbridge/renderers/discourse/tags/email_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse/tags/event_tag.rb +49 -0
- data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +21 -0
- data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +16 -0
- data/lib/markbridge/renderers/discourse/tags/image_tag.rb +29 -0
- data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +16 -0
- data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +87 -0
- data/lib/markbridge/renderers/discourse/tags/list_tag.rb +39 -0
- data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +34 -0
- data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +21 -0
- data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +51 -0
- data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +32 -0
- data/lib/markbridge/renderers/discourse/tags/size_tag.rb +27 -0
- data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/subscript_tag.rb +19 -0
- data/lib/markbridge/renderers/discourse/tags/superscript_tag.rb +19 -0
- data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +19 -0
- data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +80 -0
- data/lib/markbridge/renderers/discourse/tags/url_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse.rb +50 -0
- data/lib/markbridge/version.rb +5 -0
- data/lib/markbridge.rb +201 -0
- metadata +186 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for <a> tags
|
|
8
|
+
class UrlHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::Url
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
href = element["href"]
|
|
15
|
+
ast_element = AST::Url.new(href:)
|
|
16
|
+
parent << ast_element
|
|
17
|
+
|
|
18
|
+
# Return element to signal: process children into this element (link text)
|
|
19
|
+
ast_element
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
attr_reader :element_class
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
# Parses HTML into an AST using Nokogiri
|
|
7
|
+
class Parser
|
|
8
|
+
attr_reader :unknown_tags
|
|
9
|
+
|
|
10
|
+
# Create a new parser with optional custom handlers
|
|
11
|
+
# @param handlers [HandlerRegistry, nil] custom handler registry, defaults to HandlerRegistry.default
|
|
12
|
+
# @yield [HandlerRegistry] optional block to customize the default registry
|
|
13
|
+
def initialize(handlers: nil, &block)
|
|
14
|
+
@handlers =
|
|
15
|
+
if block_given?
|
|
16
|
+
HandlerRegistry.build_from_default(&block)
|
|
17
|
+
else
|
|
18
|
+
handlers || HandlerRegistry.default
|
|
19
|
+
end
|
|
20
|
+
@unknown_tags = Hash.new(0)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Parse HTML string into an AST
|
|
24
|
+
# @param input [String] HTML source
|
|
25
|
+
# @return [AST::Document]
|
|
26
|
+
def parse(input)
|
|
27
|
+
@unknown_tags.clear
|
|
28
|
+
|
|
29
|
+
# Parse HTML with Nokogiri
|
|
30
|
+
doc = Nokogiri::HTML5.fragment(input)
|
|
31
|
+
|
|
32
|
+
# Create root AST document
|
|
33
|
+
document = AST::Document.new
|
|
34
|
+
|
|
35
|
+
# Process all nodes
|
|
36
|
+
doc.children.each { |node| process_node(node, document) }
|
|
37
|
+
|
|
38
|
+
document
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Process child nodes of an element (used by handlers)
|
|
42
|
+
# @param node [Nokogiri::XML::Element]
|
|
43
|
+
# @param parent [AST::Element]
|
|
44
|
+
def process_children(node, parent)
|
|
45
|
+
node.children.each { |child| process_node(child, parent) }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
# Process a Nokogiri node and add it to the parent AST node
|
|
51
|
+
# @param node [Nokogiri::XML::Node]
|
|
52
|
+
# @param parent [AST::Element]
|
|
53
|
+
def process_node(node, parent)
|
|
54
|
+
case node
|
|
55
|
+
when Nokogiri::XML::Text
|
|
56
|
+
process_text_node(node, parent)
|
|
57
|
+
when Nokogiri::XML::Element
|
|
58
|
+
process_element_node(node, parent)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Process a text node
|
|
63
|
+
# @param node [Nokogiri::XML::Text]
|
|
64
|
+
# @param parent [AST::Element]
|
|
65
|
+
def process_text_node(node, parent)
|
|
66
|
+
text = node.text
|
|
67
|
+
parent << AST::Text.new(text) unless text.empty?
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Process an element node
|
|
71
|
+
# @param node [Nokogiri::XML::Element]
|
|
72
|
+
# @param parent [AST::Element]
|
|
73
|
+
def process_element_node(node, parent)
|
|
74
|
+
tag_name = node.name.downcase
|
|
75
|
+
handler = @handlers[tag_name]
|
|
76
|
+
|
|
77
|
+
if handler
|
|
78
|
+
# Handler returns element if children should be processed, nil otherwise
|
|
79
|
+
ast_element =
|
|
80
|
+
if handler.respond_to?(:process)
|
|
81
|
+
handler.process(element: node, parent:)
|
|
82
|
+
else
|
|
83
|
+
handler.call(element: node, parent:)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Automatically process children if handler returned element
|
|
87
|
+
process_children(node, ast_element) if ast_element
|
|
88
|
+
else
|
|
89
|
+
handle_unknown_tag(node, parent)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Handle unknown tag by tracking it and ignoring the wrapper
|
|
94
|
+
# while still processing its children
|
|
95
|
+
# @param node [Nokogiri::XML::Element]
|
|
96
|
+
# @param parent [AST::Element]
|
|
97
|
+
def handle_unknown_tag(node, parent)
|
|
98
|
+
@unknown_tags[node.name.downcase] += 1
|
|
99
|
+
process_children(node, parent)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Check if an element is a void element (self-closing)
|
|
103
|
+
# @param tag_name [String]
|
|
104
|
+
# @return [Boolean]
|
|
105
|
+
def void_element?(tag_name)
|
|
106
|
+
%w[area base br col embed hr img input link meta param source track wbr].include?(
|
|
107
|
+
tag_name.downcase,
|
|
108
|
+
)
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Dependencies
|
|
4
|
+
require_relative "../gem_loader"
|
|
5
|
+
Markbridge::GemLoader.require_gem(:nokogiri, feature: "HTML parsing")
|
|
6
|
+
|
|
7
|
+
# AST Nodes
|
|
8
|
+
require_relative "../ast"
|
|
9
|
+
|
|
10
|
+
# Handlers
|
|
11
|
+
require_relative "html/handlers/base_handler"
|
|
12
|
+
require_relative "html/handlers/simple_handler"
|
|
13
|
+
require_relative "html/handlers/raw_handler"
|
|
14
|
+
require_relative "html/handlers/url_handler"
|
|
15
|
+
require_relative "html/handlers/image_handler"
|
|
16
|
+
require_relative "html/handlers/list_handler"
|
|
17
|
+
require_relative "html/handlers/list_item_handler"
|
|
18
|
+
require_relative "html/handlers/quote_handler"
|
|
19
|
+
require_relative "html/handlers/paragraph_handler"
|
|
20
|
+
|
|
21
|
+
# Parser components
|
|
22
|
+
require_relative "html/handler_registry"
|
|
23
|
+
require_relative "html/parser"
|
|
24
|
+
|
|
25
|
+
module Markbridge
|
|
26
|
+
module Parsers
|
|
27
|
+
module HTML
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module MediaWiki
|
|
6
|
+
# Parses inline MediaWiki markup within a line of text.
|
|
7
|
+
# Handles bold ('''), italic (''), links ([[...]]), external links ([...]),
|
|
8
|
+
# and HTML inline tags (<code>, <nowiki>, <s>, <del>, <u>, <ins>, <sup>, <sub>, <br>).
|
|
9
|
+
class InlineParser
|
|
10
|
+
# Parse inline markup and append resulting AST nodes to the parent element.
|
|
11
|
+
#
|
|
12
|
+
# @param text [String] the text to parse for inline markup
|
|
13
|
+
# @param parent [AST::Element] the element to append children to
|
|
14
|
+
def parse(text, parent:)
|
|
15
|
+
@input = text
|
|
16
|
+
@pos = 0
|
|
17
|
+
@length = text.length
|
|
18
|
+
@parent = parent
|
|
19
|
+
@text_buffer = +""
|
|
20
|
+
|
|
21
|
+
while @pos < @length
|
|
22
|
+
if @input[@pos] == "'" && @pos + 1 < @length && @input[@pos + 1] == "'"
|
|
23
|
+
parse_bold_italic
|
|
24
|
+
elsif @input[@pos] == "[" && @pos + 1 < @length && @input[@pos + 1] == "["
|
|
25
|
+
flush_text
|
|
26
|
+
parse_internal_link
|
|
27
|
+
elsif @input[@pos] == "[" && !(@pos + 1 < @length && @input[@pos + 1] == "[")
|
|
28
|
+
flush_text
|
|
29
|
+
parse_external_link
|
|
30
|
+
elsif @input[@pos] == "<"
|
|
31
|
+
flush_text
|
|
32
|
+
parse_html_tag
|
|
33
|
+
else
|
|
34
|
+
@text_buffer << @input[@pos]
|
|
35
|
+
@pos += 1
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
flush_text
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
# Count consecutive apostrophes and dispatch to bold/italic parsing.
|
|
45
|
+
def parse_bold_italic
|
|
46
|
+
start = @pos
|
|
47
|
+
count = 0
|
|
48
|
+
count += 1 while @pos + count < @length && @input[@pos + count] == "'"
|
|
49
|
+
count = 5 if count > 5
|
|
50
|
+
|
|
51
|
+
if count >= 5
|
|
52
|
+
flush_text
|
|
53
|
+
@pos += 5
|
|
54
|
+
parse_bold_italic_combo(start)
|
|
55
|
+
elsif count >= 3
|
|
56
|
+
flush_text
|
|
57
|
+
@pos += 3
|
|
58
|
+
parse_bold_content(start)
|
|
59
|
+
elsif count >= 2
|
|
60
|
+
flush_text
|
|
61
|
+
@pos += 2
|
|
62
|
+
parse_italic_content(start)
|
|
63
|
+
else
|
|
64
|
+
@text_buffer << @input[@pos]
|
|
65
|
+
@pos += 1
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Parse '''''bold italic''''' content.
|
|
70
|
+
def parse_bold_italic_combo(start)
|
|
71
|
+
bold = AST::Bold.new
|
|
72
|
+
italic = AST::Italic.new
|
|
73
|
+
content = collect_until_apostrophes(5)
|
|
74
|
+
|
|
75
|
+
if content
|
|
76
|
+
inner_parser = InlineParser.new
|
|
77
|
+
inner_parser.parse(content, parent: italic)
|
|
78
|
+
bold << italic
|
|
79
|
+
@parent << bold
|
|
80
|
+
else
|
|
81
|
+
# No closing found - treat as literal text
|
|
82
|
+
@text_buffer << "'''''"
|
|
83
|
+
@pos = start + 5
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Parse '''bold''' content.
|
|
88
|
+
def parse_bold_content(start)
|
|
89
|
+
bold = AST::Bold.new
|
|
90
|
+
content = collect_until_apostrophes(3)
|
|
91
|
+
|
|
92
|
+
if content
|
|
93
|
+
inner_parser = InlineParser.new
|
|
94
|
+
inner_parser.parse(content, parent: bold)
|
|
95
|
+
@parent << bold
|
|
96
|
+
else
|
|
97
|
+
@text_buffer << "'''"
|
|
98
|
+
@pos = start + 3
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Parse ''italic'' content.
|
|
103
|
+
def parse_italic_content(start)
|
|
104
|
+
italic = AST::Italic.new
|
|
105
|
+
content = collect_until_apostrophes(2)
|
|
106
|
+
|
|
107
|
+
if content
|
|
108
|
+
inner_parser = InlineParser.new
|
|
109
|
+
inner_parser.parse(content, parent: italic)
|
|
110
|
+
@parent << italic
|
|
111
|
+
else
|
|
112
|
+
@text_buffer << "''"
|
|
113
|
+
@pos = start + 2
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Collect text until we find n consecutive apostrophes.
|
|
118
|
+
# Returns the collected content string or nil if not found.
|
|
119
|
+
#
|
|
120
|
+
# @param count [Integer] number of consecutive apostrophes to match
|
|
121
|
+
# @return [String, nil]
|
|
122
|
+
def collect_until_apostrophes(count)
|
|
123
|
+
start = @pos
|
|
124
|
+
while @pos < @length
|
|
125
|
+
if @input[@pos] == "'" && consecutive_apostrophes_at(@pos) >= count
|
|
126
|
+
content = @input[start...@pos]
|
|
127
|
+
@pos += count
|
|
128
|
+
return content
|
|
129
|
+
end
|
|
130
|
+
@pos += 1
|
|
131
|
+
end
|
|
132
|
+
nil
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Count consecutive apostrophes starting at position.
|
|
136
|
+
#
|
|
137
|
+
# @param pos [Integer]
|
|
138
|
+
# @return [Integer]
|
|
139
|
+
def consecutive_apostrophes_at(pos)
|
|
140
|
+
count = 0
|
|
141
|
+
count += 1 while pos + count < @length && @input[pos + count] == "'"
|
|
142
|
+
count
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Parse [[internal link]] or [[target|display text]].
|
|
146
|
+
def parse_internal_link
|
|
147
|
+
@pos += 2 # skip [[
|
|
148
|
+
start = @pos
|
|
149
|
+
|
|
150
|
+
# Find closing ]]
|
|
151
|
+
close_pos = @input.index("]]", @pos)
|
|
152
|
+
unless close_pos
|
|
153
|
+
@text_buffer << "[["
|
|
154
|
+
return
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
content = @input[start...close_pos]
|
|
158
|
+
@pos = close_pos + 2
|
|
159
|
+
|
|
160
|
+
target, display = content.split("|", 2)
|
|
161
|
+
target = target.strip
|
|
162
|
+
display = (display || target).strip
|
|
163
|
+
|
|
164
|
+
url = AST::Url.new(href: target)
|
|
165
|
+
url << AST::Text.new(display)
|
|
166
|
+
@parent << url
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Parse [url display text] external link.
|
|
170
|
+
def parse_external_link
|
|
171
|
+
@pos += 1 # skip [
|
|
172
|
+
start = @pos
|
|
173
|
+
|
|
174
|
+
# Find closing ]
|
|
175
|
+
close_pos = @input.index("]", @pos)
|
|
176
|
+
unless close_pos
|
|
177
|
+
@text_buffer << "["
|
|
178
|
+
return
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
content = @input[start...close_pos]
|
|
182
|
+
@pos = close_pos + 1
|
|
183
|
+
|
|
184
|
+
# Split on first space: URL followed by optional display text
|
|
185
|
+
parts = content.split(" ", 2)
|
|
186
|
+
href = parts[0]
|
|
187
|
+
display = parts[1] || href
|
|
188
|
+
|
|
189
|
+
url = AST::Url.new(href:)
|
|
190
|
+
url << AST::Text.new(display)
|
|
191
|
+
@parent << url
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Parse an HTML tag (<code>, <nowiki>, <pre>, <br>, <s>, <del>, <u>, <ins>, <sup>, <sub>).
|
|
195
|
+
def parse_html_tag
|
|
196
|
+
tag_match = @input[@pos..].match(%r{\A<(/?)([a-z]+)(?: [^>]*)?\s*(/?)>}i)
|
|
197
|
+
unless tag_match
|
|
198
|
+
@text_buffer << "<"
|
|
199
|
+
@pos += 1
|
|
200
|
+
return
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
closing = !tag_match[1].empty?
|
|
204
|
+
tag_name = tag_match[2].downcase
|
|
205
|
+
self_closing = !tag_match[3].empty?
|
|
206
|
+
full_match = tag_match[0]
|
|
207
|
+
|
|
208
|
+
case tag_name
|
|
209
|
+
when "nowiki"
|
|
210
|
+
handle_nowiki_tag(closing, full_match)
|
|
211
|
+
when "code"
|
|
212
|
+
handle_paired_raw_tag(tag_name, closing, full_match, AST::Code)
|
|
213
|
+
when "pre"
|
|
214
|
+
handle_paired_raw_tag(tag_name, closing, full_match, AST::Code)
|
|
215
|
+
when "br"
|
|
216
|
+
@pos += full_match.length
|
|
217
|
+
@parent << AST::LineBreak.new
|
|
218
|
+
when "s", "del"
|
|
219
|
+
handle_paired_tag(tag_name, closing, self_closing, full_match, AST::Strikethrough)
|
|
220
|
+
when "u", "ins"
|
|
221
|
+
handle_paired_tag(tag_name, closing, self_closing, full_match, AST::Underline)
|
|
222
|
+
when "sup"
|
|
223
|
+
handle_paired_tag(tag_name, closing, self_closing, full_match, AST::Superscript)
|
|
224
|
+
when "sub"
|
|
225
|
+
handle_paired_tag(tag_name, closing, self_closing, full_match, AST::Subscript)
|
|
226
|
+
else
|
|
227
|
+
# Unknown HTML tag - treat as text
|
|
228
|
+
@text_buffer << full_match
|
|
229
|
+
@pos += full_match.length
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Handle <nowiki>...</nowiki> - preserves content as literal text.
|
|
234
|
+
def handle_nowiki_tag(closing, full_match)
|
|
235
|
+
if closing
|
|
236
|
+
@text_buffer << full_match
|
|
237
|
+
@pos += full_match.length
|
|
238
|
+
return
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
@pos += full_match.length
|
|
242
|
+
close_tag = "</nowiki>"
|
|
243
|
+
close_pos = @input.index(close_tag, @pos)
|
|
244
|
+
|
|
245
|
+
if close_pos
|
|
246
|
+
raw_content = @input[@pos...close_pos]
|
|
247
|
+
@text_buffer << raw_content
|
|
248
|
+
@pos = close_pos + close_tag.length
|
|
249
|
+
else
|
|
250
|
+
# No closing tag found - treat opening tag as text
|
|
251
|
+
@text_buffer << full_match
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# Handle paired raw tags like <code>...</code> and <pre>...</pre>.
|
|
256
|
+
# Content inside is not parsed for wiki markup.
|
|
257
|
+
def handle_paired_raw_tag(tag_name, closing, full_match, element_class)
|
|
258
|
+
if closing
|
|
259
|
+
@text_buffer << full_match
|
|
260
|
+
@pos += full_match.length
|
|
261
|
+
return
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
@pos += full_match.length
|
|
265
|
+
close_tag = "</#{tag_name}>"
|
|
266
|
+
close_pos = @input.index(close_tag, @pos)
|
|
267
|
+
|
|
268
|
+
if close_pos
|
|
269
|
+
raw_content = @input[@pos...close_pos]
|
|
270
|
+
element = element_class.new
|
|
271
|
+
element << AST::Text.new(raw_content)
|
|
272
|
+
@parent << element
|
|
273
|
+
@pos = close_pos + close_tag.length
|
|
274
|
+
else
|
|
275
|
+
@text_buffer << full_match
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
# Handle paired formatting tags like <s>, <u>, <sup>, <sub>.
|
|
280
|
+
# Content inside IS parsed for wiki markup.
|
|
281
|
+
def handle_paired_tag(tag_name, closing, self_closing, full_match, element_class)
|
|
282
|
+
if closing || self_closing
|
|
283
|
+
@text_buffer << full_match
|
|
284
|
+
@pos += full_match.length
|
|
285
|
+
return
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
@pos += full_match.length
|
|
289
|
+
# Find matching close tag, accounting for the alias tags
|
|
290
|
+
close_tags = close_tags_for(tag_name)
|
|
291
|
+
close_pos = nil
|
|
292
|
+
close_tag_length = 0
|
|
293
|
+
|
|
294
|
+
close_tags.each do |ct|
|
|
295
|
+
pos = @input.index(ct, @pos)
|
|
296
|
+
if pos && (close_pos.nil? || pos < close_pos)
|
|
297
|
+
close_pos = pos
|
|
298
|
+
close_tag_length = ct.length
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
if close_pos
|
|
303
|
+
inner_content = @input[@pos...close_pos]
|
|
304
|
+
element = element_class.new
|
|
305
|
+
inner_parser = InlineParser.new
|
|
306
|
+
inner_parser.parse(inner_content, parent: element)
|
|
307
|
+
@parent << element
|
|
308
|
+
@pos = close_pos + close_tag_length
|
|
309
|
+
else
|
|
310
|
+
@text_buffer << full_match
|
|
311
|
+
end
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
# Return the possible closing tags for a given tag name.
|
|
315
|
+
#
|
|
316
|
+
# @param tag_name [String]
|
|
317
|
+
# @return [Array<String>]
|
|
318
|
+
def close_tags_for(tag_name)
|
|
319
|
+
["</#{tag_name}>"]
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
# Flush accumulated text buffer to the parent as a Text node.
|
|
323
|
+
def flush_text
|
|
324
|
+
return if @text_buffer.empty?
|
|
325
|
+
|
|
326
|
+
@parent << AST::Text.new(@text_buffer)
|
|
327
|
+
@text_buffer = +""
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
end
|