markbridge 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/markbridge/all.rb +4 -7
- data/lib/markbridge/ast/document.rb +1 -1
- data/lib/markbridge/ast/element.rb +2 -2
- data/lib/markbridge/ast/list.rb +2 -2
- data/lib/markbridge/ast/table.rb +61 -0
- data/lib/markbridge/ast/text.rb +5 -1
- data/lib/markbridge/ast.rb +1 -0
- data/lib/markbridge/bbcode.rb +4 -0
- data/lib/markbridge/gem_loader.rb +2 -3
- data/lib/markbridge/html.rb +4 -0
- data/lib/markbridge/mediawiki.rb +4 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +0 -10
- data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -4
- data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +64 -44
- data/lib/markbridge/parsers/bbcode/handler_registry.rb +26 -11
- data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +17 -12
- data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +0 -10
- data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +6 -10
- data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +13 -19
- data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +1 -5
- data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +1 -2
- data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +30 -35
- data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +2 -6
- data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +4 -4
- data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +26 -0
- data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +32 -0
- data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +35 -0
- data/lib/markbridge/parsers/bbcode/parser.rb +5 -8
- data/lib/markbridge/parsers/bbcode/parser_state.rb +12 -18
- data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +9 -59
- data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +2 -2
- data/lib/markbridge/parsers/bbcode/scanner.rb +49 -63
- data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +1 -5
- data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +1 -6
- data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +1 -7
- data/lib/markbridge/parsers/bbcode/tokens/token.rb +1 -1
- data/lib/markbridge/parsers/bbcode.rb +4 -0
- data/lib/markbridge/parsers/html/handler_registry.rb +32 -44
- data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -3
- data/lib/markbridge/parsers/html/handlers/image_handler.rb +1 -4
- data/lib/markbridge/parsers/html/handlers/table_cell_handler.rb +24 -0
- data/lib/markbridge/parsers/html/handlers/table_handler.rb +24 -0
- data/lib/markbridge/parsers/html/handlers/table_row_handler.rb +24 -0
- data/lib/markbridge/parsers/html/parser.rb +16 -15
- data/lib/markbridge/parsers/html.rb +3 -0
- data/lib/markbridge/parsers/media_wiki/inline_parser.rb +115 -151
- data/lib/markbridge/parsers/media_wiki/inline_tag_registry.rb +103 -0
- data/lib/markbridge/parsers/media_wiki/parser.rb +174 -71
- data/lib/markbridge/parsers/media_wiki.rb +1 -0
- data/lib/markbridge/parsers/text_formatter/handler_registry.rb +10 -36
- data/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb +26 -0
- data/lib/markbridge/parsers/text_formatter/parser.rb +3 -8
- data/lib/markbridge/parsers/text_formatter.rb +1 -0
- data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +111 -92
- data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +13 -7
- data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +11 -20
- data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +10 -48
- data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +38 -63
- data/lib/markbridge/processors/discourse_markdown/scanner.rb +36 -41
- data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +6 -6
- data/lib/markbridge/renderers/discourse/html_escaper.rb +20 -0
- data/lib/markbridge/renderers/discourse/markdown_escaper.rb +262 -205
- data/lib/markbridge/renderers/discourse/render_context.rb +23 -11
- data/lib/markbridge/renderers/discourse/renderer.rb +54 -11
- data/lib/markbridge/renderers/discourse/rendering_interface.rb +12 -4
- data/lib/markbridge/renderers/discourse/tag.rb +14 -1
- data/lib/markbridge/renderers/discourse/tag_library.rb +30 -25
- data/lib/markbridge/renderers/discourse/tags/align_tag.rb +15 -7
- data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +1 -1
- data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/code_tag.rb +14 -8
- data/lib/markbridge/renderers/discourse/tags/email_tag.rb +5 -3
- data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -3
- data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +6 -2
- data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +2 -2
- data/lib/markbridge/renderers/discourse/tags/image_tag.rb +12 -1
- data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +2 -2
- data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +24 -47
- data/lib/markbridge/renderers/discourse/tags/list_tag.rb +10 -15
- data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +6 -2
- data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +10 -0
- data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +9 -4
- data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +17 -11
- data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +9 -0
- data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +2 -0
- data/lib/markbridge/renderers/discourse/tags/table_cell_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/table_row_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/table_tag.rb +128 -0
- data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +10 -3
- data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +28 -1
- data/lib/markbridge/renderers/discourse/tags/url_tag.rb +5 -3
- data/lib/markbridge/renderers/discourse.rb +4 -0
- data/lib/markbridge/textformatter.rb +4 -0
- data/lib/markbridge/version.rb +1 -1
- data/lib/markbridge.rb +27 -62
- metadata +19 -2
|
@@ -7,14 +7,10 @@ module Markbridge
|
|
|
7
7
|
class TagEndToken < Token
|
|
8
8
|
attr_reader :tag
|
|
9
9
|
|
|
10
|
-
def initialize(tag:, pos
|
|
10
|
+
def initialize(tag:, pos:, source:)
|
|
11
11
|
super(pos:, source:)
|
|
12
12
|
@tag = tag.freeze
|
|
13
13
|
end
|
|
14
|
-
|
|
15
|
-
def inspect
|
|
16
|
-
"#<TagEndToken [/#{tag}]>"
|
|
17
|
-
end
|
|
18
14
|
end
|
|
19
15
|
end
|
|
20
16
|
end
|
|
@@ -7,16 +7,11 @@ module Markbridge
|
|
|
7
7
|
class TagStartToken < Token
|
|
8
8
|
attr_reader :tag, :attrs
|
|
9
9
|
|
|
10
|
-
def initialize(tag:, attrs
|
|
10
|
+
def initialize(tag:, attrs:, pos:, source:)
|
|
11
11
|
super(pos:, source:)
|
|
12
12
|
@tag = tag.freeze
|
|
13
13
|
@attrs = attrs.freeze
|
|
14
14
|
end
|
|
15
|
-
|
|
16
|
-
def inspect
|
|
17
|
-
attrs_str = attrs.empty? ? "" : " #{attrs.inspect}"
|
|
18
|
-
"#<TagStartToken [#{tag}]#{attrs_str}>"
|
|
19
|
-
end
|
|
20
15
|
end
|
|
21
16
|
end
|
|
22
17
|
end
|
|
@@ -7,16 +7,10 @@ module Markbridge
|
|
|
7
7
|
class TextToken < Token
|
|
8
8
|
attr_reader :text
|
|
9
9
|
|
|
10
|
-
def initialize(text:, pos:
|
|
10
|
+
def initialize(text:, pos:)
|
|
11
11
|
super(pos:, source: text)
|
|
12
12
|
@text = text.freeze
|
|
13
13
|
end
|
|
14
|
-
|
|
15
|
-
alias source text
|
|
16
|
-
|
|
17
|
-
def inspect
|
|
18
|
-
"#<TextToken #{text.inspect}>"
|
|
19
|
-
end
|
|
20
14
|
end
|
|
21
15
|
end
|
|
22
16
|
end
|
|
@@ -25,6 +25,7 @@ require_relative "bbcode/handlers/raw_handler"
|
|
|
25
25
|
# Handlers
|
|
26
26
|
require_relative "bbcode/handlers/align_handler"
|
|
27
27
|
require_relative "bbcode/handlers/attachment_handler"
|
|
28
|
+
require_relative "bbcode/handlers/code_handler"
|
|
28
29
|
require_relative "bbcode/handlers/color_handler"
|
|
29
30
|
require_relative "bbcode/handlers/email_handler"
|
|
30
31
|
require_relative "bbcode/handlers/image_handler"
|
|
@@ -35,6 +36,9 @@ require_relative "bbcode/handlers/self_closing_handler"
|
|
|
35
36
|
require_relative "bbcode/handlers/simple_handler"
|
|
36
37
|
require_relative "bbcode/handlers/size_handler"
|
|
37
38
|
require_relative "bbcode/handlers/spoiler_handler"
|
|
39
|
+
require_relative "bbcode/handlers/table_handler"
|
|
40
|
+
require_relative "bbcode/handlers/table_row_handler"
|
|
41
|
+
require_relative "bbcode/handlers/table_cell_handler"
|
|
38
42
|
require_relative "bbcode/handlers/url_handler"
|
|
39
43
|
|
|
40
44
|
# Parser components
|
|
@@ -27,50 +27,38 @@ module Markbridge
|
|
|
27
27
|
# Create the default handler registry with common HTML tags
|
|
28
28
|
# @return [HandlerRegistry]
|
|
29
29
|
def self.default
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
nil # Return nil - void element, no children
|
|
63
|
-
end,
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
# List handlers
|
|
67
|
-
registry.register(%w[ul ol], Handlers::ListHandler.new)
|
|
68
|
-
registry.register("li", Handlers::ListItemHandler.new)
|
|
69
|
-
|
|
70
|
-
# Paragraph handler (transparent - doesn't create AST node)
|
|
71
|
-
registry.register("p", Handlers::ParagraphHandler.new)
|
|
72
|
-
|
|
73
|
-
registry
|
|
30
|
+
new.tap do |registry|
|
|
31
|
+
registry.register(%w[b strong], Handlers::SimpleHandler.new(AST::Bold))
|
|
32
|
+
registry.register(%w[i em], Handlers::SimpleHandler.new(AST::Italic))
|
|
33
|
+
registry.register(%w[s strike del], Handlers::SimpleHandler.new(AST::Strikethrough))
|
|
34
|
+
registry.register("u", Handlers::SimpleHandler.new(AST::Underline))
|
|
35
|
+
registry.register("sup", Handlers::SimpleHandler.new(AST::Superscript))
|
|
36
|
+
registry.register("sub", Handlers::SimpleHandler.new(AST::Subscript))
|
|
37
|
+
registry.register(%w[code pre tt], Handlers::RawHandler.new(AST::Code))
|
|
38
|
+
registry.register("a", Handlers::UrlHandler.new)
|
|
39
|
+
registry.register("img", Handlers::ImageHandler.new)
|
|
40
|
+
registry.register("blockquote", Handlers::QuoteHandler.new)
|
|
41
|
+
registry.register(
|
|
42
|
+
"br",
|
|
43
|
+
lambda do |element:, parent:|
|
|
44
|
+
parent << AST::LineBreak.new
|
|
45
|
+
nil
|
|
46
|
+
end,
|
|
47
|
+
)
|
|
48
|
+
registry.register(
|
|
49
|
+
"hr",
|
|
50
|
+
lambda do |element:, parent:|
|
|
51
|
+
parent << AST::HorizontalRule.new
|
|
52
|
+
nil
|
|
53
|
+
end,
|
|
54
|
+
)
|
|
55
|
+
registry.register(%w[ul ol], Handlers::ListHandler.new)
|
|
56
|
+
registry.register("li", Handlers::ListItemHandler.new)
|
|
57
|
+
registry.register("table", Handlers::TableHandler.new)
|
|
58
|
+
registry.register("tr", Handlers::TableRowHandler.new)
|
|
59
|
+
registry.register(%w[td th], Handlers::TableCellHandler.new)
|
|
60
|
+
registry.register("p", Handlers::ParagraphHandler.new)
|
|
61
|
+
end
|
|
74
62
|
end
|
|
75
63
|
|
|
76
64
|
# Build a registry from the default configuration with optional customization
|
|
@@ -11,9 +11,6 @@ module Markbridge
|
|
|
11
11
|
# @param parent [AST::Element] the parent AST node
|
|
12
12
|
# @return [AST::Element, nil] the created element if children should be processed, nil otherwise
|
|
13
13
|
def process(element:, parent:)
|
|
14
|
-
node = element # Alias for compatibility
|
|
15
|
-
# Default: do nothing, subclasses override
|
|
16
|
-
nil
|
|
17
14
|
end
|
|
18
15
|
|
|
19
16
|
# The element class created by this handler
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for table cell tags (<td>, <th>)
|
|
8
|
+
class TableCellHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::TableCell
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
ast_element = AST::TableCell.new(header: element.name.downcase == "th")
|
|
15
|
+
parent << ast_element
|
|
16
|
+
ast_element
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
attr_reader :element_class
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for table tags (<table>)
|
|
8
|
+
class TableHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::Table
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
ast_element = AST::Table.new
|
|
15
|
+
parent << ast_element
|
|
16
|
+
ast_element
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
attr_reader :element_class
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for table row tags (<tr>)
|
|
8
|
+
class TableRowHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::TableRow
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
ast_element = AST::TableRow.new
|
|
15
|
+
parent << ast_element
|
|
16
|
+
ast_element
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
attr_reader :element_class
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -5,6 +5,11 @@ module Markbridge
|
|
|
5
5
|
module HTML
|
|
6
6
|
# Parses HTML into an AST using Nokogiri
|
|
7
7
|
class Parser
|
|
8
|
+
# Tags whose contents should be dropped entirely (not emitted as text).
|
|
9
|
+
# These are raw-text/metadata elements whose children are either CSS,
|
|
10
|
+
# JavaScript, or document metadata that shouldn't appear in output.
|
|
11
|
+
IGNORED_TAGS = %w[style script head title noscript template].freeze
|
|
12
|
+
|
|
8
13
|
attr_reader :unknown_tags
|
|
9
14
|
|
|
10
15
|
# Create a new parser with optional custom handlers
|
|
@@ -26,8 +31,12 @@ module Markbridge
|
|
|
26
31
|
def parse(input)
|
|
27
32
|
@unknown_tags.clear
|
|
28
33
|
|
|
29
|
-
# Parse HTML with Nokogiri
|
|
30
|
-
|
|
34
|
+
# Parse HTML with Nokogiri. Using the generic HTML (HTML4) parser rather
|
|
35
|
+
# than HTML5 because Nokogiri::HTML5 is not available on JRuby
|
|
36
|
+
# (see sparklemotion/nokogiri#2227). Table support treats thead/tbody/tfoot
|
|
37
|
+
# as transparent, so the parse-tree difference (HTML5 auto-inserts tbody,
|
|
38
|
+
# HTML4 does not) has no effect on the AST.
|
|
39
|
+
doc = Nokogiri::HTML.fragment(input)
|
|
31
40
|
|
|
32
41
|
# Create root AST document
|
|
33
42
|
document = AST::Document.new
|
|
@@ -63,15 +72,16 @@ module Markbridge
|
|
|
63
72
|
# @param node [Nokogiri::XML::Text]
|
|
64
73
|
# @param parent [AST::Element]
|
|
65
74
|
def process_text_node(node, parent)
|
|
66
|
-
|
|
67
|
-
parent << AST::Text.new(text) unless text.empty?
|
|
75
|
+
parent << AST::Text.new(node.text)
|
|
68
76
|
end
|
|
69
77
|
|
|
70
78
|
# Process an element node
|
|
71
79
|
# @param node [Nokogiri::XML::Element]
|
|
72
80
|
# @param parent [AST::Element]
|
|
73
81
|
def process_element_node(node, parent)
|
|
74
|
-
tag_name = node.name
|
|
82
|
+
tag_name = node.name
|
|
83
|
+
return if IGNORED_TAGS.include?(tag_name)
|
|
84
|
+
|
|
75
85
|
handler = @handlers[tag_name]
|
|
76
86
|
|
|
77
87
|
if handler
|
|
@@ -95,18 +105,9 @@ module Markbridge
|
|
|
95
105
|
# @param node [Nokogiri::XML::Element]
|
|
96
106
|
# @param parent [AST::Element]
|
|
97
107
|
def handle_unknown_tag(node, parent)
|
|
98
|
-
@unknown_tags[node.name
|
|
108
|
+
@unknown_tags[node.name] += 1
|
|
99
109
|
process_children(node, parent)
|
|
100
110
|
end
|
|
101
|
-
|
|
102
|
-
# Check if an element is a void element (self-closing)
|
|
103
|
-
# @param tag_name [String]
|
|
104
|
-
# @return [Boolean]
|
|
105
|
-
def void_element?(tag_name)
|
|
106
|
-
%w[area base br col embed hr img input link meta param source track wbr].include?(
|
|
107
|
-
tag_name.downcase,
|
|
108
|
-
)
|
|
109
|
-
end
|
|
110
111
|
end
|
|
111
112
|
end
|
|
112
113
|
end
|
|
@@ -17,6 +17,9 @@ require_relative "html/handlers/list_handler"
|
|
|
17
17
|
require_relative "html/handlers/list_item_handler"
|
|
18
18
|
require_relative "html/handlers/quote_handler"
|
|
19
19
|
require_relative "html/handlers/paragraph_handler"
|
|
20
|
+
require_relative "html/handlers/table_handler"
|
|
21
|
+
require_relative "html/handlers/table_row_handler"
|
|
22
|
+
require_relative "html/handlers/table_cell_handler"
|
|
20
23
|
|
|
21
24
|
# Parser components
|
|
22
25
|
require_relative "html/handler_registry"
|