markbridge 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/markbridge/ast/table.rb +67 -0
- data/lib/markbridge/ast.rb +1 -0
- data/lib/markbridge/parsers/bbcode/handler_registry.rb +5 -0
- data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +13 -11
- data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +40 -33
- data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +26 -0
- data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +32 -0
- data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +35 -0
- data/lib/markbridge/parsers/bbcode/parser.rb +1 -1
- data/lib/markbridge/parsers/bbcode.rb +3 -0
- data/lib/markbridge/parsers/html/handler_registry.rb +5 -0
- data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -1
- data/lib/markbridge/parsers/html/handlers/table_cell_handler.rb +24 -0
- data/lib/markbridge/parsers/html/handlers/table_handler.rb +24 -0
- data/lib/markbridge/parsers/html/handlers/table_row_handler.rb +24 -0
- data/lib/markbridge/parsers/html/parser.rb +13 -2
- data/lib/markbridge/parsers/html.rb +3 -0
- data/lib/markbridge/parsers/media_wiki/inline_parser.rb +105 -130
- data/lib/markbridge/parsers/media_wiki/parser.rb +128 -0
- data/lib/markbridge/parsers/text_formatter/handler_registry.rb +6 -0
- data/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb +26 -0
- data/lib/markbridge/parsers/text_formatter.rb +1 -0
- data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +96 -84
- data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +12 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +0 -10
- data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +0 -10
- data/lib/markbridge/processors/discourse_markdown/scanner.rb +19 -16
- data/lib/markbridge/renderers/discourse/markdown_escaper.rb +237 -180
- data/lib/markbridge/renderers/discourse/renderer.rb +1 -0
- data/lib/markbridge/renderers/discourse/tags/align_tag.rb +1 -1
- data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +1 -1
- data/lib/markbridge/renderers/discourse/tags/code_tag.rb +2 -1
- data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -5
- data/lib/markbridge/renderers/discourse/tags/image_tag.rb +1 -1
- data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +1 -1
- data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +3 -5
- data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +15 -11
- data/lib/markbridge/renderers/discourse/tags/table_cell_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/table_row_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/table_tag.rb +124 -0
- data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +1 -1
- data/lib/markbridge/renderers/discourse.rb +3 -0
- data/lib/markbridge/version.rb +1 -1
- data/lib/markbridge.rb +20 -55
- metadata +12 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 395eaa44d32b7fc497e7153b1648250d6abd675ef3fcafa7939c1282725683e2
|
|
4
|
+
data.tar.gz: bd1c7215feeeb2a6fd6e0ebb213122c3652037eec277eec396191321e3e050e0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ee252b9c8fdd75437e99e5cf2aa446c71c58f9091a1c1d33a0724feaa3e8239f1267e31154957b60a7d1b72c951ec0cf11367ecf8b13a15a5147e93f702a1d1f
|
|
7
|
+
data.tar.gz: da07e85f2ba664a1239578c2fead05e2bbbf43a563f2e2da5c6d2f409192d7777cfe5875148bb2058fd1750838ff9f56556938ee8ca000ac918e1a608276d5df
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module AST
|
|
5
|
+
# Represents a table element containing rows.
|
|
6
|
+
#
|
|
7
|
+
# @example
|
|
8
|
+
# table = AST::Table.new
|
|
9
|
+
# table << AST::TableRow.new
|
|
10
|
+
class Table < Element
|
|
11
|
+
# Add a child node to the table.
|
|
12
|
+
# Whitespace-only Text nodes are ignored.
|
|
13
|
+
#
|
|
14
|
+
# @param child [Node] the node to add
|
|
15
|
+
# @return [Table] self for chaining
|
|
16
|
+
def <<(child)
|
|
17
|
+
return self if child.is_a?(Text) && child.text.strip.empty?
|
|
18
|
+
|
|
19
|
+
super
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Represents a table row containing cells.
|
|
24
|
+
#
|
|
25
|
+
# @example
|
|
26
|
+
# row = AST::TableRow.new
|
|
27
|
+
# row << AST::TableCell.new
|
|
28
|
+
class TableRow < Element
|
|
29
|
+
# Add a child node to the row.
|
|
30
|
+
# Whitespace-only Text nodes are ignored.
|
|
31
|
+
#
|
|
32
|
+
# @param child [Node] the node to add
|
|
33
|
+
# @return [TableRow] self for chaining
|
|
34
|
+
def <<(child)
|
|
35
|
+
return self if child.is_a?(Text) && child.text.strip.empty?
|
|
36
|
+
|
|
37
|
+
super
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Represents a table cell (td or th).
|
|
42
|
+
#
|
|
43
|
+
# @example Data cell
|
|
44
|
+
# cell = AST::TableCell.new
|
|
45
|
+
# cell << AST::Text.new("data")
|
|
46
|
+
#
|
|
47
|
+
# @example Header cell
|
|
48
|
+
# cell = AST::TableCell.new(header: true)
|
|
49
|
+
# cell << AST::Text.new("header")
|
|
50
|
+
class TableCell < Element
|
|
51
|
+
# Create a new table cell.
|
|
52
|
+
#
|
|
53
|
+
# @param header [Boolean] whether this is a header cell (th)
|
|
54
|
+
def initialize(header: false)
|
|
55
|
+
super()
|
|
56
|
+
@header = header
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Check if this is a header cell.
|
|
60
|
+
#
|
|
61
|
+
# @return [Boolean] true if this is a header cell
|
|
62
|
+
def header?
|
|
63
|
+
@header
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
data/lib/markbridge/ast.rb
CHANGED
|
@@ -18,6 +18,7 @@ require_relative "ast/italic"
|
|
|
18
18
|
require_relative "ast/line_break"
|
|
19
19
|
require_relative "ast/list"
|
|
20
20
|
require_relative "ast/list_item"
|
|
21
|
+
require_relative "ast/table"
|
|
21
22
|
require_relative "ast/paragraph"
|
|
22
23
|
require_relative "ast/quote"
|
|
23
24
|
require_relative "ast/size"
|
|
@@ -132,6 +132,11 @@ module Markbridge
|
|
|
132
132
|
registry.register(%w[list ul ol ulist olist], Handlers::ListHandler.new)
|
|
133
133
|
registry.register(%w[* li .], Handlers::ListItemHandler.new)
|
|
134
134
|
|
|
135
|
+
# Table handlers
|
|
136
|
+
registry.register("table", Handlers::TableHandler.new)
|
|
137
|
+
registry.register("tr", Handlers::TableRowHandler.new)
|
|
138
|
+
registry.register(%w[td th], Handlers::TableCellHandler.new)
|
|
139
|
+
|
|
135
140
|
# Set the closing strategy
|
|
136
141
|
registry.closing_strategy = closing_strategy || default_closing_strategy(registry)
|
|
137
142
|
|
|
@@ -18,22 +18,24 @@ module Markbridge
|
|
|
18
18
|
private
|
|
19
19
|
|
|
20
20
|
def create_element(token:, content:)
|
|
21
|
-
|
|
21
|
+
width, height = extract_dimensions(token)
|
|
22
|
+
AST::Image.new(src: content, width:, height:)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def extract_dimensions(token)
|
|
22
26
|
width = sanitize_dimension(token.attrs[:width])
|
|
23
27
|
height = sanitize_dimension(token.attrs[:height])
|
|
24
28
|
|
|
25
|
-
|
|
26
|
-
if
|
|
27
|
-
|
|
28
|
-
width = sanitize_dimension(
|
|
29
|
-
height = sanitize_dimension(
|
|
30
|
-
elsif
|
|
31
|
-
|
|
32
|
-
width = sanitize_dimension(token.attrs[:option])
|
|
29
|
+
option = token.attrs[:option]
|
|
30
|
+
if option&.match?(/^\d+x\d+$/i)
|
|
31
|
+
parts = option.split("x", 2)
|
|
32
|
+
width = sanitize_dimension(parts[0])
|
|
33
|
+
height = sanitize_dimension(parts[1])
|
|
34
|
+
elsif option&.match?(/^\d+$/)
|
|
35
|
+
width = sanitize_dimension(option)
|
|
33
36
|
end
|
|
34
37
|
|
|
35
|
-
|
|
36
|
-
AST::Image.new(src: content, width:, height:)
|
|
38
|
+
[width, height]
|
|
37
39
|
end
|
|
38
40
|
|
|
39
41
|
# Convert dimension to positive integer or nil
|
|
@@ -17,46 +17,53 @@ module Markbridge
|
|
|
17
17
|
end
|
|
18
18
|
|
|
19
19
|
def on_open(token:, context:, registry:, tokens: nil)
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
attrs = extract_quote_attrs(token)
|
|
21
|
+
element = AST::Quote.new(**attrs)
|
|
22
|
+
context.push(element, token:)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
attr_reader :element_class
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
def extract_quote_attrs(token)
|
|
30
|
+
author, post, topic, username = extract_from_option(token)
|
|
31
|
+
author ||= token.attrs[:author]
|
|
32
|
+
|
|
33
|
+
# Explicit attributes override option-parsed values
|
|
34
|
+
{
|
|
35
|
+
author:,
|
|
36
|
+
post: token.attrs[:post] || post,
|
|
37
|
+
topic: token.attrs[:topic] || topic,
|
|
38
|
+
username: token.attrs[:username] || username,
|
|
39
|
+
}
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def extract_from_option(token)
|
|
43
|
+
option = token.attrs[:option]
|
|
44
|
+
return nil, nil, nil, nil unless option
|
|
45
|
+
|
|
46
|
+
unless option.match?(/,\s*post:\d+/)
|
|
47
|
+
# Simple author attribution
|
|
48
|
+
return option, nil, nil, nil
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Discourse format: "username, post:123, topic:456"
|
|
52
|
+
parts = option.split(",").map(&:strip)
|
|
53
|
+
username = parts[0]
|
|
22
54
|
post = nil
|
|
23
55
|
topic = nil
|
|
24
|
-
username = nil
|
|
25
56
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
option = token.attrs[:option]
|
|
32
|
-
if option.match?(/,\s*post:\d+/)
|
|
33
|
-
# Discourse format with post/topic
|
|
34
|
-
parts = option.split(",").map(&:strip)
|
|
35
|
-
username = parts[0]
|
|
36
|
-
parts[1..].each do |part|
|
|
37
|
-
if part =~ /^post:(\d+)$/
|
|
38
|
-
post = ::Regexp.last_match(1)
|
|
39
|
-
elsif part =~ /^topic:(\d+)$/
|
|
40
|
-
topic = ::Regexp.last_match(1)
|
|
41
|
-
end
|
|
42
|
-
end
|
|
43
|
-
author = username
|
|
44
|
-
else
|
|
45
|
-
# Simple author attribution
|
|
46
|
-
author = option
|
|
57
|
+
parts[1..].each do |part|
|
|
58
|
+
if part =~ /^post:(\d+)$/
|
|
59
|
+
post = ::Regexp.last_match(1)
|
|
60
|
+
elsif part =~ /^topic:(\d+)$/
|
|
61
|
+
topic = ::Regexp.last_match(1)
|
|
47
62
|
end
|
|
48
63
|
end
|
|
49
64
|
|
|
50
|
-
|
|
51
|
-
username = token.attrs[:username] if token.attrs[:username]
|
|
52
|
-
post = token.attrs[:post] if token.attrs[:post]
|
|
53
|
-
topic = token.attrs[:topic] if token.attrs[:topic]
|
|
54
|
-
|
|
55
|
-
element = AST::Quote.new(author:, post:, topic:, username:)
|
|
56
|
-
context.push(element, token:)
|
|
65
|
+
[username, post, topic, username]
|
|
57
66
|
end
|
|
58
|
-
|
|
59
|
-
attr_reader :element_class
|
|
60
67
|
end
|
|
61
68
|
end
|
|
62
69
|
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for table cell tags (td, th)
|
|
8
|
+
class TableCellHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::TableCell
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def on_open(token:, context:, registry:, tokens: nil)
|
|
14
|
+
# Auto-close previous cell if still open
|
|
15
|
+
context.pop if context.current.is_a?(AST::TableCell)
|
|
16
|
+
|
|
17
|
+
element = AST::TableCell.new(header: token.tag == "th")
|
|
18
|
+
context.push(element, token:)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
attr_reader :element_class
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for table tags
|
|
8
|
+
class TableHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::Table
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def on_open(token:, context:, registry:, tokens: nil)
|
|
14
|
+
element = AST::Table.new
|
|
15
|
+
context.push(element, token:)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def on_close(token:, context:, registry:, tokens: nil)
|
|
19
|
+
# Auto-close open cell before closing row
|
|
20
|
+
context.pop if context.current.is_a?(AST::TableCell)
|
|
21
|
+
# Auto-close open row before closing table
|
|
22
|
+
context.pop if context.current.is_a?(AST::TableRow)
|
|
23
|
+
|
|
24
|
+
super
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
attr_reader :element_class
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module BBCode
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for table row tags (tr)
|
|
8
|
+
class TableRowHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::TableRow
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def on_open(token:, context:, registry:, tokens: nil)
|
|
14
|
+
# Auto-close open cell before starting new row
|
|
15
|
+
context.pop if context.current.is_a?(AST::TableCell)
|
|
16
|
+
# Auto-close previous row if still open
|
|
17
|
+
context.pop if context.current.is_a?(AST::TableRow)
|
|
18
|
+
|
|
19
|
+
element = AST::TableRow.new
|
|
20
|
+
context.push(element, token:)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def on_close(token:, context:, registry:, tokens: nil)
|
|
24
|
+
# Auto-close open cell before closing row
|
|
25
|
+
context.pop if context.current.is_a?(AST::TableCell)
|
|
26
|
+
|
|
27
|
+
super
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
attr_reader :element_class
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -114,7 +114,7 @@ module Markbridge
|
|
|
114
114
|
# while still processing its children
|
|
115
115
|
# @param token [Token]
|
|
116
116
|
# @param context [ParserState]
|
|
117
|
-
def handle_unknown_tag(token,
|
|
117
|
+
def handle_unknown_tag(token, _context)
|
|
118
118
|
@unknown_tags[token.tag] += 1
|
|
119
119
|
end
|
|
120
120
|
end
|
|
@@ -35,6 +35,9 @@ require_relative "bbcode/handlers/self_closing_handler"
|
|
|
35
35
|
require_relative "bbcode/handlers/simple_handler"
|
|
36
36
|
require_relative "bbcode/handlers/size_handler"
|
|
37
37
|
require_relative "bbcode/handlers/spoiler_handler"
|
|
38
|
+
require_relative "bbcode/handlers/table_handler"
|
|
39
|
+
require_relative "bbcode/handlers/table_row_handler"
|
|
40
|
+
require_relative "bbcode/handlers/table_cell_handler"
|
|
38
41
|
require_relative "bbcode/handlers/url_handler"
|
|
39
42
|
|
|
40
43
|
# Parser components
|
|
@@ -67,6 +67,11 @@ module Markbridge
|
|
|
67
67
|
registry.register(%w[ul ol], Handlers::ListHandler.new)
|
|
68
68
|
registry.register("li", Handlers::ListItemHandler.new)
|
|
69
69
|
|
|
70
|
+
# Table handlers (thead/tbody/tfoot are transparent - unregistered tags pass through)
|
|
71
|
+
registry.register("table", Handlers::TableHandler.new)
|
|
72
|
+
registry.register("tr", Handlers::TableRowHandler.new)
|
|
73
|
+
registry.register(%w[td th], Handlers::TableCellHandler.new)
|
|
74
|
+
|
|
70
75
|
# Paragraph handler (transparent - doesn't create AST node)
|
|
71
76
|
registry.register("p", Handlers::ParagraphHandler.new)
|
|
72
77
|
|
|
@@ -11,7 +11,6 @@ module Markbridge
|
|
|
11
11
|
# @param parent [AST::Element] the parent AST node
|
|
12
12
|
# @return [AST::Element, nil] the created element if children should be processed, nil otherwise
|
|
13
13
|
def process(element:, parent:)
|
|
14
|
-
node = element # Alias for compatibility
|
|
15
14
|
# Default: do nothing, subclasses override
|
|
16
15
|
nil
|
|
17
16
|
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for table cell tags (<td>, <th>)
|
|
8
|
+
class TableCellHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::TableCell
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
ast_element = AST::TableCell.new(header: element.name.downcase == "th")
|
|
15
|
+
parent << ast_element
|
|
16
|
+
ast_element
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
attr_reader :element_class
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for table tags (<table>)
|
|
8
|
+
class TableHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::Table
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
ast_element = AST::Table.new
|
|
15
|
+
parent << ast_element
|
|
16
|
+
ast_element
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
attr_reader :element_class
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module HTML
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for table row tags (<tr>)
|
|
8
|
+
class TableRowHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::TableRow
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
ast_element = AST::TableRow.new
|
|
15
|
+
parent << ast_element
|
|
16
|
+
ast_element
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
attr_reader :element_class
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -5,6 +5,11 @@ module Markbridge
|
|
|
5
5
|
module HTML
|
|
6
6
|
# Parses HTML into an AST using Nokogiri
|
|
7
7
|
class Parser
|
|
8
|
+
# Tags whose contents should be dropped entirely (not emitted as text).
|
|
9
|
+
# These are raw-text/metadata elements whose children are either CSS,
|
|
10
|
+
# JavaScript, or document metadata that shouldn't appear in output.
|
|
11
|
+
IGNORED_TAGS = %w[style script head title noscript template].freeze
|
|
12
|
+
|
|
8
13
|
attr_reader :unknown_tags
|
|
9
14
|
|
|
10
15
|
# Create a new parser with optional custom handlers
|
|
@@ -26,8 +31,12 @@ module Markbridge
|
|
|
26
31
|
def parse(input)
|
|
27
32
|
@unknown_tags.clear
|
|
28
33
|
|
|
29
|
-
# Parse HTML with Nokogiri
|
|
30
|
-
|
|
34
|
+
# Parse HTML with Nokogiri. Using the generic HTML (HTML4) parser rather
|
|
35
|
+
# than HTML5 because Nokogiri::HTML5 is not available on JRuby
|
|
36
|
+
# (see sparklemotion/nokogiri#2227). Table support treats thead/tbody/tfoot
|
|
37
|
+
# as transparent, so the parse-tree difference (HTML5 auto-inserts tbody,
|
|
38
|
+
# HTML4 does not) has no effect on the AST.
|
|
39
|
+
doc = Nokogiri::HTML.fragment(input)
|
|
31
40
|
|
|
32
41
|
# Create root AST document
|
|
33
42
|
document = AST::Document.new
|
|
@@ -72,6 +81,8 @@ module Markbridge
|
|
|
72
81
|
# @param parent [AST::Element]
|
|
73
82
|
def process_element_node(node, parent)
|
|
74
83
|
tag_name = node.name.downcase
|
|
84
|
+
return if IGNORED_TAGS.include?(tag_name)
|
|
85
|
+
|
|
75
86
|
handler = @handlers[tag_name]
|
|
76
87
|
|
|
77
88
|
if handler
|
|
@@ -17,6 +17,9 @@ require_relative "html/handlers/list_handler"
|
|
|
17
17
|
require_relative "html/handlers/list_item_handler"
|
|
18
18
|
require_relative "html/handlers/quote_handler"
|
|
19
19
|
require_relative "html/handlers/paragraph_handler"
|
|
20
|
+
require_relative "html/handlers/table_handler"
|
|
21
|
+
require_relative "html/handlers/table_row_handler"
|
|
22
|
+
require_relative "html/handlers/table_cell_handler"
|
|
20
23
|
|
|
21
24
|
# Parser components
|
|
22
25
|
require_relative "html/handler_registry"
|