markbridge 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/lib/markbridge/ast/table.rb +67 -0
  3. data/lib/markbridge/ast.rb +1 -0
  4. data/lib/markbridge/parsers/bbcode/handler_registry.rb +5 -0
  5. data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +13 -11
  6. data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +40 -33
  7. data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +26 -0
  8. data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +32 -0
  9. data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +35 -0
  10. data/lib/markbridge/parsers/bbcode/parser.rb +1 -1
  11. data/lib/markbridge/parsers/bbcode.rb +3 -0
  12. data/lib/markbridge/parsers/html/handler_registry.rb +5 -0
  13. data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -1
  14. data/lib/markbridge/parsers/html/handlers/table_cell_handler.rb +24 -0
  15. data/lib/markbridge/parsers/html/handlers/table_handler.rb +24 -0
  16. data/lib/markbridge/parsers/html/handlers/table_row_handler.rb +24 -0
  17. data/lib/markbridge/parsers/html/parser.rb +13 -2
  18. data/lib/markbridge/parsers/html.rb +3 -0
  19. data/lib/markbridge/parsers/media_wiki/inline_parser.rb +105 -130
  20. data/lib/markbridge/parsers/media_wiki/parser.rb +128 -0
  21. data/lib/markbridge/parsers/text_formatter/handler_registry.rb +6 -0
  22. data/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb +26 -0
  23. data/lib/markbridge/parsers/text_formatter.rb +1 -0
  24. data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +96 -84
  25. data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +12 -0
  26. data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +0 -10
  27. data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +0 -10
  28. data/lib/markbridge/processors/discourse_markdown/scanner.rb +19 -16
  29. data/lib/markbridge/renderers/discourse/markdown_escaper.rb +237 -180
  30. data/lib/markbridge/renderers/discourse/renderer.rb +1 -0
  31. data/lib/markbridge/renderers/discourse/tags/align_tag.rb +1 -1
  32. data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +1 -1
  33. data/lib/markbridge/renderers/discourse/tags/code_tag.rb +2 -1
  34. data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -5
  35. data/lib/markbridge/renderers/discourse/tags/image_tag.rb +1 -1
  36. data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +1 -1
  37. data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +3 -5
  38. data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +15 -11
  39. data/lib/markbridge/renderers/discourse/tags/table_cell_tag.rb +18 -0
  40. data/lib/markbridge/renderers/discourse/tags/table_row_tag.rb +18 -0
  41. data/lib/markbridge/renderers/discourse/tags/table_tag.rb +124 -0
  42. data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +1 -1
  43. data/lib/markbridge/renderers/discourse.rb +3 -0
  44. data/lib/markbridge/version.rb +1 -1
  45. data/lib/markbridge.rb +20 -55
  46. metadata +12 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 43b79ac6557f41fcce9274986295812dd9cbfccf10ce19d0f9b81424ef4f9a83
4
- data.tar.gz: 63d7b2494828a885ceea8b333251d23e6400c2d84cb0a50e30c3112bb4c2a517
3
+ metadata.gz: 395eaa44d32b7fc497e7153b1648250d6abd675ef3fcafa7939c1282725683e2
4
+ data.tar.gz: bd1c7215feeeb2a6fd6e0ebb213122c3652037eec277eec396191321e3e050e0
5
5
  SHA512:
6
- metadata.gz: 387f752e9d6ae0eecc1c4e1e48cda6ac9e94390b16b75d8813a29400af10664913ca0dff8d9af2553753ed41ed5a5ed60d366f51246a3c814fb6034dfeb3660c
7
- data.tar.gz: ea951ad454a6f6bfa86d902725394bf01605bff6307c1a6a6b10918d1267af12d41fd2bd1989151a90a3eec7d0670fbfb79c69e0ae256077ed5897f64870e036
6
+ metadata.gz: ee252b9c8fdd75437e99e5cf2aa446c71c58f9091a1c1d33a0724feaa3e8239f1267e31154957b60a7d1b72c951ec0cf11367ecf8b13a15a5147e93f702a1d1f
7
+ data.tar.gz: da07e85f2ba664a1239578c2fead05e2bbbf43a563f2e2da5c6d2f409192d7777cfe5875148bb2058fd1750838ff9f56556938ee8ca000ac918e1a608276d5df
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module AST
5
+ # Represents a table element containing rows.
6
+ #
7
+ # @example
8
+ # table = AST::Table.new
9
+ # table << AST::TableRow.new
10
+ class Table < Element
11
+ # Add a child node to the table.
12
+ # Whitespace-only Text nodes are ignored.
13
+ #
14
+ # @param child [Node] the node to add
15
+ # @return [Table] self for chaining
16
+ def <<(child)
17
+ return self if child.is_a?(Text) && child.text.strip.empty?
18
+
19
+ super
20
+ end
21
+ end
22
+
23
+ # Represents a table row containing cells.
24
+ #
25
+ # @example
26
+ # row = AST::TableRow.new
27
+ # row << AST::TableCell.new
28
+ class TableRow < Element
29
+ # Add a child node to the row.
30
+ # Whitespace-only Text nodes are ignored.
31
+ #
32
+ # @param child [Node] the node to add
33
+ # @return [TableRow] self for chaining
34
+ def <<(child)
35
+ return self if child.is_a?(Text) && child.text.strip.empty?
36
+
37
+ super
38
+ end
39
+ end
40
+
41
+ # Represents a table cell (td or th).
42
+ #
43
+ # @example Data cell
44
+ # cell = AST::TableCell.new
45
+ # cell << AST::Text.new("data")
46
+ #
47
+ # @example Header cell
48
+ # cell = AST::TableCell.new(header: true)
49
+ # cell << AST::Text.new("header")
50
+ class TableCell < Element
51
+ # Create a new table cell.
52
+ #
53
+ # @param header [Boolean] whether this is a header cell (th)
54
+ def initialize(header: false)
55
+ super()
56
+ @header = header
57
+ end
58
+
59
+ # Check if this is a header cell.
60
+ #
61
+ # @return [Boolean] true if this is a header cell
62
+ def header?
63
+ @header
64
+ end
65
+ end
66
+ end
67
+ end
@@ -18,6 +18,7 @@ require_relative "ast/italic"
18
18
  require_relative "ast/line_break"
19
19
  require_relative "ast/list"
20
20
  require_relative "ast/list_item"
21
+ require_relative "ast/table"
21
22
  require_relative "ast/paragraph"
22
23
  require_relative "ast/quote"
23
24
  require_relative "ast/size"
@@ -132,6 +132,11 @@ module Markbridge
132
132
  registry.register(%w[list ul ol ulist olist], Handlers::ListHandler.new)
133
133
  registry.register(%w[* li .], Handlers::ListItemHandler.new)
134
134
 
135
+ # Table handlers
136
+ registry.register("table", Handlers::TableHandler.new)
137
+ registry.register("tr", Handlers::TableRowHandler.new)
138
+ registry.register(%w[td th], Handlers::TableCellHandler.new)
139
+
135
140
  # Set the closing strategy
136
141
  registry.closing_strategy = closing_strategy || default_closing_strategy(registry)
137
142
 
@@ -18,22 +18,24 @@ module Markbridge
18
18
  private
19
19
 
20
20
  def create_element(token:, content:)
21
- # Extract dimensions from attributes or option
21
+ width, height = extract_dimensions(token)
22
+ AST::Image.new(src: content, width:, height:)
23
+ end
24
+
25
+ def extract_dimensions(token)
22
26
  width = sanitize_dimension(token.attrs[:width])
23
27
  height = sanitize_dimension(token.attrs[:height])
24
28
 
25
- # Parse option for WIDTHxHEIGHT format (e.g., [img=100x200])
26
- if token.attrs[:option]&.match?(/^\d+x\d+$/i)
27
- dimensions = token.attrs[:option].split("x", 2)
28
- width = sanitize_dimension(dimensions[0])
29
- height = sanitize_dimension(dimensions[1])
30
- elsif token.attrs[:option]&.match?(/^\d+$/)
31
- # Just a number means width
32
- width = sanitize_dimension(token.attrs[:option])
29
+ option = token.attrs[:option]
30
+ if option&.match?(/^\d+x\d+$/i)
31
+ parts = option.split("x", 2)
32
+ width = sanitize_dimension(parts[0])
33
+ height = sanitize_dimension(parts[1])
34
+ elsif option&.match?(/^\d+$/)
35
+ width = sanitize_dimension(option)
33
36
  end
34
37
 
35
- # Content is the URL
36
- AST::Image.new(src: content, width:, height:)
38
+ [width, height]
37
39
  end
38
40
 
39
41
  # Convert dimension to positive integer or nil
@@ -17,46 +17,53 @@ module Markbridge
17
17
  end
18
18
 
19
19
  def on_open(token:, context:, registry:, tokens: nil)
20
- # Extract quote attributes
21
- author = nil
20
+ attrs = extract_quote_attrs(token)
21
+ element = AST::Quote.new(**attrs)
22
+ context.push(element, token:)
23
+ end
24
+
25
+ attr_reader :element_class
26
+
27
+ private
28
+
29
+ def extract_quote_attrs(token)
30
+ author, post, topic, username = extract_from_option(token)
31
+ author ||= token.attrs[:author]
32
+
33
+ # Explicit attributes override option-parsed values
34
+ {
35
+ author:,
36
+ post: token.attrs[:post] || post,
37
+ topic: token.attrs[:topic] || topic,
38
+ username: token.attrs[:username] || username,
39
+ }
40
+ end
41
+
42
+ def extract_from_option(token)
43
+ option = token.attrs[:option]
44
+ return nil, nil, nil, nil unless option
45
+
46
+ unless option.match?(/,\s*post:\d+/)
47
+ # Simple author attribution
48
+ return option, nil, nil, nil
49
+ end
50
+
51
+ # Discourse format: "username, post:123, topic:456"
52
+ parts = option.split(",").map(&:strip)
53
+ username = parts[0]
22
54
  post = nil
23
55
  topic = nil
24
- username = nil
25
56
 
26
- # Check for author attribute or option
27
- if token.attrs[:author]
28
- author = token.attrs[:author]
29
- elsif token.attrs[:option]
30
- # Parse Discourse-style quote: "username, post:123, topic:456"
31
- option = token.attrs[:option]
32
- if option.match?(/,\s*post:\d+/)
33
- # Discourse format with post/topic
34
- parts = option.split(",").map(&:strip)
35
- username = parts[0]
36
- parts[1..].each do |part|
37
- if part =~ /^post:(\d+)$/
38
- post = ::Regexp.last_match(1)
39
- elsif part =~ /^topic:(\d+)$/
40
- topic = ::Regexp.last_match(1)
41
- end
42
- end
43
- author = username
44
- else
45
- # Simple author attribution
46
- author = option
57
+ parts[1..].each do |part|
58
+ if part =~ /^post:(\d+)$/
59
+ post = ::Regexp.last_match(1)
60
+ elsif part =~ /^topic:(\d+)$/
61
+ topic = ::Regexp.last_match(1)
47
62
  end
48
63
  end
49
64
 
50
- # Check for explicit username, post, topic attributes (override option if present)
51
- username = token.attrs[:username] if token.attrs[:username]
52
- post = token.attrs[:post] if token.attrs[:post]
53
- topic = token.attrs[:topic] if token.attrs[:topic]
54
-
55
- element = AST::Quote.new(author:, post:, topic:, username:)
56
- context.push(element, token:)
65
+ [username, post, topic, username]
57
66
  end
58
-
59
- attr_reader :element_class
60
67
  end
61
68
  end
62
69
  end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module BBCode
6
+ module Handlers
7
+ # Handler for table cell tags (td, th)
8
+ class TableCellHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::TableCell
11
+ end
12
+
13
+ def on_open(token:, context:, registry:, tokens: nil)
14
+ # Auto-close previous cell if still open
15
+ context.pop if context.current.is_a?(AST::TableCell)
16
+
17
+ element = AST::TableCell.new(header: token.tag == "th")
18
+ context.push(element, token:)
19
+ end
20
+
21
+ attr_reader :element_class
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module BBCode
6
+ module Handlers
7
+ # Handler for table tags
8
+ class TableHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::Table
11
+ end
12
+
13
+ def on_open(token:, context:, registry:, tokens: nil)
14
+ element = AST::Table.new
15
+ context.push(element, token:)
16
+ end
17
+
18
+ def on_close(token:, context:, registry:, tokens: nil)
19
+ # Auto-close open cell before closing row
20
+ context.pop if context.current.is_a?(AST::TableCell)
21
+ # Auto-close open row before closing table
22
+ context.pop if context.current.is_a?(AST::TableRow)
23
+
24
+ super
25
+ end
26
+
27
+ attr_reader :element_class
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module BBCode
6
+ module Handlers
7
+ # Handler for table row tags (tr)
8
+ class TableRowHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::TableRow
11
+ end
12
+
13
+ def on_open(token:, context:, registry:, tokens: nil)
14
+ # Auto-close open cell before starting new row
15
+ context.pop if context.current.is_a?(AST::TableCell)
16
+ # Auto-close previous row if still open
17
+ context.pop if context.current.is_a?(AST::TableRow)
18
+
19
+ element = AST::TableRow.new
20
+ context.push(element, token:)
21
+ end
22
+
23
+ def on_close(token:, context:, registry:, tokens: nil)
24
+ # Auto-close open cell before closing row
25
+ context.pop if context.current.is_a?(AST::TableCell)
26
+
27
+ super
28
+ end
29
+
30
+ attr_reader :element_class
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -114,7 +114,7 @@ module Markbridge
114
114
  # while still processing its children
115
115
  # @param token [Token]
116
116
  # @param context [ParserState]
117
- def handle_unknown_tag(token, context)
117
+ def handle_unknown_tag(token, _context)
118
118
  @unknown_tags[token.tag] += 1
119
119
  end
120
120
  end
@@ -35,6 +35,9 @@ require_relative "bbcode/handlers/self_closing_handler"
35
35
  require_relative "bbcode/handlers/simple_handler"
36
36
  require_relative "bbcode/handlers/size_handler"
37
37
  require_relative "bbcode/handlers/spoiler_handler"
38
+ require_relative "bbcode/handlers/table_handler"
39
+ require_relative "bbcode/handlers/table_row_handler"
40
+ require_relative "bbcode/handlers/table_cell_handler"
38
41
  require_relative "bbcode/handlers/url_handler"
39
42
 
40
43
  # Parser components
@@ -67,6 +67,11 @@ module Markbridge
67
67
  registry.register(%w[ul ol], Handlers::ListHandler.new)
68
68
  registry.register("li", Handlers::ListItemHandler.new)
69
69
 
70
+ # Table handlers (thead/tbody/tfoot are transparent - unregistered tags pass through)
71
+ registry.register("table", Handlers::TableHandler.new)
72
+ registry.register("tr", Handlers::TableRowHandler.new)
73
+ registry.register(%w[td th], Handlers::TableCellHandler.new)
74
+
70
75
  # Paragraph handler (transparent - doesn't create AST node)
71
76
  registry.register("p", Handlers::ParagraphHandler.new)
72
77
 
@@ -11,7 +11,6 @@ module Markbridge
11
11
  # @param parent [AST::Element] the parent AST node
12
12
  # @return [AST::Element, nil] the created element if children should be processed, nil otherwise
13
13
  def process(element:, parent:)
14
- node = element # Alias for compatibility
15
14
  # Default: do nothing, subclasses override
16
15
  nil
17
16
  end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module HTML
6
+ module Handlers
7
+ # Handler for table cell tags (<td>, <th>)
8
+ class TableCellHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::TableCell
11
+ end
12
+
13
+ def process(element:, parent:)
14
+ ast_element = AST::TableCell.new(header: element.name.downcase == "th")
15
+ parent << ast_element
16
+ ast_element
17
+ end
18
+
19
+ attr_reader :element_class
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module HTML
6
+ module Handlers
7
+ # Handler for table tags (<table>)
8
+ class TableHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::Table
11
+ end
12
+
13
+ def process(element:, parent:)
14
+ ast_element = AST::Table.new
15
+ parent << ast_element
16
+ ast_element
17
+ end
18
+
19
+ attr_reader :element_class
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module HTML
6
+ module Handlers
7
+ # Handler for table row tags (<tr>)
8
+ class TableRowHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::TableRow
11
+ end
12
+
13
+ def process(element:, parent:)
14
+ ast_element = AST::TableRow.new
15
+ parent << ast_element
16
+ ast_element
17
+ end
18
+
19
+ attr_reader :element_class
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -5,6 +5,11 @@ module Markbridge
5
5
  module HTML
6
6
  # Parses HTML into an AST using Nokogiri
7
7
  class Parser
8
+ # Tags whose contents should be dropped entirely (not emitted as text).
9
+ # These are raw-text/metadata elements whose children are either CSS,
10
+ # JavaScript, or document metadata that shouldn't appear in output.
11
+ IGNORED_TAGS = %w[style script head title noscript template].freeze
12
+
8
13
  attr_reader :unknown_tags
9
14
 
10
15
  # Create a new parser with optional custom handlers
@@ -26,8 +31,12 @@ module Markbridge
26
31
  def parse(input)
27
32
  @unknown_tags.clear
28
33
 
29
- # Parse HTML with Nokogiri
30
- doc = Nokogiri::HTML5.fragment(input)
34
+ # Parse HTML with Nokogiri. Using the generic HTML (HTML4) parser rather
35
+ # than HTML5 because Nokogiri::HTML5 is not available on JRuby
36
+ # (see sparklemotion/nokogiri#2227). Table support treats thead/tbody/tfoot
37
+ # as transparent, so the parse-tree difference (HTML5 auto-inserts tbody,
38
+ # HTML4 does not) has no effect on the AST.
39
+ doc = Nokogiri::HTML.fragment(input)
31
40
 
32
41
  # Create root AST document
33
42
  document = AST::Document.new
@@ -72,6 +81,8 @@ module Markbridge
72
81
  # @param parent [AST::Element]
73
82
  def process_element_node(node, parent)
74
83
  tag_name = node.name.downcase
84
+ return if IGNORED_TAGS.include?(tag_name)
85
+
75
86
  handler = @handlers[tag_name]
76
87
 
77
88
  if handler
@@ -17,6 +17,9 @@ require_relative "html/handlers/list_handler"
17
17
  require_relative "html/handlers/list_item_handler"
18
18
  require_relative "html/handlers/quote_handler"
19
19
  require_relative "html/handlers/paragraph_handler"
20
+ require_relative "html/handlers/table_handler"
21
+ require_relative "html/handlers/table_row_handler"
22
+ require_relative "html/handlers/table_cell_handler"
20
23
 
21
24
  # Parser components
22
25
  require_relative "html/handler_registry"