markbridge 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/lib/markbridge/all.rb +4 -7
  3. data/lib/markbridge/ast/document.rb +1 -1
  4. data/lib/markbridge/ast/element.rb +2 -2
  5. data/lib/markbridge/ast/list.rb +2 -2
  6. data/lib/markbridge/ast/table.rb +61 -0
  7. data/lib/markbridge/ast/text.rb +5 -1
  8. data/lib/markbridge/ast.rb +1 -0
  9. data/lib/markbridge/bbcode.rb +4 -0
  10. data/lib/markbridge/gem_loader.rb +2 -3
  11. data/lib/markbridge/html.rb +4 -0
  12. data/lib/markbridge/mediawiki.rb +4 -0
  13. data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +0 -10
  14. data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -4
  15. data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +64 -44
  16. data/lib/markbridge/parsers/bbcode/handler_registry.rb +26 -11
  17. data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +17 -12
  18. data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +0 -10
  19. data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +6 -10
  20. data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +13 -19
  21. data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +1 -5
  22. data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +1 -2
  23. data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +30 -35
  24. data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +2 -6
  25. data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +4 -4
  26. data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +26 -0
  27. data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +32 -0
  28. data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +35 -0
  29. data/lib/markbridge/parsers/bbcode/parser.rb +5 -8
  30. data/lib/markbridge/parsers/bbcode/parser_state.rb +12 -18
  31. data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +9 -59
  32. data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +2 -2
  33. data/lib/markbridge/parsers/bbcode/scanner.rb +49 -63
  34. data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +1 -5
  35. data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +1 -6
  36. data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +1 -7
  37. data/lib/markbridge/parsers/bbcode/tokens/token.rb +1 -1
  38. data/lib/markbridge/parsers/bbcode.rb +4 -0
  39. data/lib/markbridge/parsers/html/handler_registry.rb +32 -44
  40. data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -3
  41. data/lib/markbridge/parsers/html/handlers/image_handler.rb +1 -4
  42. data/lib/markbridge/parsers/html/handlers/table_cell_handler.rb +24 -0
  43. data/lib/markbridge/parsers/html/handlers/table_handler.rb +24 -0
  44. data/lib/markbridge/parsers/html/handlers/table_row_handler.rb +24 -0
  45. data/lib/markbridge/parsers/html/parser.rb +16 -15
  46. data/lib/markbridge/parsers/html.rb +3 -0
  47. data/lib/markbridge/parsers/media_wiki/inline_parser.rb +115 -151
  48. data/lib/markbridge/parsers/media_wiki/inline_tag_registry.rb +103 -0
  49. data/lib/markbridge/parsers/media_wiki/parser.rb +174 -71
  50. data/lib/markbridge/parsers/media_wiki.rb +1 -0
  51. data/lib/markbridge/parsers/text_formatter/handler_registry.rb +10 -36
  52. data/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb +26 -0
  53. data/lib/markbridge/parsers/text_formatter/parser.rb +3 -8
  54. data/lib/markbridge/parsers/text_formatter.rb +1 -0
  55. data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +111 -92
  56. data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +13 -7
  57. data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +11 -20
  58. data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +10 -48
  59. data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +38 -63
  60. data/lib/markbridge/processors/discourse_markdown/scanner.rb +36 -41
  61. data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +6 -6
  62. data/lib/markbridge/renderers/discourse/html_escaper.rb +20 -0
  63. data/lib/markbridge/renderers/discourse/markdown_escaper.rb +262 -205
  64. data/lib/markbridge/renderers/discourse/render_context.rb +23 -11
  65. data/lib/markbridge/renderers/discourse/renderer.rb +54 -11
  66. data/lib/markbridge/renderers/discourse/rendering_interface.rb +12 -4
  67. data/lib/markbridge/renderers/discourse/tag.rb +14 -1
  68. data/lib/markbridge/renderers/discourse/tag_library.rb +30 -25
  69. data/lib/markbridge/renderers/discourse/tags/align_tag.rb +15 -7
  70. data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +1 -1
  71. data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +2 -0
  72. data/lib/markbridge/renderers/discourse/tags/code_tag.rb +14 -8
  73. data/lib/markbridge/renderers/discourse/tags/email_tag.rb +5 -3
  74. data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -3
  75. data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +6 -2
  76. data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +2 -2
  77. data/lib/markbridge/renderers/discourse/tags/image_tag.rb +12 -1
  78. data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +2 -0
  79. data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +2 -2
  80. data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +24 -47
  81. data/lib/markbridge/renderers/discourse/tags/list_tag.rb +10 -15
  82. data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +6 -2
  83. data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +10 -0
  84. data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +9 -4
  85. data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +17 -11
  86. data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +9 -0
  87. data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +2 -0
  88. data/lib/markbridge/renderers/discourse/tags/table_cell_tag.rb +18 -0
  89. data/lib/markbridge/renderers/discourse/tags/table_row_tag.rb +18 -0
  90. data/lib/markbridge/renderers/discourse/tags/table_tag.rb +128 -0
  91. data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +10 -3
  92. data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +28 -1
  93. data/lib/markbridge/renderers/discourse/tags/url_tag.rb +5 -3
  94. data/lib/markbridge/renderers/discourse.rb +4 -0
  95. data/lib/markbridge/textformatter.rb +4 -0
  96. data/lib/markbridge/version.rb +1 -1
  97. data/lib/markbridge.rb +27 -62
  98. metadata +19 -2
@@ -7,14 +7,10 @@ module Markbridge
7
7
  class TagEndToken < Token
8
8
  attr_reader :tag
9
9
 
10
- def initialize(tag:, pos: 0, source: nil)
10
+ def initialize(tag:, pos:, source:)
11
11
  super(pos:, source:)
12
12
  @tag = tag.freeze
13
13
  end
14
-
15
- def inspect
16
- "#<TagEndToken [/#{tag}]>"
17
- end
18
14
  end
19
15
  end
20
16
  end
@@ -7,16 +7,11 @@ module Markbridge
7
7
  class TagStartToken < Token
8
8
  attr_reader :tag, :attrs
9
9
 
10
- def initialize(tag:, attrs: {}, pos: 0, source: nil)
10
+ def initialize(tag:, attrs:, pos:, source:)
11
11
  super(pos:, source:)
12
12
  @tag = tag.freeze
13
13
  @attrs = attrs.freeze
14
14
  end
15
-
16
- def inspect
17
- attrs_str = attrs.empty? ? "" : " #{attrs.inspect}"
18
- "#<TagStartToken [#{tag}]#{attrs_str}>"
19
- end
20
15
  end
21
16
  end
22
17
  end
@@ -7,16 +7,10 @@ module Markbridge
7
7
  class TextToken < Token
8
8
  attr_reader :text
9
9
 
10
- def initialize(text:, pos: 0)
10
+ def initialize(text:, pos:)
11
11
  super(pos:, source: text)
12
12
  @text = text.freeze
13
13
  end
14
-
15
- alias source text
16
-
17
- def inspect
18
- "#<TextToken #{text.inspect}>"
19
- end
20
14
  end
21
15
  end
22
16
  end
@@ -6,7 +6,7 @@ module Markbridge
6
6
  class Token
7
7
  attr_reader :pos, :source
8
8
 
9
- def initialize(pos: 0, source: nil)
9
+ def initialize(pos:, source:)
10
10
  @pos = pos
11
11
  @source = source
12
12
  end
@@ -25,6 +25,7 @@ require_relative "bbcode/handlers/raw_handler"
25
25
  # Handlers
26
26
  require_relative "bbcode/handlers/align_handler"
27
27
  require_relative "bbcode/handlers/attachment_handler"
28
+ require_relative "bbcode/handlers/code_handler"
28
29
  require_relative "bbcode/handlers/color_handler"
29
30
  require_relative "bbcode/handlers/email_handler"
30
31
  require_relative "bbcode/handlers/image_handler"
@@ -35,6 +36,9 @@ require_relative "bbcode/handlers/self_closing_handler"
35
36
  require_relative "bbcode/handlers/simple_handler"
36
37
  require_relative "bbcode/handlers/size_handler"
37
38
  require_relative "bbcode/handlers/spoiler_handler"
39
+ require_relative "bbcode/handlers/table_handler"
40
+ require_relative "bbcode/handlers/table_row_handler"
41
+ require_relative "bbcode/handlers/table_cell_handler"
38
42
  require_relative "bbcode/handlers/url_handler"
39
43
 
40
44
  # Parser components
@@ -27,50 +27,38 @@ module Markbridge
27
27
  # Create the default handler registry with common HTML tags
28
28
  # @return [HandlerRegistry]
29
29
  def self.default
30
- registry = new
31
-
32
- # Simple formatting handlers
33
- registry.register(%w[b strong], Handlers::SimpleHandler.new(AST::Bold))
34
- registry.register(%w[i em], Handlers::SimpleHandler.new(AST::Italic))
35
- registry.register(%w[s strike del], Handlers::SimpleHandler.new(AST::Strikethrough))
36
- registry.register("u", Handlers::SimpleHandler.new(AST::Underline))
37
- registry.register("sup", Handlers::SimpleHandler.new(AST::Superscript))
38
- registry.register("sub", Handlers::SimpleHandler.new(AST::Subscript))
39
-
40
- # Code handlers (raw content)
41
- registry.register(%w[code pre tt], Handlers::RawHandler.new(AST::Code))
42
-
43
- # Link and image handlers
44
- registry.register("a", Handlers::UrlHandler.new)
45
- registry.register("img", Handlers::ImageHandler.new)
46
-
47
- # Blockquote handler
48
- registry.register("blockquote", Handlers::QuoteHandler.new)
49
-
50
- # Void elements - use simple inline handlers
51
- registry.register(
52
- "br",
53
- lambda do |element:, parent:|
54
- parent << AST::LineBreak.new
55
- nil # Return nil - void element, no children
56
- end,
57
- )
58
- registry.register(
59
- "hr",
60
- lambda do |element:, parent:|
61
- parent << AST::HorizontalRule.new
62
- nil # Return nil - void element, no children
63
- end,
64
- )
65
-
66
- # List handlers
67
- registry.register(%w[ul ol], Handlers::ListHandler.new)
68
- registry.register("li", Handlers::ListItemHandler.new)
69
-
70
- # Paragraph handler (transparent - doesn't create AST node)
71
- registry.register("p", Handlers::ParagraphHandler.new)
72
-
73
- registry
30
+ new.tap do |registry|
31
+ registry.register(%w[b strong], Handlers::SimpleHandler.new(AST::Bold))
32
+ registry.register(%w[i em], Handlers::SimpleHandler.new(AST::Italic))
33
+ registry.register(%w[s strike del], Handlers::SimpleHandler.new(AST::Strikethrough))
34
+ registry.register("u", Handlers::SimpleHandler.new(AST::Underline))
35
+ registry.register("sup", Handlers::SimpleHandler.new(AST::Superscript))
36
+ registry.register("sub", Handlers::SimpleHandler.new(AST::Subscript))
37
+ registry.register(%w[code pre tt], Handlers::RawHandler.new(AST::Code))
38
+ registry.register("a", Handlers::UrlHandler.new)
39
+ registry.register("img", Handlers::ImageHandler.new)
40
+ registry.register("blockquote", Handlers::QuoteHandler.new)
41
+ registry.register(
42
+ "br",
43
+ lambda do |element:, parent:|
44
+ parent << AST::LineBreak.new
45
+ nil
46
+ end,
47
+ )
48
+ registry.register(
49
+ "hr",
50
+ lambda do |element:, parent:|
51
+ parent << AST::HorizontalRule.new
52
+ nil
53
+ end,
54
+ )
55
+ registry.register(%w[ul ol], Handlers::ListHandler.new)
56
+ registry.register("li", Handlers::ListItemHandler.new)
57
+ registry.register("table", Handlers::TableHandler.new)
58
+ registry.register("tr", Handlers::TableRowHandler.new)
59
+ registry.register(%w[td th], Handlers::TableCellHandler.new)
60
+ registry.register("p", Handlers::ParagraphHandler.new)
61
+ end
74
62
  end
75
63
 
76
64
  # Build a registry from the default configuration with optional customization
@@ -11,9 +11,6 @@ module Markbridge
11
11
  # @param parent [AST::Element] the parent AST node
12
12
  # @return [AST::Element, nil] the created element if children should be processed, nil otherwise
13
13
  def process(element:, parent:)
14
- node = element # Alias for compatibility
15
- # Default: do nothing, subclasses override
16
- nil
17
14
  end
18
15
 
19
16
  # The element class created by this handler
@@ -26,12 +26,9 @@ module Markbridge
26
26
 
27
27
  private
28
28
 
29
- # Convert dimension to positive integer or nil
30
29
  def sanitize_dimension(value)
31
- return nil if value.nil?
32
-
33
30
  dim = value.to_i
34
- dim.positive? ? dim : nil
31
+ dim if dim.positive?
35
32
  end
36
33
  end
37
34
  end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module HTML
6
+ module Handlers
7
+ # Handler for table cell tags (<td>, <th>)
8
+ class TableCellHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::TableCell
11
+ end
12
+
13
+ def process(element:, parent:)
14
+ ast_element = AST::TableCell.new(header: element.name.downcase == "th")
15
+ parent << ast_element
16
+ ast_element
17
+ end
18
+
19
+ attr_reader :element_class
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module HTML
6
+ module Handlers
7
+ # Handler for table tags (<table>)
8
+ class TableHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::Table
11
+ end
12
+
13
+ def process(element:, parent:)
14
+ ast_element = AST::Table.new
15
+ parent << ast_element
16
+ ast_element
17
+ end
18
+
19
+ attr_reader :element_class
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module HTML
6
+ module Handlers
7
+ # Handler for table row tags (<tr>)
8
+ class TableRowHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::TableRow
11
+ end
12
+
13
+ def process(element:, parent:)
14
+ ast_element = AST::TableRow.new
15
+ parent << ast_element
16
+ ast_element
17
+ end
18
+
19
+ attr_reader :element_class
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -5,6 +5,11 @@ module Markbridge
5
5
  module HTML
6
6
  # Parses HTML into an AST using Nokogiri
7
7
  class Parser
8
+ # Tags whose contents should be dropped entirely (not emitted as text).
9
+ # These are raw-text/metadata elements whose children are either CSS,
10
+ # JavaScript, or document metadata that shouldn't appear in output.
11
+ IGNORED_TAGS = %w[style script head title noscript template].freeze
12
+
8
13
  attr_reader :unknown_tags
9
14
 
10
15
  # Create a new parser with optional custom handlers
@@ -26,8 +31,12 @@ module Markbridge
26
31
  def parse(input)
27
32
  @unknown_tags.clear
28
33
 
29
- # Parse HTML with Nokogiri
30
- doc = Nokogiri::HTML5.fragment(input)
34
+ # Parse HTML with Nokogiri. Using the generic HTML (HTML4) parser rather
35
+ # than HTML5 because Nokogiri::HTML5 is not available on JRuby
36
+ # (see sparklemotion/nokogiri#2227). Table support treats thead/tbody/tfoot
37
+ # as transparent, so the parse-tree difference (HTML5 auto-inserts tbody,
38
+ # HTML4 does not) has no effect on the AST.
39
+ doc = Nokogiri::HTML.fragment(input)
31
40
 
32
41
  # Create root AST document
33
42
  document = AST::Document.new
@@ -63,15 +72,16 @@ module Markbridge
63
72
  # @param node [Nokogiri::XML::Text]
64
73
  # @param parent [AST::Element]
65
74
  def process_text_node(node, parent)
66
- text = node.text
67
- parent << AST::Text.new(text) unless text.empty?
75
+ parent << AST::Text.new(node.text)
68
76
  end
69
77
 
70
78
  # Process an element node
71
79
  # @param node [Nokogiri::XML::Element]
72
80
  # @param parent [AST::Element]
73
81
  def process_element_node(node, parent)
74
- tag_name = node.name.downcase
82
+ tag_name = node.name
83
+ return if IGNORED_TAGS.include?(tag_name)
84
+
75
85
  handler = @handlers[tag_name]
76
86
 
77
87
  if handler
@@ -95,18 +105,9 @@ module Markbridge
95
105
  # @param node [Nokogiri::XML::Element]
96
106
  # @param parent [AST::Element]
97
107
  def handle_unknown_tag(node, parent)
98
- @unknown_tags[node.name.downcase] += 1
108
+ @unknown_tags[node.name] += 1
99
109
  process_children(node, parent)
100
110
  end
101
-
102
- # Check if an element is a void element (self-closing)
103
- # @param tag_name [String]
104
- # @return [Boolean]
105
- def void_element?(tag_name)
106
- %w[area base br col embed hr img input link meta param source track wbr].include?(
107
- tag_name.downcase,
108
- )
109
- end
110
111
  end
111
112
  end
112
113
  end
@@ -17,6 +17,9 @@ require_relative "html/handlers/list_handler"
17
17
  require_relative "html/handlers/list_item_handler"
18
18
  require_relative "html/handlers/quote_handler"
19
19
  require_relative "html/handlers/paragraph_handler"
20
+ require_relative "html/handlers/table_handler"
21
+ require_relative "html/handlers/table_row_handler"
22
+ require_relative "html/handlers/table_cell_handler"
20
23
 
21
24
  # Parser components
22
25
  require_relative "html/handler_registry"