markbridge 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/lib/markbridge/all.rb +9 -0
  4. data/lib/markbridge/ast/align.rb +24 -0
  5. data/lib/markbridge/ast/attachment.rb +42 -0
  6. data/lib/markbridge/ast/bold.rb +13 -0
  7. data/lib/markbridge/ast/code.rb +27 -0
  8. data/lib/markbridge/ast/color.rb +25 -0
  9. data/lib/markbridge/ast/document.rb +27 -0
  10. data/lib/markbridge/ast/element.rb +47 -0
  11. data/lib/markbridge/ast/email.rb +27 -0
  12. data/lib/markbridge/ast/event.rb +59 -0
  13. data/lib/markbridge/ast/heading.rb +23 -0
  14. data/lib/markbridge/ast/horizontal_rule.rb +12 -0
  15. data/lib/markbridge/ast/image.rb +35 -0
  16. data/lib/markbridge/ast/italic.rb +13 -0
  17. data/lib/markbridge/ast/line_break.rb +12 -0
  18. data/lib/markbridge/ast/list.rb +52 -0
  19. data/lib/markbridge/ast/list_item.rb +13 -0
  20. data/lib/markbridge/ast/markdown_text.rb +37 -0
  21. data/lib/markbridge/ast/mention.rb +29 -0
  22. data/lib/markbridge/ast/node.rb +19 -0
  23. data/lib/markbridge/ast/paragraph.rb +13 -0
  24. data/lib/markbridge/ast/poll.rb +74 -0
  25. data/lib/markbridge/ast/quote.rb +46 -0
  26. data/lib/markbridge/ast/size.rb +25 -0
  27. data/lib/markbridge/ast/spoiler.rb +27 -0
  28. data/lib/markbridge/ast/strikethrough.rb +13 -0
  29. data/lib/markbridge/ast/subscript.rb +13 -0
  30. data/lib/markbridge/ast/superscript.rb +13 -0
  31. data/lib/markbridge/ast/text.rb +38 -0
  32. data/lib/markbridge/ast/underline.rb +13 -0
  33. data/lib/markbridge/ast/upload.rb +74 -0
  34. data/lib/markbridge/ast/url.rb +27 -0
  35. data/lib/markbridge/ast.rb +42 -0
  36. data/lib/markbridge/configuration.rb +11 -0
  37. data/lib/markbridge/gem_loader.rb +23 -0
  38. data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +37 -0
  39. data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -0
  40. data/lib/markbridge/parsers/bbcode/closing_strategies/strict.rb +12 -0
  41. data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +121 -0
  42. data/lib/markbridge/parsers/bbcode/errors/max_depth_exceeded_error.rb +13 -0
  43. data/lib/markbridge/parsers/bbcode/handler_registry.rb +160 -0
  44. data/lib/markbridge/parsers/bbcode/handlers/align_handler.rb +26 -0
  45. data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +104 -0
  46. data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +44 -0
  47. data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +25 -0
  48. data/lib/markbridge/parsers/bbcode/handlers/color_handler.rb +31 -0
  49. data/lib/markbridge/parsers/bbcode/handlers/email_handler.rb +25 -0
  50. data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +51 -0
  51. data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +36 -0
  52. data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +26 -0
  53. data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +64 -0
  54. data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +48 -0
  55. data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +28 -0
  56. data/lib/markbridge/parsers/bbcode/handlers/simple_handler.rb +28 -0
  57. data/lib/markbridge/parsers/bbcode/handlers/size_handler.rb +31 -0
  58. data/lib/markbridge/parsers/bbcode/handlers/spoiler_handler.rb +28 -0
  59. data/lib/markbridge/parsers/bbcode/handlers/url_handler.rb +24 -0
  60. data/lib/markbridge/parsers/bbcode/parser.rb +123 -0
  61. data/lib/markbridge/parsers/bbcode/parser_state.rb +93 -0
  62. data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +126 -0
  63. data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +35 -0
  64. data/lib/markbridge/parsers/bbcode/raw_content_result.rb +25 -0
  65. data/lib/markbridge/parsers/bbcode/scanner.rb +231 -0
  66. data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +21 -0
  67. data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +23 -0
  68. data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +23 -0
  69. data/lib/markbridge/parsers/bbcode/tokens/token.rb +16 -0
  70. data/lib/markbridge/parsers/bbcode.rb +56 -0
  71. data/lib/markbridge/parsers/html/handler_registry.rb +87 -0
  72. data/lib/markbridge/parsers/html/handlers/base_handler.rb +27 -0
  73. data/lib/markbridge/parsers/html/handlers/image_handler.rb +40 -0
  74. data/lib/markbridge/parsers/html/handlers/list_handler.rb +29 -0
  75. data/lib/markbridge/parsers/html/handlers/list_item_handler.rb +26 -0
  76. data/lib/markbridge/parsers/html/handlers/paragraph_handler.rb +17 -0
  77. data/lib/markbridge/parsers/html/handlers/quote_handler.rb +28 -0
  78. data/lib/markbridge/parsers/html/handlers/raw_handler.rb +33 -0
  79. data/lib/markbridge/parsers/html/handlers/simple_handler.rb +26 -0
  80. data/lib/markbridge/parsers/html/handlers/url_handler.rb +27 -0
  81. data/lib/markbridge/parsers/html/parser.rb +113 -0
  82. data/lib/markbridge/parsers/html.rb +30 -0
  83. data/lib/markbridge/parsers/media_wiki/inline_parser.rb +332 -0
  84. data/lib/markbridge/parsers/media_wiki/parser.rb +279 -0
  85. data/lib/markbridge/parsers/media_wiki.rb +15 -0
  86. data/lib/markbridge/parsers/text_formatter/handler_registry.rb +130 -0
  87. data/lib/markbridge/parsers/text_formatter/handlers/attachment_handler.rb +33 -0
  88. data/lib/markbridge/parsers/text_formatter/handlers/attribute_handler.rb +40 -0
  89. data/lib/markbridge/parsers/text_formatter/handlers/base_handler.rb +45 -0
  90. data/lib/markbridge/parsers/text_formatter/handlers/code_handler.rb +28 -0
  91. data/lib/markbridge/parsers/text_formatter/handlers/email_handler.rb +27 -0
  92. data/lib/markbridge/parsers/text_formatter/handlers/image_handler.rb +32 -0
  93. data/lib/markbridge/parsers/text_formatter/handlers/list_handler.rb +31 -0
  94. data/lib/markbridge/parsers/text_formatter/handlers/quote_handler.rb +33 -0
  95. data/lib/markbridge/parsers/text_formatter/handlers/simple_handler.rb +37 -0
  96. data/lib/markbridge/parsers/text_formatter/handlers/url_handler.rb +29 -0
  97. data/lib/markbridge/parsers/text_formatter/parser.rb +132 -0
  98. data/lib/markbridge/parsers/text_formatter.rb +31 -0
  99. data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +199 -0
  100. data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +57 -0
  101. data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +73 -0
  102. data/lib/markbridge/processors/discourse_markdown/detectors/mention.rb +57 -0
  103. data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +90 -0
  104. data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +123 -0
  105. data/lib/markbridge/processors/discourse_markdown/scanner.rb +199 -0
  106. data/lib/markbridge/processors/discourse_markdown.rb +16 -0
  107. data/lib/markbridge/processors.rb +8 -0
  108. data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +83 -0
  109. data/lib/markbridge/renderers/discourse/markdown_escaper.rb +468 -0
  110. data/lib/markbridge/renderers/discourse/render_context.rb +80 -0
  111. data/lib/markbridge/renderers/discourse/renderer.rb +63 -0
  112. data/lib/markbridge/renderers/discourse/rendering_interface.rb +86 -0
  113. data/lib/markbridge/renderers/discourse/tag.rb +29 -0
  114. data/lib/markbridge/renderers/discourse/tag_library.rb +67 -0
  115. data/lib/markbridge/renderers/discourse/tags/align_tag.rb +24 -0
  116. data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +46 -0
  117. data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +18 -0
  118. data/lib/markbridge/renderers/discourse/tags/code_tag.rb +54 -0
  119. data/lib/markbridge/renderers/discourse/tags/color_tag.rb +27 -0
  120. data/lib/markbridge/renderers/discourse/tags/email_tag.rb +24 -0
  121. data/lib/markbridge/renderers/discourse/tags/event_tag.rb +49 -0
  122. data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +21 -0
  123. data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +16 -0
  124. data/lib/markbridge/renderers/discourse/tags/image_tag.rb +29 -0
  125. data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +18 -0
  126. data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +16 -0
  127. data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +87 -0
  128. data/lib/markbridge/renderers/discourse/tags/list_tag.rb +39 -0
  129. data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +34 -0
  130. data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +21 -0
  131. data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +51 -0
  132. data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +32 -0
  133. data/lib/markbridge/renderers/discourse/tags/size_tag.rb +27 -0
  134. data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +24 -0
  135. data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +18 -0
  136. data/lib/markbridge/renderers/discourse/tags/subscript_tag.rb +19 -0
  137. data/lib/markbridge/renderers/discourse/tags/superscript_tag.rb +19 -0
  138. data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +19 -0
  139. data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +80 -0
  140. data/lib/markbridge/renderers/discourse/tags/url_tag.rb +24 -0
  141. data/lib/markbridge/renderers/discourse.rb +50 -0
  142. data/lib/markbridge/version.rb +5 -0
  143. data/lib/markbridge.rb +201 -0
  144. metadata +186 -0
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module TextFormatter
6
+ module Handlers
7
+ # Handler for QUOTE elements in s9e/TextFormatter XML
8
+ class QuoteHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::Quote
11
+ end
12
+
13
+ def process(element:, parent:)
14
+ attrs = extract_attributes(element)
15
+ node =
16
+ AST::Quote.new(
17
+ author: attrs[:author],
18
+ post: attrs[:post_id] || attrs[:post],
19
+ topic: attrs[:topic_id] || attrs[:topic],
20
+ username: attrs[:username],
21
+ )
22
+ parent << node
23
+
24
+ # Return node to signal: process children into this node
25
+ node
26
+ end
27
+
28
+ attr_reader :element_class
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module TextFormatter
6
+ module Handlers
7
+ # Handler for simple XML elements that don't require attributes
8
+ #
9
+ # This handler creates an AST node of the specified class and processes
10
+ # all child elements. Use this for simple formatting tags like B, I, U, S.
11
+ #
12
+ # @example
13
+ # handler = SimpleHandler.new(AST::Bold)
14
+ # registry.register("B", handler)
15
+ class SimpleHandler < BaseHandler
16
+ # @param element_class [Class] the AST node class to instantiate
17
+ def initialize(element_class)
18
+ @element_class = element_class
19
+ end
20
+
21
+ # Process the element by creating an AST node and processing children
22
+ # @param element [Nokogiri::XML::Element]
23
+ # @param parent [AST::Element]
24
+ def process(element:, parent:)
25
+ node = @element_class.new
26
+ parent << node
27
+
28
+ # Return node to signal: process children into this node
29
+ node
30
+ end
31
+
32
+ attr_reader :element_class
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module TextFormatter
6
+ module Handlers
7
+ # Handler for URL elements in s9e/TextFormatter XML
8
+ #
9
+ # Extracts the url attribute and creates an AST::Url node
10
+ class UrlHandler < BaseHandler
11
+ def initialize
12
+ @element_class = AST::Url
13
+ end
14
+
15
+ def process(element:, parent:)
16
+ attrs = extract_attributes(element)
17
+ node = AST::Url.new(href: attrs[:url])
18
+ parent << node
19
+
20
+ # Return node to signal: process children into this node
21
+ node
22
+ end
23
+
24
+ attr_reader :element_class
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,132 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module TextFormatter
6
+ # Parses s9e/TextFormatter XML format into an AST
7
+ #
8
+ # The s9e/TextFormatter library (https://github.com/s9e/TextFormatter) stores BBCode as XML:
9
+ # - Plain text: <t>text content</t>
10
+ # - Rich text: <r><B>bold</B> <URL url="...">link</URL></r>
11
+ # - Markup preservation: <s> and <e> elements (ignored during parsing)
12
+ #
13
+ # This format is used by phpBB 3.2+ and other forum software.
14
+ #
15
+ # Requires Nokogiri gem to be installed. Add to your Gemfile:
16
+ # gem "nokogiri"
17
+ class Parser
18
+ attr_reader :unknown_tags
19
+
20
+ # Create a new parser with optional custom handler registry
21
+ # @param handlers [HandlerRegistry, nil] custom handler registry, defaults to HandlerRegistry.default
22
+ # @yield [HandlerRegistry] optional block to customize the default registry
23
+ # @example Using default mappings
24
+ # parser = Parser.new
25
+ # @example Using custom registry
26
+ # parser = Parser.new(handlers: my_registry)
27
+ # @example Customizing default mappings
28
+ # parser = Parser.new do |registry|
29
+ # registry.register("CUSTOM", MyCustomHandler.new)
30
+ # end
31
+ def initialize(handlers: nil, &block)
32
+ @handlers =
33
+ if block_given?
34
+ HandlerRegistry.build_from_default(&block)
35
+ else
36
+ handlers || HandlerRegistry.default
37
+ end
38
+ @unknown_tags = Hash.new(0)
39
+ end
40
+
41
+ # Parse s9e/TextFormatter XML into an AST
42
+ # @param input [String] XML string in s9e/TextFormatter format
43
+ # @return [AST::Document]
44
+ def parse(input)
45
+ @unknown_tags.clear
46
+
47
+ xml_doc = Nokogiri.XML(input)
48
+ root = xml_doc.root
49
+
50
+ unless root
51
+ # Invalid or non-XML - treat as plain text
52
+ document = AST::Document.new
53
+ document << AST::Text.new(input) unless input.empty?
54
+ return document
55
+ end
56
+
57
+ document = AST::Document.new
58
+ process_node(root, document)
59
+ document
60
+ rescue Nokogiri::XML::SyntaxError => e
61
+ # Invalid XML - treat as plain text
62
+ document = AST::Document.new
63
+ document << AST::Text.new(input)
64
+ document
65
+ end
66
+
67
+ # Process children of an XML element (public for handler access)
68
+ # @param element [Nokogiri::XML::Element]
69
+ # @param ast_parent [AST::Element]
70
+ def process_children(element, ast_parent)
71
+ element.children.each { |child| process_node(child, ast_parent) }
72
+ end
73
+
74
+ private
75
+
76
+ # Process an XML node and add corresponding AST nodes to parent
77
+ # @param xml_node [Nokogiri::XML::Element, Nokogiri::XML::Text]
78
+ # @param ast_parent [AST::Element]
79
+ def process_node(xml_node, ast_parent)
80
+ if xml_node.element?
81
+ process_element(xml_node, ast_parent)
82
+ elsif xml_node.text?
83
+ process_text(xml_node, ast_parent)
84
+ end
85
+ end
86
+
87
+ # Process an XML element
88
+ # @param element [Nokogiri::XML::Element]
89
+ # @param ast_parent [AST::Element]
90
+ def process_element(element, ast_parent)
91
+ tag_name = element.name
92
+
93
+ # Skip markup preservation elements and their content (used for unparsing)
94
+ return if %w[s e].include?(tag_name)
95
+
96
+ # Handle root nodes
97
+ return process_children(element, ast_parent) if %w[t r].include?(tag_name)
98
+
99
+ # Handle line breaks
100
+ if tag_name == "br"
101
+ ast_parent << AST::LineBreak.new
102
+ return
103
+ end
104
+
105
+ # Process element with registered handler
106
+ # Handler returns element if children should be processed, nil otherwise
107
+ result_element = @handlers.process_element(element, ast_parent)
108
+
109
+ if result_element
110
+ # Handler succeeded and returned element - process children into it
111
+ process_children(element, result_element)
112
+ elsif !@handlers.has_handler?(tag_name)
113
+ # No handler found - track as unknown and process children directly
114
+ @unknown_tags[tag_name] += 1
115
+ process_children(element, ast_parent)
116
+ end
117
+ # else: handler returned nil intentionally (no children to process)
118
+ end
119
+
120
+ # Process text node
121
+ # @param text_node [Nokogiri::XML::Text]
122
+ # @param ast_parent [AST::Element]
123
+ def process_text(text_node, ast_parent)
124
+ text = text_node.content
125
+ return if text.strip.empty?
126
+
127
+ ast_parent << AST::Text.new(text)
128
+ end
129
+ end
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Dependencies
4
+ require_relative "../gem_loader"
5
+ Markbridge::GemLoader.require_gem(:nokogiri, feature: "s9e/TextFormatter XML parsing")
6
+
7
+ # AST Nodes
8
+ require_relative "../ast"
9
+
10
+ # Handler classes
11
+ require_relative "text_formatter/handlers/base_handler"
12
+ require_relative "text_formatter/handlers/simple_handler"
13
+ require_relative "text_formatter/handlers/attribute_handler"
14
+ require_relative "text_formatter/handlers/attachment_handler"
15
+ require_relative "text_formatter/handlers/code_handler"
16
+ require_relative "text_formatter/handlers/email_handler"
17
+ require_relative "text_formatter/handlers/image_handler"
18
+ require_relative "text_formatter/handlers/list_handler"
19
+ require_relative "text_formatter/handlers/quote_handler"
20
+ require_relative "text_formatter/handlers/url_handler"
21
+
22
+ # Parser components
23
+ require_relative "text_formatter/handler_registry"
24
+ require_relative "text_formatter/parser"
25
+
26
+ module Markbridge
27
+ module Parsers
28
+ module TextFormatter
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Processors
5
+ module DiscourseMarkdown
6
+ # Tracks whether the current position is inside a code block.
7
+ # Handles fenced code blocks (``` or ~~~), indented code blocks (4+ spaces),
8
+ # and inline code (`).
9
+ #
10
+ # Fenced code blocks:
11
+ # - Can have leading whitespace (up to 3 spaces)
12
+ # - Opening fence: 3+ backticks or tildes, optionally followed by language
13
+ # - Closing fence: same or more fence characters as opening
14
+ #
15
+ # Indented code blocks:
16
+ # - Lines indented by 4+ spaces or 1+ tab
17
+ # - Continues until a non-blank line with less indentation
18
+ #
19
+ # Inline code:
20
+ # - Single or multiple backticks as delimiter
21
+ # - Content between matching backticks
22
+ class CodeBlockTracker
23
+ # @return [Boolean] true if currently inside a fenced code block
24
+ attr_reader :in_fenced_block
25
+
26
+ # @return [Boolean] true if currently inside an indented code block
27
+ attr_reader :in_indented_block
28
+
29
+ # @return [Boolean] true if currently inside an inline code span
30
+ attr_reader :in_inline_code
31
+
32
+ def initialize
33
+ @in_fenced_block = false
34
+ @fence_char = nil
35
+ @fence_length = 0
36
+ @in_indented_block = false
37
+ @in_inline_code = false
38
+ @inline_delimiter = nil
39
+ end
40
+
41
+ # Check if currently inside any code context
42
+ # @return [Boolean]
43
+ def in_code?
44
+ @in_fenced_block || @in_indented_block || @in_inline_code
45
+ end
46
+
47
+ # Check if position is at start of a fenced code block boundary
48
+ # @param input [String] the full input string
49
+ # @param pos [Integer] current position
50
+ # @param line_start [Boolean] true if pos is at the start of a line
51
+ # @return [Integer, nil] end position after fence, or nil if no fence
52
+ def check_fenced_boundary(input, pos, line_start:)
53
+ return nil unless line_start
54
+
55
+ # Skip up to 3 spaces of indentation
56
+ scan_pos = pos
57
+ spaces = 0
58
+ while spaces < 3 && scan_pos < input.length && input[scan_pos] == " "
59
+ spaces += 1
60
+ scan_pos += 1
61
+ end
62
+
63
+ return nil if scan_pos >= input.length
64
+
65
+ fence_char = input[scan_pos]
66
+ return nil unless fence_char == "`" || fence_char == "~"
67
+
68
+ # Count consecutive fence characters
69
+ fence_start = scan_pos
70
+ fence_length = 0
71
+ while scan_pos < input.length && input[scan_pos] == fence_char
72
+ fence_length += 1
73
+ scan_pos += 1
74
+ end
75
+
76
+ return nil if fence_length < 3
77
+
78
+ if @in_fenced_block
79
+ # Check if this closes the current block
80
+ if fence_char == @fence_char && fence_length >= @fence_length
81
+ # Closing fence - must be followed by newline or end of input
82
+ # Skip any trailing whitespace
83
+ scan_pos += 1 while scan_pos < input.length && input[scan_pos] == " "
84
+
85
+ if scan_pos >= input.length || input[scan_pos] == "\n"
86
+ @in_fenced_block = false
87
+ @fence_char = nil
88
+ @fence_length = 0
89
+ # Return position after the newline if present
90
+ return scan_pos < input.length ? scan_pos + 1 : scan_pos
91
+ end
92
+ end
93
+ nil
94
+ else
95
+ # Opening fence - skip to end of line (info string)
96
+ scan_pos += 1 while scan_pos < input.length && input[scan_pos] != "\n"
97
+
98
+ @in_fenced_block = true
99
+ @fence_char = fence_char
100
+ @fence_length = fence_length
101
+
102
+ # Return position after the newline if present
103
+ scan_pos < input.length ? scan_pos + 1 : scan_pos
104
+ end
105
+ end
106
+
107
+ # Check if line at position is an indented code block line.
108
+ # A line is considered indented code if it starts with 4+ spaces or 1+ tab.
109
+ # Blank lines within an indented block are considered part of it.
110
+ #
111
+ # @param input [String] the full input string
112
+ # @param pos [Integer] current position (must be at line start)
113
+ # @param line_start [Boolean] true if pos is at the start of a line
114
+ # @return [Integer, nil] end position after the line, or nil if not indented code
115
+ def check_indented_boundary(input, pos, line_start:)
116
+ return nil unless line_start
117
+ return nil if @in_fenced_block # Fenced blocks take precedence
118
+
119
+ # Find end of line
120
+ line_end = input.index("\n", pos) || input.length
121
+
122
+ # Check if line is blank
123
+ line_content = input[pos...line_end]
124
+ is_blank = line_content.match?(/\A\s*\z/)
125
+
126
+ # Check indentation (4+ spaces or tab)
127
+ has_code_indent = line_content.start_with?(" ") || line_content.start_with?("\t")
128
+
129
+ if @in_indented_block
130
+ if is_blank
131
+ # Blank lines continue the indented block
132
+ # Return end of line (after newline if present)
133
+ return line_end < input.length ? line_end + 1 : line_end
134
+ elsif has_code_indent
135
+ # Still in indented code
136
+ return line_end < input.length ? line_end + 1 : line_end
137
+ else
138
+ # Non-blank, non-indented line ends the block
139
+ @in_indented_block = false
140
+ return nil
141
+ end
142
+ else
143
+ if has_code_indent
144
+ # Start of indented code block
145
+ @in_indented_block = true
146
+ return line_end < input.length ? line_end + 1 : line_end
147
+ end
148
+ end
149
+
150
+ nil
151
+ end
152
+
153
+ # Check for inline code boundary
154
+ # @param input [String] the full input string
155
+ # @param pos [Integer] current position
156
+ # @return [Integer, nil] end position after inline code, or nil if not at boundary
157
+ def check_inline_boundary(input, pos)
158
+ return nil if @in_fenced_block || @in_indented_block
159
+ return nil if pos >= input.length || input[pos] != "`"
160
+
161
+ if @in_inline_code
162
+ # Check if this closes the current inline code
163
+ delimiter_length = @inline_delimiter.length
164
+ if input[pos, delimiter_length] == @inline_delimiter
165
+ # Check what follows - should not be another backtick
166
+ next_pos = pos + delimiter_length
167
+ if next_pos >= input.length || input[next_pos] != "`"
168
+ @in_inline_code = false
169
+ @inline_delimiter = nil
170
+ return next_pos
171
+ end
172
+ end
173
+ nil
174
+ else
175
+ # Opening inline code - count backticks
176
+ delimiter_start = pos
177
+ pos += 1 while pos < input.length && input[pos] == "`"
178
+
179
+ @inline_delimiter = input[delimiter_start...pos]
180
+ @in_inline_code = true
181
+
182
+ # Return position after opening delimiter
183
+ pos
184
+ end
185
+ end
186
+
187
+ # Reset the tracker state
188
+ def reset!
189
+ @in_fenced_block = false
190
+ @fence_char = nil
191
+ @fence_length = 0
192
+ @in_indented_block = false
193
+ @in_inline_code = false
194
+ @inline_delimiter = nil
195
+ end
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Processors
5
+ module DiscourseMarkdown
6
+ module Detectors
7
+ # Result of a successful detection
8
+ # @attr_reader start_pos [Integer] start position in input
9
+ # @attr_reader end_pos [Integer] end position in input (exclusive)
10
+ # @attr_reader node [AST::Node] the AST node representing the detected construct
11
+ Match = Data.define(:start_pos, :end_pos, :node)
12
+
13
+ # Base class for construct detectors.
14
+ # Subclasses implement detection logic for specific constructs
15
+ # (mentions, polls, events, uploads).
16
+ #
17
+ # @abstract Subclass and implement {#detect}
18
+ class Base
19
+ # Attempt to detect a construct at the given position.
20
+ #
21
+ # @param input [String] the full input string
22
+ # @param pos [Integer] current position to check
23
+ # @return [Match, nil] match result or nil if no match
24
+ def detect(input, pos)
25
+ raise NotImplementedError, "#{self.class} must implement #detect"
26
+ end
27
+
28
+ private
29
+
30
+ # Helper to check if position is at a word boundary (for mentions, etc.)
31
+ # @param input [String] the input string
32
+ # @param pos [Integer] position to check
33
+ # @return [Boolean] true if at word boundary
34
+ def word_boundary?(input, pos)
35
+ return true if pos == 0
36
+
37
+ prev_char = input[pos - 1]
38
+ !prev_char.match?(/\w/)
39
+ end
40
+
41
+ # Helper to extract a word starting at position
42
+ # @param input [String] the input string
43
+ # @param pos [Integer] starting position
44
+ # @return [String] the word (may be empty)
45
+ def extract_word(input, pos)
46
+ word = +""
47
+ while pos < input.length && input[pos].match?(/[\w\-]/)
48
+ word << input[pos]
49
+ pos += 1
50
+ end
51
+ word
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Processors
5
+ module DiscourseMarkdown
6
+ module Detectors
7
+ # Detects Discourse event blocks [event]...[/event].
8
+ #
9
+ # @example
10
+ # detector = Event.new
11
+ # input = '[event name="Meeting" start="2025-12-15 14:00"][/event]'
12
+ # match = detector.detect(input, 0)
13
+ # match.node.name # => "Meeting"
14
+ class Event < Base
15
+ OPEN_TAG_PATTERN = /\[event([^\]]*)\]/i
16
+ CLOSE_TAG_PATTERN = %r{\[/event\]}i
17
+
18
+ # Attempt to detect an event at the given position.
19
+ #
20
+ # @param input [String] the full input string
21
+ # @param pos [Integer] current position to check
22
+ # @return [Match, nil] match result or nil if no match
23
+ def detect(input, pos)
24
+ return nil unless input[pos] == "["
25
+
26
+ # Check for opening tag
27
+ remaining = input[pos..]
28
+ open_match = OPEN_TAG_PATTERN.match(remaining)
29
+ return nil unless open_match&.begin(0)&.zero?
30
+
31
+ # Find closing tag
32
+ close_match = CLOSE_TAG_PATTERN.match(remaining, open_match.end(0))
33
+ return nil unless close_match
34
+
35
+ # Extract raw content
36
+ end_pos = pos + close_match.end(0)
37
+ raw = input[pos...end_pos]
38
+
39
+ # Parse attributes from opening tag
40
+ attrs = parse_attributes(open_match[1])
41
+
42
+ # Validate required attributes
43
+ return nil unless attrs["name"] && attrs["start"]
44
+
45
+ node =
46
+ AST::Event.new(
47
+ name: attrs["name"],
48
+ starts_at: attrs["start"],
49
+ ends_at: attrs["end"],
50
+ status: attrs["status"],
51
+ timezone: attrs["timezone"],
52
+ raw:,
53
+ )
54
+
55
+ Match.new(start_pos: pos, end_pos:, node:)
56
+ end
57
+
58
+ private
59
+
60
+ def parse_attributes(attr_string)
61
+ attrs = {}
62
+ return attrs if attr_string.nil? || attr_string.empty?
63
+
64
+ # Match key="value" or key='value' patterns
65
+ attr_string.scan(/(\w+)=["']([^"']*)["']/) { |key, value| attrs[key.downcase] = value }
66
+
67
+ attrs
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Processors
5
+ module DiscourseMarkdown
6
+ module Detectors
7
+ # Detects user and group mentions (@username, @groupname).
8
+ #
9
+ # @example Basic usage
10
+ # detector = Mention.new
11
+ # match = detector.detect("Hello @gerhard!", 6)
12
+ # match.node.name # => "gerhard"
13
+ # match.node.type # => :user (default)
14
+ #
15
+ # @example With type resolver
16
+ # resolver = ->(name) { name == "Testers" ? :group : :user }
17
+ # detector = Mention.new(type_resolver: resolver)
18
+ # match = detector.detect("@Testers", 0)
19
+ # match.node.type # => :group
20
+ class Mention < Base
21
+ # @param type_resolver [#call, nil] callable that takes a name and returns :user or :group
22
+ def initialize(type_resolver: nil)
23
+ @type_resolver = type_resolver
24
+ end
25
+
26
+ # Attempt to detect a mention at the given position.
27
+ #
28
+ # @param input [String] the full input string
29
+ # @param pos [Integer] current position to check
30
+ # @return [Match, nil] match result or nil if no match
31
+ def detect(input, pos)
32
+ return nil unless input[pos] == "@"
33
+ return nil unless word_boundary?(input, pos)
34
+
35
+ # Extract the username/group name
36
+ name = extract_word(input, pos + 1)
37
+ return nil if name.empty?
38
+
39
+ end_pos = pos + 1 + name.length
40
+ type = resolve_type(name)
41
+ node = AST::Mention.new(name:, type:)
42
+
43
+ Match.new(start_pos: pos, end_pos:, node:)
44
+ end
45
+
46
+ private
47
+
48
+ def resolve_type(name)
49
+ return :user unless @type_resolver
50
+
51
+ @type_resolver.call(name) || :user
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end