markbridge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/lib/markbridge/all.rb +9 -0
- data/lib/markbridge/ast/align.rb +24 -0
- data/lib/markbridge/ast/attachment.rb +42 -0
- data/lib/markbridge/ast/bold.rb +13 -0
- data/lib/markbridge/ast/code.rb +27 -0
- data/lib/markbridge/ast/color.rb +25 -0
- data/lib/markbridge/ast/document.rb +27 -0
- data/lib/markbridge/ast/element.rb +47 -0
- data/lib/markbridge/ast/email.rb +27 -0
- data/lib/markbridge/ast/event.rb +59 -0
- data/lib/markbridge/ast/heading.rb +23 -0
- data/lib/markbridge/ast/horizontal_rule.rb +12 -0
- data/lib/markbridge/ast/image.rb +35 -0
- data/lib/markbridge/ast/italic.rb +13 -0
- data/lib/markbridge/ast/line_break.rb +12 -0
- data/lib/markbridge/ast/list.rb +52 -0
- data/lib/markbridge/ast/list_item.rb +13 -0
- data/lib/markbridge/ast/markdown_text.rb +37 -0
- data/lib/markbridge/ast/mention.rb +29 -0
- data/lib/markbridge/ast/node.rb +19 -0
- data/lib/markbridge/ast/paragraph.rb +13 -0
- data/lib/markbridge/ast/poll.rb +74 -0
- data/lib/markbridge/ast/quote.rb +46 -0
- data/lib/markbridge/ast/size.rb +25 -0
- data/lib/markbridge/ast/spoiler.rb +27 -0
- data/lib/markbridge/ast/strikethrough.rb +13 -0
- data/lib/markbridge/ast/subscript.rb +13 -0
- data/lib/markbridge/ast/superscript.rb +13 -0
- data/lib/markbridge/ast/text.rb +38 -0
- data/lib/markbridge/ast/underline.rb +13 -0
- data/lib/markbridge/ast/upload.rb +74 -0
- data/lib/markbridge/ast/url.rb +27 -0
- data/lib/markbridge/ast.rb +42 -0
- data/lib/markbridge/configuration.rb +11 -0
- data/lib/markbridge/gem_loader.rb +23 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +37 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/strict.rb +12 -0
- data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +121 -0
- data/lib/markbridge/parsers/bbcode/errors/max_depth_exceeded_error.rb +13 -0
- data/lib/markbridge/parsers/bbcode/handler_registry.rb +160 -0
- data/lib/markbridge/parsers/bbcode/handlers/align_handler.rb +26 -0
- data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +104 -0
- data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +44 -0
- data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +25 -0
- data/lib/markbridge/parsers/bbcode/handlers/color_handler.rb +31 -0
- data/lib/markbridge/parsers/bbcode/handlers/email_handler.rb +25 -0
- data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +51 -0
- data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +36 -0
- data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +26 -0
- data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +64 -0
- data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +48 -0
- data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +28 -0
- data/lib/markbridge/parsers/bbcode/handlers/simple_handler.rb +28 -0
- data/lib/markbridge/parsers/bbcode/handlers/size_handler.rb +31 -0
- data/lib/markbridge/parsers/bbcode/handlers/spoiler_handler.rb +28 -0
- data/lib/markbridge/parsers/bbcode/handlers/url_handler.rb +24 -0
- data/lib/markbridge/parsers/bbcode/parser.rb +123 -0
- data/lib/markbridge/parsers/bbcode/parser_state.rb +93 -0
- data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +126 -0
- data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +35 -0
- data/lib/markbridge/parsers/bbcode/raw_content_result.rb +25 -0
- data/lib/markbridge/parsers/bbcode/scanner.rb +231 -0
- data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +21 -0
- data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +23 -0
- data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +23 -0
- data/lib/markbridge/parsers/bbcode/tokens/token.rb +16 -0
- data/lib/markbridge/parsers/bbcode.rb +56 -0
- data/lib/markbridge/parsers/html/handler_registry.rb +87 -0
- data/lib/markbridge/parsers/html/handlers/base_handler.rb +27 -0
- data/lib/markbridge/parsers/html/handlers/image_handler.rb +40 -0
- data/lib/markbridge/parsers/html/handlers/list_handler.rb +29 -0
- data/lib/markbridge/parsers/html/handlers/list_item_handler.rb +26 -0
- data/lib/markbridge/parsers/html/handlers/paragraph_handler.rb +17 -0
- data/lib/markbridge/parsers/html/handlers/quote_handler.rb +28 -0
- data/lib/markbridge/parsers/html/handlers/raw_handler.rb +33 -0
- data/lib/markbridge/parsers/html/handlers/simple_handler.rb +26 -0
- data/lib/markbridge/parsers/html/handlers/url_handler.rb +27 -0
- data/lib/markbridge/parsers/html/parser.rb +113 -0
- data/lib/markbridge/parsers/html.rb +30 -0
- data/lib/markbridge/parsers/media_wiki/inline_parser.rb +332 -0
- data/lib/markbridge/parsers/media_wiki/parser.rb +279 -0
- data/lib/markbridge/parsers/media_wiki.rb +15 -0
- data/lib/markbridge/parsers/text_formatter/handler_registry.rb +130 -0
- data/lib/markbridge/parsers/text_formatter/handlers/attachment_handler.rb +33 -0
- data/lib/markbridge/parsers/text_formatter/handlers/attribute_handler.rb +40 -0
- data/lib/markbridge/parsers/text_formatter/handlers/base_handler.rb +45 -0
- data/lib/markbridge/parsers/text_formatter/handlers/code_handler.rb +28 -0
- data/lib/markbridge/parsers/text_formatter/handlers/email_handler.rb +27 -0
- data/lib/markbridge/parsers/text_formatter/handlers/image_handler.rb +32 -0
- data/lib/markbridge/parsers/text_formatter/handlers/list_handler.rb +31 -0
- data/lib/markbridge/parsers/text_formatter/handlers/quote_handler.rb +33 -0
- data/lib/markbridge/parsers/text_formatter/handlers/simple_handler.rb +37 -0
- data/lib/markbridge/parsers/text_formatter/handlers/url_handler.rb +29 -0
- data/lib/markbridge/parsers/text_formatter/parser.rb +132 -0
- data/lib/markbridge/parsers/text_formatter.rb +31 -0
- data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +199 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +57 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +73 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/mention.rb +57 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +90 -0
- data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +123 -0
- data/lib/markbridge/processors/discourse_markdown/scanner.rb +199 -0
- data/lib/markbridge/processors/discourse_markdown.rb +16 -0
- data/lib/markbridge/processors.rb +8 -0
- data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +83 -0
- data/lib/markbridge/renderers/discourse/markdown_escaper.rb +468 -0
- data/lib/markbridge/renderers/discourse/render_context.rb +80 -0
- data/lib/markbridge/renderers/discourse/renderer.rb +63 -0
- data/lib/markbridge/renderers/discourse/rendering_interface.rb +86 -0
- data/lib/markbridge/renderers/discourse/tag.rb +29 -0
- data/lib/markbridge/renderers/discourse/tag_library.rb +67 -0
- data/lib/markbridge/renderers/discourse/tags/align_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +46 -0
- data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/code_tag.rb +54 -0
- data/lib/markbridge/renderers/discourse/tags/color_tag.rb +27 -0
- data/lib/markbridge/renderers/discourse/tags/email_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse/tags/event_tag.rb +49 -0
- data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +21 -0
- data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +16 -0
- data/lib/markbridge/renderers/discourse/tags/image_tag.rb +29 -0
- data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +16 -0
- data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +87 -0
- data/lib/markbridge/renderers/discourse/tags/list_tag.rb +39 -0
- data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +34 -0
- data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +21 -0
- data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +51 -0
- data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +32 -0
- data/lib/markbridge/renderers/discourse/tags/size_tag.rb +27 -0
- data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +18 -0
- data/lib/markbridge/renderers/discourse/tags/subscript_tag.rb +19 -0
- data/lib/markbridge/renderers/discourse/tags/superscript_tag.rb +19 -0
- data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +19 -0
- data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +80 -0
- data/lib/markbridge/renderers/discourse/tags/url_tag.rb +24 -0
- data/lib/markbridge/renderers/discourse.rb +50 -0
- data/lib/markbridge/version.rb +5 -0
- data/lib/markbridge.rb +201 -0
- metadata +186 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module TextFormatter
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for QUOTE elements in s9e/TextFormatter XML
|
|
8
|
+
class QuoteHandler < BaseHandler
|
|
9
|
+
def initialize
|
|
10
|
+
@element_class = AST::Quote
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def process(element:, parent:)
|
|
14
|
+
attrs = extract_attributes(element)
|
|
15
|
+
node =
|
|
16
|
+
AST::Quote.new(
|
|
17
|
+
author: attrs[:author],
|
|
18
|
+
post: attrs[:post_id] || attrs[:post],
|
|
19
|
+
topic: attrs[:topic_id] || attrs[:topic],
|
|
20
|
+
username: attrs[:username],
|
|
21
|
+
)
|
|
22
|
+
parent << node
|
|
23
|
+
|
|
24
|
+
# Return node to signal: process children into this node
|
|
25
|
+
node
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
attr_reader :element_class
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module TextFormatter
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for simple XML elements that don't require attributes
|
|
8
|
+
#
|
|
9
|
+
# This handler creates an AST node of the specified class and processes
|
|
10
|
+
# all child elements. Use this for simple formatting tags like B, I, U, S.
|
|
11
|
+
#
|
|
12
|
+
# @example
|
|
13
|
+
# handler = SimpleHandler.new(AST::Bold)
|
|
14
|
+
# registry.register("B", handler)
|
|
15
|
+
class SimpleHandler < BaseHandler
|
|
16
|
+
# @param element_class [Class] the AST node class to instantiate
|
|
17
|
+
def initialize(element_class)
|
|
18
|
+
@element_class = element_class
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Process the element by creating an AST node and processing children
|
|
22
|
+
# @param element [Nokogiri::XML::Element]
|
|
23
|
+
# @param parent [AST::Element]
|
|
24
|
+
def process(element:, parent:)
|
|
25
|
+
node = @element_class.new
|
|
26
|
+
parent << node
|
|
27
|
+
|
|
28
|
+
# Return node to signal: process children into this node
|
|
29
|
+
node
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
attr_reader :element_class
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module TextFormatter
|
|
6
|
+
module Handlers
|
|
7
|
+
# Handler for URL elements in s9e/TextFormatter XML
|
|
8
|
+
#
|
|
9
|
+
# Extracts the url attribute and creates an AST::Url node
|
|
10
|
+
class UrlHandler < BaseHandler
|
|
11
|
+
def initialize
|
|
12
|
+
@element_class = AST::Url
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def process(element:, parent:)
|
|
16
|
+
attrs = extract_attributes(element)
|
|
17
|
+
node = AST::Url.new(href: attrs[:url])
|
|
18
|
+
parent << node
|
|
19
|
+
|
|
20
|
+
# Return node to signal: process children into this node
|
|
21
|
+
node
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
attr_reader :element_class
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Parsers
|
|
5
|
+
module TextFormatter
|
|
6
|
+
# Parses s9e/TextFormatter XML format into an AST
|
|
7
|
+
#
|
|
8
|
+
# The s9e/TextFormatter library (https://github.com/s9e/TextFormatter) stores BBCode as XML:
|
|
9
|
+
# - Plain text: <t>text content</t>
|
|
10
|
+
# - Rich text: <r><B>bold</B> <URL url="...">link</URL></r>
|
|
11
|
+
# - Markup preservation: <s> and <e> elements (ignored during parsing)
|
|
12
|
+
#
|
|
13
|
+
# This format is used by phpBB 3.2+ and other forum software.
|
|
14
|
+
#
|
|
15
|
+
# Requires Nokogiri gem to be installed. Add to your Gemfile:
|
|
16
|
+
# gem "nokogiri"
|
|
17
|
+
class Parser
|
|
18
|
+
attr_reader :unknown_tags
|
|
19
|
+
|
|
20
|
+
# Create a new parser with optional custom handler registry
|
|
21
|
+
# @param handlers [HandlerRegistry, nil] custom handler registry, defaults to HandlerRegistry.default
|
|
22
|
+
# @yield [HandlerRegistry] optional block to customize the default registry
|
|
23
|
+
# @example Using default mappings
|
|
24
|
+
# parser = Parser.new
|
|
25
|
+
# @example Using custom registry
|
|
26
|
+
# parser = Parser.new(handlers: my_registry)
|
|
27
|
+
# @example Customizing default mappings
|
|
28
|
+
# parser = Parser.new do |registry|
|
|
29
|
+
# registry.register("CUSTOM", MyCustomHandler.new)
|
|
30
|
+
# end
|
|
31
|
+
def initialize(handlers: nil, &block)
|
|
32
|
+
@handlers =
|
|
33
|
+
if block_given?
|
|
34
|
+
HandlerRegistry.build_from_default(&block)
|
|
35
|
+
else
|
|
36
|
+
handlers || HandlerRegistry.default
|
|
37
|
+
end
|
|
38
|
+
@unknown_tags = Hash.new(0)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Parse s9e/TextFormatter XML into an AST
|
|
42
|
+
# @param input [String] XML string in s9e/TextFormatter format
|
|
43
|
+
# @return [AST::Document]
|
|
44
|
+
def parse(input)
|
|
45
|
+
@unknown_tags.clear
|
|
46
|
+
|
|
47
|
+
xml_doc = Nokogiri.XML(input)
|
|
48
|
+
root = xml_doc.root
|
|
49
|
+
|
|
50
|
+
unless root
|
|
51
|
+
# Invalid or non-XML - treat as plain text
|
|
52
|
+
document = AST::Document.new
|
|
53
|
+
document << AST::Text.new(input) unless input.empty?
|
|
54
|
+
return document
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
document = AST::Document.new
|
|
58
|
+
process_node(root, document)
|
|
59
|
+
document
|
|
60
|
+
rescue Nokogiri::XML::SyntaxError => e
|
|
61
|
+
# Invalid XML - treat as plain text
|
|
62
|
+
document = AST::Document.new
|
|
63
|
+
document << AST::Text.new(input)
|
|
64
|
+
document
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Process children of an XML element (public for handler access)
|
|
68
|
+
# @param element [Nokogiri::XML::Element]
|
|
69
|
+
# @param ast_parent [AST::Element]
|
|
70
|
+
def process_children(element, ast_parent)
|
|
71
|
+
element.children.each { |child| process_node(child, ast_parent) }
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
private
|
|
75
|
+
|
|
76
|
+
# Process an XML node and add corresponding AST nodes to parent
|
|
77
|
+
# @param xml_node [Nokogiri::XML::Element, Nokogiri::XML::Text]
|
|
78
|
+
# @param ast_parent [AST::Element]
|
|
79
|
+
def process_node(xml_node, ast_parent)
|
|
80
|
+
if xml_node.element?
|
|
81
|
+
process_element(xml_node, ast_parent)
|
|
82
|
+
elsif xml_node.text?
|
|
83
|
+
process_text(xml_node, ast_parent)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Process an XML element
|
|
88
|
+
# @param element [Nokogiri::XML::Element]
|
|
89
|
+
# @param ast_parent [AST::Element]
|
|
90
|
+
def process_element(element, ast_parent)
|
|
91
|
+
tag_name = element.name
|
|
92
|
+
|
|
93
|
+
# Skip markup preservation elements and their content (used for unparsing)
|
|
94
|
+
return if %w[s e].include?(tag_name)
|
|
95
|
+
|
|
96
|
+
# Handle root nodes
|
|
97
|
+
return process_children(element, ast_parent) if %w[t r].include?(tag_name)
|
|
98
|
+
|
|
99
|
+
# Handle line breaks
|
|
100
|
+
if tag_name == "br"
|
|
101
|
+
ast_parent << AST::LineBreak.new
|
|
102
|
+
return
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Process element with registered handler
|
|
106
|
+
# Handler returns element if children should be processed, nil otherwise
|
|
107
|
+
result_element = @handlers.process_element(element, ast_parent)
|
|
108
|
+
|
|
109
|
+
if result_element
|
|
110
|
+
# Handler succeeded and returned element - process children into it
|
|
111
|
+
process_children(element, result_element)
|
|
112
|
+
elsif !@handlers.has_handler?(tag_name)
|
|
113
|
+
# No handler found - track as unknown and process children directly
|
|
114
|
+
@unknown_tags[tag_name] += 1
|
|
115
|
+
process_children(element, ast_parent)
|
|
116
|
+
end
|
|
117
|
+
# else: handler returned nil intentionally (no children to process)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Process text node
|
|
121
|
+
# @param text_node [Nokogiri::XML::Text]
|
|
122
|
+
# @param ast_parent [AST::Element]
|
|
123
|
+
def process_text(text_node, ast_parent)
|
|
124
|
+
text = text_node.content
|
|
125
|
+
return if text.strip.empty?
|
|
126
|
+
|
|
127
|
+
ast_parent << AST::Text.new(text)
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Dependencies
|
|
4
|
+
require_relative "../gem_loader"
|
|
5
|
+
Markbridge::GemLoader.require_gem(:nokogiri, feature: "s9e/TextFormatter XML parsing")
|
|
6
|
+
|
|
7
|
+
# AST Nodes
|
|
8
|
+
require_relative "../ast"
|
|
9
|
+
|
|
10
|
+
# Handler classes
|
|
11
|
+
require_relative "text_formatter/handlers/base_handler"
|
|
12
|
+
require_relative "text_formatter/handlers/simple_handler"
|
|
13
|
+
require_relative "text_formatter/handlers/attribute_handler"
|
|
14
|
+
require_relative "text_formatter/handlers/attachment_handler"
|
|
15
|
+
require_relative "text_formatter/handlers/code_handler"
|
|
16
|
+
require_relative "text_formatter/handlers/email_handler"
|
|
17
|
+
require_relative "text_formatter/handlers/image_handler"
|
|
18
|
+
require_relative "text_formatter/handlers/list_handler"
|
|
19
|
+
require_relative "text_formatter/handlers/quote_handler"
|
|
20
|
+
require_relative "text_formatter/handlers/url_handler"
|
|
21
|
+
|
|
22
|
+
# Parser components
|
|
23
|
+
require_relative "text_formatter/handler_registry"
|
|
24
|
+
require_relative "text_formatter/parser"
|
|
25
|
+
|
|
26
|
+
module Markbridge
|
|
27
|
+
module Parsers
|
|
28
|
+
module TextFormatter
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Processors
|
|
5
|
+
module DiscourseMarkdown
|
|
6
|
+
# Tracks whether the current position is inside a code block.
|
|
7
|
+
# Handles fenced code blocks (``` or ~~~), indented code blocks (4+ spaces),
|
|
8
|
+
# and inline code (`).
|
|
9
|
+
#
|
|
10
|
+
# Fenced code blocks:
|
|
11
|
+
# - Can have leading whitespace (up to 3 spaces)
|
|
12
|
+
# - Opening fence: 3+ backticks or tildes, optionally followed by language
|
|
13
|
+
# - Closing fence: same or more fence characters as opening
|
|
14
|
+
#
|
|
15
|
+
# Indented code blocks:
|
|
16
|
+
# - Lines indented by 4+ spaces or 1+ tab
|
|
17
|
+
# - Continues until a non-blank line with less indentation
|
|
18
|
+
#
|
|
19
|
+
# Inline code:
|
|
20
|
+
# - Single or multiple backticks as delimiter
|
|
21
|
+
# - Content between matching backticks
|
|
22
|
+
class CodeBlockTracker
|
|
23
|
+
# @return [Boolean] true if currently inside a fenced code block
|
|
24
|
+
attr_reader :in_fenced_block
|
|
25
|
+
|
|
26
|
+
# @return [Boolean] true if currently inside an indented code block
|
|
27
|
+
attr_reader :in_indented_block
|
|
28
|
+
|
|
29
|
+
# @return [Boolean] true if currently inside an inline code span
|
|
30
|
+
attr_reader :in_inline_code
|
|
31
|
+
|
|
32
|
+
def initialize
|
|
33
|
+
@in_fenced_block = false
|
|
34
|
+
@fence_char = nil
|
|
35
|
+
@fence_length = 0
|
|
36
|
+
@in_indented_block = false
|
|
37
|
+
@in_inline_code = false
|
|
38
|
+
@inline_delimiter = nil
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Check if currently inside any code context
|
|
42
|
+
# @return [Boolean]
|
|
43
|
+
def in_code?
|
|
44
|
+
@in_fenced_block || @in_indented_block || @in_inline_code
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Check if position is at start of a fenced code block boundary
|
|
48
|
+
# @param input [String] the full input string
|
|
49
|
+
# @param pos [Integer] current position
|
|
50
|
+
# @param line_start [Boolean] true if pos is at the start of a line
|
|
51
|
+
# @return [Integer, nil] end position after fence, or nil if no fence
|
|
52
|
+
def check_fenced_boundary(input, pos, line_start:)
|
|
53
|
+
return nil unless line_start
|
|
54
|
+
|
|
55
|
+
# Skip up to 3 spaces of indentation
|
|
56
|
+
scan_pos = pos
|
|
57
|
+
spaces = 0
|
|
58
|
+
while spaces < 3 && scan_pos < input.length && input[scan_pos] == " "
|
|
59
|
+
spaces += 1
|
|
60
|
+
scan_pos += 1
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
return nil if scan_pos >= input.length
|
|
64
|
+
|
|
65
|
+
fence_char = input[scan_pos]
|
|
66
|
+
return nil unless fence_char == "`" || fence_char == "~"
|
|
67
|
+
|
|
68
|
+
# Count consecutive fence characters
|
|
69
|
+
fence_start = scan_pos
|
|
70
|
+
fence_length = 0
|
|
71
|
+
while scan_pos < input.length && input[scan_pos] == fence_char
|
|
72
|
+
fence_length += 1
|
|
73
|
+
scan_pos += 1
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
return nil if fence_length < 3
|
|
77
|
+
|
|
78
|
+
if @in_fenced_block
|
|
79
|
+
# Check if this closes the current block
|
|
80
|
+
if fence_char == @fence_char && fence_length >= @fence_length
|
|
81
|
+
# Closing fence - must be followed by newline or end of input
|
|
82
|
+
# Skip any trailing whitespace
|
|
83
|
+
scan_pos += 1 while scan_pos < input.length && input[scan_pos] == " "
|
|
84
|
+
|
|
85
|
+
if scan_pos >= input.length || input[scan_pos] == "\n"
|
|
86
|
+
@in_fenced_block = false
|
|
87
|
+
@fence_char = nil
|
|
88
|
+
@fence_length = 0
|
|
89
|
+
# Return position after the newline if present
|
|
90
|
+
return scan_pos < input.length ? scan_pos + 1 : scan_pos
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
nil
|
|
94
|
+
else
|
|
95
|
+
# Opening fence - skip to end of line (info string)
|
|
96
|
+
scan_pos += 1 while scan_pos < input.length && input[scan_pos] != "\n"
|
|
97
|
+
|
|
98
|
+
@in_fenced_block = true
|
|
99
|
+
@fence_char = fence_char
|
|
100
|
+
@fence_length = fence_length
|
|
101
|
+
|
|
102
|
+
# Return position after the newline if present
|
|
103
|
+
scan_pos < input.length ? scan_pos + 1 : scan_pos
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Check if line at position is an indented code block line.
|
|
108
|
+
# A line is considered indented code if it starts with 4+ spaces or 1+ tab.
|
|
109
|
+
# Blank lines within an indented block are considered part of it.
|
|
110
|
+
#
|
|
111
|
+
# @param input [String] the full input string
|
|
112
|
+
# @param pos [Integer] current position (must be at line start)
|
|
113
|
+
# @param line_start [Boolean] true if pos is at the start of a line
|
|
114
|
+
# @return [Integer, nil] end position after the line, or nil if not indented code
|
|
115
|
+
def check_indented_boundary(input, pos, line_start:)
|
|
116
|
+
return nil unless line_start
|
|
117
|
+
return nil if @in_fenced_block # Fenced blocks take precedence
|
|
118
|
+
|
|
119
|
+
# Find end of line
|
|
120
|
+
line_end = input.index("\n", pos) || input.length
|
|
121
|
+
|
|
122
|
+
# Check if line is blank
|
|
123
|
+
line_content = input[pos...line_end]
|
|
124
|
+
is_blank = line_content.match?(/\A\s*\z/)
|
|
125
|
+
|
|
126
|
+
# Check indentation (4+ spaces or tab)
|
|
127
|
+
has_code_indent = line_content.start_with?(" ") || line_content.start_with?("\t")
|
|
128
|
+
|
|
129
|
+
if @in_indented_block
|
|
130
|
+
if is_blank
|
|
131
|
+
# Blank lines continue the indented block
|
|
132
|
+
# Return end of line (after newline if present)
|
|
133
|
+
return line_end < input.length ? line_end + 1 : line_end
|
|
134
|
+
elsif has_code_indent
|
|
135
|
+
# Still in indented code
|
|
136
|
+
return line_end < input.length ? line_end + 1 : line_end
|
|
137
|
+
else
|
|
138
|
+
# Non-blank, non-indented line ends the block
|
|
139
|
+
@in_indented_block = false
|
|
140
|
+
return nil
|
|
141
|
+
end
|
|
142
|
+
else
|
|
143
|
+
if has_code_indent
|
|
144
|
+
# Start of indented code block
|
|
145
|
+
@in_indented_block = true
|
|
146
|
+
return line_end < input.length ? line_end + 1 : line_end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
nil
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Check for inline code boundary
|
|
154
|
+
# @param input [String] the full input string
|
|
155
|
+
# @param pos [Integer] current position
|
|
156
|
+
# @return [Integer, nil] end position after inline code, or nil if not at boundary
|
|
157
|
+
def check_inline_boundary(input, pos)
|
|
158
|
+
return nil if @in_fenced_block || @in_indented_block
|
|
159
|
+
return nil if pos >= input.length || input[pos] != "`"
|
|
160
|
+
|
|
161
|
+
if @in_inline_code
|
|
162
|
+
# Check if this closes the current inline code
|
|
163
|
+
delimiter_length = @inline_delimiter.length
|
|
164
|
+
if input[pos, delimiter_length] == @inline_delimiter
|
|
165
|
+
# Check what follows - should not be another backtick
|
|
166
|
+
next_pos = pos + delimiter_length
|
|
167
|
+
if next_pos >= input.length || input[next_pos] != "`"
|
|
168
|
+
@in_inline_code = false
|
|
169
|
+
@inline_delimiter = nil
|
|
170
|
+
return next_pos
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
nil
|
|
174
|
+
else
|
|
175
|
+
# Opening inline code - count backticks
|
|
176
|
+
delimiter_start = pos
|
|
177
|
+
pos += 1 while pos < input.length && input[pos] == "`"
|
|
178
|
+
|
|
179
|
+
@inline_delimiter = input[delimiter_start...pos]
|
|
180
|
+
@in_inline_code = true
|
|
181
|
+
|
|
182
|
+
# Return position after opening delimiter
|
|
183
|
+
pos
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Reset the tracker state
|
|
188
|
+
def reset!
|
|
189
|
+
@in_fenced_block = false
|
|
190
|
+
@fence_char = nil
|
|
191
|
+
@fence_length = 0
|
|
192
|
+
@in_indented_block = false
|
|
193
|
+
@in_inline_code = false
|
|
194
|
+
@inline_delimiter = nil
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Processors
|
|
5
|
+
module DiscourseMarkdown
|
|
6
|
+
module Detectors
|
|
7
|
+
# Result of a successful detection
|
|
8
|
+
# @attr_reader start_pos [Integer] start position in input
|
|
9
|
+
# @attr_reader end_pos [Integer] end position in input (exclusive)
|
|
10
|
+
# @attr_reader node [AST::Node] the AST node representing the detected construct
|
|
11
|
+
Match = Data.define(:start_pos, :end_pos, :node)
|
|
12
|
+
|
|
13
|
+
# Base class for construct detectors.
|
|
14
|
+
# Subclasses implement detection logic for specific constructs
|
|
15
|
+
# (mentions, polls, events, uploads).
|
|
16
|
+
#
|
|
17
|
+
# @abstract Subclass and implement {#detect}
|
|
18
|
+
class Base
|
|
19
|
+
# Attempt to detect a construct at the given position.
|
|
20
|
+
#
|
|
21
|
+
# @param input [String] the full input string
|
|
22
|
+
# @param pos [Integer] current position to check
|
|
23
|
+
# @return [Match, nil] match result or nil if no match
|
|
24
|
+
def detect(input, pos)
|
|
25
|
+
raise NotImplementedError, "#{self.class} must implement #detect"
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
# Helper to check if position is at a word boundary (for mentions, etc.)
|
|
31
|
+
# @param input [String] the input string
|
|
32
|
+
# @param pos [Integer] position to check
|
|
33
|
+
# @return [Boolean] true if at word boundary
|
|
34
|
+
def word_boundary?(input, pos)
|
|
35
|
+
return true if pos == 0
|
|
36
|
+
|
|
37
|
+
prev_char = input[pos - 1]
|
|
38
|
+
!prev_char.match?(/\w/)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Helper to extract a word starting at position
|
|
42
|
+
# @param input [String] the input string
|
|
43
|
+
# @param pos [Integer] starting position
|
|
44
|
+
# @return [String] the word (may be empty)
|
|
45
|
+
def extract_word(input, pos)
|
|
46
|
+
word = +""
|
|
47
|
+
while pos < input.length && input[pos].match?(/[\w\-]/)
|
|
48
|
+
word << input[pos]
|
|
49
|
+
pos += 1
|
|
50
|
+
end
|
|
51
|
+
word
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Processors
|
|
5
|
+
module DiscourseMarkdown
|
|
6
|
+
module Detectors
|
|
7
|
+
# Detects Discourse event blocks [event]...[/event].
|
|
8
|
+
#
|
|
9
|
+
# @example
|
|
10
|
+
# detector = Event.new
|
|
11
|
+
# input = '[event name="Meeting" start="2025-12-15 14:00"][/event]'
|
|
12
|
+
# match = detector.detect(input, 0)
|
|
13
|
+
# match.node.name # => "Meeting"
|
|
14
|
+
class Event < Base
|
|
15
|
+
OPEN_TAG_PATTERN = /\[event([^\]]*)\]/i
|
|
16
|
+
CLOSE_TAG_PATTERN = %r{\[/event\]}i
|
|
17
|
+
|
|
18
|
+
# Attempt to detect an event at the given position.
|
|
19
|
+
#
|
|
20
|
+
# @param input [String] the full input string
|
|
21
|
+
# @param pos [Integer] current position to check
|
|
22
|
+
# @return [Match, nil] match result or nil if no match
|
|
23
|
+
def detect(input, pos)
|
|
24
|
+
return nil unless input[pos] == "["
|
|
25
|
+
|
|
26
|
+
# Check for opening tag
|
|
27
|
+
remaining = input[pos..]
|
|
28
|
+
open_match = OPEN_TAG_PATTERN.match(remaining)
|
|
29
|
+
return nil unless open_match&.begin(0)&.zero?
|
|
30
|
+
|
|
31
|
+
# Find closing tag
|
|
32
|
+
close_match = CLOSE_TAG_PATTERN.match(remaining, open_match.end(0))
|
|
33
|
+
return nil unless close_match
|
|
34
|
+
|
|
35
|
+
# Extract raw content
|
|
36
|
+
end_pos = pos + close_match.end(0)
|
|
37
|
+
raw = input[pos...end_pos]
|
|
38
|
+
|
|
39
|
+
# Parse attributes from opening tag
|
|
40
|
+
attrs = parse_attributes(open_match[1])
|
|
41
|
+
|
|
42
|
+
# Validate required attributes
|
|
43
|
+
return nil unless attrs["name"] && attrs["start"]
|
|
44
|
+
|
|
45
|
+
node =
|
|
46
|
+
AST::Event.new(
|
|
47
|
+
name: attrs["name"],
|
|
48
|
+
starts_at: attrs["start"],
|
|
49
|
+
ends_at: attrs["end"],
|
|
50
|
+
status: attrs["status"],
|
|
51
|
+
timezone: attrs["timezone"],
|
|
52
|
+
raw:,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
Match.new(start_pos: pos, end_pos:, node:)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def parse_attributes(attr_string)
|
|
61
|
+
attrs = {}
|
|
62
|
+
return attrs if attr_string.nil? || attr_string.empty?
|
|
63
|
+
|
|
64
|
+
# Match key="value" or key='value' patterns
|
|
65
|
+
attr_string.scan(/(\w+)=["']([^"']*)["']/) { |key, value| attrs[key.downcase] = value }
|
|
66
|
+
|
|
67
|
+
attrs
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markbridge
|
|
4
|
+
module Processors
|
|
5
|
+
module DiscourseMarkdown
|
|
6
|
+
module Detectors
|
|
7
|
+
# Detects user and group mentions (@username, @groupname).
|
|
8
|
+
#
|
|
9
|
+
# @example Basic usage
|
|
10
|
+
# detector = Mention.new
|
|
11
|
+
# match = detector.detect("Hello @gerhard!", 6)
|
|
12
|
+
# match.node.name # => "gerhard"
|
|
13
|
+
# match.node.type # => :user (default)
|
|
14
|
+
#
|
|
15
|
+
# @example With type resolver
|
|
16
|
+
# resolver = ->(name) { name == "Testers" ? :group : :user }
|
|
17
|
+
# detector = Mention.new(type_resolver: resolver)
|
|
18
|
+
# match = detector.detect("@Testers", 0)
|
|
19
|
+
# match.node.type # => :group
|
|
20
|
+
class Mention < Base
|
|
21
|
+
# @param type_resolver [#call, nil] callable that takes a name and returns :user or :group
|
|
22
|
+
def initialize(type_resolver: nil)
|
|
23
|
+
@type_resolver = type_resolver
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Attempt to detect a mention at the given position.
|
|
27
|
+
#
|
|
28
|
+
# @param input [String] the full input string
|
|
29
|
+
# @param pos [Integer] current position to check
|
|
30
|
+
# @return [Match, nil] match result or nil if no match
|
|
31
|
+
def detect(input, pos)
|
|
32
|
+
return nil unless input[pos] == "@"
|
|
33
|
+
return nil unless word_boundary?(input, pos)
|
|
34
|
+
|
|
35
|
+
# Extract the username/group name
|
|
36
|
+
name = extract_word(input, pos + 1)
|
|
37
|
+
return nil if name.empty?
|
|
38
|
+
|
|
39
|
+
end_pos = pos + 1 + name.length
|
|
40
|
+
type = resolve_type(name)
|
|
41
|
+
node = AST::Mention.new(name:, type:)
|
|
42
|
+
|
|
43
|
+
Match.new(start_pos: pos, end_pos:, node:)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def resolve_type(name)
|
|
49
|
+
return :user unless @type_resolver
|
|
50
|
+
|
|
51
|
+
@type_resolver.call(name) || :user
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|