RubyGems - markbridge - Versions diffs - 0.1.1 → 0.1.3 - Mend

markbridge 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb CHANGED Viewed

@@ -22,11 +22,18 @@ module Markbridge
         #   match = detector.detect(input, 0)
         #   match.node.type # => :attachment
         class Upload < Base
-          # Pattern for image: ![alt|dimensions](upload://sha1.ext)
-          IMAGE_PATTERN = %r{!\[([^\]]*)\]\(upload://([^)]+)\)}
-          # Pattern for attachment: [filename|attachment](upload://sha1.ext) followed by optional (size)
-          ATTACHMENT_PATTERN = %r{\[([^\]]*\|attachment)\]\(upload://([^)]+)\)(\s*\([^)]+\))?}
+          # Image: ![alt|dimensions](upload://sha1.ext)
+          IMAGE_PATTERN =
+            %r{\A!\[(?<alt>[^|\]]*)(?:\|(?<dimensions>[^\]]*))?\]\(upload://(?<url>[^)]+)\)}
+          # Attachment: [filename|attachment](upload://sha1.ext) (size)
+          ATTACHMENT_PATTERN =
+            %r{
+            \A
+            \[(?<filename>[^|\]]*)\|attachment\]
+            \(upload://(?<url>[^)]+)\)
+            (?:\s*\((?<size>[^)]+)\))?
+          }xi
           # Attempt to detect an upload at the given position.
           #
@@ -34,14 +41,11 @@ module Markbridge
           # @param pos [Integer] current position to check
           # @return [Match, nil] match result or nil if no match
           def detect(input, pos)
-            char = input[pos]
-            return nil unless char == "!" || char == "["
             remaining = input[pos..]
-            if char == "!"
+            case input[pos]
+            when "!"
               detect_image(remaining, pos)
-            else
+            when "["
               detect_attachment(remaining, pos)
             end
           end
@@ -50,71 +54,42 @@ module Markbridge
           def detect_image(remaining, pos)
             match = IMAGE_PATTERN.match(remaining)
-            return nil unless match&.begin(0)&.zero?
-            raw = match[0]
-            alt_part = match[1]
-            url_part = match[2]
+            return nil unless match
-            # Parse alt and dimensions from "alt|dimensions" format
-            alt, dimensions = parse_alt_dimensions(alt_part)
+            sha1, filename = parse_upload_url(match[:url])
+            alt = match[:alt]
+            alt = nil if alt.empty?
-            # Extract SHA1 and filename from URL
-            sha1, filename = parse_upload_url(url_part)
+            # `type: :image` is omitted because it is AST::Upload's default -
+            # passing it explicitly was an equivalent-mutation surface.
+            node =
+              AST::Upload.new(sha1:, filename:, alt:, dimensions: match[:dimensions], raw: match[0])
-            node = AST::Upload.new(sha1:, filename:, type: :image, alt:, dimensions:, raw:)
-            Match.new(start_pos: pos, end_pos: pos + raw.length, node:)
+            Match.new(start_pos: pos, end_pos: pos + match[0].length, node:)
           end
           def detect_attachment(remaining, pos)
             match = ATTACHMENT_PATTERN.match(remaining)
-            return nil unless match&.begin(0)&.zero?
-            raw = match[0]
-            name_part = match[1]
-            url_part = match[2]
-            size_part = match[3]
-            # Parse filename from "filename|attachment" format
-            filename = name_part.sub(/\|attachment$/i, "")
+            return nil unless match
-            # Extract SHA1 from URL
-            sha1, _url_filename = parse_upload_url(url_part)
+            sha1, = parse_upload_url(match[:url])
-            # Parse size if present
-            size = size_part&.strip&.delete_prefix("(")&.delete_suffix(")")
+            node =
+              AST::Upload.new(
+                sha1:,
+                filename: match[:filename],
+                type: :attachment,
+                size: match[:size],
+                raw: match[0],
+              )
-            node = AST::Upload.new(sha1:, filename:, type: :attachment, size:, raw:)
-            Match.new(start_pos: pos, end_pos: pos + raw.length, node:)
-          end
-          def parse_alt_dimensions(alt_part)
-            return nil, nil if alt_part.nil? || alt_part.empty?
-            if alt_part.include?("|")
-              parts = alt_part.split("|", 2)
-              alt = parts[0].empty? ? nil : parts[0]
-              dimensions = parts[1]
-              [alt, dimensions]
-            else
-              [alt_part, nil]
-            end
+            Match.new(start_pos: pos, end_pos: pos + match[0].length, node:)
           end
+          # URL format: sha1.ext or just sha1. Returns [sha1, filename-or-nil].
           def parse_upload_url(url_part)
-            # URL format: sha1.ext or just sha1
-            if url_part.include?(".")
-              parts = url_part.split(".", 2)
-              sha1 = parts[0]
-              filename = url_part
-            else
-              sha1 = url_part
-              filename = nil
-            end
-            [sha1, filename]
+            sha1, _, ext = url_part.partition(".")
+            [sha1, ext.empty? ? nil : url_part]
           end
         end
       end

data/lib/markbridge/processors/discourse_markdown/scanner.rb CHANGED Viewed

@@ -49,13 +49,8 @@ module Markbridge
         def initialize(detectors: DEFAULT_DETECTORS, tag_library: nil, mention_resolver: nil)
           @detector_instances = build_detectors(detectors, mention_resolver)
           @tag_library = tag_library
-          @code_tracker = nil
-          @result = nil
-          @nodes = nil
-          @node_index = 0
-          @pos = 0
-          @input = nil
-          @line_start = true
+          # @code_tracker / @result / @nodes / @node_index / @pos / @input /
+          # @line_start are set by #scan before use; no defensive init needed.
         end
         # Scan input and extract constructs.
@@ -63,14 +58,12 @@ module Markbridge
         # @param input [String] Discourse Markdown input
         # @return [ScanResult] result containing processed markdown and extracted nodes
         def scan(input)
-          return ScanResult.new(markdown: "", nodes: []) if input.nil? || input.empty?
           @code_tracker = CodeBlockTracker.new
           @result = +""
           @nodes = []
           @node_index = 0
           @pos = 0
-          @input = input
+          @input = input.to_s
           @line_start = true
           scan_input
@@ -82,14 +75,10 @@ module Markbridge
         def build_detectors(detectors, mention_resolver)
           detectors.map do |klass|
-            if klass.is_a?(Class)
-              if klass == Detectors::Mention && mention_resolver
-                klass.new(type_resolver: mention_resolver)
-              else
-                klass.new
-              end
+            if klass == Detectors::Mention
+              klass.new(type_resolver: mention_resolver)
             else
-              klass
+              klass.new
             end
           end
         end
@@ -102,9 +91,10 @@ module Markbridge
               next if advance_code_boundary(:check_indented_boundary)
             end
-            # Check for inline code boundary
-            if @input[@pos] == "`" && !@code_tracker.in_fenced_block &&
-                 !@code_tracker.in_indented_block
+            # Check for inline code boundary. check_inline_boundary's
+            # own fenced/indented guard means we don't need to pre-check
+            # here — it'll just return nil in those cases.
+            if @input[@pos] == "`"
               new_pos = @code_tracker.check_inline_boundary(@input, @pos)
               if new_pos
                 @result << @input[@pos...new_pos]
@@ -142,9 +132,15 @@ module Markbridge
           new_pos = @code_tracker.public_send(method, @input, @pos, line_start: true)
           return false unless new_pos
+          # check_fenced_boundary / check_indented_boundary always stop
+          # at pos_after_line, which is either after a "\n" or at EOF.
+          # After-newline → @line_start should be true; at EOF the
+          # outer `while @pos < @input.length` exits and @line_start
+          # is unobservable. Setting true unconditionally drops the
+          # `@input[new_pos - 1] == "\n"` dance.
           @result << @input[@pos...new_pos]
           @pos = new_pos
-          @line_start = new_pos > 0 && @input[new_pos - 1] == "\n"
+          @line_start = true
           true
         end
@@ -159,26 +155,24 @@ module Markbridge
         def handle_match(match)
           node = match.node
           @nodes << node
+          @result << render_placeholder(node)
-          # Render placeholder using tag library if available
-          placeholder = render_placeholder(node)
-          @result << placeholder
+          # Every detector shipped today matches content that ends on a
+          # non-newline byte (`]`, `)`, `_`, alphanumeric), so @line_start
+          # is always false after a successful match. If a future custom
+          # detector produces a match whose end_pos sits right after
+          # "\n", re-introduce the `@input[@pos - 1] == "\n"` check.
           @pos = match.end_pos
-          @line_start = @pos > 0 && @input[@pos - 1] == "\n"
+          @line_start = false
           @node_index += 1
         end
         def render_placeholder(node)
           if @tag_library
             tag = @tag_library[node.class]
-            if tag
-              # Create a minimal interface for rendering
-              return tag.render(node, nil)
-            end
+            return tag.render(node, nil) if tag
           end
-          # Default placeholder format if no tag library or tag not found
           default_placeholder(node)
         end
@@ -192,8 +186,6 @@ module Markbridge
             "<<EVENT:#{@node_index}:#{node.name}>>"
           when AST::Upload
             "<<UPLOAD:#{@node_index}:#{node.sha1}>>"
-          else
-            "<<UNKNOWN:#{@node_index}>>"
           end
         end
       end

data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb CHANGED Viewed

@@ -15,12 +15,10 @@ module Markbridge
           # @return [String]
           def build(content, marker:, indent:)
             lines = content.split("\n")
-            lines = [""] if lines.empty? # Handle empty content
             first_line = "#{indent}#{marker}#{lines.first}"
-            return "#{first_line}\n" if lines.size == 1
+            return "#{first_line}\n" if lines.size < 2
-            # Handle multi-line content with sophisticated blank line handling
             format_multiline(lines, first_line, indent)
           end
@@ -63,15 +61,17 @@ module Markbridge
             end
           end
-          # Handle empty lines in continuation
+          # Handle empty lines in continuation. Caller (format_continuation_line)
+          # only invokes this when `line.empty?`, and `content.split("\n")`
+          # trims trailing empty strings, so the LAST continuation line is
+          # never empty — `idx + 1` is always in bounds when we get here.
           # @param idx [Integer] index in continuation_lines
           # @param continuation_lines [Array<String>] all continuation lines
           # @param continuation_indent [String] indent for continuation
           # @return [String, nil] formatted line or nil to skip
           def handle_empty_line(idx, continuation_lines, continuation_indent)
             # Skip empty lines that come before nested list items (structural blanks)
-            next_line = continuation_lines[idx + 1]
-            return nil if next_line&.match?(/\A\s*(?:-|\d+\.)\s/)
+            return nil if continuation_lines[idx + 1].match?(/\A\s*(?:-|\d+\.)\s/)
             # Preserve empty lines within text content (paragraph breaks) with indentation
             continuation_indent

data/lib/markbridge/renderers/discourse/html_escaper.rb ADDED Viewed

@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+require "cgi"
+module Markbridge
+  module Renderers
+    module Discourse
+      # Escapes text for safe inclusion in HTML output. Used when rendering
+      # content inside a CommonMark HTML block (e.g. TableTag's fallback)
+      # where Markdown-level escaping would not be applied.
+      class HtmlEscaper
+        # @param text [String, nil]
+        # @return [String]
+        def self.escape(text)
+          CGI.escapeHTML(text || "")
+        end
+      end
+    end
+  end
+end

data/lib/markbridge/renderers/discourse/markdown_escaper.rb CHANGED Viewed

@@ -36,18 +36,15 @@ module Markbridge
         #   breaks disabled by default.
         def initialize(escape_hard_line_breaks: false)
           @escape_hard_line_breaks = escape_hard_line_breaks
-          @inline_content = nil
-          @inline_result = nil
-          @inline_len = 0
+          # @inline_content / @inline_result / @inline_len are set by
+          # escape_inline on every call before any helper reads them;
+          # no defensive init needed.
         end
-        # Fast-path check: any character that might need escaping
-        # Only includes characters we actually escape (removed ], {, }, ^)
-        # > is needed for blockquote detection at line start
+        # Fast-path: skip escape_text entirely for content with no special
+        # chars. `>` is needed for blockquote detection at line start.
         MAYBE_SPECIAL = /[\\`*_\[#+\-.!<>&|~=>)]/
-        # Check for indented code on any line
-        # Matches: 4+ spaces, tab, or space+tab combinations that reach column 4+
         MAYBE_INDENTED_CODE = /(?:^|\n)(?: {4}|\t| {1,3}\t)/
         # Block-level patterns
@@ -122,8 +119,7 @@ module Markbridge
         # @return [String] the escaped text, or empty string if input is nil
         # @note Multi-line HTML tags and blocks are handled by escaping the opening <
         def escape(text)
-          return "".freeze if text.nil?
-          return text if text.empty?
+          return "" if text.nil?
           # Neutralize hard line breaks (trailing 2+ spaces before newline)
           text = text.gsub(/  +\n/, "\n") if @escape_hard_line_breaks && text.include?("  \n")
@@ -136,7 +132,14 @@ module Markbridge
         private
         def escape_text(text)
-          lines = text.split("\n", -1)
+          # On CRLF input, consume `\r` as part of the line terminator instead
+          # of leaving it on the line. A trailing `\r` breaks line-end anchored
+          # regexes (e.g. SETEXT_UNDERLINE_*) and the `ws_end >= line_length`
+          # early-out in escape_indented_code, leaking NBSPs onto
+          # whitespace-only CRLF lines. The `include?` guard keeps the
+          # LF-only fast path on a string split (regex split is ~20% slower
+          # on the indented-code hot path).
+          lines = text.include?("\r") ? text.split(/\r?\n/, -1) : text.split("\n", -1)
           return escape_line(lines[0], false) if lines.size == 1
           # Pre-allocate result buffer
@@ -158,37 +161,32 @@ module Markbridge
         end
         def escape_line(line, prev_was_paragraph)
-          return line if line.empty?
-          # Handle indented code blocks first
+          # No `line.empty?` early-return: it's redundant with the
+          # `line.getbyte(indent_len).nil?` guard below, which catches both
+          # empty and whitespace-only lines while also preserving object
+          # identity (returns `line`).
           return escape_indented_code(line) if INDENTED_CODE.match?(line)
-          # Extract 0-3 space indent
-          line_length = line.length
+          # After INDENTED_CODE, line has at most 3 leading spaces, so the
+          # `< 3` bound keeps this a tight YJIT-friendly hot loop.
           indent_len = 0
-          while indent_len < 3 && indent_len < line_length && line.getbyte(indent_len) == SPACE
-            indent_len += 1
-          end
+          indent_len += 1 while indent_len < 3 && line.getbyte(indent_len) == SPACE
-          return line if indent_len >= line_length
+          # Whitespace-only line (1-3 spaces) — getbyte past end is nil.
+          return line if line.getbyte(indent_len).nil?
           has_indent = indent_len > 0
           content = has_indent ? line[indent_len..] : line
-          # Apply block-level escaping (which may also do inline escaping)
           escaped, skip_inline = escape_block_level(content, prev_was_paragraph)
-          # Apply inline escaping if block-level didn't handle it
           escaped = escape_inline(escaped) unless skip_inline
-          # Prepend indent if present, preserve encoding
           if has_indent
-            encoding = line.encoding
-            result = String.new(encoding:)
+            result = String.new(encoding: line.encoding)
             result << line[0, indent_len] << escaped
             result
           else
-            escaped.is_a?(String) ? escaped.force_encoding(line.encoding) : escaped
+            escaped.force_encoding(line.encoding)
           end
         end
@@ -203,15 +201,14 @@ module Markbridge
           # - Content doesn't start at valid block position (no lists, headings, etc.)
           # - Visual indentation is preserved (NBSP renders as space)
           # We still escape inline content since it's no longer protected.
+          # Caller (escape_line) guarantees INDENTED_CODE matched, so line
+          # starts with at least one SPACE or TAB; ws_end is always ≥ 1.
           line_length = line.length
           ws_end = 0
-          while ws_end < line_length
-            byte = line.getbyte(ws_end)
-            break if byte != SPACE && byte != TAB
+          while ws_end < line_length && ((byte = line.getbyte(ws_end)) == SPACE || byte == TAB)
             ws_end += 1
           end
-          return line if ws_end == 0 # No leading whitespace (shouldn't happen, but safe)
           return line if ws_end >= line_length # Whitespace-only line
           # Convert leading whitespace to NBSP (tab = 4 NBSP for visual consistency)
@@ -310,6 +307,13 @@ module Markbridge
           @inline_len = bytesize
           pos = 0
+          # No loop-progress guard: every `dispatch_inline_byte` branch
+          # returns `pos + N` for N >= 1 by construction, so the loop
+          # is provably terminating. Mutations that break this
+          # (`while true`, body drops, selector swaps that short-circuit
+          # the dispatch) surface as timeouts rather than alive
+          # mutations, and the inline guard would otherwise cost ~15%
+          # on this hot path per benchmark.
           while pos < @inline_len
             byte = @inline_content.getbyte(pos)
             pos = dispatch_inline_byte(byte, pos)
@@ -474,39 +478,42 @@ module Markbridge
         end
         def paragraph_line?(line)
-          return false if line.empty?
+          pos = 0
+          line_len = line.bytesize
+          pos += 1 while pos < line_len && line.getbyte(pos) == SPACE
+          first_non_space = pos
-          line_length = line.length
-          first_non_space = 0
-          while first_non_space < line_length && line.getbyte(first_non_space) == SPACE
-            first_non_space += 1
-          end
-          return false if first_non_space >= line_length || line.getbyte(first_non_space) == TAB
+          # Empty or whitespace-only lines: getbyte past the end returns nil.
+          return false if line.getbyte(first_non_space).nil?
-          content = first_non_space <= 3 ? line[first_non_space..] : line
+          # Indented code (4+ spaces or any leading \t) is not a paragraph.
+          # INDENTED_CODE also catches lines where first_non_space > 3, so no
+          # separate numeric boundary check is needed.
+          return false if INDENTED_CODE.match?(line)
-          # Lines starting with [ get escaped to \[, which IS paragraph content
-          # So setext headings CAN follow them
-          return true if content.getbyte(0) == BRACKET_OPEN
+          content = first_non_space == 0 ? line : line[first_non_space..]
-          !block_construct?(content) && !INDENTED_CODE.match?(line)
+          # Lines starting with [ are paragraph content (the escaper rewrites [
+          # to \[). block_construct? has no BRACKET_OPEN case arm, so such
+          # lines naturally fall through and !block_construct?(content) == true.
+          !block_construct?(content)
         end
         # Checks whether content starts with a block-level markdown construct.
         # Used by both escape_block_level (to decide what to escape) and
         # paragraph_line? (to decide if setext underlines can follow).
         def block_construct?(content)
-          first_byte = content.getbyte(0)
-          case first_byte
+          case content.getbyte(0)
           when HASH
             ATX_HEADING.match?(content)
           when GT
             true
-          when DASH, PLUS, STAR
-            BULLET_LIST.match?(content) ||
-              (first_byte == DASH && THEMATIC_BREAK_DASH.match?(content)) ||
-              (first_byte == STAR && THEMATIC_BREAK_STAR.match?(content))
+          when DASH
+            BULLET_LIST.match?(content) || THEMATIC_BREAK_DASH.match?(content)
+          when STAR
+            BULLET_LIST.match?(content) || THEMATIC_BREAK_STAR.match?(content)
+          when PLUS
+            BULLET_LIST.match?(content)
           when UNDERSCORE
             THEMATIC_BREAK_UNDERSCORE.match?(content)
           when BACKTICK

data/lib/markbridge/renderers/discourse/render_context.rb CHANGED Viewed

@@ -11,26 +11,40 @@ module Markbridge
       class RenderContext
         attr_reader :parents, :depth
-        def initialize(parents = [], parent_cache: nil)
+        def initialize(parents = [], parent_cache: nil, html_mode: false)
           @parents = parents.freeze
           @depth = parents.size
           @parent_cache = parent_cache || build_cache(parents)
+          @html_mode = html_mode
         end
-        # Create new context with element added to parent chain
-        # Incrementally updates cache instead of rebuilding from scratch
+        # Create new context with element added to parent chain.
+        # Incrementally updates the cache (O(1)) instead of rebuilding from
+        # parents (O(depth)) — important for deeply-nested documents.
         # @param element [AST::Element]
         # @return [RenderContext]
         def with_parent(element)
           new_parents = @parents + [element]
-          # Incrementally update cache instead of rebuilding
           new_cache = @parent_cache.dup
           element_class = element.class
           new_cache[element_class] ||= []
           new_cache[element_class] = new_cache[element_class] + [element]
-          self.class.new(new_parents, parent_cache: new_cache)
+          self.class.new(new_parents, parent_cache: new_cache, html_mode: @html_mode)
+        end
+        # Create new context with html_mode toggled
+        # Preserves parent chain and cache
+        # @param value [Boolean]
+        # @return [RenderContext]
+        def with_html_mode(value)
+          self.class.new(@parents, parent_cache: @parent_cache, html_mode: value)
+        end
+        # @return [Boolean]
+        def html_mode?
+          @html_mode
         end
         # Find closest parent of given type
@@ -54,7 +68,7 @@ module Markbridge
         # @param klass [Class]
         # @return [Boolean]
         def has_parent?(klass)
-          @parent_cache.key?(klass) && !@parent_cache[klass].empty?
+          !@parent_cache[klass].nil?
         end
         # Check if we're at the root (no parents)
@@ -65,14 +79,12 @@ module Markbridge
         private
-        # Build cache from parents array
-        # Groups parents by class for fast lookup
+        # Build cache from parents array.
+        # Groups parents by class for fast O(1) lookup.
         # @param parents [Array<AST::Element>]
         # @return [Hash{Class => Array<AST::Element>}]
         def build_cache(parents)
-          parents.each_with_object(Hash.new { |h, k| h[k] = [] }) do |parent, cache|
-            cache[parent.class] = cache[parent.class] + [parent]
-          end
+          parents.group_by(&:class)
         end
       end
     end