RubyGems - coradoc-markdown - Versions diffs - 1.0.0 - Mend

coradoc-markdown 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

checksums.yaml +7 -0
data/LICENSE.txt +21 -0
data/lib/coradoc/markdown/errors.rb +28 -0
data/lib/coradoc/markdown/model/abbreviation.rb +27 -0
data/lib/coradoc/markdown/model/attribute_list.rb +98 -0
data/lib/coradoc/markdown/model/base.rb +86 -0
data/lib/coradoc/markdown/model/blockquote.rb +21 -0
data/lib/coradoc/markdown/model/code.rb +11 -0
data/lib/coradoc/markdown/model/code_block.rb +24 -0
data/lib/coradoc/markdown/model/definition_item.rb +24 -0
data/lib/coradoc/markdown/model/definition_list.rb +47 -0
data/lib/coradoc/markdown/model/definition_term.rb +21 -0
data/lib/coradoc/markdown/model/document.rb +39 -0
data/lib/coradoc/markdown/model/emphasis.rb +11 -0
data/lib/coradoc/markdown/model/extension.rb +92 -0
data/lib/coradoc/markdown/model/footnote.rb +31 -0
data/lib/coradoc/markdown/model/footnote_reference.rb +22 -0
data/lib/coradoc/markdown/model/heading.rb +44 -0
data/lib/coradoc/markdown/model/highlight.rb +18 -0
data/lib/coradoc/markdown/model/horizontal_rule.rb +16 -0
data/lib/coradoc/markdown/model/image.rb +19 -0
data/lib/coradoc/markdown/model/link.rb +19 -0
data/lib/coradoc/markdown/model/list.rb +22 -0
data/lib/coradoc/markdown/model/list_item.rb +29 -0
data/lib/coradoc/markdown/model/math.rb +50 -0
data/lib/coradoc/markdown/model/paragraph.rb +28 -0
data/lib/coradoc/markdown/model/strikethrough.rb +18 -0
data/lib/coradoc/markdown/model/strong.rb +11 -0
data/lib/coradoc/markdown/model/table.rb +13 -0
data/lib/coradoc/markdown/model/text.rb +15 -0
data/lib/coradoc/markdown/parser/ast_processor.rb +543 -0
data/lib/coradoc/markdown/parser/block_parser.rb +745 -0
data/lib/coradoc/markdown/parser/html_entities.rb +2149 -0
data/lib/coradoc/markdown/parser/inline_parser.rb +274 -0
data/lib/coradoc/markdown/parser/parslet_extras.rb +215 -0
data/lib/coradoc/markdown/parser.rb +11 -0
data/lib/coradoc/markdown/parser_util.rb +90 -0
data/lib/coradoc/markdown/serializer.rb +199 -0
data/lib/coradoc/markdown/toc_generator.rb +215 -0
data/lib/coradoc/markdown/transform/from_core_model.rb +325 -0
data/lib/coradoc/markdown/transform/text_extraction.rb +19 -0
data/lib/coradoc/markdown/transform/to_core_model.rb +287 -0
data/lib/coradoc/markdown/transformer.rb +463 -0
data/lib/coradoc/markdown/version.rb +7 -0
data/lib/coradoc/markdown.rb +190 -0
metadata +173 -0

data/lib/coradoc/markdown/parser/inline_parser.rb ADDED Viewed

@@ -0,0 +1,274 @@
+# frozen_string_literal: true
+module Coradoc
+  module Markdown
+    module Parser
+      autoload :ParsletExtras, "#{__dir__}/parslet_extras"
+      autoload :HtmlEntities, "#{__dir__}/html_entities"
+      class InlineParser < Parslet::Parser
+        using ParsletExtras
+        rule(:line_ending) { (str("\n") | str("\r\n") | str("\r")).ignore }
+        rule(:line_ending_or_eof) { line_ending | any.absent? }
+        rule(:whitespace) { match[" \t"] }
+        rule(:unicode_whitespace) { match["\\p{Zs}\t\r\n\f"] | any.absent? }
+        rule(:unicode_punctuation) { match['\\p{P}\\p{S}'] }
+        def unicode_codepoint(base, s)
+          i = s.to_s.to_i(base)
+          return "\uFFFD" if i.zero?
+          i.chr(Encoding::UTF_8)
+        rescue RangeError
+          "\uFFFD"
+        end
+        def unicode_dec(s)
+          unicode_codepoint(10, s)
+        end
+        def unicode_hex(s)
+          unicode_codepoint(16, s)
+        end
+        def lookup_entity(s)
+          HTML_ENTITIES[s.to_s] || "&#{s};"
+        end
+        def process_code(s)
+          s = s.to_s
+          s.tr!("\n", ' ')
+          return s.slice(1, s.length - 2) if s.length > 2 && s.start_with?(' ') && s.end_with?(' ')
+          s
+        end
+        rule(:escape) { str('\\').ignore >> match["!\"#$%&'\\(\\)*+,\\-./:;<=>?@\\[\\\\\\]\\^_`\\{\\|\\}~"] }
+        rule(:dec_entity) do
+          str('&#').ignore >> match['0-9'].repeat(1, 7).dynamic_output(method(:unicode_dec)) >> str(';').ignore
+        end
+        rule(:hex_entity) do
+          str('&#').ignore >> match['xX'].ignore >> match['A-Fa-f0-9'].repeat(1,
+                                                                              6).dynamic_output(method(:unicode_hex)) >> str(';').ignore
+        end
+        rule(:entity) do
+          str('&').ignore >> match['A-Za-z0-9'].repeat(1).dynamic_output(method(:lookup_entity)) >> str(';').ignore
+        end
+        rule(:nul_byte) { str("\0").output("\uFFFD") }
+        rule(:special_char) { escape | dec_entity | hex_entity | entity | nul_byte }
+        rule(:text) do
+          (special_char | (element.absent? >> any)).repeat(1).as(:text)
+        end
+        rule(:code_span) do
+          str('`').does_not_precede? >>
+            str('`').repeat(1).capture(:code_opener).ignore >>
+            dynamic do |_src, ctx|
+              ending = (str('`').does_not_precede? >> str(ctx.captures[:code_opener]).ignore >> str('`').absent?)
+              (ending.absent? >> any).repeat(1).dynamic_output(method(:process_code)).as(:code) >> ending
+            end
+        end
+        rule(:delimiter_run) do
+          str('*').repeat(1) | str('_').repeat(1)
+        end
+        rule(:both_flanking_delimiter_run) do
+          any.precedes? >>
+            unicode_whitespace.does_not_precede? >> (
+            (
+              unicode_punctuation.precedes? >>
+              delimiter_run.as(:bfdr) >>
+              unicode_punctuation.present?
+            ) | (
+              unicode_punctuation.does_not_precede? >>
+              delimiter_run.as(:bfdr) >>
+              unicode_punctuation.absent?
+            )
+          ) >> unicode_whitespace.absent?
+        end
+        rule(:left_flanking_delimiter_run) do
+          (
+            (
+              delimiter_run.as(:lfdr) >>
+              unicode_punctuation.absent?
+            ) | (
+              ((unicode_whitespace | unicode_punctuation).precedes? | any.does_not_precede?) >>
+              delimiter_run.as(:lfdr)
+            )
+          ) >> unicode_whitespace.absent?
+        end
+        rule(:right_flanking_delimiter_run) do
+          any.precedes? >>
+            unicode_whitespace.does_not_precede? >> (
+            (
+              unicode_punctuation.precedes? >>
+              delimiter_run.as(:rfdr) >>
+              (unicode_whitespace | unicode_punctuation).present?
+            ) | (
+              unicode_punctuation.does_not_precede? >>
+              delimiter_run.as(:rfdr)
+            )
+          )
+        end
+        rule(:non_flanking_delimiter_run) do
+          left_flanking_delimiter_run.absent? >> left_flanking_delimiter_run.absent? >> delimiter_run.as(:nfdr)
+        end
+        rule(:flanking_delimiter_run) do
+          both_flanking_delimiter_run | left_flanking_delimiter_run | right_flanking_delimiter_run | non_flanking_delimiter_run
+        end
+        rule(:run_surrounded_by_punctuation) do
+          (unicode_punctuation.precedes? >> flanking_delimiter_run >> unicode_punctuation.present?).as(:rsp)
+        end
+        rule(:run_preceded_by_punctuation) do
+          (unicode_punctuation.precedes? >> flanking_delimiter_run).as(:rpp)
+        end
+        rule(:run_followed_by_punctuation) do
+          (flanking_delimiter_run >> unicode_punctuation.present?).as(:rfp)
+        end
+        rule(:checked_delimiter_run) do
+          run_surrounded_by_punctuation | run_preceded_by_punctuation | run_followed_by_punctuation | flanking_delimiter_run
+        end
+        rule(:element) { code_span | checked_delimiter_run }
+        rule(:inline) { (text | element).repeat }
+        root :inline
+        def can_open_emphasis(elem)
+          return false unless elem[:left_flanking]
+          return true unless elem[:char] == '_'
+          !elem[:right_flanking] || (elem[:right_flanking] && elem[:preceded_by_punc])
+        end
+        def can_close_emphasis(elem)
+          return false unless elem[:right_flanking]
+          return true unless elem[:char] == '_'
+          !elem[:left_flanking] || (elem[:left_flanking] && elem[:followed_by_punc])
+        end
+        def rule_of_three(opener, closer)
+          return true unless (can_open_emphasis(opener) && can_close_emphasis(opener)) ||
+                             (can_open_emphasis(closer) && can_close_emphasis(closer))
+          ((opener[:length] % 3).zero? && (closer[:length] % 3).zero?) ||
+            (opener[:length] + closer[:length]) % 3 != 0
+        end
+        def used_delims_to_text(elems)
+          elems.map do |elem|
+            if elem.key?(:char)
+              next if elem[:length] < 1
+              { text: elem[:char] * elem[:length] }
+            else
+              elem
+            end
+          end.compact
+        end
+        def build_delim_stack(tree)
+          delim_stack = []
+          tree.each_with_index do |elem, idx|
+            next unless elem.is_a?(Hash) || elem.length != 1
+            key = elem.first.first
+            if %i[rsp rpp rfp].include?(key)
+              outer_key = key
+              tree[idx] = elem = elem[key]
+              key = elem.first.first
+            end
+            next unless %i[bfdr lfdr rfdr nfdr].include?(key)
+            delim_stack << idx
+            # pp elem
+            elem[:char] = elem[key].to_s[0]
+            elem[:length] = elem[key].length
+            elem[:left_flanking] = %i[bfdr lfdr].include?(key)
+            elem[:right_flanking] = %i[bfdr rfdr].include?(key)
+            elem[:preceded_by_punc] = %i[rsp rpp].include?(outer_key)
+            elem[:followed_by_punc] = %i[rsp rfp].include?(outer_key)
+            # elem[:active] = true
+          end
+          # pp delim_stack
+          delim_stack
+        end
+        def process_emphasis(tree)
+          delim_stack = build_delim_stack(tree)
+          cur_pos = 0
+          openers_bottom = { '*' => 0, '_' => 0 }
+          while (closer_offset = delim_stack[cur_pos..].index { |i| can_close_emphasis(tree[i]) })
+            # puts "-----"
+            # pp tree
+            # puts "clofset #{closer_offset} -> cur_pos #{closer_offset + cur_pos}"
+            cur_pos += closer_offset
+            closer = tree[closer_idx = delim_stack[cur_pos]]
+            # puts "closer:#{cur_pos}, #{closer}"
+            # look back - in reverse?
+            opener_bottom = openers_bottom[closer[:char]]
+            opener_to_cur = delim_stack.slice(opener_bottom, cur_pos - opener_bottom)
+            # puts "obottom #{opener_bottom} len #{cur_pos - opener_bottom} -> opener_to_cur #{opener_to_cur}"
+            opener_offset = (opener_to_cur || []).rindex do |i|
+              can_open_emphasis(tree[i]) && tree[i][:char] == closer[:char] && rule_of_three(tree[i], closer)
+            end
+            # puts "opener:#{opener_offset}"
+            if opener_offset
+              opener = tree[opener_idx = delim_stack[opener_bottom + opener_offset]]
+              strong = opener[:length] > 1 && closer[:length] > 1
+              # pp opener
+              contents_range = (opener_idx + 1)..(closer_idx - 1)
+              # puts "crange #{contents_range} size #{contents_range.size}"
+              contents = used_delims_to_text(tree.slice!(contents_range))
+              tree.insert(opener_idx + 1, { (strong ? :strong : :emph) => contents })
+              middle = (opener_bottom + opener_offset + 1)..(cur_pos - 1)
+              delim_stack.slice!(middle)
+              # puts "slice middle #{middle} -> #{delim_stack}"
+              delim_stack.map! { |i| i <= opener_idx ? i : (i - contents_range.size + 1) }
+              # puts "slice map <dstack.map!> #{delim_stack}"
+              cur_pos -= middle.size
+              if (opener[:length] -= strong ? 2 : 1).zero?
+                delim_stack.slice!(opener_bottom + opener_offset)
+                cur_pos -= 1
+                # puts "slice opener #{opener_bottom + opener_offset} -> #{delim_stack} @#{cur_pos}"
+              end
+              if (closer[:length] -= strong ? 2 : 1).zero?
+                delim_stack.slice!(cur_pos)
+                # puts "slice closer #{cur_pos} -> #{delim_stack}"
+              end
+            else
+              openers_bottom[closer[:chr]] = cur_pos - 1
+              if can_open_emphasis(closer)
+                cur_pos += 1
+              else
+                delim_stack.slice!(cur_pos)
+                # puts "nopener slice #{cur_pos} -> #{delim_stack}"
+              end
+            end
+          end
+          # puts "----------"
+          used_delims_to_text(tree)
+          # puts "----------"
+          # pp x
+        end
+        def parse(io, options = {})
+          process_emphasis(super(io, options))
+        end
+      end
+    end
+  end
+end

data/lib/coradoc/markdown/parser/parslet_extras.rb ADDED Viewed

@@ -0,0 +1,215 @@
+# frozen_string_literal: true
+require 'parslet'
+require 'parslet/convenience'
+module Coradoc
+  module Markdown
+    module Parser
+      module ParsletExtras
+        refine Parslet::Source do
+          def rewind(nchars)
+            # https://github.com/ruby/strscan/issues/122
+            self.charpos = @str.charpos - nchars
+          end
+          def charpos=(pos)
+            @str.reset
+            @str.getch while @str.charpos < pos
+          end
+          def charpos
+            @str.charpos
+          end
+          def peek_byte
+            @str.peek(1)
+          end
+        end
+        refine Parslet::Scope do
+          attr_reader :current
+          def key?(...)
+            @current.key?(...)
+          end
+          def has_key?(...)
+            @current.key?(...)
+          end
+          def root
+            scope = current
+            scope = scope.parent while scope.parent
+            scope
+          end
+        end
+        refine Parslet::Scope::Binding do
+          def key?(...)
+            @hash.key?(...)
+          end
+          def has_key?(...)
+            @hash.key?(...)
+          end
+          def initialize_copy(original)
+            super
+            @hash = @hash.clone
+          end
+        end
+        # like Named but returning other things
+        class Output < Parslet::Atoms::Base
+          attr_reader :parslet, :value
+          def initialize(parslet, value)
+            super()
+            @parslet = parslet
+            @value = value
+          end
+          def apply(source, context, consume_all)
+            success, = result = parslet.apply(source, context, consume_all)
+            return result unless success
+            succ(@value)
+          end
+          def to_s_inner(prec)
+            "#{value}:#{parslet.to_s(prec)}"
+          end
+        end
+        class DynamicOutput < Parslet::Atoms::Base
+          attr_reader :parslet, :callable
+          def initialize(parslet, callable)
+            super()
+            @parslet = parslet
+            @callable = callable
+          end
+          def apply(source, context, consume_all)
+            success, value = result = parslet.apply(source, context, consume_all)
+            return result unless success
+            succ(@callable.call(flatten(value)))
+          end
+          def to_s_inner(prec)
+            "#{callable}:#{parslet.to_s(prec)}"
+          end
+        end
+        class Lookbehind < Parslet::Atoms::Base
+          using ParsletExtras
+          attr_reader :positive
+          attr_reader :number, :bound_parslet
+          def initialize(bound_parslet, number, positive: true)
+            super()
+            # Model positive and negative lookbehind by testing this flag.
+            @positive = positive
+            @number = number
+            @bound_parslet = bound_parslet
+          end
+          def error_msgs
+            @error_msgs ||= {
+              positive: ['Input should be preceded by ', bound_parslet],
+              negative: ['Input should not be preceded by ', bound_parslet]
+            }
+          end
+          def try(source, context, consume_all)
+            rewind_pos = source.bytepos
+            if source.bytepos.zero?
+              return succ(nil) unless positive
+              return context.err_at(self, source, error_msgs[:positive], source.pos)
+            end
+            source.rewind(number)
+            error_pos = source.pos
+            success, = bound_parslet.apply(source, context, consume_all)
+            if positive
+              return succ(nil) if success
+              context.err_at(self, source, error_msgs[:positive], error_pos)
+            else
+              return succ(nil) unless success
+              context.err_at(self, source, error_msgs[:negative], error_pos)
+            end
+          ensure
+            source.bytepos = rewind_pos
+          end
+          def to_s_inner(prec)
+            @char = positive ? '&' : '!'
+            "<#{@char}<#{number}<#{bound_parslet.to_s(prec)}"
+          end
+        end
+        # Like Dynamic but does not return a further parslet, just a reject/accept boolean
+        module ::Parslet
+          module Atoms
+            class Check < ::Parslet::Atoms::Base
+              attr_reader :block
+              def initialize(block)
+                super()
+                @block = block
+              end
+              def cached?
+                false
+              end
+              def try(source, context, _consume_all)
+                [block.call(source, context), nil]
+              end
+              def to_s_inner(_prec)
+                'check { ... }'
+              end
+            end
+          end
+        end
+        refine ::Parslet do
+          def check(&block)
+            ::Parslet::Atoms::Check.new(block)
+          end
+          module_function :check
+        end
+        refine ::Parslet::Atoms::DSL do
+          def output(value)
+            Output.new(self, value)
+          end
+          def dynamic_output(value)
+            DynamicOutput.new(self, value)
+          end
+          def precedes?(num = 1)
+            Lookbehind.new(self, num, positive: true)
+          end
+          def does_not_precede?(num = 1)
+            Lookbehind.new(self, num, positive: false)
+          end
+        end
+      end
+    end
+  end
+end

data/lib/coradoc/markdown/parser.rb ADDED Viewed

@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+module Coradoc
+  module Markdown
+    module Parser
+      autoload :BlockParser, "#{__dir__}/block_parser"
+      autoload :InlineParser, "#{__dir__}/inline_parser"
+      autoload :AstProcessor, "#{__dir__}/ast_processor"
+    end
+  end
+end

data/lib/coradoc/markdown/parser_util.rb ADDED Viewed

@@ -0,0 +1,90 @@
+# frozen_string_literal: true
+require 'strscan'
+module Coradoc
+  module Markdown
+    # Shared parser utilities for Markdown processing
+    module ParserUtil
+      # Parser for IAL (Inline Attribute List) syntax
+      #
+      # IAL syntax: {:.class #id key="value"}
+      # Supports:
+      # - Classes: .classname or .-classname
+      # - IDs: #idname
+      # - Key-value pairs: key="value", key='value', or key=value
+      #
+      module IalParser
+        # Tokenize an IAL string into its components
+        # @param content [String] The IAL content (without braces)
+        # @return [Array<Hash>] Array of tokens with :type and :value
+        def self.tokenize(content)
+          tokens = []
+          scanner = StringScanner.new(content.to_s)
+          until scanner.eos?
+            scanner.skip(/\s+/)
+            break if scanner.eos?
+            if scanner.scan(/\.(-?\w[\w-]*)/)
+              tokens << { type: :class, value: scanner[1] }
+            elsif scanner.scan(/#(\w[\w-]*)/)
+              tokens << { type: :id, value: scanner[1] }
+            elsif scanner.scan(/(\w[\w-]*)\s*=\s*/)
+              key = scanner[1]
+              value = extract_quoted_value(scanner, handle_escapes: true)
+              tokens << { type: :attribute, key: key, value: value }
+            elsif scanner.scan(/\S+/)
+              # Skip unknown tokens
+            end
+          end
+          tokens
+        end
+        # Parse IAL content into a hash
+        # @param content [String] The IAL content
+        # @return [Hash] Parsed result with :id, :classes, :attributes keys
+        def self.parse_to_hash(content)
+          result = { id: nil, classes: [], attributes: {} }
+          return result if content.nil? || content.empty?
+          tokens = tokenize(content)
+          tokens.each do |token|
+            case token[:type]
+            when :class
+              result[:classes] << token[:value]
+            when :id
+              result[:id] = token[:value]
+            when :attribute
+              result[:attributes][token[:key]] = token[:value]
+            end
+          end
+          result
+        end
+        # Extract a quoted value from the scanner
+        # @param scanner [StringScanner]
+        # @param handle_escapes [Boolean] Whether to unescape \\" and \\'
+        # @return [String] The extracted value
+        def self.extract_quoted_value(scanner, handle_escapes: false)
+          if scanner.scan(/"([^"\\]*(?:\\.[^"\\]*)*)"/)
+            value = scanner[1]
+            value = value.gsub(/\\"/, '"') if handle_escapes
+            value
+          elsif scanner.scan(/'([^'\\]*(?:\\.[^'\\]*)*)'/)
+            value = scanner[1]
+            value = value.gsub(/\\'/, "'") if handle_escapes
+            value
+          elsif scanner.scan(/(\S+)/)
+            scanner[1]
+          else
+            ''
+          end
+        end
+        private_class_method :extract_quoted_value
+      end
+    end
+  end
+end