RubyGems - manticore-smash - Versions diffs - 3.1.0 - Mend

manticore-smash 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +7 -0
data/LICENSE +661 -0
data/README.md +492 -0
data/lib/manticore.rb +24 -0
data/lib/mdutils/rediscount.rb +871 -0
data/lib/xmlutils/formatters.rb +91 -0
data/lib/xmlutils/node.rb +585 -0
data/lib/xmlutils/tokenizer.rb +282 -0
data/lib/xmlutils/tree_parser.rb +161 -0
data/lib/xmlutils/xml_doc.rb +273 -0
data/lib/xmlutils/xpath.rb +103 -0
metadata +48 -0

data/lib/xmlutils/tokenizer.rb ADDED Viewed

@@ -0,0 +1,282 @@
+# frozen_string_literal: false
+# Copyright (C) 2024 Manticore Authors
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+module XmlUtils
+  class Token
+    attr_accessor :type, :value, :line, :position
+    TYPES = %i[
+      start_tag end_tag empty_tag
+      text cdata comment processing_instruction
+      doctype xml_decl attribute_name attribute_value
+      close_tag eq quote whitespace newline eof
+    ].freeze
+    def initialize(type, value = nil, line = nil, position = nil)
+      @type = type
+      @value = value
+      @line = line
+      @position = position
+    end
+    def to_s
+      "<Token #{@type}: #{@value.inspect}>"
+    end
+  end
+  class Tokenizer
+    XML_NAME_PATTERN = /[A-Za-z_][A-Za-z0-9_.:-]*/.freeze
+    def initialize(source)
+      @source = source.respond_to?(:read) ? source.read : source.to_s
+      @pos = 0
+      @line = 1
+      @col = 1
+      @tokens = []
+    end
+    def tokenize
+      until @pos >= @source.length
+        @tokens << next_token
+      end
+      @tokens << Token.new(:eof)
+      @tokens
+    end
+    private
+    def next_token
+      skip_spaces
+      if @pos >= @source.length
+        return Token.new(:eof, nil, @line, @col)
+      end
+      ch = peek
+      case ch
+      when '<'
+        advance
+        case peek
+        when '/'
+          advance
+          name = read_name
+          skip_until('>')
+          advance if peek == '>'
+          Token.new(:close_tag, name, @line, @col)
+        when '!'
+          advance
+          if peek(2) == '--'
+            advance(2)
+            read_comment
+          elsif peek(7).upcase == '[CDATA['
+            advance(7)
+            read_cdata
+          elsif peek(7).upcase == 'DOCTYPE'
+            advance(7)
+            read_doctype
+          else
+            raise ParseException.new("Invalid markup after <!", @line, @col)
+          end
+        when '?'
+          advance
+          read_processing_instruction
+        else
+          read_tag
+        end
+      when '&'
+        read_entity_ref
+      else
+        read_text
+      end
+    end
+    def read_tag
+      name = read_name
+      skip_spaces
+      attrs = {}
+      until peek == '>' || peek == '/' || @pos >= @source.length
+        attr_name = read_name
+        skip_spaces
+        if peek == '='
+          advance
+          skip_spaces
+          attr_value = read_quoted_string
+        else
+          attr_value = attr_name
+        end
+        attrs[attr_name] = attr_value
+        skip_spaces
+      end
+      if peek == '/'
+        advance
+        token_type = :empty_tag
+      else
+        token_type = :start_tag
+      end
+      skip_until('>')
+      advance if peek == '>'
+      Token.new(token_type, { name: name, attributes: attrs }, @line, @col)
+    end
+    def read_comment
+      start_line = @line
+      start_col = @col
+      content = ""
+      until @pos + 2 >= @source.length
+        if peek(3) == '-->'
+          advance(3)
+          break
+        end
+        content << advance
+      end
+      Token.new(:comment, content, start_line, start_col)
+    end
+    def read_cdata
+      start_line = @line
+      start_col = @col
+      content = ""
+      until @pos + 2 >= @source.length
+        if peek(3) == ']]>'
+          advance(3)
+          break
+        end
+        content << advance
+      end
+      Token.new(:cdata, content, start_line, start_col)
+    end
+    def read_doctype
+      start_line = @line
+      start_col = @col
+      content = ""
+      depth = 1
+      while @pos < @source.length && depth > 0
+        ch = advance
+        if ch == '<'
+          depth += 1
+        elsif ch == '>'
+          depth -= 1
+        end
+        content << ch unless depth == 0
+      end
+      Token.new(:doctype, content.strip, start_line, start_col)
+    end
+    def read_processing_instruction
+      start_line = @line
+      start_col = @col
+      target = read_name
+      skip_spaces
+      content = ""
+      until @pos + 1 >= @source.length
+        if peek(2) == '?>'
+          advance(2)
+          break
+        end
+        content << advance
+      end
+      Token.new(:processing_instruction, { target: target, content: content.strip }, start_line, start_col)
+    end
+    def read_text
+      start_line = @line
+      start_col = @col
+      text = ""
+      while @pos < @source.length && peek != '<' && peek != '&'
+        text << advance
+      end
+      Token.new(:text, text, start_line, start_col)
+    end
+    def read_entity_ref
+      start_line = @line
+      start_col = @col
+      advance # &
+      ref = ""
+      while @pos < @source.length && peek != ';'
+        ref << advance
+      end
+      advance if peek == ';' # ;
+      entity = case ref
+               when 'amp' then '&'
+               when 'lt' then '<'
+               when 'gt' then '>'
+               when 'quot' then '"'
+               when 'apos' then "'"
+               else "&#{ref};"
+               end
+      Token.new(:text, entity, start_line, start_col)
+    end
+    def read_name
+      name = ""
+      while @pos < @source.length && peek =~ /[A-Za-z0-9_.:-]/
+        name << advance
+      end
+      raise ParseException.new("Expected name", @line, @col) if name.empty?
+      name
+    end
+    def read_quoted_string
+      quote = advance
+      raise ParseException.new("Expected quote", @line, @col) unless quote == '"' || quote == "'"
+      value = ""
+      while @pos < @source.length && peek != quote
+        value << advance
+      end
+      advance if peek == quote
+      value
+    end
+    def peek(n = 1)
+      @source[@pos, n] || ''
+    end
+    def advance(n = 1)
+      ch = @source[@pos, n]
+      n.times do |i|
+        c = @source[@pos + i]
+        if c == "\n"
+          @line += 1
+          @col = 1
+        else
+          @col += 1
+        end
+      end
+      @pos += n
+      ch
+    end
+    def skip_spaces
+      while @pos < @source.length && peek =~ /\s/
+        advance
+      end
+    end
+    def skip_until(char)
+      while @pos < @source.length && peek != char
+        advance
+      end
+    end
+  end
+end

data/lib/xmlutils/tree_parser.rb ADDED Viewed

@@ -0,0 +1,161 @@
+# frozen_string_literal: false
+# Copyright (C) 2024 Manticore Authors
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+module XmlUtils
+  class TreeParser
+    def initialize(source, document = nil)
+      @tokens = Tokenizer.new(source).tokenize
+      @pos = 0
+      @document = document || Document.new
+    end
+    def parse
+      while current_token && current_token.type != :eof
+        parse_node(@document)
+      end
+      @document
+    end
+    private
+    def current_token
+      @tokens[@pos]
+    end
+    def advance
+      token = @tokens[@pos]
+      @pos += 1
+      token
+    end
+    def parse_node(parent)
+      token = current_token
+      return unless token
+      case token.type
+      when :xml_decl
+        advance
+        decl = parse_xml_decl(token.value)
+        parent.add(decl)
+      when :doctype
+        advance
+        parent.add(parse_doctype(token.value))
+      when :processing_instruction
+        advance
+        if token.value[:target] == 'xml'
+          parent.add(parse_xml_decl(token.value[:content]))
+        else
+          parent.add(ProcessingInstruction.new(token.value[:target], token.value[:content]))
+        end
+      when :comment
+        advance
+        parent.add(Comment.new(token.value))
+      when :cdata
+        advance
+        parent.add(CData.new(token.value, true))
+      when :start_tag, :empty_tag
+        parse_element(parent)
+      when :text
+        advance
+        text = token.value
+        parent.add(Text.new(text)) unless text.strip.empty?
+      when :close_tag
+        advance
+      else
+        advance
+      end
+    end
+    def parse_element(parent)
+      token = advance
+      tag_data = token.value
+      element = Element.new(tag_data[:name])
+      tag_data[:attributes].each do |name, value|
+        element.add_attribute(name, value)
+      end
+      if token.type == :empty_tag
+        parent.add(element)
+        return
+      end
+      parent.add(element)
+      loop do
+        break if @pos >= @tokens.length
+        next_token = current_token
+        if next_token.type == :close_tag
+          if next_token.value == tag_data[:name]
+            advance
+            break
+          else
+            raise ParseException.new(
+              "Unexpected close tag </#{next_token.value}>, expected </#{tag_data[:name]}>",
+              next_token.line,
+              next_token.position
+            )
+          end
+        elsif next_token.type == :start_tag
+          parse_element(element)
+        elsif next_token.type == :empty_tag
+          parse_element(element)
+        elsif next_token.type == :text
+          advance
+          element.add(Text.new(next_token.value))
+        elsif next_token.type == :cdata
+          advance
+          element.add(CData.new(next_token.value, true))
+        elsif next_token.type == :comment
+          advance
+          element.add(Comment.new(next_token.value))
+        elsif next_token.type == :processing_instruction
+          advance
+          element.add(ProcessingInstruction.new(next_token.value[:target], next_token.value[:content]))
+        else
+          advance
+        end
+      end
+    end
+    def parse_xml_decl(content)
+      decl = XMLDecl.new
+      if content =~ /version\s*=\s*["']([^"']+)["']/
+        decl.version = $1
+      end
+      if content =~ /encoding\s*=\s*["']([^"']+)["']/
+        decl.encoding = $1
+      end
+      if content =~ /standalone\s*=\s*["']([^"']+)["']/
+        decl.standalone = $1
+      end
+      decl
+    end
+    def parse_doctype(content)
+      if content =~ /^(\S+)(?:\s+PUBLIC\s+["']([^"']*)["']\s+["']([^"']*)["'])?(?:\s+SYSTEM\s+["']([^"']*)["'])?/
+        name = $1
+        public_id = $2
+        system_id = $3 || $4
+        external_id = public_id ? 'PUBLIC' : (system_id ? 'SYSTEM' : nil)
+        DocType.new(name, external_id, system_id, public_id)
+      else
+        DocType.new(content.strip)
+      end
+    end
+  end
+end

data/lib/xmlutils/xml_doc.rb ADDED Viewed

@@ -0,0 +1,273 @@
+# frozen_string_literal: false
+# Copyright (C) 2024 Manticore Authors
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+['json','yaml'].each{|mod|require mod}
+module XmlUtils
+  VERSION = "3.0.1"
+  def self.parse(source)
+    parser = TreeParser.new(source)
+    parser.parse
+  end
+  def self.new_document
+    Document.new
+  end
+  def self.create_element(name, attributes = {})
+    element = Element.new(name)
+    attributes.each { |k, v| element.add_attribute(k, v) }
+    element
+  end
+  def self.to_xml_string(node)
+    formatter = Formatters::Default.new
+    output = ""
+    formatter.write(node, output)
+    output
+  end
+end
+class XmlNode
+  attr_accessor :name, :attributes, :elements, :parent, :next, :prev
+  def initialize(option = {})
+    args = {parent: nil, attributes: {}, elements: [], prev: [], next: []}.merge(option)
+    @name = args[:name] || "OID:#{self.object_id}"
+    @attributes = args[:attributes]
+    @parent, @elements, @prev, @next = args[:parent], args[:elements], args[:prev], args[:next]
+    if @parent
+      @prev << @parent unless @prev.include?(@parent)
+      @parent.elements << self unless @parent.elements.include?(self)
+      @parent.next << self unless @parent.next.include?(self)
+    end
+  end
+  #####################################################################################################
+  # format                                                                                            #
+  #####################################################################################################
+  # 三元组 ([name, attributes, [name, attributes, ...]])
+  def to_triad
+    attrs,elems = {},[]
+    @attributes.each do|k,v|
+      unless k == :text
+        attrs[k] = v
+      else
+        elems += [v].flatten
+      end
+    end
+    elems += @elements.map{|c|c.to_triad}
+    [@name, attrs, elems]
+  end
+  alias :to_a :to_triad
+  # 文档化 ({name: [attributes, {name: [...]}]})
+  def to_doc
+    doc = {}
+    doc[@name] = []
+    doc[@name] << @attributes
+    @elements.each{|e|doc[@name] << e.to_doc}
+    return doc
+  end
+  # 对象化 (like js: {obj: {'-attr': val, '#text': text, obj: {...}}})
+  def to_obj
+    doc = {}
+    @attributes.each do|k,v|
+      h = k==:text ? '#' : '-'
+      doc["#{h}#{k}"] = v
+    end
+    @elements.each do|elem|
+      doc.merge! elem.to_obj
+    end
+    return {@name => doc}
+  end
+  # XML（手动序列化，需确保属性/文本已转义）
+  def to_xml
+    attrs, content = '', ''
+    @attributes.each do |k,v|
+      if k == :text
+        content += XmlNode.escape_xml([v].flatten.join("\n"))
+      elsif k == :namespace && !v
+        next
+      else
+        attrs += " #{k}=\"#{XmlNode.escape_xml_attr(v.to_s)}\""
+      end
+    end
+    return "<#{@name}#{attrs}/>" if @elements.size==0 && !@attributes[:text]
+    @elements.each do|e|
+      content += if e.is_a?(XmlNode)
+        e.to_xml
+      elsif e.instance_of?(String)
+        XmlNode.escape_xml(e)
+      end
+    end
+    return "<#{@name}#{attrs}>#{content}</#{@name}>"
+  end
+  def pretty format, method, indent=2
+    case method
+    when :xml
+      pretty_xml = ""
+      XmlUtils::Formatters::Default.new.write(XmlUtils.parse(self.send(format)), pretty_xml)
+      return pretty_xml
+    when :json
+      return JSON.pretty_generate(self.send(format))
+    else
+      raise ArgumentError, "Unknown pretty method: #{method.inspect}"
+    end
+  end
+  def self.escape_xml(text)
+    text.gsub('&', '&amp;')
+        .gsub('<', '&lt;')
+        .gsub('>', '&gt;')
+  end
+  def self.escape_xml_attr(text)
+    text.gsub('&', '&amp;')
+        .gsub('<', '&lt;')
+        .gsub('>', '&gt;')
+        .gsub('"', '&quot;')
+        .gsub("'", '&apos;')
+  end
+  def self.make_str_from xml
+    text = xml.dup
+    [['&lt;','<'], ['&gt;','>'], ['&amp;','&'], ['&apos;',"'"], ['&quot;','"']].each do |xstr, str|
+      text.gsub!(xstr, str)
+    end
+    text
+  end
+  def self.make_xml_from string
+    string.gsub('&', '&amp;')
+          .gsub('<', '&lt;')
+          .gsub('>', '&gt;')
+          .gsub("'", '&apos;')
+          .gsub('"', '&quot;')
+  end
+  #####################################################################################################
+  # attributes operation                                                                              #
+  #####################################################################################################
+  def add_attributes hash
+    (@attributes[:text] ||= []) << hash[:text] if hash[:text]
+    hash.delete(:text)
+    @attributes.merge!(hash)
+  end
+  def modify_attributes hash
+    add_attributes hash
+  end
+  def delete_attribute key
+    @attributes.delete(key) unless key==:text
+  end
+  #####################################################################################################
+  # content operation                                                                                 #
+  #####################################################################################################
+  def add_content content
+    @elements << content
+  end
+  def modify_content content
+    @attributes[:text] = []
+    @elements.delete_if{|e|e.is_a?(XmlNode)}
+    @elements << content
+  end
+  def delete_content
+    @elements = @elements.find_all{|c|!c.instance_of?(XmlNode)}
+  end
+  def add_element elem
+    if elem.is_a?(XmlNode)
+      @elements << elem unless @elements.include?(elem)
+      @next << elem unless @next.include?(elem)
+      elem.parent = self
+      elem.prev << self unless elem.prev.include?(self)
+    end
+  end
+  def search_elements &block
+    return ( block ? @elements.find_all(&block) : [] )
+  end
+  def delete_elements &block
+    return [] unless block
+    elems = search_elements(&block)
+    elems.each{|elem|@elements.delete(elem)}
+    return elems
+  end
+  def self.copy node
+    duplicate = XmlNode.new(name: node.name, parent: nil, attributes: node.attributes.dup)
+    node.elements.map{|subnode|self.copy(subnode)}.each do|subnode|
+      duplicate.add_element subnode
+    end
+    return duplicate
+  end
+end
+module XmlParser
+  def self.load(filepath)
+    return File.exist?(filepath) ? XmlParser.parse(File.read(filepath)) : nil
+  end
+  def self.parse(s)
+    doc = XmlUtils.parse(s)
+    root_elem = doc.root
+    return nil unless root_elem
+    build_xmlnode(root_elem)
+  end
+  private
+  def self.build_xmlnode(element, parent = nil)
+    attrs = {}
+    element.attributes.each do |k, attr|
+      attrs[k.to_sym] = attr.value
+    end
+    ns = element.namespace(element.prefix)
+    attrs[:namespace] = ns if ns && !ns.empty?
+    text_content = element.children
+      .select { |c| c.is_a?(XmlUtils::Text) || c.is_a?(XmlUtils::CData) }
+      .map(&:to_s)
+      .join
+    attrs[:text] = text_content unless text_content.strip.empty?
+    node = XmlNode.new(name: element.name, parent: parent, attributes: attrs)
+    element.children.select { |c| c.is_a?(XmlUtils::Element) }.each do |child|
+      build_xmlnode(child, node)
+    end
+    node
+  end
+end