nokogiri 1.12.3-x86-linux → 1.13.1-x86-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +5 -0
- data/README.md +9 -7
- data/bin/nokogiri +63 -50
- data/dependencies.yml +5 -6
- data/ext/nokogiri/extconf.rb +47 -35
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +1 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +1 -1
- data/ext/nokogiri/xml_document.c +35 -35
- data/ext/nokogiri/xml_document_fragment.c +0 -2
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_encoding_handler.c +25 -11
- data/ext/nokogiri/xml_node.c +645 -333
- data/ext/nokogiri/xml_reader.c +37 -11
- data/ext/nokogiri/xml_xpath_context.c +72 -49
- data/ext/nokogiri/xslt_stylesheet.c +107 -9
- data/lib/nokogiri/2.6/nokogiri.so +0 -0
- data/lib/nokogiri/2.7/nokogiri.so +0 -0
- data/lib/nokogiri/3.0/nokogiri.so +0 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +360 -341
- data/lib/nokogiri/css/parser.y +249 -244
- data/lib/nokogiri/css/parser_extras.rb +20 -20
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +179 -82
- data/lib/nokogiri/css.rb +38 -6
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/gumbo.rb +1 -0
- data/lib/nokogiri/html.rb +16 -10
- data/lib/nokogiri/html4/builder.rb +1 -0
- data/lib/nokogiri/html4/document.rb +84 -75
- data/lib/nokogiri/html4/document_fragment.rb +11 -7
- data/lib/nokogiri/html4/element_description.rb +1 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +426 -520
- data/lib/nokogiri/html4/entity_lookup.rb +2 -1
- data/lib/nokogiri/html4/sax/parser.rb +2 -1
- data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
- data/lib/nokogiri/html4.rb +11 -5
- data/lib/nokogiri/html5/document.rb +24 -10
- data/lib/nokogiri/html5/document_fragment.rb +5 -2
- data/lib/nokogiri/html5/node.rb +6 -3
- data/lib/nokogiri/html5.rb +68 -64
- data/lib/nokogiri/jruby/dependencies.rb +10 -9
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +19 -13
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +5 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +69 -31
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +178 -96
- data/lib/nokogiri/xml/document_fragment.rb +41 -38
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +2 -0
- data/lib/nokogiri/xml/node/save_options.rb +7 -4
- data/lib/nokogiri/xml/node.rb +512 -348
- data/lib/nokogiri/xml/node_set.rb +46 -54
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +11 -7
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +24 -26
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/reader.rb +17 -19
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +20 -19
- data/lib/nokogiri/xml/sax/parser.rb +36 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +93 -62
- data/lib/nokogiri/xml/syntax_error.rb +4 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +12 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +3 -3
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/nokogiri/xslt.rb +21 -13
- data/lib/nokogiri.rb +19 -16
- data/lib/xsd/xmlparser/nokogiri.rb +25 -24
- metadata +124 -52
- data/lib/nokogiri/2.5/nokogiri.so +0 -0
| @@ -1,6 +1,7 @@ | |
| 1 | 
            +
            # coding: utf-8
         | 
| 1 2 | 
             
            # frozen_string_literal: true
         | 
| 2 3 |  | 
| 3 | 
            -
            require  | 
| 4 | 
            +
            require "pathname"
         | 
| 4 5 |  | 
| 5 6 | 
             
            module Nokogiri
         | 
| 6 7 | 
             
              module HTML4
         | 
| @@ -9,11 +10,10 @@ module Nokogiri | |
| 9 10 | 
             
                  # Get the meta tag encoding for this document.  If there is no meta tag,
         | 
| 10 11 | 
             
                  # then nil is returned.
         | 
| 11 12 | 
             
                  def meta_encoding
         | 
| 12 | 
            -
                     | 
| 13 | 
            -
                    when meta = at('//meta[@charset]')
         | 
| 13 | 
            +
                    if (meta = at_xpath("//meta[@charset]"))
         | 
| 14 14 | 
             
                      meta[:charset]
         | 
| 15 | 
            -
                     | 
| 16 | 
            -
                      meta[ | 
| 15 | 
            +
                    elsif (meta = meta_content_type)
         | 
| 16 | 
            +
                      meta["content"][/charset\s*=\s*([\w-]+)/i, 1]
         | 
| 17 17 | 
             
                    end
         | 
| 18 18 | 
             
                  end
         | 
| 19 19 |  | 
| @@ -33,24 +33,22 @@ module Nokogiri | |
| 33 33 | 
             
                  #
         | 
| 34 34 | 
             
                  # Beware in CRuby, that libxml2 automatically inserts a meta tag
         | 
| 35 35 | 
             
                  # into a head element.
         | 
| 36 | 
            -
                  def meta_encoding= | 
| 37 | 
            -
                     | 
| 38 | 
            -
             | 
| 39 | 
            -
                      meta['content'] = 'text/html; charset=%s' % encoding
         | 
| 36 | 
            +
                  def meta_encoding=(encoding)
         | 
| 37 | 
            +
                    if (meta = meta_content_type)
         | 
| 38 | 
            +
                      meta["content"] = format("text/html; charset=%s", encoding)
         | 
| 40 39 | 
             
                      encoding
         | 
| 41 | 
            -
                     | 
| 42 | 
            -
                      meta[ | 
| 40 | 
            +
                    elsif (meta = at_xpath("//meta[@charset]"))
         | 
| 41 | 
            +
                      meta["charset"] = encoding
         | 
| 43 42 | 
             
                    else
         | 
| 44 | 
            -
                      meta = XML::Node.new( | 
| 45 | 
            -
                      if dtd = internal_subset  | 
| 46 | 
            -
                        meta[ | 
| 43 | 
            +
                      meta = XML::Node.new("meta", self)
         | 
| 44 | 
            +
                      if (dtd = internal_subset) && dtd.html5_dtd?
         | 
| 45 | 
            +
                        meta["charset"] = encoding
         | 
| 47 46 | 
             
                      else
         | 
| 48 | 
            -
                        meta[ | 
| 49 | 
            -
                        meta[ | 
| 47 | 
            +
                        meta["http-equiv"] = "Content-Type"
         | 
| 48 | 
            +
                        meta["content"] = format("text/html; charset=%s", encoding)
         | 
| 50 49 | 
             
                      end
         | 
| 51 50 |  | 
| 52 | 
            -
                       | 
| 53 | 
            -
                      when head = at('//head')
         | 
| 51 | 
            +
                      if (head = at_xpath("//head"))
         | 
| 54 52 | 
             
                        head.prepend_child(meta)
         | 
| 55 53 | 
             
                      else
         | 
| 56 54 | 
             
                        set_metadata_element(meta)
         | 
| @@ -60,9 +58,9 @@ module Nokogiri | |
| 60 58 | 
             
                  end
         | 
| 61 59 |  | 
| 62 60 | 
             
                  def meta_content_type
         | 
| 63 | 
            -
                    xpath( | 
| 64 | 
            -
                      node[ | 
| 65 | 
            -
                     | 
| 61 | 
            +
                    xpath("//meta[@http-equiv and boolean(@content)]").find do |node|
         | 
| 62 | 
            +
                      node["http-equiv"] =~ /\AContent-Type\z/i
         | 
| 63 | 
            +
                    end
         | 
| 66 64 | 
             
                  end
         | 
| 67 65 | 
             
                  private :meta_content_type
         | 
| 68 66 |  | 
| @@ -70,7 +68,7 @@ module Nokogiri | |
| 70 68 | 
             
                  # Get the title string of this document.  Return nil if there is
         | 
| 71 69 | 
             
                  # no title tag.
         | 
| 72 70 | 
             
                  def title
         | 
| 73 | 
            -
                    title =  | 
| 71 | 
            +
                    (title = at_xpath("//title")) && title.inner_text
         | 
| 74 72 | 
             
                  end
         | 
| 75 73 |  | 
| 76 74 | 
             
                  ###
         | 
| @@ -86,44 +84,41 @@ module Nokogiri | |
| 86 84 | 
             
                  # content element (typically <body>) if any.
         | 
| 87 85 | 
             
                  def title=(text)
         | 
| 88 86 | 
             
                    tnode = XML::Text.new(text, self)
         | 
| 89 | 
            -
                    if title =  | 
| 87 | 
            +
                    if (title = at_xpath("//title"))
         | 
| 90 88 | 
             
                      title.children = tnode
         | 
| 91 89 | 
             
                      return text
         | 
| 92 90 | 
             
                    end
         | 
| 93 91 |  | 
| 94 | 
            -
                    title = XML::Node.new( | 
| 95 | 
            -
                     | 
| 96 | 
            -
                    when head = at('//head')
         | 
| 92 | 
            +
                    title = XML::Node.new("title", self) << tnode
         | 
| 93 | 
            +
                    if (head = at_xpath("//head"))
         | 
| 97 94 | 
             
                      head << title
         | 
| 98 | 
            -
                     | 
| 95 | 
            +
                    elsif (meta = (at_xpath("//meta[@charset]") || meta_content_type))
         | 
| 99 96 | 
             
                      # better put after charset declaration
         | 
| 100 97 | 
             
                      meta.add_next_sibling(title)
         | 
| 101 98 | 
             
                    else
         | 
| 102 99 | 
             
                      set_metadata_element(title)
         | 
| 103 100 | 
             
                    end
         | 
| 104 | 
            -
                    text
         | 
| 105 101 | 
             
                  end
         | 
| 106 102 |  | 
| 107 | 
            -
                  def set_metadata_element(element)
         | 
| 108 | 
            -
                     | 
| 109 | 
            -
                    when head = at('//head')
         | 
| 103 | 
            +
                  def set_metadata_element(element) # rubocop:disable Naming/AccessorMethodName
         | 
| 104 | 
            +
                    if (head = at_xpath("//head"))
         | 
| 110 105 | 
             
                      head << element
         | 
| 111 | 
            -
                     | 
| 112 | 
            -
                      head = html.prepend_child(XML::Node.new( | 
| 106 | 
            +
                    elsif (html = at_xpath("//html"))
         | 
| 107 | 
            +
                      head = html.prepend_child(XML::Node.new("head", self))
         | 
| 113 108 | 
             
                      head.prepend_child(element)
         | 
| 114 | 
            -
                     | 
| 115 | 
            -
             | 
| 116 | 
            -
             | 
| 117 | 
            -
             | 
| 118 | 
            -
             | 
| 119 | 
            -
             | 
| 109 | 
            +
                    elsif (first = children.find do |node|
         | 
| 110 | 
            +
                             case node
         | 
| 111 | 
            +
                             when XML::Element, XML::Text
         | 
| 112 | 
            +
                               true
         | 
| 113 | 
            +
                             end
         | 
| 114 | 
            +
                           end)
         | 
| 120 115 | 
             
                      # We reach here only if the underlying document model
         | 
| 121 116 | 
             
                      # allows <html>/<head> elements to be omitted and does not
         | 
| 122 117 | 
             
                      # automatically supply them.
         | 
| 123 118 | 
             
                      first.add_previous_sibling(element)
         | 
| 124 119 | 
             
                    else
         | 
| 125 | 
            -
                      html = add_child(XML::Node.new( | 
| 126 | 
            -
                      head = html.add_child(XML::Node.new( | 
| 120 | 
            +
                      html = add_child(XML::Node.new("html", self))
         | 
| 121 | 
            +
                      head = html.add_child(XML::Node.new("head", self))
         | 
| 127 122 | 
             
                      head.prepend_child(element)
         | 
| 128 123 | 
             
                    end
         | 
| 129 124 | 
             
                  end
         | 
| @@ -143,15 +138,25 @@ module Nokogiri | |
| 143 138 | 
             
                  #     config.format.as_xml
         | 
| 144 139 | 
             
                  #   end
         | 
| 145 140 | 
             
                  #
         | 
| 146 | 
            -
                  def serialize | 
| 141 | 
            +
                  def serialize(options = {})
         | 
| 147 142 | 
             
                    options[:save_with] ||= XML::Node::SaveOptions::DEFAULT_HTML
         | 
| 148 143 | 
             
                    super
         | 
| 149 144 | 
             
                  end
         | 
| 150 145 |  | 
| 151 146 | 
             
                  ####
         | 
| 152 147 | 
             
                  # Create a Nokogiri::XML::DocumentFragment from +tags+
         | 
| 153 | 
            -
                  def fragment | 
| 154 | 
            -
                    DocumentFragment.new(self, tags,  | 
| 148 | 
            +
                  def fragment(tags = nil)
         | 
| 149 | 
            +
                    DocumentFragment.new(self, tags, root)
         | 
| 150 | 
            +
                  end
         | 
| 151 | 
            +
             | 
| 152 | 
            +
                  # :call-seq:
         | 
| 153 | 
            +
                  #   xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
         | 
| 154 | 
            +
                  #
         | 
| 155 | 
            +
                  # [Returns] The document type which determines CSS-to-XPath translation.
         | 
| 156 | 
            +
                  #
         | 
| 157 | 
            +
                  # See XPathVisitor for more information.
         | 
| 158 | 
            +
                  def xpath_doctype
         | 
| 159 | 
            +
                    Nokogiri::CSS::XPathVisitor::DoctypeConfig::HTML4
         | 
| 155 160 | 
             
                  end
         | 
| 156 161 |  | 
| 157 162 | 
             
                  class << self
         | 
| @@ -163,9 +168,8 @@ module Nokogiri | |
| 163 168 | 
             
                    # is a number that sets options in the parser, such as
         | 
| 164 169 | 
             
                    # Nokogiri::XML::ParseOptions::RECOVER.  See the constants in
         | 
| 165 170 | 
             
                    # Nokogiri::XML::ParseOptions.
         | 
| 166 | 
            -
                    def parse | 
| 171 | 
            +
                    def parse(string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML)
         | 
| 167 172 | 
             
                      options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
         | 
| 168 | 
            -
             | 
| 169 173 | 
             
                      yield options if block_given?
         | 
| 170 174 |  | 
| 171 175 | 
             
                      url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
         | 
| @@ -206,7 +210,7 @@ module Nokogiri | |
| 206 210 | 
             
                      end
         | 
| 207 211 |  | 
| 208 212 | 
             
                      # read_memory pukes on empty docs
         | 
| 209 | 
            -
                      if string_or_io.nil?  | 
| 213 | 
            +
                      if string_or_io.nil? || string_or_io.empty?
         | 
| 210 214 | 
             
                        return encoding ? new.tap { |i| i.encoding = encoding } : new
         | 
| 211 215 | 
             
                      end
         | 
| 212 216 |  | 
| @@ -216,37 +220,38 @@ module Nokogiri | |
| 216 220 | 
             
                    end
         | 
| 217 221 | 
             
                  end
         | 
| 218 222 |  | 
| 219 | 
            -
                  class EncodingFound < StandardError # :nodoc:
         | 
| 223 | 
            +
                  class EncodingFound < StandardError # :nodoc: all
         | 
| 220 224 | 
             
                    attr_reader :found_encoding
         | 
| 221 225 |  | 
| 222 226 | 
             
                    def initialize(encoding)
         | 
| 223 227 | 
             
                      @found_encoding = encoding
         | 
| 224 | 
            -
                      super("encoding found: %s"  | 
| 228 | 
            +
                      super(format("encoding found: %s", encoding))
         | 
| 225 229 | 
             
                    end
         | 
| 226 230 | 
             
                  end
         | 
| 227 231 |  | 
| 228 | 
            -
                   | 
| 229 | 
            -
             | 
| 232 | 
            +
                  # :nodoc: all
         | 
| 233 | 
            +
                  class EncodingReader
         | 
| 234 | 
            +
                    class SAXHandler < Nokogiri::XML::SAX::Document
         | 
| 230 235 | 
             
                      attr_reader :encoding
         | 
| 231 | 
            -
             | 
| 236 | 
            +
             | 
| 232 237 | 
             
                      def initialize
         | 
| 233 238 | 
             
                        @encoding = nil
         | 
| 234 239 | 
             
                        super()
         | 
| 235 240 | 
             
                      end
         | 
| 236 | 
            -
             | 
| 241 | 
            +
             | 
| 237 242 | 
             
                      def start_element(name, attrs = [])
         | 
| 238 | 
            -
                        return unless name ==  | 
| 243 | 
            +
                        return unless name == "meta"
         | 
| 239 244 | 
             
                        attr = Hash[attrs]
         | 
| 240 | 
            -
                        charset = attr[ | 
| 241 | 
            -
                          @encoding = charset
         | 
| 242 | 
            -
                        http_equiv = attr[ | 
| 243 | 
            -
                          http_equiv.match(/\AContent-Type\z/i)  | 
| 244 | 
            -
                          content = attr[ | 
| 245 | 
            -
                          m = content.match(/;\s*charset\s*=\s*([\w-]+)/)  | 
| 246 | 
            -
                          @encoding = m[1]
         | 
| 245 | 
            +
                        (charset = attr["charset"]) &&
         | 
| 246 | 
            +
                          (@encoding = charset)
         | 
| 247 | 
            +
                        (http_equiv = attr["http-equiv"]) &&
         | 
| 248 | 
            +
                          http_equiv.match(/\AContent-Type\z/i) &&
         | 
| 249 | 
            +
                          (content = attr["content"]) &&
         | 
| 250 | 
            +
                          (m = content.match(/;\s*charset\s*=\s*([\w-]+)/)) &&
         | 
| 251 | 
            +
                          (@encoding = m[1])
         | 
| 247 252 | 
             
                      end
         | 
| 248 253 | 
             
                    end
         | 
| 249 | 
            -
             | 
| 254 | 
            +
             | 
| 250 255 | 
             
                    class JumpSAXHandler < SAXHandler
         | 
| 251 256 | 
             
                      def initialize(jumptag)
         | 
| 252 257 | 
             
                        @jumptag = jumptag
         | 
| @@ -255,26 +260,30 @@ module Nokogiri | |
| 255 260 |  | 
| 256 261 | 
             
                      def start_element(name, attrs = [])
         | 
| 257 262 | 
             
                        super
         | 
| 258 | 
            -
                        throw | 
| 259 | 
            -
                        throw | 
| 263 | 
            +
                        throw(@jumptag, @encoding) if @encoding
         | 
| 264 | 
            +
                        throw(@jumptag, nil) if /\A(?:div|h1|img|p|br)\z/.match?(name)
         | 
| 260 265 | 
             
                      end
         | 
| 261 266 | 
             
                    end
         | 
| 262 267 |  | 
| 263 268 | 
             
                    def self.detect_encoding(chunk)
         | 
| 264 | 
            -
                      m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/)  | 
| 265 | 
            -
                        return Nokogiri.XML(m[1]).encoding
         | 
| 269 | 
            +
                      (m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/)) &&
         | 
| 270 | 
            +
                        (return Nokogiri.XML(m[1]).encoding)
         | 
| 266 271 |  | 
| 267 272 | 
             
                      if Nokogiri.jruby?
         | 
| 268 | 
            -
                        m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i)  | 
| 269 | 
            -
                          return m[4]
         | 
| 270 | 
            -
                        catch(:encoding_found)  | 
| 273 | 
            +
                        (m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i)) &&
         | 
| 274 | 
            +
                          (return m[4])
         | 
| 275 | 
            +
                        catch(:encoding_found) do
         | 
| 271 276 | 
             
                          Nokogiri::HTML4::SAX::Parser.new(JumpSAXHandler.new(:encoding_found)).parse(chunk)
         | 
| 272 277 | 
             
                          nil
         | 
| 273 | 
            -
                         | 
| 278 | 
            +
                        end
         | 
| 274 279 | 
             
                      else
         | 
| 275 280 | 
             
                        handler = SAXHandler.new
         | 
| 276 281 | 
             
                        parser = Nokogiri::HTML4::SAX::PushParser.new(handler)
         | 
| 277 | 
            -
                         | 
| 282 | 
            +
                        begin
         | 
| 283 | 
            +
                          parser << chunk
         | 
| 284 | 
            +
                        rescue
         | 
| 285 | 
            +
                          Nokogiri::SyntaxError
         | 
| 286 | 
            +
                        end
         | 
| 278 287 | 
             
                        handler.encoding
         | 
| 279 288 | 
             
                      end
         | 
| 280 289 | 
             
                    end
         | 
| @@ -293,13 +302,13 @@ module Nokogiri | |
| 293 302 | 
             
                    def read(len)
         | 
| 294 303 | 
             
                      # no support for a call without len
         | 
| 295 304 |  | 
| 296 | 
            -
                       | 
| 297 | 
            -
                        @firstchunk = @io.read(len)  | 
| 305 | 
            +
                      unless @firstchunk
         | 
| 306 | 
            +
                        (@firstchunk = @io.read(len)) || (return nil)
         | 
| 298 307 |  | 
| 299 308 | 
             
                        # This implementation expects that the first call from
         | 
| 300 309 | 
             
                        # htmlReadIO() is made with a length long enough (~1KB) to
         | 
| 301 310 | 
             
                        # achieve advanced encoding detection.
         | 
| 302 | 
            -
                        if encoding = EncodingReader.detect_encoding(@firstchunk)
         | 
| 311 | 
            +
                        if (encoding = EncodingReader.detect_encoding(@firstchunk))
         | 
| 303 312 | 
             
                          # The first chunk is stored for the next read in retry.
         | 
| 304 313 | 
             
                          raise @encoding_found = EncodingFound.new(encoding)
         | 
| 305 314 | 
             
                        end
         | 
| @@ -308,7 +317,7 @@ module Nokogiri | |
| 308 317 |  | 
| 309 318 | 
             
                      ret = @firstchunk.slice!(0, len)
         | 
| 310 319 | 
             
                      if (len -= ret.length) > 0
         | 
| 311 | 
            -
                        rest = @io.read(len)  | 
| 320 | 
            +
                        (rest = @io.read(len)) && ret << (rest)
         | 
| 312 321 | 
             
                      end
         | 
| 313 322 | 
             
                      if ret.empty?
         | 
| 314 323 | 
             
                        nil
         | 
| @@ -1,34 +1,38 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 2 3 | 
             
            module Nokogiri
         | 
| 3 4 | 
             
              module HTML4
         | 
| 4 5 | 
             
                class DocumentFragment < Nokogiri::XML::DocumentFragment
         | 
| 5 6 | 
             
                  ####
         | 
| 6 7 | 
             
                  # Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+
         | 
| 7 | 
            -
                  def self.parse(tags, encoding = nil)
         | 
| 8 | 
            +
                  def self.parse(tags, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
         | 
| 8 9 | 
             
                    doc = HTML4::Document.new
         | 
| 9 10 |  | 
| 10 11 | 
             
                    encoding ||= if tags.respond_to?(:encoding)
         | 
| 11 12 | 
             
                      encoding = tags.encoding
         | 
| 12 13 | 
             
                      if encoding == ::Encoding::ASCII_8BIT
         | 
| 13 | 
            -
                         | 
| 14 | 
            +
                        "UTF-8"
         | 
| 14 15 | 
             
                      else
         | 
| 15 16 | 
             
                        encoding.name
         | 
| 16 17 | 
             
                      end
         | 
| 17 18 | 
             
                    else
         | 
| 18 | 
            -
                       | 
| 19 | 
            +
                      "UTF-8"
         | 
| 19 20 | 
             
                    end
         | 
| 20 21 |  | 
| 21 22 | 
             
                    doc.encoding = encoding
         | 
| 22 23 |  | 
| 23 | 
            -
                    new(doc, tags)
         | 
| 24 | 
            +
                    new(doc, tags, nil, options, &block)
         | 
| 24 25 | 
             
                  end
         | 
| 25 26 |  | 
| 26 | 
            -
                  def initialize(document, tags = nil, ctx = nil)
         | 
| 27 | 
            +
                  def initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML)
         | 
| 27 28 | 
             
                    return self unless tags
         | 
| 28 29 |  | 
| 30 | 
            +
                    options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
         | 
| 31 | 
            +
                    yield options if block_given?
         | 
| 32 | 
            +
             | 
| 29 33 | 
             
                    if ctx
         | 
| 30 34 | 
             
                      preexisting_errors = document.errors.dup
         | 
| 31 | 
            -
                      node_set = ctx.parse("<div>#{tags}</div>")
         | 
| 35 | 
            +
                      node_set = ctx.parse("<div>#{tags}</div>", options)
         | 
| 32 36 | 
             
                      node_set.first.children.each { |child| child.parent = self } unless node_set.empty?
         | 
| 33 37 | 
             
                      self.errors = document.errors - preexisting_errors
         | 
| 34 38 | 
             
                    else
         | 
| @@ -39,7 +43,7 @@ module Nokogiri | |
| 39 43 | 
             
                        "/html/body/node()"
         | 
| 40 44 | 
             
                      end
         | 
| 41 45 |  | 
| 42 | 
            -
                      temp_doc = HTML4::Document.parse("<html><body>#{tags}", nil, document.encoding)
         | 
| 46 | 
            +
                      temp_doc = HTML4::Document.parse("<html><body>#{tags}", nil, document.encoding, options)
         | 
| 43 47 | 
             
                      temp_doc.xpath(path).each { |child| child.parent = self }
         | 
| 44 48 | 
             
                      self.errors = temp_doc.errors
         | 
| 45 49 | 
             
                    end
         |