nokogiri 1.11.7-x86-linux → 1.12.0.rc1-x86-linux

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +243 -22
  3. data/LICENSE.md +1 -1
  4. data/README.md +6 -5
  5. data/ext/nokogiri/depend +35 -34
  6. data/ext/nokogiri/extconf.rb +181 -103
  7. data/ext/nokogiri/gumbo.c +611 -0
  8. data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
  9. data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +20 -18
  10. data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
  11. data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +5 -5
  12. data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
  13. data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
  14. data/ext/nokogiri/nokogiri.c +51 -38
  15. data/ext/nokogiri/nokogiri.h +16 -9
  16. data/ext/nokogiri/xml_document.c +13 -13
  17. data/ext/nokogiri/xml_element_content.c +2 -0
  18. data/ext/nokogiri/xml_encoding_handler.c +11 -6
  19. data/ext/nokogiri/xml_namespace.c +2 -0
  20. data/ext/nokogiri/xml_node.c +102 -102
  21. data/ext/nokogiri/xml_node_set.c +20 -20
  22. data/ext/nokogiri/xml_reader.c +2 -0
  23. data/ext/nokogiri/xml_sax_parser.c +6 -6
  24. data/ext/nokogiri/xml_sax_parser_context.c +2 -0
  25. data/ext/nokogiri/xml_schema.c +2 -0
  26. data/ext/nokogiri/xml_xpath_context.c +67 -65
  27. data/ext/nokogiri/xslt_stylesheet.c +2 -1
  28. data/gumbo-parser/CHANGES.md +63 -0
  29. data/gumbo-parser/Makefile +101 -0
  30. data/gumbo-parser/THANKS +27 -0
  31. data/lib/nokogiri.rb +31 -29
  32. data/lib/nokogiri/2.5/nokogiri.so +0 -0
  33. data/lib/nokogiri/2.6/nokogiri.so +0 -0
  34. data/lib/nokogiri/2.7/nokogiri.so +0 -0
  35. data/lib/nokogiri/3.0/nokogiri.so +0 -0
  36. data/lib/nokogiri/css.rb +14 -14
  37. data/lib/nokogiri/css/parser.rb +1 -1
  38. data/lib/nokogiri/css/parser.y +1 -1
  39. data/lib/nokogiri/css/syntax_error.rb +1 -1
  40. data/lib/nokogiri/extension.rb +2 -2
  41. data/lib/nokogiri/gumbo.rb +14 -0
  42. data/lib/nokogiri/html.rb +31 -27
  43. data/lib/nokogiri/html4.rb +40 -0
  44. data/lib/nokogiri/{html → html4}/builder.rb +2 -2
  45. data/lib/nokogiri/{html → html4}/document.rb +4 -4
  46. data/lib/nokogiri/{html → html4}/document_fragment.rb +3 -3
  47. data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
  48. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
  49. data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
  50. data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
  51. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  52. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
  53. data/lib/nokogiri/html5.rb +473 -0
  54. data/lib/nokogiri/html5/document.rb +74 -0
  55. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  56. data/lib/nokogiri/html5/node.rb +93 -0
  57. data/lib/nokogiri/version/constant.rb +1 -1
  58. data/lib/nokogiri/version/info.rb +11 -2
  59. data/lib/nokogiri/xml.rb +35 -36
  60. data/lib/nokogiri/xml/node.rb +6 -5
  61. data/lib/nokogiri/xml/parse_options.rb +2 -0
  62. data/lib/nokogiri/xml/pp.rb +2 -2
  63. data/lib/nokogiri/xml/sax.rb +4 -4
  64. data/lib/nokogiri/xml/sax/document.rb +24 -30
  65. data/lib/nokogiri/xml/xpath.rb +2 -2
  66. data/lib/nokogiri/xslt.rb +16 -16
  67. data/lib/nokogiri/xslt/stylesheet.rb +1 -1
  68. metadata +37 -37
  69. data/lib/nokogiri/html/sax/parser_context.rb +0 -17
Binary file
Binary file
Binary file
Binary file
data/lib/nokogiri/css.rb CHANGED
@@ -1,28 +1,28 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri/css/node'
3
- require 'nokogiri/css/xpath_visitor'
4
- x = $-w
5
- $-w = false
6
- require 'nokogiri/css/parser'
7
- $-w = x
8
-
9
- require 'nokogiri/css/tokenizer'
10
- require 'nokogiri/css/syntax_error'
11
-
12
2
  module Nokogiri
13
3
  module CSS
14
4
  class << self
15
5
  ###
16
6
  # Parse this CSS selector in +selector+. Returns an AST.
17
- def parse selector
18
- Parser.new.parse selector
7
+ def parse(selector)
8
+ Parser.new.parse(selector)
19
9
  end
20
10
 
21
11
  ###
22
12
  # Get the XPath for +selector+.
23
- def xpath_for selector, options={}
24
- Parser.new(options[:ns] || {}).xpath_for selector, options
13
+ def xpath_for(selector, options = {})
14
+ Parser.new(options[:ns] || {}).xpath_for(selector, options)
25
15
  end
26
16
  end
27
17
  end
28
18
  end
19
+
20
+ require_relative "css/node"
21
+ require_relative "css/xpath_visitor"
22
+ x = $-w
23
+ $-w = false
24
+ require_relative "css/parser"
25
+ $-w = x
26
+
27
+ require_relative "css/tokenizer"
28
+ require_relative "css/syntax_error"
@@ -8,7 +8,7 @@
8
8
  require 'racc/parser.rb'
9
9
 
10
10
 
11
- require 'nokogiri/css/parser_extras'
11
+ require_relative "parser_extras"
12
12
 
13
13
  module Nokogiri
14
14
  module CSS
@@ -253,7 +253,7 @@ end
253
253
 
254
254
  ---- header
255
255
 
256
- require 'nokogiri/css/parser_extras'
256
+ require_relative "parser_extras"
257
257
 
258
258
  ---- inner
259
259
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri/syntax_error'
2
+ require_relative "../syntax_error"
3
3
  module Nokogiri
4
4
  module CSS
5
5
  class SyntaxError < ::Nokogiri::SyntaxError
@@ -3,7 +3,7 @@
3
3
  # load the C or Java extension
4
4
  begin
5
5
  ::RUBY_VERSION =~ /(\d+\.\d+)/
6
- require "nokogiri/#{Regexp.last_match(1)}/nokogiri"
6
+ require_relative "#{Regexp.last_match(1)}/nokogiri"
7
7
  rescue LoadError => e
8
8
  if e.message =~ /GLIBC/
9
9
  warn(<<~EOM)
@@ -22,5 +22,5 @@ rescue LoadError => e
22
22
  EOM
23
23
  raise e
24
24
  end
25
- require 'nokogiri/nokogiri'
25
+ require_relative "nokogiri"
26
26
  end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module Gumbo
4
+ # The default maximum number of attributes per element.
5
+ DEFAULT_MAX_ATTRIBUTES = 400
6
+
7
+ # The default maximum number of errors for parsing a document or a fragment.
8
+ DEFAULT_MAX_ERRORS = 0
9
+
10
+ # The default maximum depth of the DOM tree produced by parsing a document
11
+ # or fragment.
12
+ DEFAULT_MAX_TREE_DEPTH = 400
13
+ end
14
+ end
data/lib/nokogiri/html.rb CHANGED
@@ -1,38 +1,42 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri/html/entity_lookup'
3
- require 'nokogiri/html/document'
4
- require 'nokogiri/html/document_fragment'
5
- require 'nokogiri/html/sax/parser_context'
6
- require 'nokogiri/html/sax/parser'
7
- require 'nokogiri/html/sax/push_parser'
8
- require 'nokogiri/html/element_description'
9
- require 'nokogiri/html/element_description_defaults'
2
+ require_relative "html4"
10
3
 
11
4
  module Nokogiri
12
- class << self
13
- ###
14
- # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
15
- def HTML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
16
- Nokogiri::HTML::Document.parse(thing, url, encoding, options, &block)
17
- end
18
- end
5
+ HTML = Nokogiri::HTML4
6
+
7
+ # @!method HTML(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
8
+ # Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
9
+ # @!scope class
10
+ define_singleton_method(:HTML, Nokogiri.method(:HTML4))
19
11
 
12
+ # @note This module/namespace is an alias for {Nokogiri::HTML4} as of v1.12.0. Before v1.12.0,
13
+ # {Nokogiri::HTML4} did not exist, and this was the module/namespace for all HTML-related
14
+ # classes.
20
15
  module HTML
21
- class << self
22
- ###
23
- # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
24
- def parse thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
25
- Document.parse(thing, url, encoding, options, &block)
16
+ # @note This class is an alias for {Nokogiri::HTML4::Document} as of v1.12.0.
17
+ class Document < Nokogiri::XML::Document
18
+ end
19
+
20
+ # @note This class is an alias for {Nokogiri::HTML4::DocumentFragment} as of v1.12.0.
21
+ class DocumentFragment < Nokogiri::XML::DocumentFragment
22
+ end
23
+
24
+ # @note This class is an alias for {Nokogiri::HTML4::Builder} as of v1.12.0.
25
+ class Builder < Nokogiri::XML::Builder
26
+ end
27
+
28
+ module SAX
29
+ # @note This class is an alias for {Nokogiri::HTML4::SAX::Parser} as of v1.12.0.
30
+ class Parser < Nokogiri::XML::SAX::Parser
26
31
  end
27
32
 
28
- ####
29
- # Parse a fragment from +string+ in to a NodeSet.
30
- def fragment string, encoding = nil
31
- HTML::DocumentFragment.parse string, encoding
33
+ # @note This class is an alias for {Nokogiri::HTML4::SAX::ParserContext} as of v1.12.0.
34
+ class ParserContext < Nokogiri::XML::SAX::ParserContext
32
35
  end
33
- end
34
36
 
35
- # Instance of Nokogiri::HTML::EntityLookup
36
- NamedCharacters = EntityLookup.new
37
+ # @note This class is an alias for {Nokogiri::HTML4::SAX::PushParser} as of v1.12.0.
38
+ class PushParser
39
+ end
40
+ end
37
41
  end
38
42
  end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ class << self
4
+ ###
5
+ # Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
6
+ def HTML4(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
7
+ Nokogiri::HTML4::Document.parse(input, url, encoding, options, &block)
8
+ end
9
+ end
10
+
11
+ # @since v1.12.0
12
+ # @note Before v1.12.0, {Nokogiri::HTML4} did not exist, and {Nokogiri::HTML} was the module/namespace for parsing HTML.
13
+ module HTML4
14
+ class << self
15
+ ###
16
+ # Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
17
+ def parse(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
18
+ Document.parse(input, url, encoding, options, &block)
19
+ end
20
+
21
+ ####
22
+ # Parse a fragment from +string+ in to a NodeSet.
23
+ def fragment(string, encoding = nil)
24
+ HTML4::DocumentFragment.parse(string, encoding)
25
+ end
26
+ end
27
+
28
+ # Instance of Nokogiri::HTML4::EntityLookup
29
+ NamedCharacters = EntityLookup.new
30
+ end
31
+ end
32
+
33
+ require_relative "html4/entity_lookup"
34
+ require_relative "html4/document"
35
+ require_relative "html4/document_fragment"
36
+ require_relative "html4/sax/parser_context"
37
+ require_relative "html4/sax/parser"
38
+ require_relative "html4/sax/push_parser"
39
+ require_relative "html4/element_description"
40
+ require_relative "html4/element_description_defaults"
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
  module Nokogiri
3
- module HTML
3
+ module HTML4
4
4
  ###
5
5
  # Nokogiri HTML builder is used for building HTML documents. It is very
6
6
  # similar to the Nokogiri::XML::Builder. In fact, you should go read the
@@ -12,7 +12,7 @@ module Nokogiri
12
12
  # Create an HTML document with a body that has an onload attribute, and a
13
13
  # span tag with a class of "bold" that has content of "Hello world".
14
14
  #
15
- # builder = Nokogiri::HTML::Builder.new do |doc|
15
+ # builder = Nokogiri::HTML4::Builder.new do |doc|
16
16
  # doc.html {
17
17
  # doc.body(:onload => 'some_func();') {
18
18
  # doc.span.bold {
@@ -3,7 +3,7 @@
3
3
  require 'pathname'
4
4
 
5
5
  module Nokogiri
6
- module HTML
6
+ module HTML4
7
7
  class Document < Nokogiri::XML::Document
8
8
  ###
9
9
  # Get the meta tag encoding for this document. If there is no meta tag,
@@ -268,12 +268,12 @@ module Nokogiri
268
268
  m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
269
269
  return m[4]
270
270
  catch(:encoding_found) {
271
- Nokogiri::HTML::SAX::Parser.new(JumpSAXHandler.new(:encoding_found)).parse(chunk)
271
+ Nokogiri::HTML4::SAX::Parser.new(JumpSAXHandler.new(:encoding_found)).parse(chunk)
272
272
  nil
273
273
  }
274
274
  else
275
275
  handler = SAXHandler.new
276
- parser = Nokogiri::HTML::SAX::PushParser.new(handler)
276
+ parser = Nokogiri::HTML4::SAX::PushParser.new(handler)
277
277
  parser << chunk rescue Nokogiri::SyntaxError
278
278
  handler.encoding
279
279
  end
@@ -286,7 +286,7 @@ module Nokogiri
286
286
  end
287
287
 
288
288
  # This method is used by the C extension so that
289
- # Nokogiri::HTML::Document#read_io() does not leak memory when
289
+ # Nokogiri::HTML4::Document#read_io() does not leak memory when
290
290
  # EncodingFound is raised.
291
291
  attr_reader :encoding_found
292
292
 
@@ -1,11 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
  module Nokogiri
3
- module HTML
3
+ module HTML4
4
4
  class DocumentFragment < Nokogiri::XML::DocumentFragment
5
5
  ####
6
6
  # Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+
7
7
  def self.parse(tags, encoding = nil)
8
- doc = HTML::Document.new
8
+ doc = HTML4::Document.new
9
9
 
10
10
  encoding ||= if tags.respond_to?(:encoding)
11
11
  encoding = tags.encoding
@@ -39,7 +39,7 @@ module Nokogiri
39
39
  "/html/body/node()"
40
40
  end
41
41
 
42
- temp_doc = HTML::Document.parse("<html><body>#{tags}", nil, document.encoding)
42
+ temp_doc = HTML4::Document.parse("<html><body>#{tags}", nil, document.encoding)
43
43
  temp_doc.xpath(path).each { |child| child.parent = self }
44
44
  self.errors = temp_doc.errors
45
45
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
  module Nokogiri
3
- module HTML
3
+ module HTML4
4
4
  class ElementDescription
5
5
  ###
6
6
  # Is this element a block element?
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
  module Nokogiri
3
- module HTML
3
+ module HTML4
4
4
  class ElementDescription
5
5
 
6
6
  # Methods are defined protected by method_defined? because at
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
  module Nokogiri
3
- module HTML
3
+ module HTML4
4
4
  class EntityDescription < Struct.new(:value, :name, :description); end
5
5
 
6
6
  class EntityLookup
@@ -1,18 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
  module Nokogiri
3
- module HTML
3
+ module HTML4
4
4
  ###
5
- # Nokogiri lets you write a SAX parser to process HTML but get HTML
6
- # correction features.
5
+ # Nokogiri lets you write a SAX parser to process HTML but get HTML correction features.
7
6
  #
8
- # See Nokogiri::HTML::SAX::Parser for a basic example of using a
9
- # SAX parser with HTML.
7
+ # See Nokogiri::HTML4::SAX::Parser for a basic example of using a SAX parser with HTML.
10
8
  #
11
9
  # For more information on SAX parsers, see Nokogiri::XML::SAX
12
10
  module SAX
13
11
  ###
14
- # This class lets you perform SAX style parsing on HTML with HTML
15
- # error correction.
12
+ # This class lets you perform SAX style parsing on HTML with HTML error correction.
16
13
  #
17
14
  # Here is a basic usage example:
18
15
  #
@@ -22,40 +19,40 @@ module Nokogiri
22
19
  # end
23
20
  # end
24
21
  #
25
- # parser = Nokogiri::HTML::SAX::Parser.new(MyDoc.new)
22
+ # parser = Nokogiri::HTML4::SAX::Parser.new(MyDoc.new)
26
23
  # parser.parse(File.read(ARGV[0], mode: 'rb'))
27
24
  #
28
25
  # For more information on SAX parsers, see Nokogiri::XML::SAX
29
26
  class Parser < Nokogiri::XML::SAX::Parser
30
27
  ###
31
28
  # Parse html stored in +data+ using +encoding+
32
- def parse_memory data, encoding = 'UTF-8'
29
+ def parse_memory(data, encoding = "UTF-8")
33
30
  raise ArgumentError unless data
34
31
  return unless data.length > 0
35
32
  ctx = ParserContext.memory(data, encoding)
36
33
  yield ctx if block_given?
37
- ctx.parse_with self
34
+ ctx.parse_with(self)
38
35
  end
39
36
 
40
37
  ###
41
38
  # Parse given +io+
42
- def parse_io io, encoding = 'UTF-8'
39
+ def parse_io(io, encoding = "UTF-8")
43
40
  check_encoding(encoding)
44
41
  @encoding = encoding
45
42
  ctx = ParserContext.io(io, ENCODINGS[encoding])
46
43
  yield ctx if block_given?
47
- ctx.parse_with self
44
+ ctx.parse_with(self)
48
45
  end
49
46
 
50
47
  ###
51
48
  # Parse a file with +filename+
52
- def parse_file filename, encoding = 'UTF-8'
49
+ def parse_file(filename, encoding = "UTF-8")
53
50
  raise ArgumentError unless filename
54
51
  raise Errno::ENOENT unless File.exist?(filename)
55
52
  raise Errno::EISDIR if File.directory?(filename)
56
53
  ctx = ParserContext.file(filename, encoding)
57
54
  yield ctx if block_given?
58
- ctx.parse_with self
55
+ ctx.parse_with(self)
59
56
  end
60
57
  end
61
58
  end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module HTML4
4
+ module SAX
5
+ ###
6
+ # Context for HTML SAX parsers. This class is usually not instantiated by the user. Instead,
7
+ # you should be looking at Nokogiri::HTML4::SAX::Parser
8
+ class ParserContext < Nokogiri::XML::SAX::ParserContext
9
+ def self.new(thing, encoding = "UTF-8")
10
+ if [:read, :close].all? { |x| thing.respond_to?(x) }
11
+ super
12
+ else
13
+ memory(thing, encoding)
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -1,17 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
  module Nokogiri
3
- module HTML
3
+ module HTML4
4
4
  module SAX
5
5
  class PushParser
6
6
 
7
- # The Nokogiri::HTML::SAX::Document on which the PushParser will be
7
+ # The Nokogiri::HTML4::SAX::Document on which the PushParser will be
8
8
  # operating
9
9
  attr_accessor :document
10
10
 
11
- def initialize(doc = HTML::SAX::Document.new, file_name = nil, encoding = 'UTF-8')
11
+ def initialize(doc = HTML4::SAX::Document.new, file_name = nil, encoding = 'UTF-8')
12
12
  @document = doc
13
13
  @encoding = encoding
14
- @sax_parser = HTML::SAX::Parser.new(doc, @encoding)
14
+ @sax_parser = HTML4::SAX::Parser.new(doc, @encoding)
15
15
 
16
16
  ## Create our push parser context
17
17
  initialize_native(@sax_parser, file_name, encoding)
@@ -27,7 +27,7 @@ module Nokogiri
27
27
 
28
28
  ###
29
29
  # Finish the parsing. This method is only necessary for
30
- # Nokogiri::HTML::SAX::Document#end_document to be called.
30
+ # Nokogiri::HTML4::SAX::Document#end_document to be called.
31
31
  def finish
32
32
  write '', true
33
33
  end