nokogiri 1.12.5 → 1.13.8

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/README.md +9 -7
  4. data/bin/nokogiri +63 -50
  5. data/dependencies.yml +13 -64
  6. data/ext/nokogiri/extconf.rb +66 -44
  7. data/ext/nokogiri/gumbo.c +1 -1
  8. data/ext/nokogiri/html4_sax_parser_context.c +2 -3
  9. data/ext/nokogiri/nokogiri.h +8 -0
  10. data/ext/nokogiri/xml_attr.c +2 -2
  11. data/ext/nokogiri/xml_attribute_decl.c +3 -3
  12. data/ext/nokogiri/xml_cdata.c +1 -1
  13. data/ext/nokogiri/xml_document.c +36 -36
  14. data/ext/nokogiri/xml_document_fragment.c +0 -2
  15. data/ext/nokogiri/xml_dtd.c +10 -10
  16. data/ext/nokogiri/xml_element_decl.c +3 -3
  17. data/ext/nokogiri/xml_encoding_handler.c +25 -11
  18. data/ext/nokogiri/xml_entity_decl.c +5 -5
  19. data/ext/nokogiri/xml_node.c +707 -381
  20. data/ext/nokogiri/xml_node_set.c +4 -4
  21. data/ext/nokogiri/xml_reader.c +88 -11
  22. data/ext/nokogiri/xml_sax_parser_context.c +10 -3
  23. data/ext/nokogiri/xml_schema.c +3 -3
  24. data/ext/nokogiri/xml_text.c +1 -1
  25. data/ext/nokogiri/xml_xpath_context.c +73 -50
  26. data/ext/nokogiri/xslt_stylesheet.c +107 -9
  27. data/gumbo-parser/src/parser.c +0 -11
  28. data/lib/nokogiri/class_resolver.rb +67 -0
  29. data/lib/nokogiri/css/node.rb +9 -8
  30. data/lib/nokogiri/css/parser.rb +360 -341
  31. data/lib/nokogiri/css/parser.y +249 -244
  32. data/lib/nokogiri/css/parser_extras.rb +22 -20
  33. data/lib/nokogiri/css/syntax_error.rb +1 -0
  34. data/lib/nokogiri/css/tokenizer.rb +4 -3
  35. data/lib/nokogiri/css/tokenizer.rex +3 -2
  36. data/lib/nokogiri/css/xpath_visitor.rb +179 -82
  37. data/lib/nokogiri/css.rb +38 -6
  38. data/lib/nokogiri/decorators/slop.rb +8 -7
  39. data/lib/nokogiri/extension.rb +1 -1
  40. data/lib/nokogiri/gumbo.rb +1 -0
  41. data/lib/nokogiri/html.rb +16 -10
  42. data/lib/nokogiri/html4/builder.rb +1 -0
  43. data/lib/nokogiri/html4/document.rb +88 -77
  44. data/lib/nokogiri/html4/document_fragment.rb +11 -7
  45. data/lib/nokogiri/html4/element_description.rb +1 -0
  46. data/lib/nokogiri/html4/element_description_defaults.rb +426 -520
  47. data/lib/nokogiri/html4/entity_lookup.rb +2 -1
  48. data/lib/nokogiri/html4/sax/parser.rb +5 -2
  49. data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
  50. data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
  51. data/lib/nokogiri/html4.rb +11 -5
  52. data/lib/nokogiri/html5/document.rb +27 -10
  53. data/lib/nokogiri/html5/document_fragment.rb +5 -2
  54. data/lib/nokogiri/html5/node.rb +10 -3
  55. data/lib/nokogiri/html5.rb +69 -64
  56. data/lib/nokogiri/jruby/dependencies.rb +10 -9
  57. data/lib/nokogiri/syntax_error.rb +1 -0
  58. data/lib/nokogiri/version/constant.rb +2 -1
  59. data/lib/nokogiri/version/info.rb +20 -13
  60. data/lib/nokogiri/version.rb +1 -0
  61. data/lib/nokogiri/xml/attr.rb +5 -3
  62. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  63. data/lib/nokogiri/xml/builder.rb +34 -32
  64. data/lib/nokogiri/xml/cdata.rb +2 -1
  65. data/lib/nokogiri/xml/character_data.rb +1 -0
  66. data/lib/nokogiri/xml/document.rb +144 -103
  67. data/lib/nokogiri/xml/document_fragment.rb +41 -38
  68. data/lib/nokogiri/xml/dtd.rb +3 -2
  69. data/lib/nokogiri/xml/element_content.rb +1 -0
  70. data/lib/nokogiri/xml/element_decl.rb +2 -1
  71. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  72. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  73. data/lib/nokogiri/xml/namespace.rb +2 -0
  74. data/lib/nokogiri/xml/node/save_options.rb +8 -4
  75. data/lib/nokogiri/xml/node.rb +521 -351
  76. data/lib/nokogiri/xml/node_set.rb +50 -54
  77. data/lib/nokogiri/xml/notation.rb +12 -0
  78. data/lib/nokogiri/xml/parse_options.rb +12 -7
  79. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  80. data/lib/nokogiri/xml/pp/node.rb +24 -26
  81. data/lib/nokogiri/xml/pp.rb +1 -0
  82. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  83. data/lib/nokogiri/xml/reader.rb +20 -24
  84. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  85. data/lib/nokogiri/xml/sax/document.rb +20 -19
  86. data/lib/nokogiri/xml/sax/parser.rb +37 -34
  87. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  88. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  89. data/lib/nokogiri/xml/sax.rb +1 -0
  90. data/lib/nokogiri/xml/schema.rb +7 -6
  91. data/lib/nokogiri/xml/searchable.rb +93 -62
  92. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  93. data/lib/nokogiri/xml/text.rb +1 -0
  94. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  95. data/lib/nokogiri/xml/xpath.rb +12 -0
  96. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  97. data/lib/nokogiri/xml.rb +4 -3
  98. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  99. data/lib/nokogiri/xslt.rb +21 -13
  100. data/lib/nokogiri.rb +19 -16
  101. data/lib/xsd/xmlparser/nokogiri.rb +25 -24
  102. data/patches/libxml2/0004-use-glibc-strlen.patch +3 -3
  103. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2443 -1914
  104. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  105. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  106. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
  107. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  108. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  109. metadata +104 -32
  110. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
  111. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
  112. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  113. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,10 +1,11 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  module XPath
5
6
  class SyntaxError < XML::SyntaxError
6
7
  def to_s
7
- [super.chomp, str1].compact.join(': ')
8
+ [super.chomp, str1].compact.join(": ")
8
9
  end
9
10
  end
10
11
  end
@@ -1,7 +1,19 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  module XPath
6
+ # The XPath search prefix to search globally, +//+
7
+ GLOBAL_SEARCH_PREFIX = "//"
8
+
9
+ # The XPath search prefix to search direct descendants of the root element, +/+
10
+ ROOT_SEARCH_PREFIX = "/"
11
+
12
+ # The XPath search prefix to search direct descendants of the current element, +./+
13
+ CURRENT_SEARCH_PREFIX = "./"
14
+
15
+ # The XPath search prefix to search anywhere in the current element's subtree, +.//+
16
+ SUBTREE_SEARCH_PREFIX = ".//"
5
17
  end
6
18
  end
7
19
  end
@@ -1,17 +1,16 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  class XPathContext
5
-
6
6
  ###
7
7
  # Register namespaces in +namespaces+
8
8
  def register_namespaces(namespaces)
9
9
  namespaces.each do |k, v|
10
- k = k.to_s.gsub(/.*:/,'') # strip off 'xmlns:' or 'xml:'
10
+ k = k.to_s.gsub(/.*:/, "") # strip off 'xmlns:' or 'xml:'
11
11
  register_ns(k, v)
12
12
  end
13
13
  end
14
-
15
14
  end
16
15
  end
17
16
  end
data/lib/nokogiri/xml.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  class << self
4
5
  ###
@@ -21,12 +22,12 @@ module Nokogiri
21
22
  # Nokogiri::XML::Reader for mor information
22
23
  def Reader(string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT)
23
24
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
24
- # Give the options to the user
25
25
  yield options if block_given?
26
26
 
27
27
  if string_or_io.respond_to?(:read)
28
28
  return Reader.from_io(string_or_io, url, encoding, options.to_i)
29
29
  end
30
+
30
31
  Reader.from_memory(string_or_io, url, encoding, options.to_i)
31
32
  end
32
33
 
@@ -38,8 +39,8 @@ module Nokogiri
38
39
 
39
40
  ####
40
41
  # Parse a fragment from +string+ in to a NodeSet.
41
- def fragment(string)
42
- XML::DocumentFragment.parse(string)
42
+ def fragment(string, options = ParseOptions::DEFAULT_XML, &block)
43
+ XML::DocumentFragment.parse(string, options, &block)
43
44
  end
44
45
  end
45
46
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XSLT
4
5
  ###
data/lib/nokogiri/xslt.rb CHANGED
@@ -1,4 +1,6 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
3
+
2
4
  module Nokogiri
3
5
  class << self
4
6
  ###
@@ -8,7 +10,7 @@ module Nokogiri
8
10
  #
9
11
  # xslt = Nokogiri::XSLT(File.read(ARGV[0]))
10
12
  #
11
- def XSLT stylesheet, modules = {}
13
+ def XSLT(stylesheet, modules = {})
12
14
  XSLT.parse(stylesheet, modules)
13
15
  end
14
16
  end
@@ -33,22 +35,28 @@ module Nokogiri
33
35
  end
34
36
  end
35
37
 
36
- ###
37
- # Quote parameters in +params+ for stylesheet safety
38
+ # :call-seq:
39
+ # quote_params(params) Array
40
+ #
41
+ # Quote parameters in +params+ for stylesheet safety.
42
+ # See Nokogiri::XSLT::Stylesheet.transform for example usage.
43
+ #
44
+ # [Parameters]
45
+ # - +params+ (Hash, Array) XSLT parameters (key->value, or tuples of [key, value])
46
+ #
47
+ # [Returns] Array of string parameters, with quotes correctly escaped for use with XSLT::Stylesheet.transform
48
+ #
38
49
  def quote_params(params)
39
- parray = (params.instance_of?(Hash) ? params.to_a.flatten : params).dup
40
- parray.each_with_index do |v, i|
41
- parray[i] = if i % 2 > 0
42
- if v =~ /'/
43
- "concat('#{v.gsub(/'/, %q{', "'", '})}')"
44
- else
45
- "'#{v}'"
46
- end
50
+ params.flatten.each_slice(2).each_with_object([]) do |kv, quoted_params|
51
+ key, value = kv.map(&:to_s)
52
+ value = if /'/.match?(value)
53
+ "concat('#{value.gsub(/'/, %q{', "'", '})}')"
47
54
  else
48
- v.to_s
55
+ "'#{value}'"
49
56
  end
57
+ quoted_params << key
58
+ quoted_params << value
50
59
  end
51
- parray.flatten
52
60
  end
53
61
  end
54
62
  end
data/lib/nokogiri.rb CHANGED
@@ -1,6 +1,5 @@
1
- # -*- coding: utf-8 -*-
1
+ # coding: utf-8
2
2
  # frozen_string_literal: true
3
- # Modify the PATH on windows so that the external DLLs will get loaded.
4
3
 
5
4
  require "rbconfig"
6
5
 
@@ -19,30 +18,32 @@ require_relative "nokogiri/extension"
19
18
  #
20
19
  # Here is an example:
21
20
  #
22
- # require 'nokogiri'
23
- # require 'open-uri'
21
+ # require 'nokogiri'
22
+ # require 'open-uri'
24
23
  #
25
- # # Get a Nokogiri::HTML4::Document for the page we’re interested in...
24
+ # # Get a Nokogiri::HTML4::Document for the page we’re interested in...
26
25
  #
27
- # doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
26
+ # doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
28
27
  #
29
- # # Do funky things with it using Nokogiri::XML::Node methods...
28
+ # # Do funky things with it using Nokogiri::XML::Node methods...
30
29
  #
31
- # ####
32
- # # Search for nodes by css
33
- # doc.css('h3.r a.l').each do |link|
34
- # puts link.content
35
- # end
30
+ # ####
31
+ # # Search for nodes by css
32
+ # doc.css('h3.r a.l').each do |link|
33
+ # puts link.content
34
+ # end
36
35
  #
37
- # See Nokogiri::XML::Searchable#css for more information about CSS searching.
38
- # See Nokogiri::XML::Searchable#xpath for more information about XPath searching.
36
+ # See also:
37
+ #
38
+ # - Nokogiri::XML::Searchable#css for more information about CSS searching
39
+ # - Nokogiri::XML::Searchable#xpath for more information about XPath searching
39
40
  module Nokogiri
40
41
  class << self
41
42
  ###
42
43
  # Parse an HTML or XML document. +string+ contains the document.
43
44
  def parse(string, url = nil, encoding = nil, options = nil)
44
45
  if string.respond_to?(:read) ||
45
- /^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
46
+ /^\s*<(?:!DOCTYPE\s+)?html[\s>]/i.match?(string[0, 512])
46
47
  # Expect an HTML indicator to appear within the first 512
47
48
  # characters of a document. (<?xml ?> + <?xml-stylesheet ?>
48
49
  # shouldn't be that long)
@@ -85,6 +86,7 @@ module Nokogiri
85
86
  Nokogiri(*args, &block).slop!
86
87
  end
87
88
 
89
+ # :nodoc:
88
90
  def install_default_aliases
89
91
  # Make sure to support some popular encoding aliases not known by
90
92
  # all iconv implementations.
@@ -105,7 +107,7 @@ end
105
107
  #
106
108
  # To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
107
109
  def Nokogiri(*args, &block)
108
- if block_given?
110
+ if block
109
111
  Nokogiri::HTML4::Builder.new(&block).doc.root
110
112
  else
111
113
  Nokogiri.parse(*args)
@@ -113,6 +115,7 @@ def Nokogiri(*args, &block)
113
115
  end
114
116
 
115
117
  require_relative "nokogiri/version"
118
+ require_relative "nokogiri/class_resolver"
116
119
  require_relative "nokogiri/syntax_error"
117
120
  require_relative "nokogiri/xml"
118
121
  require_relative "nokogiri/xslt"
@@ -1,8 +1,9 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri'
3
2
 
4
- module XSD # :nodoc:
5
- module XMLParser # :nodoc:
3
+ require "nokogiri"
4
+
5
+ module XSD
6
+ module XMLParser
6
7
  ###
7
8
  # Nokogiri XML parser for soap4r.
8
9
  #
@@ -27,40 +28,40 @@ module XSD # :nodoc:
27
28
  class Nokogiri < XSD::XMLParser::Parser
28
29
  ###
29
30
  # Create a new XSD parser with +host+ and +opt+
30
- def initialize host, opt = {}
31
+ def initialize(host, opt = {})
31
32
  super
32
- @parser = ::Nokogiri::XML::SAX::Parser.new(self, @charset || 'UTF-8')
33
+ @parser = ::Nokogiri::XML::SAX::Parser.new(self, @charset || "UTF-8")
33
34
  end
34
35
 
35
36
  ###
36
37
  # Start parsing +string_or_readable+
37
- def do_parse string_or_readable
38
+ def do_parse(string_or_readable)
38
39
  @parser.parse(string_or_readable)
39
40
  end
40
41
 
41
42
  ###
42
43
  # Handle the start_element event with +name+ and +attrs+
43
- def start_element name, attrs = []
44
+ def start_element(name, attrs = [])
44
45
  super(name, Hash[*attrs.flatten])
45
46
  end
46
47
 
47
48
  ###
48
49
  # Handle the end_element event with +name+
49
- def end_element name
50
+ def end_element(name)
50
51
  super
51
52
  end
52
53
 
53
54
  ###
54
55
  # Handle errors with message +msg+
55
- def error msg
56
- raise ParseError.new(msg)
56
+ def error(msg)
57
+ raise ParseError, msg
57
58
  end
58
- alias :warning :error
59
+ alias_method :warning, :error
59
60
 
60
61
  ###
61
62
  # Handle cdata_blocks containing +string+
62
- def cdata_block string
63
- characters string
63
+ def cdata_block(string)
64
+ characters(string)
64
65
  end
65
66
 
66
67
  ###
@@ -70,16 +71,16 @@ module XSD # :nodoc:
70
71
  # +prefix+ is the namespace prefix for the element
71
72
  # +uri+ is the associated namespace URI
72
73
  # +ns+ is a hash of namespace prefix:urls associated with the element
73
- def start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = []
74
+ def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
74
75
  ###
75
76
  # Deal with SAX v1 interface
76
- name = [prefix, name].compact.join(':')
77
- attributes = ns.map { |ns_prefix,ns_uri|
78
- [['xmlns', ns_prefix].compact.join(':'), ns_uri]
79
- } + attrs.map { |attr|
80
- [[attr.prefix, attr.localname].compact.join(':'), attr.value]
81
- }.flatten
82
- start_element name, attributes
77
+ name = [prefix, name].compact.join(":")
78
+ attributes = ns.map do |ns_prefix, ns_uri|
79
+ [["xmlns", ns_prefix].compact.join(":"), ns_uri]
80
+ end + attrs.map do |attr|
81
+ [[attr.prefix, attr.localname].compact.join(":"), attr.value]
82
+ end.flatten
83
+ start_element(name, attributes)
83
84
  end
84
85
 
85
86
  ###
@@ -87,13 +88,13 @@ module XSD # :nodoc:
87
88
  # +name+ is the element's name
88
89
  # +prefix+ is the namespace prefix associated with the element
89
90
  # +uri+ is the associated namespace URI
90
- def end_element_namespace name, prefix = nil, uri = nil
91
+ def end_element_namespace(name, prefix = nil, uri = nil)
91
92
  ###
92
93
  # Deal with SAX v1 interface
93
- end_element [prefix, name].compact.join(':')
94
+ end_element([prefix, name].compact.join(":"))
94
95
  end
95
96
 
96
- %w{ xmldecl start_document end_document comment }.each do |name|
97
+ ["xmldecl", "start_document", "end_document", "comment"].each do |name|
97
98
  class_eval %{ def #{name}(*args); end }
98
99
  end
99
100
 
@@ -31,18 +31,18 @@ diff --git a/xmlstring.c b/xmlstring.c
31
31
  index e8a1e45d..df247dff 100644
32
32
  --- a/xmlstring.c
33
33
  +++ b/xmlstring.c
34
- @@ -423,14 +423,9 @@ xmlStrsub(const xmlChar *str, int start, int len) {
34
+ @@ -423,12 +423,7 @@ xmlStrsub(const xmlChar *str, int start, int len) {
35
35
 
36
36
  int
37
37
  xmlStrlen(const xmlChar *str) {
38
- - int len = 0;
38
+ - size_t len = 0;
39
39
  -
40
40
  if (str == NULL) return(0);
41
41
  - while (*str != 0) { /* non input consuming */
42
42
  - str++;
43
43
  - len++;
44
44
  - }
45
- - return(len);
45
+ - return(len > INT_MAX ? 0 : len);
46
46
  +
47
47
  + return strlen((const char*)str);
48
48
  }