nokogiri 1.11.4 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +243 -22
  3. data/LICENSE.md +1 -1
  4. data/README.md +6 -5
  5. data/ext/nokogiri/depend +35 -34
  6. data/ext/nokogiri/extconf.rb +185 -103
  7. data/ext/nokogiri/gumbo.c +584 -0
  8. data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
  9. data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +21 -19
  10. data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
  11. data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +6 -5
  12. data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
  13. data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
  14. data/ext/nokogiri/nokogiri.c +70 -38
  15. data/ext/nokogiri/nokogiri.h +19 -9
  16. data/ext/nokogiri/xml_document.c +14 -14
  17. data/ext/nokogiri/xml_element_content.c +2 -0
  18. data/ext/nokogiri/xml_encoding_handler.c +11 -6
  19. data/ext/nokogiri/xml_namespace.c +4 -2
  20. data/ext/nokogiri/xml_node.c +123 -108
  21. data/ext/nokogiri/xml_node_set.c +20 -20
  22. data/ext/nokogiri/xml_reader.c +2 -0
  23. data/ext/nokogiri/xml_sax_parser.c +6 -6
  24. data/ext/nokogiri/xml_sax_parser_context.c +2 -0
  25. data/ext/nokogiri/xml_schema.c +2 -0
  26. data/ext/nokogiri/xml_xpath_context.c +67 -65
  27. data/ext/nokogiri/xslt_stylesheet.c +2 -1
  28. data/gumbo-parser/CHANGES.md +63 -0
  29. data/gumbo-parser/Makefile +101 -0
  30. data/gumbo-parser/THANKS +27 -0
  31. data/gumbo-parser/src/Makefile +34 -0
  32. data/gumbo-parser/src/README.md +41 -0
  33. data/gumbo-parser/src/ascii.c +75 -0
  34. data/gumbo-parser/src/ascii.h +115 -0
  35. data/gumbo-parser/src/attribute.c +42 -0
  36. data/gumbo-parser/src/attribute.h +17 -0
  37. data/gumbo-parser/src/char_ref.c +22225 -0
  38. data/gumbo-parser/src/char_ref.h +29 -0
  39. data/gumbo-parser/src/char_ref.rl +2154 -0
  40. data/gumbo-parser/src/error.c +626 -0
  41. data/gumbo-parser/src/error.h +148 -0
  42. data/gumbo-parser/src/foreign_attrs.c +104 -0
  43. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  44. data/gumbo-parser/src/gumbo.h +943 -0
  45. data/gumbo-parser/src/insertion_mode.h +33 -0
  46. data/gumbo-parser/src/macros.h +91 -0
  47. data/gumbo-parser/src/parser.c +4886 -0
  48. data/gumbo-parser/src/parser.h +41 -0
  49. data/gumbo-parser/src/replacement.h +33 -0
  50. data/gumbo-parser/src/string_buffer.c +103 -0
  51. data/gumbo-parser/src/string_buffer.h +68 -0
  52. data/gumbo-parser/src/string_piece.c +48 -0
  53. data/gumbo-parser/src/svg_attrs.c +174 -0
  54. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  55. data/gumbo-parser/src/svg_tags.c +137 -0
  56. data/gumbo-parser/src/svg_tags.gperf +55 -0
  57. data/gumbo-parser/src/tag.c +222 -0
  58. data/gumbo-parser/src/tag_lookup.c +382 -0
  59. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  60. data/gumbo-parser/src/tag_lookup.h +13 -0
  61. data/gumbo-parser/src/token_buffer.c +79 -0
  62. data/gumbo-parser/src/token_buffer.h +71 -0
  63. data/gumbo-parser/src/token_type.h +17 -0
  64. data/gumbo-parser/src/tokenizer.c +3463 -0
  65. data/gumbo-parser/src/tokenizer.h +112 -0
  66. data/gumbo-parser/src/tokenizer_states.h +339 -0
  67. data/gumbo-parser/src/utf8.c +245 -0
  68. data/gumbo-parser/src/utf8.h +164 -0
  69. data/gumbo-parser/src/util.c +68 -0
  70. data/gumbo-parser/src/util.h +30 -0
  71. data/gumbo-parser/src/vector.c +111 -0
  72. data/gumbo-parser/src/vector.h +45 -0
  73. data/lib/nokogiri/css/parser.rb +1 -1
  74. data/lib/nokogiri/css/parser.y +1 -1
  75. data/lib/nokogiri/css/syntax_error.rb +1 -1
  76. data/lib/nokogiri/css.rb +14 -14
  77. data/lib/nokogiri/extension.rb +7 -2
  78. data/lib/nokogiri/gumbo.rb +14 -0
  79. data/lib/nokogiri/html.rb +31 -27
  80. data/lib/nokogiri/{html → html4}/builder.rb +2 -2
  81. data/lib/nokogiri/{html → html4}/document.rb +4 -4
  82. data/lib/nokogiri/{html → html4}/document_fragment.rb +3 -3
  83. data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
  84. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
  85. data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
  86. data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
  87. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  88. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
  89. data/lib/nokogiri/html4.rb +40 -0
  90. data/lib/nokogiri/html5/document.rb +74 -0
  91. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  92. data/lib/nokogiri/html5/node.rb +93 -0
  93. data/lib/nokogiri/html5.rb +473 -0
  94. data/lib/nokogiri/version/constant.rb +1 -1
  95. data/lib/nokogiri/version/info.rb +12 -2
  96. data/lib/nokogiri/xml/builder.rb +38 -0
  97. data/lib/nokogiri/xml/document.rb +46 -0
  98. data/lib/nokogiri/xml/node/save_options.rb +1 -1
  99. data/lib/nokogiri/xml/node.rb +6 -5
  100. data/lib/nokogiri/xml/parse_options.rb +2 -0
  101. data/lib/nokogiri/xml/pp.rb +2 -2
  102. data/lib/nokogiri/xml/sax/document.rb +24 -30
  103. data/lib/nokogiri/xml/sax.rb +4 -4
  104. data/lib/nokogiri/xml/xpath.rb +2 -2
  105. data/lib/nokogiri/xml.rb +35 -36
  106. data/lib/nokogiri/xslt/stylesheet.rb +1 -1
  107. data/lib/nokogiri/xslt.rb +16 -16
  108. data/lib/nokogiri.rb +31 -29
  109. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  110. metadata +101 -58
  111. data/lib/nokogiri/html/sax/parser_context.rb +0 -17
@@ -1,7 +1,6 @@
1
1
  # encoding: UTF-8
2
2
  # frozen_string_literal: true
3
3
  require "stringio"
4
- require "nokogiri/xml/node/save_options"
5
4
 
6
5
  module Nokogiri
7
6
  module XML
@@ -837,7 +836,7 @@ module Nokogiri
837
836
  node_set = in_context(contents, options.to_i)
838
837
  if (node_set.empty? && (document.errors.length > error_count))
839
838
  if options.recover?
840
- fragment = Nokogiri::HTML::DocumentFragment.parse contents
839
+ fragment = Nokogiri::HTML4::DocumentFragment.parse contents
841
840
  node_set = fragment.children
842
841
  else
843
842
  raise document.errors[error_count]
@@ -883,7 +882,7 @@ module Nokogiri
883
882
  type == DOCUMENT_NODE
884
883
  end
885
884
 
886
- # Returns true if this is an HTML::Document node
885
+ # Returns true if this is an HTML4::Document node
887
886
  def html?
888
887
  type == HTML_DOCUMENT_NODE
889
888
  end
@@ -909,11 +908,11 @@ module Nokogiri
909
908
  end
910
909
 
911
910
  ###
912
- # Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
911
+ # Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
913
912
  # nil on XML documents and on unknown tags.
914
913
  def description
915
914
  return nil if document.xml?
916
- Nokogiri::HTML::ElementDescription[name]
915
+ Nokogiri::HTML4::ElementDescription[name]
917
916
  end
918
917
 
919
918
  ###
@@ -1235,3 +1234,5 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
1235
1234
  end
1236
1235
  end
1237
1236
  end
1237
+
1238
+ require_relative "node/save_options"
@@ -71,6 +71,8 @@ module Nokogiri
71
71
 
72
72
  # the default options used for parsing XML documents
73
73
  DEFAULT_XML = RECOVER | NONET
74
+ # the default options used for parsing XSLT stylesheets
75
+ DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA
74
76
  # the default options used for parsing HTML documents
75
77
  DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
76
78
  # the default options used for parsing XML schemas
@@ -1,3 +1,3 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri/xml/pp/node'
3
- require 'nokogiri/xml/pp/character_data'
2
+ require_relative "pp/node"
3
+ require_relative "pp/character_data"
@@ -2,20 +2,19 @@
2
2
  module Nokogiri
3
3
  module XML
4
4
  ###
5
- # SAX Parsers are event driven parsers. Nokogiri provides two different
6
- # event based parsers when dealing with XML. If you want to do SAX style
7
- # parsing using HTML, check out Nokogiri::HTML::SAX.
5
+ # SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
6
+ # dealing with XML. If you want to do SAX style parsing using HTML, check out
7
+ # Nokogiri::HTML4::SAX.
8
8
  #
9
- # The basic way a SAX style parser works is by creating a parser,
10
- # telling the parser about the events we're interested in, then giving
11
- # the parser some XML to process. The parser will notify you when
12
- # it encounters events you said you would like to know about.
9
+ # The basic way a SAX style parser works is by creating a parser, telling the parser about the
10
+ # events we're interested in, then giving the parser some XML to process. The parser will notify
11
+ # you when it encounters events you said you would like to know about.
13
12
  #
14
- # To register for events, you simply subclass Nokogiri::XML::SAX::Document,
15
- # and implement the methods for which you would like notification.
13
+ # To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
14
+ # methods for which you would like notification.
16
15
  #
17
- # For example, if I want to be notified when a document ends, and when an
18
- # element starts, I would write a class like this:
16
+ # For example, if I want to be notified when a document ends, and when an element starts, I
17
+ # would write a class like this:
19
18
  #
20
19
  # class MyDocument < Nokogiri::XML::SAX::Document
21
20
  # def end_document
@@ -27,8 +26,7 @@ module Nokogiri
27
26
  # end
28
27
  # end
29
28
  #
30
- # Then I would instantiate a SAX parser with this document, and feed the
31
- # parser some XML
29
+ # Then I would instantiate a SAX parser with this document, and feed the parser some XML
32
30
  #
33
31
  # # Create a new parser
34
32
  # parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
@@ -36,25 +34,21 @@ module Nokogiri
36
34
  # # Feed the parser some XML
37
35
  # parser.parse(File.open(ARGV[0]))
38
36
  #
39
- # Now my document handler will be called when each node starts, and when
40
- # then document ends. To see what kinds of events are available, take
41
- # a look at Nokogiri::XML::SAX::Document.
37
+ # Now my document handler will be called when each node starts, and when then document ends. To
38
+ # see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
42
39
  #
43
- # Two SAX parsers for XML are available, a parser that reads from a string
44
- # or IO object as it feels necessary, and a parser that lets you spoon
45
- # feed it XML. If you want to let Nokogiri deal with reading your XML,
46
- # use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
40
+ # Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
41
+ # feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
42
+ # deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
47
43
  # control over the XML input, use the Nokogiri::XML::SAX::PushParser.
48
44
  module SAX
49
45
  ###
50
- # This class is used for registering types of events you are interested
51
- # in handling. All of the methods on this class are available as
52
- # possible events while parsing an XML document. To register for any
53
- # particular event, just subclass this class and implement the methods
54
- # you are interested in knowing about.
46
+ # This class is used for registering types of events you are interested in handling. All of
47
+ # the methods on this class are available as possible events while parsing an XML document. To
48
+ # register for any particular event, just subclass this class and implement the methods you
49
+ # are interested in knowing about.
55
50
  #
56
- # To only be notified about start and end element events, write a class
57
- # like this:
51
+ # To only be notified about start and end element events, write a class like this:
58
52
  #
59
53
  # class MyDocument < Nokogiri::XML::SAX::Document
60
54
  # def start_element name, attrs = []
@@ -66,8 +60,8 @@ module Nokogiri
66
60
  # end
67
61
  # end
68
62
  #
69
- # You can use this event handler for any SAX style parser included with
70
- # Nokogiri. See Nokogiri::XML::SAX, and Nokogiri::HTML::SAX.
63
+ # You can use this event handler for any SAX style parser included with Nokogiri. See
64
+ # Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
71
65
  class Document
72
66
  ###
73
67
  # Called when an XML declaration is parsed
@@ -129,7 +123,7 @@ module Nokogiri
129
123
  end
130
124
 
131
125
  ###
132
- # Characters read between a tag. This method might be called multiple
126
+ # Characters read between a tag. This method might be called multiple
133
127
  # times given one contiguous string of characters.
134
128
  #
135
129
  # +string+ contains the character data
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri/xml/sax/document'
3
- require 'nokogiri/xml/sax/parser_context'
4
- require 'nokogiri/xml/sax/parser'
5
- require 'nokogiri/xml/sax/push_parser'
2
+ require_relative "sax/document"
3
+ require_relative "sax/parser_context"
4
+ require_relative "sax/parser"
5
+ require_relative "sax/push_parser"
@@ -1,9 +1,9 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri/xml/xpath/syntax_error'
3
-
4
2
  module Nokogiri
5
3
  module XML
6
4
  module XPath
7
5
  end
8
6
  end
9
7
  end
8
+
9
+ require_relative "xpath/syntax_error"
data/lib/nokogiri/xml.rb CHANGED
@@ -1,38 +1,9 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri/xml/pp'
3
- require 'nokogiri/xml/parse_options'
4
- require 'nokogiri/xml/sax'
5
- require 'nokogiri/xml/searchable'
6
- require 'nokogiri/xml/node'
7
- require 'nokogiri/xml/attribute_decl'
8
- require 'nokogiri/xml/element_decl'
9
- require 'nokogiri/xml/element_content'
10
- require 'nokogiri/xml/character_data'
11
- require 'nokogiri/xml/namespace'
12
- require 'nokogiri/xml/attr'
13
- require 'nokogiri/xml/dtd'
14
- require 'nokogiri/xml/cdata'
15
- require 'nokogiri/xml/text'
16
- require 'nokogiri/xml/document'
17
- require 'nokogiri/xml/document_fragment'
18
- require 'nokogiri/xml/processing_instruction'
19
- require 'nokogiri/xml/node_set'
20
- require 'nokogiri/xml/syntax_error'
21
- require 'nokogiri/xml/xpath'
22
- require 'nokogiri/xml/xpath_context'
23
- require 'nokogiri/xml/builder'
24
- require 'nokogiri/xml/reader'
25
- require 'nokogiri/xml/notation'
26
- require 'nokogiri/xml/entity_decl'
27
- require 'nokogiri/xml/entity_reference'
28
- require 'nokogiri/xml/schema'
29
- require 'nokogiri/xml/relax_ng'
30
-
31
2
  module Nokogiri
32
3
  class << self
33
4
  ###
34
5
  # Parse XML. Convenience method for Nokogiri::XML::Document.parse
35
- def XML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_XML, &block
6
+ def XML(thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_XML, &block)
36
7
  Nokogiri::XML::Document.parse(thing, url, encoding, options, &block)
37
8
  end
38
9
  end
@@ -41,20 +12,19 @@ module Nokogiri
41
12
  # Original C14N 1.0 spec canonicalization
42
13
  XML_C14N_1_0 = 0
43
14
  # Exclusive C14N 1.0 spec canonicalization
44
- XML_C14N_EXCLUSIVE_1_0 = 1
15
+ XML_C14N_EXCLUSIVE_1_0 = 1
45
16
  # C14N 1.1 spec canonicalization
46
17
  XML_C14N_1_1 = 2
47
18
  class << self
48
19
  ###
49
20
  # Parse an XML document using the Nokogiri::XML::Reader API. See
50
21
  # Nokogiri::XML::Reader for mor information
51
- def Reader string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT
52
-
22
+ def Reader(string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT)
53
23
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
54
24
  # Give the options to the user
55
25
  yield options if block_given?
56
26
 
57
- if string_or_io.respond_to? :read
27
+ if string_or_io.respond_to?(:read)
58
28
  return Reader.from_io(string_or_io, url, encoding, options.to_i)
59
29
  end
60
30
  Reader.from_memory(string_or_io, url, encoding, options.to_i)
@@ -62,15 +32,44 @@ module Nokogiri
62
32
 
63
33
  ###
64
34
  # Parse XML. Convenience method for Nokogiri::XML::Document.parse
65
- def parse thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
35
+ def parse(thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block)
66
36
  Document.parse(thing, url, encoding, options, &block)
67
37
  end
68
38
 
69
39
  ####
70
40
  # Parse a fragment from +string+ in to a NodeSet.
71
- def fragment string
41
+ def fragment(string)
72
42
  XML::DocumentFragment.parse(string)
73
43
  end
74
44
  end
75
45
  end
76
46
  end
47
+
48
+ require_relative "xml/pp"
49
+ require_relative "xml/parse_options"
50
+ require_relative "xml/sax"
51
+ require_relative "xml/searchable"
52
+ require_relative "xml/node"
53
+ require_relative "xml/attribute_decl"
54
+ require_relative "xml/element_decl"
55
+ require_relative "xml/element_content"
56
+ require_relative "xml/character_data"
57
+ require_relative "xml/namespace"
58
+ require_relative "xml/attr"
59
+ require_relative "xml/dtd"
60
+ require_relative "xml/cdata"
61
+ require_relative "xml/text"
62
+ require_relative "xml/document"
63
+ require_relative "xml/document_fragment"
64
+ require_relative "xml/processing_instruction"
65
+ require_relative "xml/node_set"
66
+ require_relative "xml/syntax_error"
67
+ require_relative "xml/xpath"
68
+ require_relative "xml/xpath_context"
69
+ require_relative "xml/builder"
70
+ require_relative "xml/reader"
71
+ require_relative "xml/notation"
72
+ require_relative "xml/entity_decl"
73
+ require_relative "xml/entity_reference"
74
+ require_relative "xml/schema"
75
+ require_relative "xml/relax_ng"
@@ -18,7 +18,7 @@ module Nokogiri
18
18
  # Apply an XSLT stylesheet to an XML::Document.
19
19
  # +params+ is an array of strings used as XSLT parameters.
20
20
  # returns serialized document
21
- def apply_to document, params = []
21
+ def apply_to(document, params = [])
22
22
  serialize(transform(document, params))
23
23
  end
24
24
  end
data/lib/nokogiri/xslt.rb CHANGED
@@ -1,6 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri/xslt/stylesheet'
3
-
4
2
  module Nokogiri
5
3
  class << self
6
4
  ###
@@ -22,32 +20,32 @@ module Nokogiri
22
20
  class << self
23
21
  ###
24
22
  # Parse the stylesheet in +string+, register any +modules+
25
- def parse string, modules = {}
23
+ def parse(string, modules = {})
26
24
  modules.each do |url, klass|
27
- XSLT.register url, klass
25
+ XSLT.register(url, klass)
28
26
  end
29
27
 
28
+ doc = XML::Document.parse(string, nil, nil, XML::ParseOptions::DEFAULT_XSLT)
30
29
  if Nokogiri.jruby?
31
- Stylesheet.parse_stylesheet_doc(XML.parse(string), string)
30
+ Stylesheet.parse_stylesheet_doc(doc, string)
32
31
  else
33
- Stylesheet.parse_stylesheet_doc(XML.parse(string))
32
+ Stylesheet.parse_stylesheet_doc(doc)
34
33
  end
35
34
  end
36
35
 
37
36
  ###
38
37
  # Quote parameters in +params+ for stylesheet safety
39
- def quote_params params
38
+ def quote_params(params)
40
39
  parray = (params.instance_of?(Hash) ? params.to_a.flatten : params).dup
41
- parray.each_with_index do |v,i|
42
- if i % 2 > 0
43
- parray[i]=
44
- if v =~ /'/
45
- "concat('#{ v.gsub(/'/, %q{', "'", '}) }')"
46
- else
47
- "'#{v}'";
48
- end
40
+ parray.each_with_index do |v, i|
41
+ parray[i] = if i % 2 > 0
42
+ if v =~ /'/
43
+ "concat('#{v.gsub(/'/, %q{', "'", '})}')"
44
+ else
45
+ "'#{v}'"
46
+ end
49
47
  else
50
- parray[i] = v.to_s
48
+ v.to_s
51
49
  end
52
50
  end
53
51
  parray.flatten
@@ -55,3 +53,5 @@ module Nokogiri
55
53
  end
56
54
  end
57
55
  end
56
+
57
+ require_relative "xslt/stylesheet"
data/lib/nokogiri.rb CHANGED
@@ -2,38 +2,29 @@
2
2
  # frozen_string_literal: true
3
3
  # Modify the PATH on windows so that the external DLLs will get loaded.
4
4
 
5
- require 'rbconfig'
5
+ require "rbconfig"
6
6
 
7
7
  if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
8
- require 'nokogiri/jruby/dependencies'
8
+ require_relative "nokogiri/jruby/dependencies"
9
9
  end
10
10
 
11
- require 'nokogiri/extension'
12
-
13
- require 'nokogiri/version'
14
- require 'nokogiri/syntax_error'
15
- require 'nokogiri/xml'
16
- require 'nokogiri/xslt'
17
- require 'nokogiri/html'
18
- require 'nokogiri/decorators/slop'
19
- require 'nokogiri/css'
20
- require 'nokogiri/html/builder'
11
+ require_relative "nokogiri/extension"
21
12
 
22
13
  # Nokogiri parses and searches XML/HTML very quickly, and also has
23
14
  # correctly implemented CSS3 selector support as well as XPath 1.0
24
15
  # support.
25
16
  #
26
17
  # Parsing a document returns either a Nokogiri::XML::Document, or a
27
- # Nokogiri::HTML::Document depending on the kind of document you parse.
18
+ # Nokogiri::HTML4::Document depending on the kind of document you parse.
28
19
  #
29
20
  # Here is an example:
30
21
  #
31
22
  # require 'nokogiri'
32
23
  # require 'open-uri'
33
24
  #
34
- # # Get a Nokogiri::HTML:Document for the page we’re interested in...
25
+ # # Get a Nokogiri::HTML4::Document for the page we’re interested in...
35
26
  #
36
- # doc = Nokogiri::HTML(URI.open('http://www.google.com/search?q=tenderlove'))
27
+ # doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
37
28
  #
38
29
  # # Do funky things with it using Nokogiri::XML::Node methods...
39
30
  #
@@ -49,27 +40,27 @@ module Nokogiri
49
40
  class << self
50
41
  ###
51
42
  # Parse an HTML or XML document. +string+ contains the document.
52
- def parse string, url = nil, encoding = nil, options = nil
43
+ def parse(string, url = nil, encoding = nil, options = nil)
53
44
  if string.respond_to?(:read) ||
54
45
  /^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
55
46
  # Expect an HTML indicator to appear within the first 512
56
47
  # characters of a document. (<?xml ?> + <?xml-stylesheet ?>
57
48
  # shouldn't be that long)
58
- Nokogiri.HTML(string, url, encoding,
49
+ Nokogiri.HTML4(string, url, encoding,
59
50
  options || XML::ParseOptions::DEFAULT_HTML)
60
51
  else
61
52
  Nokogiri.XML(string, url, encoding,
62
53
  options || XML::ParseOptions::DEFAULT_XML)
63
- end.tap { |doc|
54
+ end.tap do |doc|
64
55
  yield doc if block_given?
65
- }
56
+ end
66
57
  end
67
58
 
68
59
  ###
69
60
  # Create a new Nokogiri::XML::DocumentFragment
70
- def make input = nil, opts = {}, &blk
61
+ def make(input = nil, opts = {}, &blk)
71
62
  if input
72
- Nokogiri::HTML.fragment(input).children.first
63
+ Nokogiri::HTML4.fragment(input).children.first
73
64
  else
74
65
  Nokogiri(&blk)
75
66
  end
@@ -98,10 +89,10 @@ module Nokogiri
98
89
  # Make sure to support some popular encoding aliases not known by
99
90
  # all iconv implementations.
100
91
  {
101
- 'Windows-31J' => 'CP932', # Windows-31J is the IANA registered name of CP932.
102
- }.each { |alias_name, name|
92
+ "Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
93
+ }.each do |alias_name, name|
103
94
  EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
104
- }
95
+ end
105
96
  end
106
97
  end
107
98
 
@@ -109,15 +100,26 @@ module Nokogiri
109
100
  end
110
101
 
111
102
  ###
112
- # Parse a document contained in +args+. Nokogiri will try to guess what
113
- # type of document you are attempting to parse. For more information, see
114
- # Nokogiri.parse
103
+ # Parse a document contained in +args+. Nokogiri will try to guess what type of document you are
104
+ # attempting to parse. For more information, see Nokogiri.parse
115
105
  #
116
- # To specify the type of document, use Nokogiri.XML or Nokogiri.HTML.
106
+ # To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
117
107
  def Nokogiri(*args, &block)
118
108
  if block_given?
119
- Nokogiri::HTML::Builder.new(&block).doc.root
109
+ Nokogiri::HTML4::Builder.new(&block).doc.root
120
110
  else
121
111
  Nokogiri.parse(*args)
122
112
  end
123
113
  end
114
+
115
+ require_relative "nokogiri/version"
116
+ require_relative "nokogiri/syntax_error"
117
+ require_relative "nokogiri/xml"
118
+ require_relative "nokogiri/xslt"
119
+ require_relative "nokogiri/html4"
120
+ require_relative "nokogiri/html"
121
+ require_relative "nokogiri/decorators/slop"
122
+ require_relative "nokogiri/css"
123
+ require_relative "nokogiri/html4/builder"
124
+
125
+ require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?
@@ -0,0 +1,31 @@
1
+ From 3e1aad4fe584747fd7d17cc7b2863a78e2d21a77 Mon Sep 17 00:00:00 2001
2
+ From: Nick Wellnhofer <wellnhofer@aevum.de>
3
+ Date: Wed, 2 Jun 2021 17:31:49 +0200
4
+ Subject: [PATCH] Fix XPath recursion limit
5
+
6
+ Fix accounting of recursion depth when parsing XPath expressions.
7
+
8
+ This silly bug introduced in commit 804c5297 could lead to spurious
9
+ errors when parsing larger expressions or XSLT documents.
10
+
11
+ Should fix #264.
12
+ ---
13
+ xpath.c | 2 +-
14
+ 1 file changed, 1 insertion(+), 1 deletion(-)
15
+
16
+ diff --git a/xpath.c b/xpath.c
17
+ index 7497ba0..1aa2f1a 100644
18
+ --- a/xpath.c
19
+ +++ b/xpath.c
20
+ @@ -10983,7 +10983,7 @@ xmlXPathCompileExpr(xmlXPathParserContextPtr ctxt, int sort) {
21
+ }
22
+
23
+ if (xpctxt != NULL)
24
+ - xpctxt->depth -= 1;
25
+ + xpctxt->depth -= 10;
26
+ }
27
+
28
+ /**
29
+ --
30
+ 2.31.0
31
+