nokogiri 1.11.2 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (127) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +243 -22
  3. data/LICENSE.md +1 -1
  4. data/README.md +7 -6
  5. data/dependencies.yml +12 -12
  6. data/ext/nokogiri/depend +35 -34
  7. data/ext/nokogiri/extconf.rb +185 -103
  8. data/ext/nokogiri/gumbo.c +584 -0
  9. data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
  10. data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +21 -19
  11. data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
  12. data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +6 -5
  13. data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
  14. data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
  15. data/ext/nokogiri/nokogiri.c +70 -38
  16. data/ext/nokogiri/nokogiri.h +19 -9
  17. data/ext/nokogiri/xml_document.c +50 -49
  18. data/ext/nokogiri/xml_element_content.c +2 -0
  19. data/ext/nokogiri/xml_encoding_handler.c +11 -6
  20. data/ext/nokogiri/xml_namespace.c +4 -2
  21. data/ext/nokogiri/xml_node.c +147 -133
  22. data/ext/nokogiri/xml_node_set.c +20 -20
  23. data/ext/nokogiri/xml_reader.c +2 -0
  24. data/ext/nokogiri/xml_sax_parser.c +6 -6
  25. data/ext/nokogiri/xml_sax_parser_context.c +2 -0
  26. data/ext/nokogiri/xml_schema.c +2 -0
  27. data/ext/nokogiri/xml_xpath_context.c +67 -65
  28. data/ext/nokogiri/xslt_stylesheet.c +2 -1
  29. data/gumbo-parser/CHANGES.md +63 -0
  30. data/gumbo-parser/Makefile +101 -0
  31. data/gumbo-parser/THANKS +27 -0
  32. data/gumbo-parser/src/Makefile +34 -0
  33. data/gumbo-parser/src/README.md +41 -0
  34. data/gumbo-parser/src/ascii.c +75 -0
  35. data/gumbo-parser/src/ascii.h +115 -0
  36. data/gumbo-parser/src/attribute.c +42 -0
  37. data/gumbo-parser/src/attribute.h +17 -0
  38. data/gumbo-parser/src/char_ref.c +22225 -0
  39. data/gumbo-parser/src/char_ref.h +29 -0
  40. data/gumbo-parser/src/char_ref.rl +2154 -0
  41. data/gumbo-parser/src/error.c +626 -0
  42. data/gumbo-parser/src/error.h +148 -0
  43. data/gumbo-parser/src/foreign_attrs.c +104 -0
  44. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  45. data/gumbo-parser/src/gumbo.h +943 -0
  46. data/gumbo-parser/src/insertion_mode.h +33 -0
  47. data/gumbo-parser/src/macros.h +91 -0
  48. data/gumbo-parser/src/parser.c +4886 -0
  49. data/gumbo-parser/src/parser.h +41 -0
  50. data/gumbo-parser/src/replacement.h +33 -0
  51. data/gumbo-parser/src/string_buffer.c +103 -0
  52. data/gumbo-parser/src/string_buffer.h +68 -0
  53. data/gumbo-parser/src/string_piece.c +48 -0
  54. data/gumbo-parser/src/svg_attrs.c +174 -0
  55. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  56. data/gumbo-parser/src/svg_tags.c +137 -0
  57. data/gumbo-parser/src/svg_tags.gperf +55 -0
  58. data/gumbo-parser/src/tag.c +222 -0
  59. data/gumbo-parser/src/tag_lookup.c +382 -0
  60. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  61. data/gumbo-parser/src/tag_lookup.h +13 -0
  62. data/gumbo-parser/src/token_buffer.c +79 -0
  63. data/gumbo-parser/src/token_buffer.h +71 -0
  64. data/gumbo-parser/src/token_type.h +17 -0
  65. data/gumbo-parser/src/tokenizer.c +3463 -0
  66. data/gumbo-parser/src/tokenizer.h +112 -0
  67. data/gumbo-parser/src/tokenizer_states.h +339 -0
  68. data/gumbo-parser/src/utf8.c +245 -0
  69. data/gumbo-parser/src/utf8.h +164 -0
  70. data/gumbo-parser/src/util.c +68 -0
  71. data/gumbo-parser/src/util.h +30 -0
  72. data/gumbo-parser/src/vector.c +111 -0
  73. data/gumbo-parser/src/vector.h +45 -0
  74. data/lib/nokogiri/css/parser.rb +1 -1
  75. data/lib/nokogiri/css/parser.y +1 -1
  76. data/lib/nokogiri/css/syntax_error.rb +1 -1
  77. data/lib/nokogiri/css.rb +14 -14
  78. data/lib/nokogiri/extension.rb +7 -2
  79. data/lib/nokogiri/gumbo.rb +14 -0
  80. data/lib/nokogiri/html.rb +31 -27
  81. data/lib/nokogiri/{html → html4}/builder.rb +2 -2
  82. data/lib/nokogiri/{html → html4}/document.rb +4 -4
  83. data/lib/nokogiri/{html → html4}/document_fragment.rb +3 -3
  84. data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
  85. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
  86. data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
  87. data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
  88. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  89. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
  90. data/lib/nokogiri/html4.rb +40 -0
  91. data/lib/nokogiri/html5/document.rb +74 -0
  92. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  93. data/lib/nokogiri/html5/node.rb +93 -0
  94. data/lib/nokogiri/html5.rb +473 -0
  95. data/lib/nokogiri/version/constant.rb +1 -1
  96. data/lib/nokogiri/version/info.rb +12 -2
  97. data/lib/nokogiri/xml/builder.rb +38 -0
  98. data/lib/nokogiri/xml/document.rb +89 -17
  99. data/lib/nokogiri/xml/node/save_options.rb +1 -1
  100. data/lib/nokogiri/xml/node.rb +7 -5
  101. data/lib/nokogiri/xml/parse_options.rb +2 -0
  102. data/lib/nokogiri/xml/pp.rb +2 -2
  103. data/lib/nokogiri/xml/sax/document.rb +24 -30
  104. data/lib/nokogiri/xml/sax.rb +4 -4
  105. data/lib/nokogiri/xml/xpath.rb +2 -2
  106. data/lib/nokogiri/xml.rb +35 -36
  107. data/lib/nokogiri/xslt/stylesheet.rb +1 -1
  108. data/lib/nokogiri/xslt.rb +16 -16
  109. data/lib/nokogiri.rb +31 -29
  110. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  111. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  112. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  113. data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +0 -0
  114. data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
  115. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  116. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  117. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  118. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  119. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  120. metadata +110 -69
  121. data/lib/nokogiri/html/sax/parser_context.rb +0 -17
  122. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  123. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  124. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
  125. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
  126. data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +0 -70
  127. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -14,11 +14,12 @@ module Nokogiri
14
14
  # Nokogiri::XML::Searchable#xpath
15
15
  #
16
16
  class Document < Nokogiri::XML::Node
17
- # I'm ignoring unicode characters here.
18
- # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details.
17
+ # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
18
+ # attempting to handle unicode characters partly because libxml2 doesn't handle unicode
19
+ # characters in NCNAMEs.
19
20
  NCNAME_START_CHAR = "A-Za-z_"
20
- NCNAME_CHAR = NCNAME_START_CHAR + "\\-.0-9"
21
- NCNAME_RE = /^xmlns(:[#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*)?$/
21
+ NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
22
+ NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
22
23
 
23
24
  ##
24
25
  # Parse an XML file.
@@ -112,39 +113,110 @@ module Nokogiri
112
113
  # A list of Nokogiri::XML::SyntaxError found when parsing a document
113
114
  attr_accessor :errors
114
115
 
116
+ # When true, reparented elements without a namespace will inherit their new parent's
117
+ # namespace (if one exists). Defaults to +false+.
118
+ #
119
+ # @example Default behavior of namespace inheritance
120
+ # xml = <<~EOF
121
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
122
+ # <foo:parent>
123
+ # </foo:parent>
124
+ # </root>
125
+ # EOF
126
+ # doc = Nokogiri::XML(xml)
127
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
128
+ # parent.add_child("<child></child>")
129
+ # doc.to_xml
130
+ # # => <?xml version="1.0"?>
131
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
132
+ # # <foo:parent>
133
+ # # <child/>
134
+ # # </foo:parent>
135
+ # # </root>
136
+ #
137
+ # @example Setting namespace inheritance to +true+
138
+ # xml = <<~EOF
139
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
140
+ # <foo:parent>
141
+ # </foo:parent>
142
+ # </root>
143
+ # EOF
144
+ # doc = Nokogiri::XML(xml)
145
+ # doc.namespace_inheritance = true
146
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
147
+ # parent.add_child("<child></child>")
148
+ # doc.to_xml
149
+ # # => <?xml version="1.0"?>
150
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
151
+ # # <foo:parent>
152
+ # # <foo:child/>
153
+ # # </foo:parent>
154
+ # # </root>
155
+ #
156
+ # @return [Boolean]
157
+ #
158
+ # @since v1.12.4
159
+ attr_accessor :namespace_inheritance
160
+
115
161
  def initialize *args # :nodoc:
116
162
  @errors = []
117
163
  @decorators = nil
164
+ @namespace_inheritance = false
118
165
  end
119
166
 
120
167
  ##
121
- # Create an element with +name+, and optionally setting the content and attributes.
168
+ # Create a new +Element+ with +name+ sharing GC lifecycle with the document, optionally
169
+ # setting contents or attributes.
170
+ #
171
+ # Arguments may be passed to initialize the element:
172
+ # - a +Hash+ argument will be used to set attributes
173
+ # - a non-Hash object that responds to +#to_s+ will be used to set the new node's contents
174
+ #
175
+ # A block may be passed to mutate the node.
176
+ #
177
+ # @param name [String]
178
+ # @param contents_or_attrs [#to_s,Hash]
179
+ # @yieldparam node [Nokogiri::XML::Element]
180
+ # @return [Nokogiri::XML::Element]
181
+ #
182
+ # @example An empty element without attributes
183
+ # doc.create_element("div")
184
+ # # => <div></div>
185
+ #
186
+ # @example An element with contents
187
+ # doc.create_element("div", "contents")
188
+ # # => <div>contents</div>
189
+ #
190
+ # @example An element with attributes
191
+ # doc.create_element("div", {"class" => "container"})
192
+ # # => <div class='container'></div>
193
+ #
194
+ # @example An element with contents and attributes
195
+ # doc.create_element("div", "contents", {"class" => "container"})
196
+ # # => <div class='container'>contents</div>
122
197
  #
123
- # doc.create_element "div" # <div></div>
124
- # doc.create_element "div", :class => "container" # <div class='container'></div>
125
- # doc.create_element "div", "contents" # <div>contents</div>
126
- # doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
127
- # doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
198
+ # @example Passing a block to mutate the element
199
+ # doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
128
200
  #
129
- def create_element name, *args, &block
201
+ def create_element(name, *contents_or_attrs, &block)
130
202
  elm = Nokogiri::XML::Element.new(name, self, &block)
131
- args.each do |arg|
203
+ contents_or_attrs.each do |arg|
132
204
  case arg
133
205
  when Hash
134
- arg.each { |k,v|
206
+ arg.each do |k, v|
135
207
  key = k.to_s
136
208
  if key =~ NCNAME_RE
137
- ns_name = key.split(":", 2)[1]
138
- elm.add_namespace_definition ns_name, v
209
+ ns_name = Regexp.last_match(1)
210
+ elm.add_namespace_definition(ns_name, v)
139
211
  else
140
212
  elm[k.to_s] = v.to_s
141
213
  end
142
- }
214
+ end
143
215
  else
144
216
  elm.content = arg
145
217
  end
146
218
  end
147
- if ns = elm.namespace_definitions.find { |n| n.prefix.nil? or n.prefix == '' }
219
+ if ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == '') }
148
220
  elm.namespace = ns
149
221
  end
150
222
  elm
@@ -34,7 +34,7 @@ module Nokogiri
34
34
  DEFAULT_HTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML
35
35
  end
36
36
  # the default for XHTML document
37
- DEFAULT_XHTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_XHTML
37
+ DEFAULT_XHTML = FORMAT | NO_DECLARATION | AS_XHTML
38
38
 
39
39
  # Integer representation of the SaveOptions
40
40
  attr_reader :options
@@ -1,7 +1,6 @@
1
1
  # encoding: UTF-8
2
2
  # frozen_string_literal: true
3
3
  require "stringio"
4
- require "nokogiri/xml/node/save_options"
5
4
 
6
5
  module Nokogiri
7
6
  module XML
@@ -93,6 +92,7 @@ module Nokogiri
93
92
  # Create a new node with +name+ sharing GC lifecycle with +document+.
94
93
  # @param name [String]
95
94
  # @param document [Nokogiri::XML::Document]
95
+ # @yieldparam node [Nokogiri::XML::Node]
96
96
  # @return [Nokogiri::XML::Node]
97
97
  # @see Nokogiri::XML::Node.new
98
98
  def initialize(name, document)
@@ -836,7 +836,7 @@ module Nokogiri
836
836
  node_set = in_context(contents, options.to_i)
837
837
  if (node_set.empty? && (document.errors.length > error_count))
838
838
  if options.recover?
839
- fragment = Nokogiri::HTML::DocumentFragment.parse contents
839
+ fragment = Nokogiri::HTML4::DocumentFragment.parse contents
840
840
  node_set = fragment.children
841
841
  else
842
842
  raise document.errors[error_count]
@@ -882,7 +882,7 @@ module Nokogiri
882
882
  type == DOCUMENT_NODE
883
883
  end
884
884
 
885
- # Returns true if this is an HTML::Document node
885
+ # Returns true if this is an HTML4::Document node
886
886
  def html?
887
887
  type == HTML_DOCUMENT_NODE
888
888
  end
@@ -908,11 +908,11 @@ module Nokogiri
908
908
  end
909
909
 
910
910
  ###
911
- # Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
911
+ # Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
912
912
  # nil on XML documents and on unknown tags.
913
913
  def description
914
914
  return nil if document.xml?
915
- Nokogiri::HTML::ElementDescription[name]
915
+ Nokogiri::HTML4::ElementDescription[name]
916
916
  end
917
917
 
918
918
  ###
@@ -1234,3 +1234,5 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
1234
1234
  end
1235
1235
  end
1236
1236
  end
1237
+
1238
+ require_relative "node/save_options"
@@ -71,6 +71,8 @@ module Nokogiri
71
71
 
72
72
  # the default options used for parsing XML documents
73
73
  DEFAULT_XML = RECOVER | NONET
74
+ # the default options used for parsing XSLT stylesheets
75
+ DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA
74
76
  # the default options used for parsing HTML documents
75
77
  DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
76
78
  # the default options used for parsing XML schemas
@@ -1,3 +1,3 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri/xml/pp/node'
3
- require 'nokogiri/xml/pp/character_data'
2
+ require_relative "pp/node"
3
+ require_relative "pp/character_data"
@@ -2,20 +2,19 @@
2
2
  module Nokogiri
3
3
  module XML
4
4
  ###
5
- # SAX Parsers are event driven parsers. Nokogiri provides two different
6
- # event based parsers when dealing with XML. If you want to do SAX style
7
- # parsing using HTML, check out Nokogiri::HTML::SAX.
5
+ # SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
6
+ # dealing with XML. If you want to do SAX style parsing using HTML, check out
7
+ # Nokogiri::HTML4::SAX.
8
8
  #
9
- # The basic way a SAX style parser works is by creating a parser,
10
- # telling the parser about the events we're interested in, then giving
11
- # the parser some XML to process. The parser will notify you when
12
- # it encounters events you said you would like to know about.
9
+ # The basic way a SAX style parser works is by creating a parser, telling the parser about the
10
+ # events we're interested in, then giving the parser some XML to process. The parser will notify
11
+ # you when it encounters events you said you would like to know about.
13
12
  #
14
- # To register for events, you simply subclass Nokogiri::XML::SAX::Document,
15
- # and implement the methods for which you would like notification.
13
+ # To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
14
+ # methods for which you would like notification.
16
15
  #
17
- # For example, if I want to be notified when a document ends, and when an
18
- # element starts, I would write a class like this:
16
+ # For example, if I want to be notified when a document ends, and when an element starts, I
17
+ # would write a class like this:
19
18
  #
20
19
  # class MyDocument < Nokogiri::XML::SAX::Document
21
20
  # def end_document
@@ -27,8 +26,7 @@ module Nokogiri
27
26
  # end
28
27
  # end
29
28
  #
30
- # Then I would instantiate a SAX parser with this document, and feed the
31
- # parser some XML
29
+ # Then I would instantiate a SAX parser with this document, and feed the parser some XML
32
30
  #
33
31
  # # Create a new parser
34
32
  # parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
@@ -36,25 +34,21 @@ module Nokogiri
36
34
  # # Feed the parser some XML
37
35
  # parser.parse(File.open(ARGV[0]))
38
36
  #
39
- # Now my document handler will be called when each node starts, and when
40
- # then document ends. To see what kinds of events are available, take
41
- # a look at Nokogiri::XML::SAX::Document.
37
+ # Now my document handler will be called when each node starts, and when then document ends. To
38
+ # see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
42
39
  #
43
- # Two SAX parsers for XML are available, a parser that reads from a string
44
- # or IO object as it feels necessary, and a parser that lets you spoon
45
- # feed it XML. If you want to let Nokogiri deal with reading your XML,
46
- # use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
40
+ # Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
41
+ # feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
42
+ # deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
47
43
  # control over the XML input, use the Nokogiri::XML::SAX::PushParser.
48
44
  module SAX
49
45
  ###
50
- # This class is used for registering types of events you are interested
51
- # in handling. All of the methods on this class are available as
52
- # possible events while parsing an XML document. To register for any
53
- # particular event, just subclass this class and implement the methods
54
- # you are interested in knowing about.
46
+ # This class is used for registering types of events you are interested in handling. All of
47
+ # the methods on this class are available as possible events while parsing an XML document. To
48
+ # register for any particular event, just subclass this class and implement the methods you
49
+ # are interested in knowing about.
55
50
  #
56
- # To only be notified about start and end element events, write a class
57
- # like this:
51
+ # To only be notified about start and end element events, write a class like this:
58
52
  #
59
53
  # class MyDocument < Nokogiri::XML::SAX::Document
60
54
  # def start_element name, attrs = []
@@ -66,8 +60,8 @@ module Nokogiri
66
60
  # end
67
61
  # end
68
62
  #
69
- # You can use this event handler for any SAX style parser included with
70
- # Nokogiri. See Nokogiri::XML::SAX, and Nokogiri::HTML::SAX.
63
+ # You can use this event handler for any SAX style parser included with Nokogiri. See
64
+ # Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
71
65
  class Document
72
66
  ###
73
67
  # Called when an XML declaration is parsed
@@ -129,7 +123,7 @@ module Nokogiri
129
123
  end
130
124
 
131
125
  ###
132
- # Characters read between a tag. This method might be called multiple
126
+ # Characters read between a tag. This method might be called multiple
133
127
  # times given one contiguous string of characters.
134
128
  #
135
129
  # +string+ contains the character data
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri/xml/sax/document'
3
- require 'nokogiri/xml/sax/parser_context'
4
- require 'nokogiri/xml/sax/parser'
5
- require 'nokogiri/xml/sax/push_parser'
2
+ require_relative "sax/document"
3
+ require_relative "sax/parser_context"
4
+ require_relative "sax/parser"
5
+ require_relative "sax/push_parser"
@@ -1,9 +1,9 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri/xml/xpath/syntax_error'
3
-
4
2
  module Nokogiri
5
3
  module XML
6
4
  module XPath
7
5
  end
8
6
  end
9
7
  end
8
+
9
+ require_relative "xpath/syntax_error"
data/lib/nokogiri/xml.rb CHANGED
@@ -1,38 +1,9 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri/xml/pp'
3
- require 'nokogiri/xml/parse_options'
4
- require 'nokogiri/xml/sax'
5
- require 'nokogiri/xml/searchable'
6
- require 'nokogiri/xml/node'
7
- require 'nokogiri/xml/attribute_decl'
8
- require 'nokogiri/xml/element_decl'
9
- require 'nokogiri/xml/element_content'
10
- require 'nokogiri/xml/character_data'
11
- require 'nokogiri/xml/namespace'
12
- require 'nokogiri/xml/attr'
13
- require 'nokogiri/xml/dtd'
14
- require 'nokogiri/xml/cdata'
15
- require 'nokogiri/xml/text'
16
- require 'nokogiri/xml/document'
17
- require 'nokogiri/xml/document_fragment'
18
- require 'nokogiri/xml/processing_instruction'
19
- require 'nokogiri/xml/node_set'
20
- require 'nokogiri/xml/syntax_error'
21
- require 'nokogiri/xml/xpath'
22
- require 'nokogiri/xml/xpath_context'
23
- require 'nokogiri/xml/builder'
24
- require 'nokogiri/xml/reader'
25
- require 'nokogiri/xml/notation'
26
- require 'nokogiri/xml/entity_decl'
27
- require 'nokogiri/xml/entity_reference'
28
- require 'nokogiri/xml/schema'
29
- require 'nokogiri/xml/relax_ng'
30
-
31
2
  module Nokogiri
32
3
  class << self
33
4
  ###
34
5
  # Parse XML. Convenience method for Nokogiri::XML::Document.parse
35
- def XML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_XML, &block
6
+ def XML(thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_XML, &block)
36
7
  Nokogiri::XML::Document.parse(thing, url, encoding, options, &block)
37
8
  end
38
9
  end
@@ -41,20 +12,19 @@ module Nokogiri
41
12
  # Original C14N 1.0 spec canonicalization
42
13
  XML_C14N_1_0 = 0
43
14
  # Exclusive C14N 1.0 spec canonicalization
44
- XML_C14N_EXCLUSIVE_1_0 = 1
15
+ XML_C14N_EXCLUSIVE_1_0 = 1
45
16
  # C14N 1.1 spec canonicalization
46
17
  XML_C14N_1_1 = 2
47
18
  class << self
48
19
  ###
49
20
  # Parse an XML document using the Nokogiri::XML::Reader API. See
50
21
  # Nokogiri::XML::Reader for mor information
51
- def Reader string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT
52
-
22
+ def Reader(string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT)
53
23
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
54
24
  # Give the options to the user
55
25
  yield options if block_given?
56
26
 
57
- if string_or_io.respond_to? :read
27
+ if string_or_io.respond_to?(:read)
58
28
  return Reader.from_io(string_or_io, url, encoding, options.to_i)
59
29
  end
60
30
  Reader.from_memory(string_or_io, url, encoding, options.to_i)
@@ -62,15 +32,44 @@ module Nokogiri
62
32
 
63
33
  ###
64
34
  # Parse XML. Convenience method for Nokogiri::XML::Document.parse
65
- def parse thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
35
+ def parse(thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block)
66
36
  Document.parse(thing, url, encoding, options, &block)
67
37
  end
68
38
 
69
39
  ####
70
40
  # Parse a fragment from +string+ in to a NodeSet.
71
- def fragment string
41
+ def fragment(string)
72
42
  XML::DocumentFragment.parse(string)
73
43
  end
74
44
  end
75
45
  end
76
46
  end
47
+
48
+ require_relative "xml/pp"
49
+ require_relative "xml/parse_options"
50
+ require_relative "xml/sax"
51
+ require_relative "xml/searchable"
52
+ require_relative "xml/node"
53
+ require_relative "xml/attribute_decl"
54
+ require_relative "xml/element_decl"
55
+ require_relative "xml/element_content"
56
+ require_relative "xml/character_data"
57
+ require_relative "xml/namespace"
58
+ require_relative "xml/attr"
59
+ require_relative "xml/dtd"
60
+ require_relative "xml/cdata"
61
+ require_relative "xml/text"
62
+ require_relative "xml/document"
63
+ require_relative "xml/document_fragment"
64
+ require_relative "xml/processing_instruction"
65
+ require_relative "xml/node_set"
66
+ require_relative "xml/syntax_error"
67
+ require_relative "xml/xpath"
68
+ require_relative "xml/xpath_context"
69
+ require_relative "xml/builder"
70
+ require_relative "xml/reader"
71
+ require_relative "xml/notation"
72
+ require_relative "xml/entity_decl"
73
+ require_relative "xml/entity_reference"
74
+ require_relative "xml/schema"
75
+ require_relative "xml/relax_ng"
@@ -18,7 +18,7 @@ module Nokogiri
18
18
  # Apply an XSLT stylesheet to an XML::Document.
19
19
  # +params+ is an array of strings used as XSLT parameters.
20
20
  # returns serialized document
21
- def apply_to document, params = []
21
+ def apply_to(document, params = [])
22
22
  serialize(transform(document, params))
23
23
  end
24
24
  end
data/lib/nokogiri/xslt.rb CHANGED
@@ -1,6 +1,4 @@
1
1
  # frozen_string_literal: true
2
- require 'nokogiri/xslt/stylesheet'
3
-
4
2
  module Nokogiri
5
3
  class << self
6
4
  ###
@@ -22,32 +20,32 @@ module Nokogiri
22
20
  class << self
23
21
  ###
24
22
  # Parse the stylesheet in +string+, register any +modules+
25
- def parse string, modules = {}
23
+ def parse(string, modules = {})
26
24
  modules.each do |url, klass|
27
- XSLT.register url, klass
25
+ XSLT.register(url, klass)
28
26
  end
29
27
 
28
+ doc = XML::Document.parse(string, nil, nil, XML::ParseOptions::DEFAULT_XSLT)
30
29
  if Nokogiri.jruby?
31
- Stylesheet.parse_stylesheet_doc(XML.parse(string), string)
30
+ Stylesheet.parse_stylesheet_doc(doc, string)
32
31
  else
33
- Stylesheet.parse_stylesheet_doc(XML.parse(string))
32
+ Stylesheet.parse_stylesheet_doc(doc)
34
33
  end
35
34
  end
36
35
 
37
36
  ###
38
37
  # Quote parameters in +params+ for stylesheet safety
39
- def quote_params params
38
+ def quote_params(params)
40
39
  parray = (params.instance_of?(Hash) ? params.to_a.flatten : params).dup
41
- parray.each_with_index do |v,i|
42
- if i % 2 > 0
43
- parray[i]=
44
- if v =~ /'/
45
- "concat('#{ v.gsub(/'/, %q{', "'", '}) }')"
46
- else
47
- "'#{v}'";
48
- end
40
+ parray.each_with_index do |v, i|
41
+ parray[i] = if i % 2 > 0
42
+ if v =~ /'/
43
+ "concat('#{v.gsub(/'/, %q{', "'", '})}')"
44
+ else
45
+ "'#{v}'"
46
+ end
49
47
  else
50
- parray[i] = v.to_s
48
+ v.to_s
51
49
  end
52
50
  end
53
51
  parray.flatten
@@ -55,3 +53,5 @@ module Nokogiri
55
53
  end
56
54
  end
57
55
  end
56
+
57
+ require_relative "xslt/stylesheet"
data/lib/nokogiri.rb CHANGED
@@ -2,38 +2,29 @@
2
2
  # frozen_string_literal: true
3
3
  # Modify the PATH on windows so that the external DLLs will get loaded.
4
4
 
5
- require 'rbconfig'
5
+ require "rbconfig"
6
6
 
7
7
  if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
8
- require 'nokogiri/jruby/dependencies'
8
+ require_relative "nokogiri/jruby/dependencies"
9
9
  end
10
10
 
11
- require 'nokogiri/extension'
12
-
13
- require 'nokogiri/version'
14
- require 'nokogiri/syntax_error'
15
- require 'nokogiri/xml'
16
- require 'nokogiri/xslt'
17
- require 'nokogiri/html'
18
- require 'nokogiri/decorators/slop'
19
- require 'nokogiri/css'
20
- require 'nokogiri/html/builder'
11
+ require_relative "nokogiri/extension"
21
12
 
22
13
  # Nokogiri parses and searches XML/HTML very quickly, and also has
23
14
  # correctly implemented CSS3 selector support as well as XPath 1.0
24
15
  # support.
25
16
  #
26
17
  # Parsing a document returns either a Nokogiri::XML::Document, or a
27
- # Nokogiri::HTML::Document depending on the kind of document you parse.
18
+ # Nokogiri::HTML4::Document depending on the kind of document you parse.
28
19
  #
29
20
  # Here is an example:
30
21
  #
31
22
  # require 'nokogiri'
32
23
  # require 'open-uri'
33
24
  #
34
- # # Get a Nokogiri::HTML:Document for the page we’re interested in...
25
+ # # Get a Nokogiri::HTML4::Document for the page we’re interested in...
35
26
  #
36
- # doc = Nokogiri::HTML(URI.open('http://www.google.com/search?q=tenderlove'))
27
+ # doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
37
28
  #
38
29
  # # Do funky things with it using Nokogiri::XML::Node methods...
39
30
  #
@@ -49,27 +40,27 @@ module Nokogiri
49
40
  class << self
50
41
  ###
51
42
  # Parse an HTML or XML document. +string+ contains the document.
52
- def parse string, url = nil, encoding = nil, options = nil
43
+ def parse(string, url = nil, encoding = nil, options = nil)
53
44
  if string.respond_to?(:read) ||
54
45
  /^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
55
46
  # Expect an HTML indicator to appear within the first 512
56
47
  # characters of a document. (<?xml ?> + <?xml-stylesheet ?>
57
48
  # shouldn't be that long)
58
- Nokogiri.HTML(string, url, encoding,
49
+ Nokogiri.HTML4(string, url, encoding,
59
50
  options || XML::ParseOptions::DEFAULT_HTML)
60
51
  else
61
52
  Nokogiri.XML(string, url, encoding,
62
53
  options || XML::ParseOptions::DEFAULT_XML)
63
- end.tap { |doc|
54
+ end.tap do |doc|
64
55
  yield doc if block_given?
65
- }
56
+ end
66
57
  end
67
58
 
68
59
  ###
69
60
  # Create a new Nokogiri::XML::DocumentFragment
70
- def make input = nil, opts = {}, &blk
61
+ def make(input = nil, opts = {}, &blk)
71
62
  if input
72
- Nokogiri::HTML.fragment(input).children.first
63
+ Nokogiri::HTML4.fragment(input).children.first
73
64
  else
74
65
  Nokogiri(&blk)
75
66
  end
@@ -98,10 +89,10 @@ module Nokogiri
98
89
  # Make sure to support some popular encoding aliases not known by
99
90
  # all iconv implementations.
100
91
  {
101
- 'Windows-31J' => 'CP932', # Windows-31J is the IANA registered name of CP932.
102
- }.each { |alias_name, name|
92
+ "Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
93
+ }.each do |alias_name, name|
103
94
  EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
104
- }
95
+ end
105
96
  end
106
97
  end
107
98
 
@@ -109,15 +100,26 @@ module Nokogiri
109
100
  end
110
101
 
111
102
  ###
112
- # Parse a document contained in +args+. Nokogiri will try to guess what
113
- # type of document you are attempting to parse. For more information, see
114
- # Nokogiri.parse
103
+ # Parse a document contained in +args+. Nokogiri will try to guess what type of document you are
104
+ # attempting to parse. For more information, see Nokogiri.parse
115
105
  #
116
- # To specify the type of document, use Nokogiri.XML or Nokogiri.HTML.
106
+ # To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
117
107
  def Nokogiri(*args, &block)
118
108
  if block_given?
119
- Nokogiri::HTML::Builder.new(&block).doc.root
109
+ Nokogiri::HTML4::Builder.new(&block).doc.root
120
110
  else
121
111
  Nokogiri.parse(*args)
122
112
  end
123
113
  end
114
+
115
+ require_relative "nokogiri/version"
116
+ require_relative "nokogiri/syntax_error"
117
+ require_relative "nokogiri/xml"
118
+ require_relative "nokogiri/xslt"
119
+ require_relative "nokogiri/html4"
120
+ require_relative "nokogiri/html"
121
+ require_relative "nokogiri/decorators/slop"
122
+ require_relative "nokogiri/css"
123
+ require_relative "nokogiri/html4/builder"
124
+
125
+ require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?
@@ -16,7 +16,7 @@ index cf96d41..1372d8b 100644
16
16
  }
17
17
 
18
18
  -libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
19
- +$(top_builddir)/libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
19
+ +$(top_builddir)/libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
20
20
  $(AM_V_CCLD)$(libxml2_la_LINK) -rpath $(libdir) $(libxml2_la_OBJECTS) $(libxml2_la_LIBADD) $(LIBS)
21
21
 
22
22
  testdso.la: $(testdso_la_OBJECTS) $(testdso_la_DEPENDENCIES) $(EXTRA_testdso_la_DEPENDENCIES)