nokogiri 1.13.10-java → 1.14.0-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (119) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +33 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +18 -11
  6. data/dependencies.yml +25 -7
  7. data/ext/java/nokogiri/Html4Document.java +2 -0
  8. data/ext/java/nokogiri/Html4ElementDescription.java +9 -9
  9. data/ext/java/nokogiri/Html4EntityLookup.java +14 -3
  10. data/ext/java/nokogiri/Html4SaxParserContext.java +2 -2
  11. data/ext/java/nokogiri/Html4SaxPushParser.java +3 -0
  12. data/ext/java/nokogiri/NokogiriService.java +1 -24
  13. data/ext/java/nokogiri/XmlAttr.java +1 -1
  14. data/ext/java/nokogiri/XmlAttributeDecl.java +2 -1
  15. data/ext/java/nokogiri/XmlCdata.java +2 -1
  16. data/ext/java/nokogiri/XmlComment.java +2 -1
  17. data/ext/java/nokogiri/XmlDocument.java +5 -6
  18. data/ext/java/nokogiri/XmlDocumentFragment.java +2 -1
  19. data/ext/java/nokogiri/XmlDtd.java +4 -3
  20. data/ext/java/nokogiri/XmlElement.java +1 -0
  21. data/ext/java/nokogiri/XmlElementContent.java +4 -1
  22. data/ext/java/nokogiri/XmlElementDecl.java +3 -1
  23. data/ext/java/nokogiri/XmlEntityDecl.java +2 -0
  24. data/ext/java/nokogiri/XmlEntityReference.java +1 -0
  25. data/ext/java/nokogiri/XmlNamespace.java +2 -0
  26. data/ext/java/nokogiri/XmlNode.java +39 -24
  27. data/ext/java/nokogiri/XmlNodeSet.java +10 -7
  28. data/ext/java/nokogiri/XmlProcessingInstruction.java +1 -0
  29. data/ext/java/nokogiri/XmlReader.java +4 -3
  30. data/ext/java/nokogiri/XmlRelaxng.java +1 -0
  31. data/ext/java/nokogiri/XmlSaxParserContext.java +1 -0
  32. data/ext/java/nokogiri/XmlSaxPushParser.java +3 -0
  33. data/ext/java/nokogiri/XmlSchema.java +4 -2
  34. data/ext/java/nokogiri/XmlSyntaxError.java +1 -0
  35. data/ext/java/nokogiri/XmlText.java +1 -0
  36. data/ext/java/nokogiri/XmlXpathContext.java +2 -0
  37. data/ext/java/nokogiri/XsltStylesheet.java +16 -13
  38. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +3 -2
  39. data/ext/java/nokogiri/internals/NokogiriHandler.java +2 -2
  40. data/ext/java/nokogiri/internals/NokogiriHelpers.java +4 -5
  41. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +3 -3
  42. data/ext/java/nokogiri/internals/ParserContext.java +2 -0
  43. data/ext/java/nokogiri/internals/ReaderNode.java +1 -1
  44. data/ext/java/nokogiri/internals/SaveContextVisitor.java +4 -2
  45. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +2 -2
  46. data/ext/java/nokogiri/internals/XmlDomParserContext.java +2 -1
  47. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +1 -0
  48. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +5 -4
  49. data/ext/nokogiri/extconf.rb +80 -21
  50. data/ext/nokogiri/gumbo.c +19 -9
  51. data/ext/nokogiri/html4_document.c +1 -1
  52. data/ext/nokogiri/html4_entity_lookup.c +1 -1
  53. data/ext/nokogiri/html4_sax_parser_context.c +0 -5
  54. data/ext/nokogiri/nokogiri.c +33 -51
  55. data/ext/nokogiri/xml_attribute_decl.c +1 -1
  56. data/ext/nokogiri/xml_cdata.c +1 -1
  57. data/ext/nokogiri/xml_document.c +16 -11
  58. data/ext/nokogiri/xml_element_content.c +2 -2
  59. data/ext/nokogiri/xml_element_decl.c +1 -1
  60. data/ext/nokogiri/xml_encoding_handler.c +2 -2
  61. data/ext/nokogiri/xml_namespace.c +38 -8
  62. data/ext/nokogiri/xml_node.c +286 -26
  63. data/ext/nokogiri/xml_node_set.c +0 -2
  64. data/ext/nokogiri/xml_reader.c +40 -20
  65. data/ext/nokogiri/xml_relax_ng.c +0 -2
  66. data/ext/nokogiri/xml_sax_parser.c +22 -16
  67. data/ext/nokogiri/xml_sax_parser_context.c +0 -5
  68. data/ext/nokogiri/xml_sax_push_parser.c +0 -2
  69. data/ext/nokogiri/xml_schema.c +0 -2
  70. data/ext/nokogiri/xml_xpath_context.c +87 -83
  71. data/ext/nokogiri/xslt_stylesheet.c +14 -13
  72. data/gumbo-parser/Makefile +10 -0
  73. data/lib/nokogiri/css/node.rb +2 -2
  74. data/lib/nokogiri/css/xpath_visitor.rb +5 -3
  75. data/lib/nokogiri/css.rb +6 -0
  76. data/lib/nokogiri/encoding_handler.rb +57 -0
  77. data/lib/nokogiri/extension.rb +3 -2
  78. data/lib/nokogiri/html4/document.rb +2 -121
  79. data/lib/nokogiri/html4/element_description_defaults.rb +6 -12
  80. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  81. data/lib/nokogiri/html4.rb +1 -0
  82. data/lib/nokogiri/html5/document.rb +113 -36
  83. data/lib/nokogiri/html5/document_fragment.rb +9 -2
  84. data/lib/nokogiri/html5/node.rb +3 -5
  85. data/lib/nokogiri/html5.rb +127 -216
  86. data/lib/nokogiri/jruby/dependencies.rb +1 -19
  87. data/lib/{isorelax.jar → nokogiri/jruby/isorelax/isorelax/20030108/isorelax-20030108.jar} +0 -0
  88. data/lib/nokogiri/jruby/net/sf/saxon/Saxon-HE/9.6.0-4/Saxon-HE-9.6.0-4.jar +0 -0
  89. data/lib/nokogiri/jruby/net/sourceforge/htmlunit/neko-htmlunit/2.63.0/neko-htmlunit-2.63.0.jar +0 -0
  90. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  91. data/lib/nokogiri/jruby/nu/validator/jing/20200702VNU/jing-20200702VNU.jar +0 -0
  92. data/lib/nokogiri/jruby/org/nokogiri/nekodtd/0.1.11.noko1/nekodtd-0.1.11.noko1.jar +0 -0
  93. data/lib/{serializer.jar → nokogiri/jruby/xalan/serializer/2.7.2/serializer-2.7.2.jar} +0 -0
  94. data/lib/{xalan.jar → nokogiri/jruby/xalan/xalan/2.7.2/xalan-2.7.2.jar} +0 -0
  95. data/lib/{xercesImpl.jar → nokogiri/jruby/xerces/xercesImpl/2.12.2/xercesImpl-2.12.2.jar} +0 -0
  96. data/lib/{xml-apis.jar → nokogiri/jruby/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar} +0 -0
  97. data/lib/nokogiri/nokogiri.jar +0 -0
  98. data/lib/nokogiri/version/constant.rb +1 -1
  99. data/lib/nokogiri/version/info.rb +11 -10
  100. data/lib/nokogiri/xml/attr.rb +49 -0
  101. data/lib/nokogiri/xml/builder.rb +1 -1
  102. data/lib/nokogiri/xml/document.rb +102 -54
  103. data/lib/nokogiri/xml/document_fragment.rb +49 -6
  104. data/lib/nokogiri/xml/namespace.rb +42 -0
  105. data/lib/nokogiri/xml/node/save_options.rb +6 -4
  106. data/lib/nokogiri/xml/node.rb +190 -35
  107. data/lib/nokogiri/xml/node_set.rb +87 -9
  108. data/lib/nokogiri/xml/parse_options.rb +129 -50
  109. data/lib/nokogiri/xml/pp/node.rb +6 -4
  110. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  111. data/lib/nokogiri/xml/sax/parser.rb +2 -3
  112. data/lib/nokogiri/xslt.rb +1 -1
  113. data/lib/nokogiri.rb +3 -11
  114. data/lib/xsd/xmlparser/nokogiri.rb +3 -1
  115. metadata +60 -272
  116. data/ext/java/nokogiri/EncodingHandler.java +0 -111
  117. data/lib/jing.jar +0 -0
  118. data/lib/nekodtd.jar +0 -0
  119. data/lib/nekohtml.jar +0 -0
@@ -19,63 +19,72 @@ module Nokogiri
19
19
  NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
20
20
  NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
21
21
 
22
- ##
23
- # Parse an XML file.
24
- #
25
- # +string_or_io+ may be a String, or any object that responds to
26
- # _read_ and _close_ such as an IO, or StringIO.
27
- #
28
- # +url+ (optional) is the URI where this document is located.
29
- #
30
- # +encoding+ (optional) is the encoding that should be used when processing
31
- # the document.
32
- #
33
- # +options+ (optional) is a configuration object that sets options during
34
- # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
35
- # Nokogiri::XML::ParseOptions for more information.
36
- #
37
- # +block+ (optional) is passed a configuration object on which
38
- # parse options may be set.
39
- #
40
- # By default, Nokogiri treats documents as untrusted, and so
41
- # does not attempt to load DTDs or access the network. See
42
- # Nokogiri::XML::ParseOptions for a complete list of options;
43
- # and that module's DEFAULT_XML constant for what's set (and not
44
- # set) by default.
45
- #
46
- # Nokogiri.XML() is a convenience method which will call this method.
47
- #
48
- def self.parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
49
- options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
50
- yield options if block_given?
22
+ class << self
23
+ # Parse an XML file.
24
+ #
25
+ # +string_or_io+ may be a String, or any object that responds to
26
+ # _read_ and _close_ such as an IO, or StringIO.
27
+ #
28
+ # +url+ (optional) is the URI where this document is located.
29
+ #
30
+ # +encoding+ (optional) is the encoding that should be used when processing
31
+ # the document.
32
+ #
33
+ # +options+ (optional) is a configuration object that sets options during
34
+ # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
35
+ # Nokogiri::XML::ParseOptions for more information.
36
+ #
37
+ # +block+ (optional) is passed a configuration object on which
38
+ # parse options may be set.
39
+ #
40
+ # By default, Nokogiri treats documents as untrusted, and so
41
+ # does not attempt to load DTDs or access the network. See
42
+ # Nokogiri::XML::ParseOptions for a complete list of options;
43
+ # and that module's DEFAULT_XML constant for what's set (and not
44
+ # set) by default.
45
+ #
46
+ # Nokogiri.XML() is a convenience method which will call this method.
47
+ #
48
+ def parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
49
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
50
+ yield options if block_given?
51
+
52
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
53
+
54
+ if empty_doc?(string_or_io)
55
+ if options.strict?
56
+ raise Nokogiri::XML::SyntaxError, "Empty document"
57
+ else
58
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
59
+ end
60
+ end
51
61
 
52
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
62
+ doc = if string_or_io.respond_to?(:read)
63
+ if string_or_io.is_a?(Pathname)
64
+ # resolve the Pathname to the file and open it as an IO object, see #2110
65
+ string_or_io = string_or_io.expand_path.open
66
+ url ||= string_or_io.path
67
+ end
53
68
 
54
- if empty_doc?(string_or_io)
55
- if options.strict?
56
- raise Nokogiri::XML::SyntaxError, "Empty document"
69
+ read_io(string_or_io, url, encoding, options.to_i)
57
70
  else
58
- return encoding ? new.tap { |i| i.encoding = encoding } : new
71
+ # read_memory pukes on empty docs
72
+ read_memory(string_or_io, url, encoding, options.to_i)
59
73
  end
60
- end
61
74
 
62
- doc = if string_or_io.respond_to?(:read)
63
- if string_or_io.is_a?(Pathname)
64
- # resolve the Pathname to the file and open it as an IO object, see #2110
65
- string_or_io = string_or_io.expand_path.open
66
- url ||= string_or_io.path
67
- end
75
+ # do xinclude processing
76
+ doc.do_xinclude(options) if options.xinclude?
68
77
 
69
- read_io(string_or_io, url, encoding, options.to_i)
70
- else
71
- # read_memory pukes on empty docs
72
- read_memory(string_or_io, url, encoding, options.to_i)
78
+ doc
73
79
  end
74
80
 
75
- # do xinclude processing
76
- doc.do_xinclude(options) if options.xinclude?
81
+ private
77
82
 
78
- doc
83
+ def empty_doc?(string_or_io)
84
+ string_or_io.nil? ||
85
+ (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
86
+ (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
87
+ end
79
88
  end
80
89
 
81
90
  ##
@@ -165,6 +174,7 @@ module Nokogiri
165
174
  # Since v1.12.4
166
175
  attr_accessor :namespace_inheritance
167
176
 
177
+ # rubocop:disable Lint/MissingSuper
168
178
  def initialize(*args) # :nodoc:
169
179
  @errors = []
170
180
  @decorators = nil
@@ -405,14 +415,52 @@ module Nokogiri
405
415
  Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML
406
416
  end
407
417
 
408
- private
409
-
410
- def self.empty_doc?(string_or_io)
411
- string_or_io.nil? ||
412
- (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
413
- (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
418
+ #
419
+ # :call-seq: deconstruct_keys(array_of_names) → Hash
420
+ #
421
+ # Returns a hash describing the Document, to use in pattern matching.
422
+ #
423
+ # Valid keys and their values:
424
+ # - +root+ → (Node, nil) The root node of the Document, or +nil+ if the document is empty.
425
+ #
426
+ # In the future, other keys may allow accessing things like doctype and processing
427
+ # instructions. If you have a use case and would like this functionality, please let us know
428
+ # by opening an issue or a discussion on the github project.
429
+ #
430
+ # ⚡ This is an experimental feature, available since v1.14.0
431
+ #
432
+ # *Example*
433
+ #
434
+ # doc = Nokogiri::XML.parse(<<~XML)
435
+ # <?xml version="1.0"?>
436
+ # <root>
437
+ # <child>
438
+ # </root>
439
+ # XML
440
+ #
441
+ # doc.deconstruct_keys([:root])
442
+ # # => {:root=>
443
+ # # #(Element:0x35c {
444
+ # # name = "root",
445
+ # # children = [
446
+ # # #(Text "\n" + " "),
447
+ # # #(Element:0x370 { name = "child", children = [ #(Text "\n")] }),
448
+ # # #(Text "\n")]
449
+ # # })}
450
+ #
451
+ # *Example* of an empty document
452
+ #
453
+ # doc = Nokogiri::XML::Document.new
454
+ #
455
+ # doc.deconstruct_keys([:root])
456
+ # # => {:root=>nil}
457
+ #
458
+ def deconstruct_keys(keys)
459
+ { root: root }
414
460
  end
415
461
 
462
+ private
463
+
416
464
  IMPLIED_XPATH_CONTEXTS = ["//"].freeze # :nodoc:
417
465
 
418
466
  def inspect_attributes
@@ -1,3 +1,4 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
2
3
 
3
4
  module Nokogiri
@@ -66,9 +67,7 @@ module Nokogiri
66
67
  def to_html(*args)
67
68
  if Nokogiri.jruby?
68
69
  options = args.first.is_a?(Hash) ? args.shift : {}
69
- unless options[:save_with]
70
- options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
71
- end
70
+ options[:save_with] ||= Node::SaveOptions::DEFAULT_HTML
72
71
  args.insert(0, options)
73
72
  end
74
73
  children.to_html(*args)
@@ -80,9 +79,7 @@ module Nokogiri
80
79
  def to_xhtml(*args)
81
80
  if Nokogiri.jruby?
82
81
  options = args.first.is_a?(Hash) ? args.shift : {}
83
- unless options[:save_with]
84
- options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_XHTML
85
- end
82
+ options[:save_with] ||= Node::SaveOptions::DEFAULT_XHTML
86
83
  args.insert(0, options)
87
84
  end
88
85
  children.to_xhtml(*args)
@@ -148,6 +145,52 @@ module Nokogiri
148
145
  document.fragment(data)
149
146
  end
150
147
 
148
+ #
149
+ # :call-seq: deconstruct() → Array
150
+ #
151
+ # Returns the root nodes of this document fragment as an array, to use in pattern matching.
152
+ #
153
+ # 💡 Note that text nodes are returned as well as elements. If you wish to operate only on
154
+ # root elements, you should deconstruct the array returned by
155
+ # <tt>DocumentFragment#elements</tt>.
156
+ #
157
+ # ⚡ This is an experimental feature, available since v1.14.0
158
+ #
159
+ # *Example*
160
+ #
161
+ # frag = Nokogiri::HTML5.fragment(<<~HTML)
162
+ # <div>Start</div>
163
+ # This is a <a href="#jump">shortcut</a> for you.
164
+ # <div>End</div>
165
+ # HTML
166
+ #
167
+ # frag.deconstruct
168
+ # # => [#(Element:0x35c { name = "div", children = [ #(Text "Start")] }),
169
+ # # #(Text "\n" + "This is a "),
170
+ # # #(Element:0x370 {
171
+ # # name = "a",
172
+ # # attributes = [ #(Attr:0x384 { name = "href", value = "#jump" })],
173
+ # # children = [ #(Text "shortcut")]
174
+ # # }),
175
+ # # #(Text " for you.\n"),
176
+ # # #(Element:0x398 { name = "div", children = [ #(Text "End")] }),
177
+ # # #(Text "\n")]
178
+ #
179
+ # *Example* only the elements, not the text nodes.
180
+ #
181
+ # frag.elements.deconstruct
182
+ # # => [#(Element:0x35c { name = "div", children = [ #(Text "Start")] }),
183
+ # # #(Element:0x370 {
184
+ # # name = "a",
185
+ # # attributes = [ #(Attr:0x384 { name = "href", value = "#jump" })],
186
+ # # children = [ #(Text "shortcut")]
187
+ # # }),
188
+ # # #(Element:0x398 { name = "div", children = [ #(Text "End")] })]
189
+ #
190
+ def deconstruct
191
+ children.to_a
192
+ end
193
+
151
194
  private
152
195
 
153
196
  # fix for issue 770
@@ -1,3 +1,4 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
2
3
 
3
4
  module Nokogiri
@@ -6,6 +7,47 @@ module Nokogiri
6
7
  include Nokogiri::XML::PP::Node
7
8
  attr_reader :document
8
9
 
10
+ #
11
+ # :call-seq: deconstruct_keys(array_of_names) → Hash
12
+ #
13
+ # Returns a hash describing the Namespace, to use in pattern matching.
14
+ #
15
+ # Valid keys and their values:
16
+ # - +prefix+ → (String, nil) The namespace's prefix, or +nil+ if there is no prefix (e.g., default namespace).
17
+ # - +href+ → (String) The namespace's URI
18
+ #
19
+ # ⚡ This is an experimental feature, available since v1.14.0
20
+ #
21
+ # *Example*
22
+ #
23
+ # doc = Nokogiri::XML.parse(<<~XML)
24
+ # <?xml version="1.0"?>
25
+ # <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
26
+ # <child1 foo="abc" noko:bar="def"/>
27
+ # <noko:child2 foo="qwe" noko:bar="rty"/>
28
+ # </root>
29
+ # XML
30
+ #
31
+ # doc.root.elements.first.namespace
32
+ # # => #(Namespace:0x35c { href = "http://nokogiri.org/ns/default" })
33
+ #
34
+ # doc.root.elements.first.namespace.deconstruct_keys([:prefix, :href])
35
+ # # => {:prefix=>nil, :href=>"http://nokogiri.org/ns/default"}
36
+ #
37
+ # doc.root.elements.last.namespace
38
+ # # => #(Namespace:0x370 {
39
+ # # prefix = "noko",
40
+ # # href = "http://nokogiri.org/ns/noko"
41
+ # # })
42
+ #
43
+ # doc.root.elements.last.namespace.deconstruct_keys([:prefix, :href])
44
+ # # => {:prefix=>"noko", :href=>"http://nokogiri.org/ns/noko"}
45
+ #
46
+ #
47
+ def deconstruct_keys(keys)
48
+ { prefix: prefix, href: href }
49
+ end
50
+
9
51
  private
10
52
 
11
53
  def inspect_attributes
@@ -29,14 +29,16 @@ module Nokogiri
29
29
  DEFAULT_XML = AS_XML # https://github.com/sparklemotion/nokogiri/issues/#issue/415
30
30
  # the default for HTML document
31
31
  DEFAULT_HTML = NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML
32
+ # the default for XHTML document
33
+ DEFAULT_XHTML = NO_DECLARATION | AS_XHTML
32
34
  else
33
35
  # the default for XML documents
34
36
  DEFAULT_XML = FORMAT | AS_XML
35
37
  # the default for HTML document
36
38
  DEFAULT_HTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML
39
+ # the default for XHTML document
40
+ DEFAULT_XHTML = FORMAT | NO_DECLARATION | AS_XHTML
37
41
  end
38
- # the default for XHTML document
39
- DEFAULT_XHTML = FORMAT | NO_DECLARATION | AS_XHTML
40
42
 
41
43
  # Integer representation of the SaveOptions
42
44
  attr_reader :options
@@ -47,7 +49,7 @@ module Nokogiri
47
49
  end
48
50
 
49
51
  constants.each do |constant|
50
- class_eval %{
52
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
51
53
  def #{constant.downcase}
52
54
  @options |= #{constant}
53
55
  self
@@ -56,7 +58,7 @@ module Nokogiri
56
58
  def #{constant.downcase}?
57
59
  #{constant} & @options == #{constant}
58
60
  end
59
- }
61
+ RUBY
60
62
  end
61
63
 
62
64
  alias_method :to_i, :options