nokogiri 1.10.7 → 1.16.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (224) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +42 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +188 -96
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +862 -421
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +222 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +17 -17
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +39 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +408 -243
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +131 -61
  33. data/ext/nokogiri/xml_node.c +1343 -674
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +305 -213
  37. data/ext/nokogiri/xml_relax_ng.c +87 -78
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +149 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +65 -37
  41. data/ext/nokogiri/xml_schema.c +138 -82
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +35 -26
  44. data/ext/nokogiri/xml_xpath_context.c +363 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +126 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3464 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +5 -3
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +205 -96
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +326 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +224 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +75 -34
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -127
  133. data/lib/nokogiri/xml/document_fragment.rb +93 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +44 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1096 -419
  142. data/lib/nokogiri/xml/node_set.rb +137 -61
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +7 -5
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +39 -38
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  169. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  170. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  171. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  172. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  173. data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
  174. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  175. metadata +121 -291
  176. data/ext/nokogiri/html_document.c +0 -170
  177. data/ext/nokogiri/html_document.h +0 -10
  178. data/ext/nokogiri/html_element_description.c +0 -279
  179. data/ext/nokogiri/html_element_description.h +0 -10
  180. data/ext/nokogiri/html_entity_lookup.c +0 -32
  181. data/ext/nokogiri/html_entity_lookup.h +0 -8
  182. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  183. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  184. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  185. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  186. data/ext/nokogiri/xml_attr.h +0 -9
  187. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  188. data/ext/nokogiri/xml_cdata.h +0 -9
  189. data/ext/nokogiri/xml_comment.h +0 -9
  190. data/ext/nokogiri/xml_document.h +0 -23
  191. data/ext/nokogiri/xml_document_fragment.h +0 -10
  192. data/ext/nokogiri/xml_dtd.h +0 -10
  193. data/ext/nokogiri/xml_element_content.h +0 -10
  194. data/ext/nokogiri/xml_element_decl.h +0 -9
  195. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  196. data/ext/nokogiri/xml_entity_decl.h +0 -10
  197. data/ext/nokogiri/xml_entity_reference.h +0 -9
  198. data/ext/nokogiri/xml_io.c +0 -61
  199. data/ext/nokogiri/xml_io.h +0 -11
  200. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  201. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  202. data/ext/nokogiri/xml_namespace.h +0 -14
  203. data/ext/nokogiri/xml_node.h +0 -13
  204. data/ext/nokogiri/xml_node_set.h +0 -12
  205. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  206. data/ext/nokogiri/xml_reader.h +0 -10
  207. data/ext/nokogiri/xml_relax_ng.h +0 -9
  208. data/ext/nokogiri/xml_sax_parser.h +0 -39
  209. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  210. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  211. data/ext/nokogiri/xml_schema.h +0 -9
  212. data/ext/nokogiri/xml_syntax_error.h +0 -13
  213. data/ext/nokogiri/xml_text.h +0 -9
  214. data/ext/nokogiri/xml_xpath_context.h +0 -10
  215. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  216. data/lib/nokogiri/html/document.rb +0 -335
  217. data/lib/nokogiri/html/document_fragment.rb +0 -49
  218. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  219. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  220. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  221. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  222. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  223. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  224. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,16 +1,19 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
5
+ # :nodoc: all
3
6
  module PP
4
7
  module CharacterData
5
- def pretty_print pp # :nodoc:
6
- nice_name = self.class.name.split('::').last
7
- pp.group(2, "#(#{nice_name} ", ')') do
8
- pp.pp text
8
+ def pretty_print(pp)
9
+ nice_name = self.class.name.split("::").last
10
+ pp.group(2, "#(#{nice_name} ", ")") do
11
+ pp.pp(text)
9
12
  end
10
13
  end
11
14
 
12
- def inspect # :nodoc:
13
- "#<#{self.class.name}:#{sprintf("0x%x",object_id)} #{text.inspect}>"
15
+ def inspect
16
+ "#<#{self.class.name}:#{format("0x%x", object_id)} #{text.inspect}>"
14
17
  end
15
18
  end
16
19
  end
@@ -1,53 +1,65 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
5
+ # :nodoc: all
3
6
  module PP
4
7
  module Node
5
- def inspect # :nodoc:
6
- attributes = inspect_attributes.reject { |x|
7
- begin
8
- attribute = send x
9
- !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
10
- rescue NoMethodError
11
- true
12
- end
13
- }.map { |attribute|
14
- "#{attribute.to_s.sub(/_\w+/, 's')}=#{send(attribute).inspect}"
15
- }.join ' '
16
- "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{attributes}>"
17
- end
8
+ COLLECTIONS = [:attribute_nodes, :children]
18
9
 
19
- def pretty_print pp # :nodoc:
20
- nice_name = self.class.name.split('::').last
21
- pp.group(2, "#(#{nice_name}:#{sprintf("0x%x", object_id)} {", '})') do
10
+ def inspect
11
+ attributes = inspect_attributes.reject do |x|
12
+ attribute = send(x)
13
+ !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
14
+ rescue NoMethodError
15
+ true
16
+ end
17
+ attributes = if inspect_attributes.length == 1
18
+ send(attributes.first).inspect
19
+ else
20
+ attributes.map do |attribute|
21
+ "#{attribute}=#{send(attribute).inspect}"
22
+ end.join(" ")
23
+ end
24
+ "#<#{self.class.name}:#{format("0x%x", object_id)} #{attributes}>"
25
+ end
22
26
 
27
+ def pretty_print(pp)
28
+ nice_name = self.class.name.split("::").last
29
+ pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
23
30
  pp.breakable
24
- attrs = inspect_attributes.map { |t|
31
+
32
+ attrs = inspect_attributes.filter_map do |t|
25
33
  [t, send(t)] if respond_to?(t)
26
- }.compact.find_all { |x|
34
+ end.find_all do |x|
27
35
  if x.last
28
- if [:attribute_nodes, :children].include? x.first
36
+ if COLLECTIONS.include?(x.first)
29
37
  !x.last.empty?
30
38
  else
31
39
  true
32
40
  end
33
41
  end
34
- }
42
+ end
35
43
 
36
- pp.seplist(attrs) do |v|
37
- if [:attribute_nodes, :children].include? v.first
38
- pp.group(2, "#{v.first.to_s.sub(/_\w+$/, 's')} = [", "]") do
39
- pp.breakable
40
- pp.seplist(v.last) do |item|
41
- pp.pp item
44
+ if inspect_attributes.length == 1
45
+ pp.pp(attrs.first.last)
46
+ else
47
+ pp.seplist(attrs) do |v|
48
+ if COLLECTIONS.include?(v.first)
49
+ pp.group(2, "#{v.first} = [", "]") do
50
+ pp.breakable
51
+ pp.seplist(v.last) do |item|
52
+ pp.pp(item)
53
+ end
42
54
  end
55
+ else
56
+ pp.text("#{v.first} = ")
57
+ pp.pp(v.last)
43
58
  end
44
- else
45
- pp.text "#{v.first} = "
46
- pp.pp v.last
47
59
  end
48
60
  end
49
- pp.breakable
50
61
 
62
+ pp.breakable
51
63
  end
52
64
  end
53
65
  end
@@ -1,2 +1,4 @@
1
- require 'nokogiri/xml/pp/node'
2
- require 'nokogiri/xml/pp/character_data'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "pp/node"
4
+ require_relative "pp/character_data"
@@ -1,7 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  class ProcessingInstruction < Node
4
- def initialize document, name, content
6
+ def initialize(document, name, content)
7
+ super(document, name)
5
8
  end
6
9
  end
7
10
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  ###
@@ -7,18 +9,18 @@ module Nokogiri
7
9
  #
8
10
  # Here is an example of usage:
9
11
  #
10
- # reader = Nokogiri::XML::Reader(<<-eoxml)
11
- # <x xmlns:tenderlove='http://tenderlovemaking.com/'>
12
- # <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
13
- # </x>
14
- # eoxml
12
+ # reader = Nokogiri::XML::Reader(<<-eoxml)
13
+ # <x xmlns:tenderlove='http://tenderlovemaking.com/'>
14
+ # <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
15
+ # </x>
16
+ # eoxml
15
17
  #
16
- # reader.each do |node|
18
+ # reader.each do |node|
17
19
  #
18
- # # node is an instance of Nokogiri::XML::Reader
19
- # puts node.name
20
+ # # node is an instance of Nokogiri::XML::Reader
21
+ # puts node.name
20
22
  #
21
- # end
23
+ # end
22
24
  #
23
25
  # Note that Nokogiri::XML::Reader#each can only be called once!! Once
24
26
  # the cursor moves through the entire document, you must parse the
@@ -69,41 +71,32 @@ module Nokogiri
69
71
  # A list of errors encountered while parsing
70
72
  attr_accessor :errors
71
73
 
72
- # The encoding for the document
73
- attr_reader :encoding
74
-
75
74
  # The XML source
76
75
  attr_reader :source
77
76
 
78
- alias :self_closing? :empty_element?
77
+ alias_method :self_closing?, :empty_element?
79
78
 
80
- def initialize source, url = nil, encoding = nil # :nodoc:
79
+ def initialize(source, url = nil, encoding = nil) # :nodoc:
81
80
  @source = source
82
81
  @errors = []
83
82
  @encoding = encoding
84
83
  end
85
84
  private :initialize
86
85
 
87
- ###
88
- # Get a list of attributes for the current node.
86
+ # Get the attributes and namespaces of the current node as a Hash.
87
+ #
88
+ # This is the union of Reader#attribute_hash and Reader#namespaces
89
+ #
90
+ # [Returns]
91
+ # (Hash<String, String>) Attribute names and values, and namespace prefixes and hrefs.
89
92
  def attributes
90
- Hash[attribute_nodes.map { |node|
91
- [node.name, node.to_s]
92
- }].merge(namespaces || {})
93
- end
94
-
95
- ###
96
- # Get a list of attributes for the current node
97
- def attribute_nodes
98
- nodes = attr_nodes
99
- nodes.each { |v| v.instance_variable_set(:@_r, self) }
100
- nodes
93
+ attribute_hash.merge(namespaces)
101
94
  end
102
95
 
103
96
  ###
104
97
  # Move the cursor through the document yielding the cursor to the block
105
98
  def each
106
- while cursor = self.read
99
+ while (cursor = read)
107
100
  yield cursor
108
101
  end
109
102
  end
@@ -1,11 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  class << self
4
6
  ###
5
7
  # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
6
8
  # See Nokogiri::XML::RelaxNG for an example.
7
- def RelaxNG string_or_io
8
- RelaxNG.new(string_or_io)
9
+ def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
10
+ RelaxNG.new(string_or_io, options)
9
11
  end
10
12
  end
11
13
 
@@ -26,6 +28,10 @@ module Nokogiri
26
28
  # end
27
29
  #
28
30
  # The list of errors are Nokogiri::XML::SyntaxError objects.
31
+ #
32
+ # NOTE: RelaxNG input is always treated as TRUSTED documents, meaning that they will cause the
33
+ # underlying parsing libraries to access network resources. This is counter to Nokogiri's
34
+ # "untrusted by default" security policy, but is a limitation of the underlying libraries.
29
35
  class RelaxNG < Nokogiri::XML::Schema
30
36
  end
31
37
  end
@@ -1,20 +1,21 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  ###
4
- # SAX Parsers are event driven parsers. Nokogiri provides two different
5
- # event based parsers when dealing with XML. If you want to do SAX style
6
- # parsing using HTML, check out Nokogiri::HTML::SAX.
6
+ # SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
7
+ # dealing with XML. If you want to do SAX style parsing using HTML, check out
8
+ # Nokogiri::HTML4::SAX.
7
9
  #
8
- # The basic way a SAX style parser works is by creating a parser,
9
- # telling the parser about the events we're interested in, then giving
10
- # the parser some XML to process. The parser will notify you when
11
- # it encounters events you said you would like to know about.
10
+ # The basic way a SAX style parser works is by creating a parser, telling the parser about the
11
+ # events we're interested in, then giving the parser some XML to process. The parser will notify
12
+ # you when it encounters events you said you would like to know about.
12
13
  #
13
- # To register for events, you simply subclass Nokogiri::XML::SAX::Document,
14
- # and implement the methods for which you would like notification.
14
+ # To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
15
+ # methods for which you would like notification.
15
16
  #
16
- # For example, if I want to be notified when a document ends, and when an
17
- # element starts, I would write a class like this:
17
+ # For example, if I want to be notified when a document ends, and when an element starts, I
18
+ # would write a class like this:
18
19
  #
19
20
  # class MyDocument < Nokogiri::XML::SAX::Document
20
21
  # def end_document
@@ -26,8 +27,7 @@ module Nokogiri
26
27
  # end
27
28
  # end
28
29
  #
29
- # Then I would instantiate a SAX parser with this document, and feed the
30
- # parser some XML
30
+ # Then I would instantiate a SAX parser with this document, and feed the parser some XML
31
31
  #
32
32
  # # Create a new parser
33
33
  # parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
@@ -35,25 +35,21 @@ module Nokogiri
35
35
  # # Feed the parser some XML
36
36
  # parser.parse(File.open(ARGV[0]))
37
37
  #
38
- # Now my document handler will be called when each node starts, and when
39
- # then document ends. To see what kinds of events are available, take
40
- # a look at Nokogiri::XML::SAX::Document.
38
+ # Now my document handler will be called when each node starts, and when then document ends. To
39
+ # see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
41
40
  #
42
- # Two SAX parsers for XML are available, a parser that reads from a string
43
- # or IO object as it feels necessary, and a parser that lets you spoon
44
- # feed it XML. If you want to let Nokogiri deal with reading your XML,
45
- # use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
41
+ # Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
42
+ # feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
43
+ # deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
46
44
  # control over the XML input, use the Nokogiri::XML::SAX::PushParser.
47
45
  module SAX
48
46
  ###
49
- # This class is used for registering types of events you are interested
50
- # in handling. All of the methods on this class are available as
51
- # possible events while parsing an XML document. To register for any
52
- # particular event, just subclass this class and implement the methods
53
- # you are interested in knowing about.
47
+ # This class is used for registering types of events you are interested in handling. All of
48
+ # the methods on this class are available as possible events while parsing an XML document. To
49
+ # register for any particular event, just subclass this class and implement the methods you
50
+ # are interested in knowing about.
54
51
  #
55
- # To only be notified about start and end element events, write a class
56
- # like this:
52
+ # To only be notified about start and end element events, write a class like this:
57
53
  #
58
54
  # class MyDocument < Nokogiri::XML::SAX::Document
59
55
  # def start_element name, attrs = []
@@ -65,12 +61,12 @@ module Nokogiri
65
61
  # end
66
62
  # end
67
63
  #
68
- # You can use this event handler for any SAX style parser included with
69
- # Nokogiri. See Nokogiri::XML::SAX, and Nokogiri::HTML::SAX.
64
+ # You can use this event handler for any SAX style parser included with Nokogiri. See
65
+ # Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
70
66
  class Document
71
67
  ###
72
68
  # Called when an XML declaration is parsed
73
- def xmldecl version, encoding, standalone
69
+ def xmldecl(version, encoding, standalone)
74
70
  end
75
71
 
76
72
  ###
@@ -88,13 +84,13 @@ module Nokogiri
88
84
  # * +name+ is the name of the tag
89
85
  # * +attrs+ are an assoc list of namespaces and attributes, e.g.:
90
86
  # [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
91
- def start_element name, attrs = []
87
+ def start_element(name, attrs = [])
92
88
  end
93
89
 
94
90
  ###
95
91
  # Called at the end of an element
96
92
  # +name+ is the tag name
97
- def end_element name
93
+ def end_element(name)
98
94
  end
99
95
 
100
96
  ###
@@ -104,16 +100,16 @@ module Nokogiri
104
100
  # +prefix+ is the namespace prefix for the element
105
101
  # +uri+ is the associated namespace URI
106
102
  # +ns+ is a hash of namespace prefix:urls associated with the element
107
- def start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = []
103
+ def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
108
104
  ###
109
105
  # Deal with SAX v1 interface
110
- name = [prefix, name].compact.join(':')
111
- attributes = ns.map { |ns_prefix,ns_uri|
112
- [['xmlns', ns_prefix].compact.join(':'), ns_uri]
113
- } + attrs.map { |attr|
114
- [[attr.prefix, attr.localname].compact.join(':'), attr.value]
115
- }
116
- start_element name, attributes
106
+ name = [prefix, name].compact.join(":")
107
+ attributes = ns.map do |ns_prefix, ns_uri|
108
+ [["xmlns", ns_prefix].compact.join(":"), ns_uri]
109
+ end + attrs.map do |attr|
110
+ [[attr.prefix, attr.localname].compact.join(":"), attr.value]
111
+ end
112
+ start_element(name, attributes)
117
113
  end
118
114
 
119
115
  ###
@@ -121,49 +117,49 @@ module Nokogiri
121
117
  # +name+ is the element's name
122
118
  # +prefix+ is the namespace prefix associated with the element
123
119
  # +uri+ is the associated namespace URI
124
- def end_element_namespace name, prefix = nil, uri = nil
120
+ def end_element_namespace(name, prefix = nil, uri = nil)
125
121
  ###
126
122
  # Deal with SAX v1 interface
127
- end_element [prefix, name].compact.join(':')
123
+ end_element([prefix, name].compact.join(":"))
128
124
  end
129
125
 
130
126
  ###
131
- # Characters read between a tag. This method might be called multiple
127
+ # Characters read between a tag. This method might be called multiple
132
128
  # times given one contiguous string of characters.
133
129
  #
134
130
  # +string+ contains the character data
135
- def characters string
131
+ def characters(string)
136
132
  end
137
133
 
138
134
  ###
139
135
  # Called when comments are encountered
140
136
  # +string+ contains the comment data
141
- def comment string
137
+ def comment(string)
142
138
  end
143
139
 
144
140
  ###
145
141
  # Called on document warnings
146
142
  # +string+ contains the warning
147
- def warning string
143
+ def warning(string)
148
144
  end
149
145
 
150
146
  ###
151
147
  # Called on document errors
152
148
  # +string+ contains the error
153
- def error string
149
+ def error(string)
154
150
  end
155
151
 
156
152
  ###
157
153
  # Called when cdata blocks are found
158
154
  # +string+ contains the cdata content
159
- def cdata_block string
155
+ def cdata_block(string)
160
156
  end
161
157
 
162
158
  ###
163
159
  # Called when processing instructions are found
164
160
  # +name+ is the target of the instruction
165
161
  # +content+ is the value of the instruction
166
- def processing_instruction name, content
162
+ def processing_instruction(name, content)
167
163
  end
168
164
  end
169
165
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  module SAX
@@ -35,29 +37,29 @@ module Nokogiri
35
37
 
36
38
  # Encodinds this parser supports
37
39
  ENCODINGS = {
38
- 'NONE' => 0, # No char encoding detected
39
- 'UTF-8' => 1, # UTF-8
40
- 'UTF16LE' => 2, # UTF-16 little endian
41
- 'UTF16BE' => 3, # UTF-16 big endian
42
- 'UCS4LE' => 4, # UCS-4 little endian
43
- 'UCS4BE' => 5, # UCS-4 big endian
44
- 'EBCDIC' => 6, # EBCDIC uh!
45
- 'UCS4-2143' => 7, # UCS-4 unusual ordering
46
- 'UCS4-3412' => 8, # UCS-4 unusual ordering
47
- 'UCS2' => 9, # UCS-2
48
- 'ISO-8859-1' => 10, # ISO-8859-1 ISO Latin 1
49
- 'ISO-8859-2' => 11, # ISO-8859-2 ISO Latin 2
50
- 'ISO-8859-3' => 12, # ISO-8859-3
51
- 'ISO-8859-4' => 13, # ISO-8859-4
52
- 'ISO-8859-5' => 14, # ISO-8859-5
53
- 'ISO-8859-6' => 15, # ISO-8859-6
54
- 'ISO-8859-7' => 16, # ISO-8859-7
55
- 'ISO-8859-8' => 17, # ISO-8859-8
56
- 'ISO-8859-9' => 18, # ISO-8859-9
57
- 'ISO-2022-JP' => 19, # ISO-2022-JP
58
- 'SHIFT-JIS' => 20, # Shift_JIS
59
- 'EUC-JP' => 21, # EUC-JP
60
- 'ASCII' => 22, # pure ASCII
40
+ "NONE" => 0, # No char encoding detected
41
+ "UTF-8" => 1, # UTF-8
42
+ "UTF16LE" => 2, # UTF-16 little endian
43
+ "UTF16BE" => 3, # UTF-16 big endian
44
+ "UCS4LE" => 4, # UCS-4 little endian
45
+ "UCS4BE" => 5, # UCS-4 big endian
46
+ "EBCDIC" => 6, # EBCDIC uh!
47
+ "UCS4-2143" => 7, # UCS-4 unusual ordering
48
+ "UCS4-3412" => 8, # UCS-4 unusual ordering
49
+ "UCS2" => 9, # UCS-2
50
+ "ISO-8859-1" => 10, # ISO-8859-1 ISO Latin 1
51
+ "ISO-8859-2" => 11, # ISO-8859-2 ISO Latin 2
52
+ "ISO-8859-3" => 12, # ISO-8859-3
53
+ "ISO-8859-4" => 13, # ISO-8859-4
54
+ "ISO-8859-5" => 14, # ISO-8859-5
55
+ "ISO-8859-6" => 15, # ISO-8859-6
56
+ "ISO-8859-7" => 16, # ISO-8859-7
57
+ "ISO-8859-8" => 17, # ISO-8859-8
58
+ "ISO-8859-9" => 18, # ISO-8859-9
59
+ "ISO-2022-JP" => 19, # ISO-2022-JP
60
+ "SHIFT-JIS" => 20, # Shift_JIS
61
+ "EUC-JP" => 21, # EUC-JP
62
+ "ASCII" => 22, # pure ASCII
61
63
  }
62
64
 
63
65
  # The Nokogiri::XML::SAX::Document where events will be sent.
@@ -67,7 +69,7 @@ module Nokogiri
67
69
  attr_accessor :encoding
68
70
 
69
71
  # Create a new Parser with +doc+ and +encoding+
70
- def initialize doc = Nokogiri::XML::SAX::Document.new, encoding = 'UTF-8'
72
+ def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = "UTF-8")
71
73
  @encoding = check_encoding(encoding)
72
74
  @document = doc
73
75
  @warned = false
@@ -76,7 +78,7 @@ module Nokogiri
76
78
  ###
77
79
  # Parse given +thing+ which may be a string containing xml, or an
78
80
  # IO object.
79
- def parse thing, &block
81
+ def parse(thing, &block)
80
82
  if thing.respond_to?(:read) && thing.respond_to?(:close)
81
83
  parse_io(thing, &block)
82
84
  else
@@ -86,34 +88,35 @@ module Nokogiri
86
88
 
87
89
  ###
88
90
  # Parse given +io+
89
- def parse_io io, encoding = 'ASCII'
90
- @encoding = check_encoding(encoding)
91
- ctx = ParserContext.io(io, ENCODINGS[@encoding])
91
+ def parse_io(io, encoding = @encoding)
92
+ ctx = ParserContext.io(io, ENCODINGS[check_encoding(encoding)])
92
93
  yield ctx if block_given?
93
- ctx.parse_with self
94
+ ctx.parse_with(self)
94
95
  end
95
96
 
96
97
  ###
97
98
  # Parse a file with +filename+
98
- def parse_file filename
99
+ def parse_file(filename)
99
100
  raise ArgumentError unless filename
100
101
  raise Errno::ENOENT unless File.exist?(filename)
101
102
  raise Errno::EISDIR if File.directory?(filename)
102
- ctx = ParserContext.file filename
103
+
104
+ ctx = ParserContext.file(filename)
103
105
  yield ctx if block_given?
104
- ctx.parse_with self
106
+ ctx.parse_with(self)
105
107
  end
106
108
 
107
- def parse_memory data
108
- ctx = ParserContext.memory data
109
+ def parse_memory(data)
110
+ ctx = ParserContext.memory(data)
109
111
  yield ctx if block_given?
110
- ctx.parse_with self
112
+ ctx.parse_with(self)
111
113
  end
112
114
 
113
115
  private
116
+
114
117
  def check_encoding(encoding)
115
118
  encoding.upcase.tap do |enc|
116
- raise ArgumentError.new("'#{enc}' is not a valid encoding") unless ENCODINGS[enc]
119
+ raise ArgumentError, "'#{enc}' is not a valid encoding" unless ENCODINGS[enc]
117
120
  end
118
121
  end
119
122
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  module SAX
@@ -6,9 +8,12 @@ module Nokogiri
6
8
  # by the user. Instead, you should be looking at
7
9
  # Nokogiri::XML::SAX::Parser
8
10
  class ParserContext
9
- def self.new thing, encoding = 'UTF-8'
10
- [:read, :close].all? { |x| thing.respond_to?(x) } ?
11
- io(thing, Parser::ENCODINGS[encoding]) : memory(thing)
11
+ def self.new(thing, encoding = "UTF-8")
12
+ if [:read, :close].all? { |x| thing.respond_to?(x) }
13
+ io(thing, Parser::ENCODINGS[encoding])
14
+ else
15
+ memory(thing)
16
+ end
12
17
  end
13
18
  end
14
19
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  module SAX
@@ -23,7 +25,6 @@ module Nokogiri
23
25
  # parser << "/div>"
24
26
  # parser.finish
25
27
  class PushParser
26
-
27
28
  # The Nokogiri::XML::SAX::Document on which the PushParser will be
28
29
  # operating
29
30
  attr_accessor :document
@@ -31,7 +32,7 @@ module Nokogiri
31
32
  ###
32
33
  # Create a new PushParser with +doc+ as the SAX Document, providing
33
34
  # an optional +file_name+ and +encoding+
34
- def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = 'UTF-8')
35
+ def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8")
35
36
  @document = doc
36
37
  @encoding = encoding
37
38
  @sax_parser = XML::SAX::Parser.new(doc)
@@ -43,16 +44,16 @@ module Nokogiri
43
44
  ###
44
45
  # Write a +chunk+ of XML to the PushParser. Any callback methods
45
46
  # that can be called will be called immediately.
46
- def write chunk, last_chunk = false
47
+ def write(chunk, last_chunk = false)
47
48
  native_write(chunk, last_chunk)
48
49
  end
49
- alias :<< :write
50
+ alias_method :<<, :write
50
51
 
51
52
  ###
52
53
  # Finish the parsing. This method is only necessary for
53
54
  # Nokogiri::XML::SAX::Document#end_document to be called.
54
55
  def finish
55
- write '', true
56
+ write("", true)
56
57
  end
57
58
  end
58
59
  end
@@ -1,4 +1,6 @@
1
- require 'nokogiri/xml/sax/document'
2
- require 'nokogiri/xml/sax/parser_context'
3
- require 'nokogiri/xml/sax/parser'
4
- require 'nokogiri/xml/sax/push_parser'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "sax/document"
4
+ require_relative "sax/parser_context"
5
+ require_relative "sax/parser"
6
+ require_relative "sax/push_parser"