nokogiri 1.10.9 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1173 -884
  4. data/LICENSE.md +1 -1
  5. data/README.md +176 -96
  6. data/dependencies.yml +12 -12
  7. data/ext/nokogiri/depend +38 -358
  8. data/ext/nokogiri/extconf.rb +716 -414
  9. data/ext/nokogiri/gumbo.c +584 -0
  10. data/ext/nokogiri/html4_document.c +166 -0
  11. data/ext/nokogiri/html4_element_description.c +294 -0
  12. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  13. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  14. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  15. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  16. data/ext/nokogiri/nokogiri.c +228 -91
  17. data/ext/nokogiri/nokogiri.h +191 -89
  18. data/ext/nokogiri/test_global_handlers.c +40 -0
  19. data/ext/nokogiri/xml_attr.c +15 -15
  20. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  21. data/ext/nokogiri/xml_cdata.c +13 -18
  22. data/ext/nokogiri/xml_comment.c +19 -26
  23. data/ext/nokogiri/xml_document.c +267 -195
  24. data/ext/nokogiri/xml_document_fragment.c +13 -15
  25. data/ext/nokogiri/xml_dtd.c +54 -48
  26. data/ext/nokogiri/xml_element_content.c +31 -26
  27. data/ext/nokogiri/xml_element_decl.c +22 -22
  28. data/ext/nokogiri/xml_encoding_handler.c +28 -17
  29. data/ext/nokogiri/xml_entity_decl.c +32 -30
  30. data/ext/nokogiri/xml_entity_reference.c +16 -18
  31. data/ext/nokogiri/xml_namespace.c +60 -51
  32. data/ext/nokogiri/xml_node.c +493 -407
  33. data/ext/nokogiri/xml_node_set.c +174 -162
  34. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  35. data/ext/nokogiri/xml_reader.c +197 -172
  36. data/ext/nokogiri/xml_relax_ng.c +52 -28
  37. data/ext/nokogiri/xml_sax_parser.c +112 -112
  38. data/ext/nokogiri/xml_sax_parser_context.c +105 -86
  39. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  40. data/ext/nokogiri/xml_schema.c +96 -46
  41. data/ext/nokogiri/xml_syntax_error.c +42 -21
  42. data/ext/nokogiri/xml_text.c +13 -17
  43. data/ext/nokogiri/xml_xpath_context.c +158 -73
  44. data/ext/nokogiri/xslt_stylesheet.c +158 -164
  45. data/gumbo-parser/CHANGES.md +63 -0
  46. data/gumbo-parser/Makefile +101 -0
  47. data/gumbo-parser/THANKS +27 -0
  48. data/gumbo-parser/src/Makefile +34 -0
  49. data/gumbo-parser/src/README.md +41 -0
  50. data/gumbo-parser/src/ascii.c +75 -0
  51. data/gumbo-parser/src/ascii.h +115 -0
  52. data/gumbo-parser/src/attribute.c +42 -0
  53. data/gumbo-parser/src/attribute.h +17 -0
  54. data/gumbo-parser/src/char_ref.c +22225 -0
  55. data/gumbo-parser/src/char_ref.h +29 -0
  56. data/gumbo-parser/src/char_ref.rl +2154 -0
  57. data/gumbo-parser/src/error.c +626 -0
  58. data/gumbo-parser/src/error.h +148 -0
  59. data/gumbo-parser/src/foreign_attrs.c +104 -0
  60. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  61. data/gumbo-parser/src/gumbo.h +943 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/parser.c +4886 -0
  65. data/gumbo-parser/src/parser.h +41 -0
  66. data/gumbo-parser/src/replacement.h +33 -0
  67. data/gumbo-parser/src/string_buffer.c +103 -0
  68. data/gumbo-parser/src/string_buffer.h +68 -0
  69. data/gumbo-parser/src/string_piece.c +48 -0
  70. data/gumbo-parser/src/svg_attrs.c +174 -0
  71. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  72. data/gumbo-parser/src/svg_tags.c +137 -0
  73. data/gumbo-parser/src/svg_tags.gperf +55 -0
  74. data/gumbo-parser/src/tag.c +222 -0
  75. data/gumbo-parser/src/tag_lookup.c +382 -0
  76. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  77. data/gumbo-parser/src/tag_lookup.h +13 -0
  78. data/gumbo-parser/src/token_buffer.c +79 -0
  79. data/gumbo-parser/src/token_buffer.h +71 -0
  80. data/gumbo-parser/src/token_type.h +17 -0
  81. data/gumbo-parser/src/tokenizer.c +3463 -0
  82. data/gumbo-parser/src/tokenizer.h +112 -0
  83. data/gumbo-parser/src/tokenizer_states.h +339 -0
  84. data/gumbo-parser/src/utf8.c +245 -0
  85. data/gumbo-parser/src/utf8.h +164 -0
  86. data/gumbo-parser/src/util.c +68 -0
  87. data/gumbo-parser/src/util.h +30 -0
  88. data/gumbo-parser/src/vector.c +111 -0
  89. data/gumbo-parser/src/vector.h +45 -0
  90. data/lib/nokogiri/css/node.rb +1 -0
  91. data/lib/nokogiri/css/parser.rb +64 -63
  92. data/lib/nokogiri/css/parser.y +3 -3
  93. data/lib/nokogiri/css/parser_extras.rb +39 -36
  94. data/lib/nokogiri/css/syntax_error.rb +2 -1
  95. data/lib/nokogiri/css/tokenizer.rb +1 -0
  96. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  97. data/lib/nokogiri/css.rb +15 -14
  98. data/lib/nokogiri/decorators/slop.rb +1 -0
  99. data/lib/nokogiri/extension.rb +31 -0
  100. data/lib/nokogiri/gumbo.rb +14 -0
  101. data/lib/nokogiri/html.rb +32 -27
  102. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  103. data/lib/nokogiri/{html → html4}/document.rb +17 -30
  104. data/lib/nokogiri/{html → html4}/document_fragment.rb +18 -17
  105. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  106. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  107. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/sax/parser.rb +12 -14
  109. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  110. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +6 -5
  111. data/lib/nokogiri/html4.rb +40 -0
  112. data/lib/nokogiri/html5/document.rb +74 -0
  113. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  114. data/lib/nokogiri/html5/node.rb +93 -0
  115. data/lib/nokogiri/html5.rb +473 -0
  116. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  117. data/lib/nokogiri/syntax_error.rb +1 -0
  118. data/lib/nokogiri/version/constant.rb +5 -0
  119. data/lib/nokogiri/version/info.rb +215 -0
  120. data/lib/nokogiri/version.rb +3 -109
  121. data/lib/nokogiri/xml/attr.rb +1 -0
  122. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  123. data/lib/nokogiri/xml/builder.rb +41 -2
  124. data/lib/nokogiri/xml/cdata.rb +1 -0
  125. data/lib/nokogiri/xml/character_data.rb +1 -0
  126. data/lib/nokogiri/xml/document.rb +138 -41
  127. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  128. data/lib/nokogiri/xml/dtd.rb +1 -0
  129. data/lib/nokogiri/xml/element_content.rb +1 -0
  130. data/lib/nokogiri/xml/element_decl.rb +1 -0
  131. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  132. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  133. data/lib/nokogiri/xml/namespace.rb +1 -0
  134. data/lib/nokogiri/xml/node/save_options.rb +2 -1
  135. data/lib/nokogiri/xml/node.rb +629 -293
  136. data/lib/nokogiri/xml/node_set.rb +1 -0
  137. data/lib/nokogiri/xml/notation.rb +1 -0
  138. data/lib/nokogiri/xml/parse_options.rb +12 -3
  139. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  140. data/lib/nokogiri/xml/pp/node.rb +1 -0
  141. data/lib/nokogiri/xml/pp.rb +3 -2
  142. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  143. data/lib/nokogiri/xml/reader.rb +9 -12
  144. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  145. data/lib/nokogiri/xml/sax/document.rb +25 -30
  146. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  147. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  148. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  149. data/lib/nokogiri/xml/sax.rb +5 -4
  150. data/lib/nokogiri/xml/schema.rb +13 -4
  151. data/lib/nokogiri/xml/searchable.rb +25 -16
  152. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  153. data/lib/nokogiri/xml/text.rb +1 -0
  154. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  155. data/lib/nokogiri/xml/xpath.rb +4 -5
  156. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  157. data/lib/nokogiri/xml.rb +36 -36
  158. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  159. data/lib/nokogiri/xslt.rb +17 -16
  160. data/lib/nokogiri.rb +32 -51
  161. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  162. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  163. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  164. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  165. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  166. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  167. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  168. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  169. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  171. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  172. metadata +139 -161
  173. data/ext/nokogiri/html_document.c +0 -170
  174. data/ext/nokogiri/html_document.h +0 -10
  175. data/ext/nokogiri/html_element_description.c +0 -279
  176. data/ext/nokogiri/html_element_description.h +0 -10
  177. data/ext/nokogiri/html_entity_lookup.c +0 -32
  178. data/ext/nokogiri/html_entity_lookup.h +0 -8
  179. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  180. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  181. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  182. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  183. data/ext/nokogiri/xml_attr.h +0 -9
  184. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  185. data/ext/nokogiri/xml_cdata.h +0 -9
  186. data/ext/nokogiri/xml_comment.h +0 -9
  187. data/ext/nokogiri/xml_document.h +0 -23
  188. data/ext/nokogiri/xml_document_fragment.h +0 -10
  189. data/ext/nokogiri/xml_dtd.h +0 -10
  190. data/ext/nokogiri/xml_element_content.h +0 -10
  191. data/ext/nokogiri/xml_element_decl.h +0 -9
  192. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  193. data/ext/nokogiri/xml_entity_decl.h +0 -10
  194. data/ext/nokogiri/xml_entity_reference.h +0 -9
  195. data/ext/nokogiri/xml_io.c +0 -61
  196. data/ext/nokogiri/xml_io.h +0 -11
  197. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  198. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  199. data/ext/nokogiri/xml_namespace.h +0 -14
  200. data/ext/nokogiri/xml_node.h +0 -13
  201. data/ext/nokogiri/xml_node_set.h +0 -12
  202. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  203. data/ext/nokogiri/xml_reader.h +0 -10
  204. data/ext/nokogiri/xml_relax_ng.h +0 -9
  205. data/ext/nokogiri/xml_sax_parser.h +0 -39
  206. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  207. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  208. data/ext/nokogiri/xml_schema.h +0 -9
  209. data/ext/nokogiri/xml_syntax_error.h +0 -13
  210. data/ext/nokogiri/xml_text.h +0 -9
  211. data/ext/nokogiri/xml_xpath_context.h +0 -10
  212. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  213. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  214. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  215. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  216. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -1,8 +1,8 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module CSS
3
4
  class XPathVisitor # :nodoc:
4
5
  def visit_function node
5
-
6
6
  msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
7
  return self.send(msg, node) if self.respond_to?(msg)
8
8
 
@@ -12,49 +12,51 @@ module Nokogiri
12
12
  when /^self\(/
13
13
  "self::#{node.value[1]}"
14
14
  when /^eq\(/
15
- "position() = #{node.value[1]}"
15
+ "position()=#{node.value[1]}"
16
16
  when /^(nth|nth-of-type)\(/
17
17
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
18
18
  nth(node.value[1])
19
19
  else
20
- "position() = #{node.value[1]}"
20
+ "position()=#{node.value[1]}"
21
21
  end
22
22
  when /^nth-child\(/
23
23
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
24
24
  nth(node.value[1], :child => true)
25
25
  else
26
- "count(preceding-sibling::*) = #{node.value[1].to_i-1}"
26
+ "count(preceding-sibling::*)=#{node.value[1].to_i-1}"
27
27
  end
28
28
  when /^nth-last-of-type\(/
29
29
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
30
30
  nth(node.value[1], :last => true)
31
31
  else
32
32
  index = node.value[1].to_i - 1
33
- index == 0 ? "position() = last()" : "position() = last() - #{index}"
33
+ index == 0 ? "position()=last()" : "position()=last()-#{index}"
34
34
  end
35
35
  when /^nth-last-child\(/
36
36
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
37
37
  nth(node.value[1], :last => true, :child => true)
38
38
  else
39
- "count(following-sibling::*) = #{node.value[1].to_i-1}"
39
+ "count(following-sibling::*)=#{node.value[1].to_i-1}"
40
40
  end
41
41
  when /^(first|first-of-type)\(/
42
- "position() = 1"
42
+ "position()=1"
43
43
  when /^(last|last-of-type)\(/
44
- "position() = last()"
44
+ "position()=last()"
45
45
  when /^contains\(/
46
- "contains(., #{node.value[1]})"
46
+ "contains(.,#{node.value[1]})"
47
47
  when /^gt\(/
48
- "position() > #{node.value[1]}"
48
+ "position()>#{node.value[1]}"
49
49
  when /^only-child\(/
50
- "last() = 1"
50
+ "last()=1"
51
51
  when /^comment\(/
52
52
  "comment()"
53
53
  when /^has\(/
54
- ".//#{node.value[1].accept(self)}"
54
+ is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
55
+ ".#{"//" if !is_direct}#{node.value[1].accept(self)}"
55
56
  else
57
+ # non-standard. this looks like a function call.
56
58
  args = ['.'] + node.value[1..-1]
57
- "#{node.value.first}#{args.join(', ')})"
59
+ "#{node.value.first}#{args.join(',')})"
58
60
  end
59
61
  end
60
62
 
@@ -69,18 +71,18 @@ module Nokogiri
69
71
 
70
72
  def visit_id node
71
73
  node.value.first =~ /^#(.*)$/
72
- "@id = '#{$1}'"
74
+ "@id='#{$1}'"
73
75
  end
74
76
 
75
77
  def visit_attribute_condition node
76
- attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
77
- ''
78
- else
79
- '@'
80
- end
78
+ attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
79
+ ''
80
+ else
81
+ '@'
82
+ end
81
83
  attribute += node.value.first.accept(self)
82
84
 
83
- # Support non-standard css
85
+ # non-standard. attributes starting with '@'
84
86
  attribute.gsub!(/^@@/, '@')
85
87
 
86
88
  return attribute unless node.value.length == 3
@@ -88,29 +90,30 @@ module Nokogiri
88
90
  value = node.value.last
89
91
  value = "'#{value}'" if value !~ /^['"]/
90
92
 
93
+ # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
91
94
  if (value[0]==value[-1]) && %q{"'}.include?(value[0])
92
95
  str_value = value[1..-2]
93
96
  if str_value.include?(value[0])
94
- value = 'concat("' + str_value.split('"', -1).join(%q{", '"', "}) + '", "")'
97
+ value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
95
98
  end
96
99
  end
97
100
 
98
101
  case node.value[1]
99
102
  when :equal
100
- attribute + " = " + "#{value}"
103
+ attribute + "=" + "#{value}"
101
104
  when :not_equal
102
- attribute + " != " + "#{value}"
105
+ attribute + "!=" + "#{value}"
103
106
  when :substring_match
104
- "contains(#{attribute}, #{value})"
107
+ "contains(#{attribute},#{value})"
105
108
  when :prefix_match
106
- "starts-with(#{attribute}, #{value})"
109
+ "starts-with(#{attribute},#{value})"
107
110
  when :dash_match
108
- "#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
111
+ "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
109
112
  when :includes
110
- "contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
113
+ value = value[1..-2] # strip quotes
114
+ css_class(attribute, value)
111
115
  when :suffix_match
112
- "substring(#{attribute}, string-length(#{attribute}) - " +
113
- "string-length(#{value}) + 1, string-length(#{value})) = #{value}"
116
+ "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
114
117
  else
115
118
  attribute + " #{node.value[1]} " + "#{value}"
116
119
  end
@@ -124,14 +127,14 @@ module Nokogiri
124
127
  return self.send(msg, node) if self.respond_to?(msg)
125
128
 
126
129
  case node.value.first
127
- when "first" then "position() = 1"
128
- when "first-child" then "count(preceding-sibling::*) = 0"
129
- when "last" then "position() = last()"
130
- when "last-child" then "count(following-sibling::*) = 0"
131
- when "first-of-type" then "position() = 1"
132
- when "last-of-type" then "position() = last()"
133
- when "only-child" then "count(preceding-sibling::*) = 0 and count(following-sibling::*) = 0"
134
- when "only-of-type" then "last() = 1"
130
+ when "first" then "position()=1"
131
+ when "first-child" then "count(preceding-sibling::*)=0"
132
+ when "last" then "position()=last()"
133
+ when "last-child" then "count(following-sibling::*)=0"
134
+ when "first-of-type" then "position()=1"
135
+ when "last-of-type" then "position()=last()"
136
+ when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
137
+ when "only-of-type" then "last()=1"
135
138
  when "empty" then "not(node())"
136
139
  when "parent" then "node()"
137
140
  when "root" then "not(parent::*)"
@@ -142,7 +145,7 @@ module Nokogiri
142
145
  end
143
146
 
144
147
  def visit_class_condition node
145
- "contains(concat(' ', normalize-space(@class), ' '), ' #{node.value.first} ')"
148
+ css_class("@class", node.value.first)
146
149
  end
147
150
 
148
151
  def visit_combinator node
@@ -179,25 +182,26 @@ module Nokogiri
179
182
  node.accept(self)
180
183
  end
181
184
 
182
- private
185
+ private
186
+
183
187
  def nth node, options={}
184
188
  raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
185
189
 
186
190
  a, b = read_a_and_positive_b node.value
187
191
  position = if options[:child]
188
- options[:last] ? "(count(following-sibling::*) + 1)" : "(count(preceding-sibling::*) + 1)"
192
+ options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
189
193
  else
190
194
  options[:last] ? "(last()-position()+1)" : "position()"
191
195
  end
192
196
 
193
197
  if b.zero?
194
- "(#{position} mod #{a}) = 0"
198
+ "(#{position} mod #{a})=0"
195
199
  else
196
200
  compare = a < 0 ? "<=" : ">="
197
201
  if a.abs == 1
198
- "#{position} #{compare} #{b}"
202
+ "#{position}#{compare}#{b}"
199
203
  else
200
- "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
204
+ "(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
201
205
  end
202
206
  end
203
207
  end
@@ -225,6 +229,32 @@ module Nokogiri
225
229
  end =~ /(nth|first|last|only)-of-type(\()?/
226
230
  end
227
231
  end
232
+
233
+ # use only ordinary xpath functions
234
+ def css_class_standard(hay, needle)
235
+ "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
236
+ end
237
+
238
+ # use the builtin implementation
239
+ def css_class_builtin(hay, needle)
240
+ "nokogiri-builtin:css-class(#{hay},'#{needle}')"
241
+ end
242
+
243
+ alias_method :css_class, :css_class_standard
244
+ end
245
+
246
+ class XPathVisitorAlwaysUseBuiltins < XPathVisitor # :nodoc:
247
+ private
248
+ alias_method :css_class, :css_class_builtin
249
+ end
250
+
251
+ class XPathVisitorOptimallyUseBuiltins < XPathVisitor # :nodoc:
252
+ private
253
+ if Nokogiri.uses_libxml?
254
+ alias_method :css_class, :css_class_builtin
255
+ else
256
+ alias_method :css_class, :css_class_standard
257
+ end
228
258
  end
229
259
  end
230
260
  end
data/lib/nokogiri/css.rb CHANGED
@@ -1,27 +1,28 @@
1
- require 'nokogiri/css/node'
2
- require 'nokogiri/css/xpath_visitor'
3
- x = $-w
4
- $-w = false
5
- require 'nokogiri/css/parser'
6
- $-w = x
7
-
8
- require 'nokogiri/css/tokenizer'
9
- require 'nokogiri/css/syntax_error'
10
-
1
+ # frozen_string_literal: true
11
2
  module Nokogiri
12
3
  module CSS
13
4
  class << self
14
5
  ###
15
6
  # Parse this CSS selector in +selector+. Returns an AST.
16
- def parse selector
17
- Parser.new.parse selector
7
+ def parse(selector)
8
+ Parser.new.parse(selector)
18
9
  end
19
10
 
20
11
  ###
21
12
  # Get the XPath for +selector+.
22
- def xpath_for selector, options={}
23
- Parser.new(options[:ns] || {}).xpath_for selector, options
13
+ def xpath_for(selector, options = {})
14
+ Parser.new(options[:ns] || {}).xpath_for(selector, options)
24
15
  end
25
16
  end
26
17
  end
27
18
  end
19
+
20
+ require_relative "css/node"
21
+ require_relative "css/xpath_visitor"
22
+ x = $-w
23
+ $-w = false
24
+ require_relative "css/parser"
25
+ $-w = x
26
+
27
+ require_relative "css/tokenizer"
28
+ require_relative "css/syntax_error"
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module Decorators
3
4
  ###
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ # load the C or Java extension
4
+ begin
5
+ # native precompiled gems package shared libraries in <gem_dir>/lib/nokogiri/<ruby_version>
6
+ ::RUBY_VERSION =~ /(\d+\.\d+)/
7
+ require_relative "#{Regexp.last_match(1)}/nokogiri"
8
+ rescue LoadError => e
9
+ if e.message =~ /GLIBC/
10
+ warn(<<~EOM)
11
+
12
+ ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system with glibc < 2.17:
13
+
14
+ #{e.message}
15
+
16
+ If that's the case, then please install Nokogiri via the `ruby` platform gem:
17
+ gem install nokogiri --platform=ruby
18
+ or:
19
+ bundle config set force_ruby_platform true
20
+
21
+ Please visit https://nokogiri.org/tutorials/installing_nokogiri.html for more help.
22
+
23
+ EOM
24
+ raise e
25
+ end
26
+
27
+ # use "require" instead of "require_relative" because non-native gems will place C extension files
28
+ # in Gem::BasicSpecification#extension_dir after compilation (during normal installation), which
29
+ # is in $LOAD_PATH but not necessarily relative to this file (see #2300)
30
+ require "nokogiri/nokogiri"
31
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module Gumbo
4
+ # The default maximum number of attributes per element.
5
+ DEFAULT_MAX_ATTRIBUTES = 400
6
+
7
+ # The default maximum number of errors for parsing a document or a fragment.
8
+ DEFAULT_MAX_ERRORS = 0
9
+
10
+ # The default maximum depth of the DOM tree produced by parsing a document
11
+ # or fragment.
12
+ DEFAULT_MAX_TREE_DEPTH = 400
13
+ end
14
+ end
data/lib/nokogiri/html.rb CHANGED
@@ -1,37 +1,42 @@
1
- require 'nokogiri/html/entity_lookup'
2
- require 'nokogiri/html/document'
3
- require 'nokogiri/html/document_fragment'
4
- require 'nokogiri/html/sax/parser_context'
5
- require 'nokogiri/html/sax/parser'
6
- require 'nokogiri/html/sax/push_parser'
7
- require 'nokogiri/html/element_description'
8
- require 'nokogiri/html/element_description_defaults'
1
+ # frozen_string_literal: true
2
+ require_relative "html4"
9
3
 
10
4
  module Nokogiri
11
- class << self
12
- ###
13
- # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
14
- def HTML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
15
- Nokogiri::HTML::Document.parse(thing, url, encoding, options, &block)
16
- end
17
- end
5
+ HTML = Nokogiri::HTML4
6
+
7
+ # @!method HTML(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
8
+ # Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
9
+ # @!scope class
10
+ define_singleton_method(:HTML, Nokogiri.method(:HTML4))
18
11
 
12
+ # @note This module/namespace is an alias for {Nokogiri::HTML4} as of v1.12.0. Before v1.12.0,
13
+ # {Nokogiri::HTML4} did not exist, and this was the module/namespace for all HTML-related
14
+ # classes.
19
15
  module HTML
20
- class << self
21
- ###
22
- # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
23
- def parse thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
24
- Document.parse(thing, url, encoding, options, &block)
16
+ # @note This class is an alias for {Nokogiri::HTML4::Document} as of v1.12.0.
17
+ class Document < Nokogiri::XML::Document
18
+ end
19
+
20
+ # @note This class is an alias for {Nokogiri::HTML4::DocumentFragment} as of v1.12.0.
21
+ class DocumentFragment < Nokogiri::XML::DocumentFragment
22
+ end
23
+
24
+ # @note This class is an alias for {Nokogiri::HTML4::Builder} as of v1.12.0.
25
+ class Builder < Nokogiri::XML::Builder
26
+ end
27
+
28
+ module SAX
29
+ # @note This class is an alias for {Nokogiri::HTML4::SAX::Parser} as of v1.12.0.
30
+ class Parser < Nokogiri::XML::SAX::Parser
25
31
  end
26
32
 
27
- ####
28
- # Parse a fragment from +string+ in to a NodeSet.
29
- def fragment string, encoding = nil
30
- HTML::DocumentFragment.parse string, encoding
33
+ # @note This class is an alias for {Nokogiri::HTML4::SAX::ParserContext} as of v1.12.0.
34
+ class ParserContext < Nokogiri::XML::SAX::ParserContext
31
35
  end
32
- end
33
36
 
34
- # Instance of Nokogiri::HTML::EntityLookup
35
- NamedCharacters = EntityLookup.new
37
+ # @note This class is an alias for {Nokogiri::HTML4::SAX::PushParser} as of v1.12.0.
38
+ class PushParser
39
+ end
40
+ end
36
41
  end
37
42
  end
@@ -1,5 +1,6 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
- module HTML
3
+ module HTML4
3
4
  ###
4
5
  # Nokogiri HTML builder is used for building HTML documents. It is very
5
6
  # similar to the Nokogiri::XML::Builder. In fact, you should go read the
@@ -11,7 +12,7 @@ module Nokogiri
11
12
  # Create an HTML document with a body that has an onload attribute, and a
12
13
  # span tag with a class of "bold" that has content of "Hello world".
13
14
  #
14
- # builder = Nokogiri::HTML::Builder.new do |doc|
15
+ # builder = Nokogiri::HTML4::Builder.new do |doc|
15
16
  # doc.html {
16
17
  # doc.body(:onload => 'some_func();') {
17
18
  # doc.span.bold {
@@ -1,5 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+
1
5
  module Nokogiri
2
- module HTML
6
+ module HTML4
3
7
  class Document < Nokogiri::XML::Document
4
8
  ###
5
9
  # Get the meta tag encoding for this document. If there is no meta tag,
@@ -160,11 +164,12 @@ module Nokogiri
160
164
  # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
161
165
  # Nokogiri::XML::ParseOptions.
162
166
  def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
163
-
164
167
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
165
- # Give the options to the user
168
+
166
169
  yield options if block_given?
167
170
 
171
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
172
+
168
173
  if string_or_io.respond_to?(:encoding)
169
174
  unless string_or_io.encoding.name == "ASCII-8BIT"
170
175
  encoding ||= string_or_io.encoding.name
@@ -172,7 +177,12 @@ module Nokogiri
172
177
  end
173
178
 
174
179
  if string_or_io.respond_to?(:read)
175
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
180
+ if string_or_io.is_a?(Pathname)
181
+ # resolve the Pathname to the file and open it as an IO object, see #2110
182
+ string_or_io = string_or_io.expand_path.open
183
+ url ||= string_or_io.path
184
+ end
185
+
176
186
  unless encoding
177
187
  # Libxml2's parser has poor support for encoding
178
188
  # detection. First, it does not recognize the HTML5
@@ -251,9 +261,6 @@ module Nokogiri
251
261
  end
252
262
 
253
263
  def self.detect_encoding(chunk)
254
- if Nokogiri.jruby? && EncodingReader.is_jruby_without_fix?
255
- return EncodingReader.detect_encoding_for_jruby_without_fix(chunk)
256
- end
257
264
  m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
258
265
  return Nokogiri.XML(m[1]).encoding
259
266
 
@@ -261,37 +268,17 @@ module Nokogiri
261
268
  m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
262
269
  return m[4]
263
270
  catch(:encoding_found) {
264
- Nokogiri::HTML::SAX::Parser.new(JumpSAXHandler.new(:encoding_found)).parse(chunk)
271
+ Nokogiri::HTML4::SAX::Parser.new(JumpSAXHandler.new(:encoding_found)).parse(chunk)
265
272
  nil
266
273
  }
267
274
  else
268
275
  handler = SAXHandler.new
269
- parser = Nokogiri::HTML::SAX::PushParser.new(handler)
276
+ parser = Nokogiri::HTML4::SAX::PushParser.new(handler)
270
277
  parser << chunk rescue Nokogiri::SyntaxError
271
278
  handler.encoding
272
279
  end
273
280
  end
274
281
 
275
- def self.is_jruby_without_fix?
276
- JRUBY_VERSION.split('.').join.to_i < 165
277
- end
278
-
279
- def self.detect_encoding_for_jruby_without_fix(chunk)
280
- m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
281
- return Nokogiri.XML(m[1]).encoding
282
-
283
- m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
284
- return m[4]
285
-
286
- catch(:encoding_found) {
287
- Nokogiri::HTML::SAX::Parser.new(JumpSAXHandler.new(:encoding_found.to_s)).parse(chunk)
288
- nil
289
- }
290
- rescue Nokogiri::SyntaxError, RuntimeError
291
- # Ignore parser errors that nokogiri may raise
292
- nil
293
- end
294
-
295
282
  def initialize(io)
296
283
  @io = io
297
284
  @firstchunk = nil
@@ -299,7 +286,7 @@ module Nokogiri
299
286
  end
300
287
 
301
288
  # This method is used by the C extension so that
302
- # Nokogiri::HTML::Document#read_io() does not leak memory when
289
+ # Nokogiri::HTML4::Document#read_io() does not leak memory when
303
290
  # EncodingFound is raised.
304
291
  attr_reader :encoding_found
305
292
 
@@ -1,28 +1,29 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
- module HTML
3
+ module HTML4
3
4
  class DocumentFragment < Nokogiri::XML::DocumentFragment
4
5
  ####
5
6
  # Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+
6
- def self.parse tags, encoding = nil
7
- doc = HTML::Document.new
7
+ def self.parse(tags, encoding = nil)
8
+ doc = HTML4::Document.new
8
9
 
9
10
  encoding ||= if tags.respond_to?(:encoding)
10
- encoding = tags.encoding
11
- if encoding == ::Encoding::ASCII_8BIT
12
- 'UTF-8'
13
- else
14
- encoding.name
15
- end
16
- else
17
- 'UTF-8'
18
- end
11
+ encoding = tags.encoding
12
+ if encoding == ::Encoding::ASCII_8BIT
13
+ 'UTF-8'
14
+ else
15
+ encoding.name
16
+ end
17
+ else
18
+ 'UTF-8'
19
+ end
19
20
 
20
21
  doc.encoding = encoding
21
22
 
22
23
  new(doc, tags)
23
24
  end
24
25
 
25
- def initialize document, tags = nil, ctx = nil
26
+ def initialize(document, tags = nil, ctx = nil)
26
27
  return self unless tags
27
28
 
28
29
  if ctx
@@ -32,13 +33,13 @@ module Nokogiri
32
33
  self.errors = document.errors - preexisting_errors
33
34
  else
34
35
  # This is a horrible hack, but I don't care
35
- if tags.strip =~ /^<body/i
36
- path = "/html/body"
36
+ path = if /^\s*?<body/i.match?(tags)
37
+ "/html/body"
37
38
  else
38
- path = "/html/body/node()"
39
+ "/html/body/node()"
39
40
  end
40
41
 
41
- temp_doc = HTML::Document.parse "<html><body>#{tags}", nil, document.encoding
42
+ temp_doc = HTML4::Document.parse("<html><body>#{tags}", nil, document.encoding)
42
43
  temp_doc.xpath(path).each { |child| child.parent = self }
43
44
  self.errors = temp_doc.errors
44
45
  end
@@ -1,5 +1,6 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
- module HTML
3
+ module HTML4
3
4
  class ElementDescription
4
5
  ###
5
6
  # Is this element a block element?
@@ -1,5 +1,6 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
- module HTML
3
+ module HTML4
3
4
  class ElementDescription
4
5
 
5
6
  # Methods are defined protected by method_defined? because at
@@ -1,5 +1,6 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
- module HTML
3
+ module HTML4
3
4
  class EntityDescription < Struct.new(:value, :name, :description); end
4
5
 
5
6
  class EntityLookup