nokogiri 1.11.0.rc1-java → 1.11.2-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (188) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +171 -94
  6. data/ext/java/nokogiri/EncodingHandler.java +78 -59
  7. data/ext/java/nokogiri/HtmlDocument.java +137 -114
  8. data/ext/java/nokogiri/HtmlElementDescription.java +104 -87
  9. data/ext/java/nokogiri/HtmlEntityLookup.java +31 -26
  10. data/ext/java/nokogiri/HtmlSaxParserContext.java +220 -192
  11. data/ext/java/nokogiri/HtmlSaxPushParser.java +164 -139
  12. data/ext/java/nokogiri/NokogiriService.java +597 -526
  13. data/ext/java/nokogiri/XmlAttr.java +120 -96
  14. data/ext/java/nokogiri/XmlAttributeDecl.java +97 -76
  15. data/ext/java/nokogiri/XmlCdata.java +35 -26
  16. data/ext/java/nokogiri/XmlComment.java +48 -37
  17. data/ext/java/nokogiri/XmlDocument.java +642 -540
  18. data/ext/java/nokogiri/XmlDocumentFragment.java +127 -107
  19. data/ext/java/nokogiri/XmlDtd.java +450 -384
  20. data/ext/java/nokogiri/XmlElement.java +25 -18
  21. data/ext/java/nokogiri/XmlElementContent.java +345 -286
  22. data/ext/java/nokogiri/XmlElementDecl.java +126 -95
  23. data/ext/java/nokogiri/XmlEntityDecl.java +121 -97
  24. data/ext/java/nokogiri/XmlEntityReference.java +51 -42
  25. data/ext/java/nokogiri/XmlNamespace.java +177 -145
  26. data/ext/java/nokogiri/XmlNode.java +1843 -1590
  27. data/ext/java/nokogiri/XmlNodeSet.java +361 -299
  28. data/ext/java/nokogiri/XmlProcessingInstruction.java +49 -39
  29. data/ext/java/nokogiri/XmlReader.java +513 -418
  30. data/ext/java/nokogiri/XmlRelaxng.java +92 -72
  31. data/ext/java/nokogiri/XmlSaxParserContext.java +330 -280
  32. data/ext/java/nokogiri/XmlSaxPushParser.java +229 -190
  33. data/ext/java/nokogiri/XmlSchema.java +335 -210
  34. data/ext/java/nokogiri/XmlSyntaxError.java +113 -87
  35. data/ext/java/nokogiri/XmlText.java +57 -46
  36. data/ext/java/nokogiri/XmlXpathContext.java +242 -178
  37. data/ext/java/nokogiri/XsltStylesheet.java +282 -239
  38. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +203 -160
  40. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  41. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  42. data/ext/java/nokogiri/internals/NokogiriDomParser.java +65 -50
  43. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  44. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +25 -18
  45. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -254
  46. data/ext/java/nokogiri/internals/NokogiriHelpers.java +738 -622
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +186 -143
  48. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +81 -59
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +66 -49
  50. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +86 -69
  51. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +44 -29
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +121 -48
  53. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -22
  54. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +25 -17
  55. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +57 -42
  56. data/ext/java/nokogiri/internals/ParserContext.java +206 -179
  57. data/ext/java/nokogiri/internals/ReaderNode.java +478 -371
  58. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -707
  59. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +28 -19
  60. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  61. data/ext/java/nokogiri/internals/XmlDeclHandler.java +5 -4
  62. data/ext/java/nokogiri/internals/XmlDomParserContext.java +208 -177
  63. data/ext/java/nokogiri/internals/XmlSaxParser.java +24 -17
  64. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  65. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  66. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  67. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  68. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  78. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  81. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  82. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  83. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  84. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  85. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  86. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  87. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  88. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  89. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  90. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  91. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  92. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -570
  93. data/ext/nokogiri/depend +37 -358
  94. data/ext/nokogiri/extconf.rb +585 -374
  95. data/ext/nokogiri/html_document.c +78 -82
  96. data/ext/nokogiri/html_element_description.c +84 -71
  97. data/ext/nokogiri/html_entity_lookup.c +21 -16
  98. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  99. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  100. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  101. data/ext/nokogiri/nokogiri.c +192 -93
  102. data/ext/nokogiri/test_global_handlers.c +40 -0
  103. data/ext/nokogiri/xml_attr.c +15 -15
  104. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  105. data/ext/nokogiri/xml_cdata.c +13 -18
  106. data/ext/nokogiri/xml_comment.c +19 -26
  107. data/ext/nokogiri/xml_document.c +225 -163
  108. data/ext/nokogiri/xml_document_fragment.c +13 -15
  109. data/ext/nokogiri/xml_dtd.c +54 -48
  110. data/ext/nokogiri/xml_element_content.c +30 -27
  111. data/ext/nokogiri/xml_element_decl.c +22 -22
  112. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  113. data/ext/nokogiri/xml_entity_decl.c +32 -30
  114. data/ext/nokogiri/xml_entity_reference.c +16 -18
  115. data/ext/nokogiri/xml_namespace.c +56 -49
  116. data/ext/nokogiri/xml_node.c +338 -286
  117. data/ext/nokogiri/xml_node_set.c +168 -156
  118. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  119. data/ext/nokogiri/xml_reader.c +195 -172
  120. data/ext/nokogiri/xml_relax_ng.c +52 -28
  121. data/ext/nokogiri/xml_sax_parser.c +118 -118
  122. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  123. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  124. data/ext/nokogiri/xml_schema.c +111 -34
  125. data/ext/nokogiri/xml_syntax_error.c +42 -21
  126. data/ext/nokogiri/xml_text.c +13 -17
  127. data/ext/nokogiri/xml_xpath_context.c +206 -123
  128. data/ext/nokogiri/xslt_stylesheet.c +158 -161
  129. data/lib/nokogiri.rb +4 -8
  130. data/lib/nokogiri/css/parser.rb +62 -62
  131. data/lib/nokogiri/css/parser.y +2 -2
  132. data/lib/nokogiri/css/parser_extras.rb +38 -36
  133. data/lib/nokogiri/css/xpath_visitor.rb +70 -42
  134. data/lib/nokogiri/extension.rb +26 -0
  135. data/lib/nokogiri/html/document.rb +12 -26
  136. data/lib/nokogiri/html/document_fragment.rb +15 -15
  137. data/lib/nokogiri/nokogiri.jar +0 -0
  138. data/lib/nokogiri/version.rb +2 -148
  139. data/lib/nokogiri/version/constant.rb +5 -0
  140. data/lib/nokogiri/version/info.rb +205 -0
  141. data/lib/nokogiri/xml/builder.rb +2 -2
  142. data/lib/nokogiri/xml/document.rb +48 -18
  143. data/lib/nokogiri/xml/document_fragment.rb +4 -6
  144. data/lib/nokogiri/xml/node.rb +599 -279
  145. data/lib/nokogiri/xml/parse_options.rb +6 -0
  146. data/lib/nokogiri/xml/reader.rb +2 -9
  147. data/lib/nokogiri/xml/relax_ng.rb +6 -2
  148. data/lib/nokogiri/xml/schema.rb +12 -4
  149. data/lib/nokogiri/xml/searchable.rb +24 -16
  150. data/lib/nokogiri/xml/xpath.rb +1 -3
  151. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  152. metadata +87 -158
  153. data/ext/nokogiri/html_document.h +0 -10
  154. data/ext/nokogiri/html_element_description.h +0 -10
  155. data/ext/nokogiri/html_entity_lookup.h +0 -8
  156. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  157. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  158. data/ext/nokogiri/nokogiri.h +0 -122
  159. data/ext/nokogiri/xml_attr.h +0 -9
  160. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  161. data/ext/nokogiri/xml_cdata.h +0 -9
  162. data/ext/nokogiri/xml_comment.h +0 -9
  163. data/ext/nokogiri/xml_document.h +0 -23
  164. data/ext/nokogiri/xml_document_fragment.h +0 -10
  165. data/ext/nokogiri/xml_dtd.h +0 -10
  166. data/ext/nokogiri/xml_element_content.h +0 -10
  167. data/ext/nokogiri/xml_element_decl.h +0 -9
  168. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  169. data/ext/nokogiri/xml_entity_decl.h +0 -10
  170. data/ext/nokogiri/xml_entity_reference.h +0 -9
  171. data/ext/nokogiri/xml_io.c +0 -61
  172. data/ext/nokogiri/xml_io.h +0 -11
  173. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  174. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  175. data/ext/nokogiri/xml_namespace.h +0 -14
  176. data/ext/nokogiri/xml_node.h +0 -13
  177. data/ext/nokogiri/xml_node_set.h +0 -12
  178. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  179. data/ext/nokogiri/xml_reader.h +0 -10
  180. data/ext/nokogiri/xml_relax_ng.h +0 -9
  181. data/ext/nokogiri/xml_sax_parser.h +0 -39
  182. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  183. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  184. data/ext/nokogiri/xml_schema.h +0 -9
  185. data/ext/nokogiri/xml_syntax_error.h +0 -13
  186. data/ext/nokogiri/xml_text.h +0 -9
  187. data/ext/nokogiri/xml_xpath_context.h +0 -10
  188. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -88,7 +88,7 @@ rule
88
88
  )
89
89
  }
90
90
  | LSQUARE NUMBER RSQUARE {
91
- # Non standard, but hpricot supports it.
91
+ # non-standard, from hpricot
92
92
  result = Node.new(:PSEUDO_CLASS,
93
93
  [Node.new(:FUNCTION, ['nth-child(', val[1]])]
94
94
  )
@@ -139,7 +139,7 @@ rule
139
139
  when 'n'
140
140
  result = Node.new(:NTH, ['1','n','+','0'])
141
141
  else
142
- # This is not CSS standard. It allows us to support this:
142
+ # non-standard to support custom functions:
143
143
  # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
144
144
  # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
145
145
  # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
@@ -1,64 +1,66 @@
1
1
  # frozen_string_literal: true
2
- require 'thread'
2
+ require "thread"
3
3
 
4
4
  module Nokogiri
5
5
  module CSS
6
6
  class Parser < Racc::Parser
7
- @cache_on = true
8
- @cache = {}
9
- @mutex = Mutex.new
7
+ CACHE_SWITCH_NAME = :nokogiri_css_parser_cache_is_off
8
+
9
+ @cache = {}
10
+ @mutex = Mutex.new
10
11
 
11
12
  class << self
12
- # Turn on CSS parse caching
13
- attr_accessor :cache_on
14
- alias :cache_on? :cache_on
15
- alias :set_cache :cache_on=
13
+ # Return a thread-local boolean indicating whether the CSS-to-XPath cache is active. (Default is `true`.)
14
+ def cache_on?
15
+ !Thread.current[CACHE_SWITCH_NAME]
16
+ end
17
+
18
+ # Set a thread-local boolean to turn cacheing on and off. Truthy values turn the cache on, falsey values turn the cache off.
19
+ def set_cache(value)
20
+ Thread.current[CACHE_SWITCH_NAME] = !value
21
+ end
16
22
 
17
23
  # Get the css selector in +string+ from the cache
18
- def [] string
19
- return unless @cache_on
24
+ def [](string)
25
+ return unless cache_on?
20
26
  @mutex.synchronize { @cache[string] }
21
27
  end
22
28
 
23
29
  # Set the css selector in +string+ in the cache to +value+
24
- def []= string, value
25
- return value unless @cache_on
30
+ def []=(string, value)
31
+ return value unless cache_on?
26
32
  @mutex.synchronize { @cache[string] = value }
27
33
  end
28
34
 
29
35
  # Clear the cache
30
- def clear_cache
31
- @mutex.synchronize { @cache = {} }
36
+ def clear_cache(create_new_object = false)
37
+ @mutex.synchronize do
38
+ if create_new_object
39
+ @cache = {}
40
+ else
41
+ @cache.clear
42
+ end
43
+ end
32
44
  end
33
45
 
34
46
  # Execute +block+ without cache
35
- def without_cache &block
36
- tmp = @cache_on
37
- @cache_on = false
47
+ def without_cache(&block)
48
+ original_cache_setting = cache_on?
49
+ set_cache false
38
50
  block.call
39
- @cache_on = tmp
40
- end
41
-
42
- ###
43
- # Parse this CSS selector in +selector+. Returns an AST.
44
- def parse selector
45
- @warned ||= false
46
- unless @warned
47
- $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
48
- @warned = true
49
- end
50
- new.parse selector
51
+ ensure
52
+ set_cache original_cache_setting
51
53
  end
52
54
  end
53
55
 
54
56
  # Create a new CSS parser with respect to +namespaces+
55
- def initialize namespaces = {}
56
- @tokenizer = Tokenizer.new
57
+ def initialize(namespaces = {})
58
+ @tokenizer = Tokenizer.new
57
59
  @namespaces = namespaces
58
60
  super()
59
61
  end
60
62
 
61
- def parse string
63
+ def parse(string)
62
64
  @tokenizer.scan_setup string
63
65
  do_parse
64
66
  end
@@ -68,14 +70,14 @@ module Nokogiri
68
70
  end
69
71
 
70
72
  # Get the xpath for +string+ using +options+
71
- def xpath_for string, options={}
73
+ def xpath_for(string, options = {})
72
74
  key = "#{string}#{options[:ns]}#{options[:prefix]}"
73
75
  v = self.class[key]
74
76
  return v if v
75
77
 
76
78
  args = [
77
- options[:prefix] || '//',
78
- options[:visitor] || XPathVisitor.new
79
+ options[:prefix] || "//",
80
+ options[:visitor] || XPathVisitor.new,
79
81
  ]
80
82
  self.class[key] = parse(string).map { |ast|
81
83
  ast.to_xpath(*args)
@@ -83,7 +85,7 @@ module Nokogiri
83
85
  end
84
86
 
85
87
  # On CSS parser error, raise an exception
86
- def on_error error_token_id, error_value, value_stack
88
+ def on_error(error_token_id, error_value, value_stack)
87
89
  after = value_stack.compact.last
88
90
  raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
89
91
  end
@@ -3,7 +3,6 @@ module Nokogiri
3
3
  module CSS
4
4
  class XPathVisitor # :nodoc:
5
5
  def visit_function node
6
-
7
6
  msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
8
7
  return self.send(msg, node) if self.respond_to?(msg)
9
8
 
@@ -13,50 +12,51 @@ module Nokogiri
13
12
  when /^self\(/
14
13
  "self::#{node.value[1]}"
15
14
  when /^eq\(/
16
- "position() = #{node.value[1]}"
15
+ "position()=#{node.value[1]}"
17
16
  when /^(nth|nth-of-type)\(/
18
17
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
19
18
  nth(node.value[1])
20
19
  else
21
- "position() = #{node.value[1]}"
20
+ "position()=#{node.value[1]}"
22
21
  end
23
22
  when /^nth-child\(/
24
23
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
25
24
  nth(node.value[1], :child => true)
26
25
  else
27
- "count(preceding-sibling::*) = #{node.value[1].to_i-1}"
26
+ "count(preceding-sibling::*)=#{node.value[1].to_i-1}"
28
27
  end
29
28
  when /^nth-last-of-type\(/
30
29
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
31
30
  nth(node.value[1], :last => true)
32
31
  else
33
32
  index = node.value[1].to_i - 1
34
- index == 0 ? "position() = last()" : "position() = last() - #{index}"
33
+ index == 0 ? "position()=last()" : "position()=last()-#{index}"
35
34
  end
36
35
  when /^nth-last-child\(/
37
36
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
38
37
  nth(node.value[1], :last => true, :child => true)
39
38
  else
40
- "count(following-sibling::*) = #{node.value[1].to_i-1}"
39
+ "count(following-sibling::*)=#{node.value[1].to_i-1}"
41
40
  end
42
41
  when /^(first|first-of-type)\(/
43
- "position() = 1"
42
+ "position()=1"
44
43
  when /^(last|last-of-type)\(/
45
- "position() = last()"
44
+ "position()=last()"
46
45
  when /^contains\(/
47
- "contains(., #{node.value[1]})"
46
+ "contains(.,#{node.value[1]})"
48
47
  when /^gt\(/
49
- "position() > #{node.value[1]}"
48
+ "position()>#{node.value[1]}"
50
49
  when /^only-child\(/
51
- "last() = 1"
50
+ "last()=1"
52
51
  when /^comment\(/
53
52
  "comment()"
54
53
  when /^has\(/
55
54
  is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
56
55
  ".#{"//" if !is_direct}#{node.value[1].accept(self)}"
57
56
  else
57
+ # non-standard. this looks like a function call.
58
58
  args = ['.'] + node.value[1..-1]
59
- "#{node.value.first}#{args.join(', ')})"
59
+ "#{node.value.first}#{args.join(',')})"
60
60
  end
61
61
  end
62
62
 
@@ -71,18 +71,18 @@ module Nokogiri
71
71
 
72
72
  def visit_id node
73
73
  node.value.first =~ /^#(.*)$/
74
- "@id = '#{$1}'"
74
+ "@id='#{$1}'"
75
75
  end
76
76
 
77
77
  def visit_attribute_condition node
78
- attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
79
- ''
80
- else
81
- '@'
82
- end
78
+ attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
79
+ ''
80
+ else
81
+ '@'
82
+ end
83
83
  attribute += node.value.first.accept(self)
84
84
 
85
- # Support non-standard css
85
+ # non-standard. attributes starting with '@'
86
86
  attribute.gsub!(/^@@/, '@')
87
87
 
88
88
  return attribute unless node.value.length == 3
@@ -90,29 +90,30 @@ module Nokogiri
90
90
  value = node.value.last
91
91
  value = "'#{value}'" if value !~ /^['"]/
92
92
 
93
+ # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
93
94
  if (value[0]==value[-1]) && %q{"'}.include?(value[0])
94
95
  str_value = value[1..-2]
95
96
  if str_value.include?(value[0])
96
- value = 'concat("' + str_value.split('"', -1).join(%q{", '"', "}) + '", "")'
97
+ value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
97
98
  end
98
99
  end
99
100
 
100
101
  case node.value[1]
101
102
  when :equal
102
- attribute + " = " + "#{value}"
103
+ attribute + "=" + "#{value}"
103
104
  when :not_equal
104
- attribute + " != " + "#{value}"
105
+ attribute + "!=" + "#{value}"
105
106
  when :substring_match
106
- "contains(#{attribute}, #{value})"
107
+ "contains(#{attribute},#{value})"
107
108
  when :prefix_match
108
- "starts-with(#{attribute}, #{value})"
109
+ "starts-with(#{attribute},#{value})"
109
110
  when :dash_match
110
- "#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
111
+ "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
111
112
  when :includes
112
- "contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
113
+ value = value[1..-2] # strip quotes
114
+ css_class(attribute, value)
113
115
  when :suffix_match
114
- "substring(#{attribute}, string-length(#{attribute}) - " +
115
- "string-length(#{value}) + 1, string-length(#{value})) = #{value}"
116
+ "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
116
117
  else
117
118
  attribute + " #{node.value[1]} " + "#{value}"
118
119
  end
@@ -126,14 +127,14 @@ module Nokogiri
126
127
  return self.send(msg, node) if self.respond_to?(msg)
127
128
 
128
129
  case node.value.first
129
- when "first" then "position() = 1"
130
- when "first-child" then "count(preceding-sibling::*) = 0"
131
- when "last" then "position() = last()"
132
- when "last-child" then "count(following-sibling::*) = 0"
133
- when "first-of-type" then "position() = 1"
134
- when "last-of-type" then "position() = last()"
135
- when "only-child" then "count(preceding-sibling::*) = 0 and count(following-sibling::*) = 0"
136
- when "only-of-type" then "last() = 1"
130
+ when "first" then "position()=1"
131
+ when "first-child" then "count(preceding-sibling::*)=0"
132
+ when "last" then "position()=last()"
133
+ when "last-child" then "count(following-sibling::*)=0"
134
+ when "first-of-type" then "position()=1"
135
+ when "last-of-type" then "position()=last()"
136
+ when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
137
+ when "only-of-type" then "last()=1"
137
138
  when "empty" then "not(node())"
138
139
  when "parent" then "node()"
139
140
  when "root" then "not(parent::*)"
@@ -144,7 +145,7 @@ module Nokogiri
144
145
  end
145
146
 
146
147
  def visit_class_condition node
147
- "contains(concat(' ', normalize-space(@class), ' '), ' #{node.value.first} ')"
148
+ css_class("@class", node.value.first)
148
149
  end
149
150
 
150
151
  def visit_combinator node
@@ -181,25 +182,26 @@ module Nokogiri
181
182
  node.accept(self)
182
183
  end
183
184
 
184
- private
185
+ private
186
+
185
187
  def nth node, options={}
186
188
  raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
187
189
 
188
190
  a, b = read_a_and_positive_b node.value
189
191
  position = if options[:child]
190
- options[:last] ? "(count(following-sibling::*) + 1)" : "(count(preceding-sibling::*) + 1)"
192
+ options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
191
193
  else
192
194
  options[:last] ? "(last()-position()+1)" : "position()"
193
195
  end
194
196
 
195
197
  if b.zero?
196
- "(#{position} mod #{a}) = 0"
198
+ "(#{position} mod #{a})=0"
197
199
  else
198
200
  compare = a < 0 ? "<=" : ">="
199
201
  if a.abs == 1
200
- "#{position} #{compare} #{b}"
202
+ "#{position}#{compare}#{b}"
201
203
  else
202
- "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
204
+ "(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
203
205
  end
204
206
  end
205
207
  end
@@ -227,6 +229,32 @@ module Nokogiri
227
229
  end =~ /(nth|first|last|only)-of-type(\()?/
228
230
  end
229
231
  end
232
+
233
+ # use only ordinary xpath functions
234
+ def css_class_standard(hay, needle)
235
+ "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
236
+ end
237
+
238
+ # use the builtin implementation
239
+ def css_class_builtin(hay, needle)
240
+ "nokogiri-builtin:css-class(#{hay},'#{needle}')"
241
+ end
242
+
243
+ alias_method :css_class, :css_class_standard
244
+ end
245
+
246
+ class XPathVisitorAlwaysUseBuiltins < XPathVisitor # :nodoc:
247
+ private
248
+ alias_method :css_class, :css_class_builtin
249
+ end
250
+
251
+ class XPathVisitorOptimallyUseBuiltins < XPathVisitor # :nodoc:
252
+ private
253
+ if Nokogiri.uses_libxml?
254
+ alias_method :css_class, :css_class_builtin
255
+ else
256
+ alias_method :css_class, :css_class_standard
257
+ end
230
258
  end
231
259
  end
232
260
  end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ # load the C or Java extension
4
+ begin
5
+ ::RUBY_VERSION =~ /(\d+\.\d+)/
6
+ require "nokogiri/#{Regexp.last_match(1)}/nokogiri"
7
+ rescue LoadError => e
8
+ if e.message =~ /GLIBC/
9
+ warn(<<~EOM)
10
+
11
+ ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system with glibc < 2.17:
12
+
13
+ #{e.message}
14
+
15
+ If that's the case, then please install Nokogiri via the `ruby` platform gem:
16
+ gem install nokogiri --platform=ruby
17
+ or:
18
+ bundle config set force_ruby_platform true
19
+
20
+ Please visit https://nokogiri.org/tutorials/installing_nokogiri.html for more help.
21
+
22
+ EOM
23
+ raise e
24
+ end
25
+ require 'nokogiri/nokogiri'
26
+ end
@@ -1,4 +1,7 @@
1
1
  # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+
2
5
  module Nokogiri
3
6
  module HTML
4
7
  class Document < Nokogiri::XML::Document
@@ -161,11 +164,12 @@ module Nokogiri
161
164
  # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
162
165
  # Nokogiri::XML::ParseOptions.
163
166
  def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
164
-
165
167
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
166
- # Give the options to the user
168
+
167
169
  yield options if block_given?
168
170
 
171
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
172
+
169
173
  if string_or_io.respond_to?(:encoding)
170
174
  unless string_or_io.encoding.name == "ASCII-8BIT"
171
175
  encoding ||= string_or_io.encoding.name
@@ -173,7 +177,12 @@ module Nokogiri
173
177
  end
174
178
 
175
179
  if string_or_io.respond_to?(:read)
176
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
180
+ if string_or_io.is_a?(Pathname)
181
+ # resolve the Pathname to the file and open it as an IO object, see #2110
182
+ string_or_io = string_or_io.expand_path.open
183
+ url ||= string_or_io.path
184
+ end
185
+
177
186
  unless encoding
178
187
  # Libxml2's parser has poor support for encoding
179
188
  # detection. First, it does not recognize the HTML5
@@ -252,9 +261,6 @@ module Nokogiri
252
261
  end
253
262
 
254
263
  def self.detect_encoding(chunk)
255
- if Nokogiri.jruby? && EncodingReader.is_jruby_without_fix?
256
- return EncodingReader.detect_encoding_for_jruby_without_fix(chunk)
257
- end
258
264
  m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
259
265
  return Nokogiri.XML(m[1]).encoding
260
266
 
@@ -273,26 +279,6 @@ module Nokogiri
273
279
  end
274
280
  end
275
281
 
276
- def self.is_jruby_without_fix?
277
- JRUBY_VERSION.split('.').join.to_i < 165
278
- end
279
-
280
- def self.detect_encoding_for_jruby_without_fix(chunk)
281
- m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
282
- return Nokogiri.XML(m[1]).encoding
283
-
284
- m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
285
- return m[4]
286
-
287
- catch(:encoding_found) {
288
- Nokogiri::HTML::SAX::Parser.new(JumpSAXHandler.new(:encoding_found.to_s)).parse(chunk)
289
- nil
290
- }
291
- rescue Nokogiri::SyntaxError, RuntimeError
292
- # Ignore parser errors that nokogiri may raise
293
- nil
294
- end
295
-
296
282
  def initialize(io)
297
283
  @io = io
298
284
  @firstchunk = nil