nokogiri 1.10.10-java → 1.11.0-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (159) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/README.md +165 -91
  5. data/ext/java/nokogiri/HtmlDocument.java +34 -46
  6. data/ext/java/nokogiri/HtmlSaxParserContext.java +88 -58
  7. data/ext/java/nokogiri/HtmlSaxPushParser.java +1 -1
  8. data/ext/java/nokogiri/NokogiriService.java +1 -1
  9. data/ext/java/nokogiri/XmlAttr.java +13 -20
  10. data/ext/java/nokogiri/XmlAttributeDecl.java +11 -12
  11. data/ext/java/nokogiri/XmlCdata.java +3 -4
  12. data/ext/java/nokogiri/XmlComment.java +1 -1
  13. data/ext/java/nokogiri/XmlDocument.java +148 -175
  14. data/ext/java/nokogiri/XmlDocumentFragment.java +13 -31
  15. data/ext/java/nokogiri/XmlDtd.java +5 -8
  16. data/ext/java/nokogiri/XmlElement.java +1 -20
  17. data/ext/java/nokogiri/XmlElementDecl.java +23 -28
  18. data/ext/java/nokogiri/XmlEntityDecl.java +23 -27
  19. data/ext/java/nokogiri/XmlEntityReference.java +2 -2
  20. data/ext/java/nokogiri/XmlNamespace.java +72 -89
  21. data/ext/java/nokogiri/XmlNode.java +303 -406
  22. data/ext/java/nokogiri/XmlNodeSet.java +70 -76
  23. data/ext/java/nokogiri/XmlReader.java +12 -13
  24. data/ext/java/nokogiri/XmlRelaxng.java +10 -3
  25. data/ext/java/nokogiri/XmlSaxParserContext.java +15 -10
  26. data/ext/java/nokogiri/XmlSchema.java +87 -27
  27. data/ext/java/nokogiri/XmlSyntaxError.java +2 -6
  28. data/ext/java/nokogiri/XmlText.java +12 -9
  29. data/ext/java/nokogiri/XmlXpathContext.java +55 -25
  30. data/ext/java/nokogiri/XsltStylesheet.java +7 -15
  31. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +52 -46
  32. data/ext/java/nokogiri/internals/NokogiriHandler.java +1 -1
  33. data/ext/java/nokogiri/internals/NokogiriHelpers.java +71 -135
  34. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +90 -58
  35. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +9 -2
  36. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +67 -10
  37. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +4 -2
  38. data/ext/java/nokogiri/internals/ParserContext.java +27 -73
  39. data/ext/java/nokogiri/internals/ReaderNode.java +2 -4
  40. data/ext/java/nokogiri/internals/XmlDomParserContext.java +18 -33
  41. data/ext/nokogiri/depend +476 -357
  42. data/ext/nokogiri/extconf.rb +507 -357
  43. data/ext/nokogiri/html_document.c +79 -78
  44. data/ext/nokogiri/html_sax_parser_context.c +2 -2
  45. data/ext/nokogiri/nokogiri.c +34 -40
  46. data/ext/nokogiri/xml_document.c +18 -4
  47. data/ext/nokogiri/xml_io.c +8 -6
  48. data/ext/nokogiri/xml_node.c +21 -1
  49. data/ext/nokogiri/xml_node_set.c +1 -1
  50. data/ext/nokogiri/xml_reader.c +6 -17
  51. data/ext/nokogiri/xml_relax_ng.c +29 -11
  52. data/ext/nokogiri/xml_sax_parser.c +2 -7
  53. data/ext/nokogiri/xml_sax_parser_context.c +2 -2
  54. data/ext/nokogiri/xml_schema.c +55 -13
  55. data/ext/nokogiri/xml_xpath_context.c +80 -4
  56. data/ext/nokogiri/xslt_stylesheet.c +1 -8
  57. data/lib/nokogiri.rb +22 -22
  58. data/lib/nokogiri/css.rb +1 -0
  59. data/lib/nokogiri/css/node.rb +1 -0
  60. data/lib/nokogiri/css/parser.rb +63 -62
  61. data/lib/nokogiri/css/parser.y +2 -2
  62. data/lib/nokogiri/css/parser_extras.rb +39 -36
  63. data/lib/nokogiri/css/syntax_error.rb +1 -0
  64. data/lib/nokogiri/css/tokenizer.rb +1 -0
  65. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  66. data/lib/nokogiri/decorators/slop.rb +1 -0
  67. data/lib/nokogiri/html.rb +1 -0
  68. data/lib/nokogiri/html/builder.rb +1 -0
  69. data/lib/nokogiri/html/document.rb +13 -26
  70. data/lib/nokogiri/html/document_fragment.rb +1 -0
  71. data/lib/nokogiri/html/element_description.rb +1 -0
  72. data/lib/nokogiri/html/element_description_defaults.rb +1 -0
  73. data/lib/nokogiri/html/entity_lookup.rb +1 -0
  74. data/lib/nokogiri/html/sax/parser.rb +1 -0
  75. data/lib/nokogiri/html/sax/parser_context.rb +1 -0
  76. data/lib/nokogiri/html/sax/push_parser.rb +1 -0
  77. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  78. data/lib/nokogiri/nokogiri.jar +0 -0
  79. data/lib/nokogiri/syntax_error.rb +1 -0
  80. data/lib/nokogiri/version.rb +3 -109
  81. data/lib/nokogiri/version/constant.rb +5 -0
  82. data/lib/nokogiri/version/info.rb +182 -0
  83. data/lib/nokogiri/xml.rb +1 -0
  84. data/lib/nokogiri/xml/attr.rb +1 -0
  85. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  86. data/lib/nokogiri/xml/builder.rb +3 -2
  87. data/lib/nokogiri/xml/cdata.rb +1 -0
  88. data/lib/nokogiri/xml/character_data.rb +1 -0
  89. data/lib/nokogiri/xml/document.rb +20 -15
  90. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  91. data/lib/nokogiri/xml/dtd.rb +1 -0
  92. data/lib/nokogiri/xml/element_content.rb +1 -0
  93. data/lib/nokogiri/xml/element_decl.rb +1 -0
  94. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  95. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  96. data/lib/nokogiri/xml/namespace.rb +1 -0
  97. data/lib/nokogiri/xml/node.rb +587 -249
  98. data/lib/nokogiri/xml/node/save_options.rb +1 -0
  99. data/lib/nokogiri/xml/node_set.rb +1 -0
  100. data/lib/nokogiri/xml/notation.rb +1 -0
  101. data/lib/nokogiri/xml/parse_options.rb +10 -3
  102. data/lib/nokogiri/xml/pp.rb +1 -0
  103. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  104. data/lib/nokogiri/xml/pp/node.rb +1 -0
  105. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  106. data/lib/nokogiri/xml/reader.rb +7 -3
  107. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  108. data/lib/nokogiri/xml/sax.rb +1 -0
  109. data/lib/nokogiri/xml/sax/document.rb +1 -0
  110. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  111. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  112. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  113. data/lib/nokogiri/xml/schema.rb +13 -4
  114. data/lib/nokogiri/xml/searchable.rb +25 -16
  115. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  116. data/lib/nokogiri/xml/text.rb +1 -0
  117. data/lib/nokogiri/xml/xpath.rb +1 -0
  118. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -0
  119. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  120. data/lib/nokogiri/xslt.rb +1 -0
  121. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  122. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  123. metadata +86 -159
  124. data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +0 -107
  125. data/ext/java/nokogiri/internals/UncloseableInputStream.java +0 -102
  126. data/ext/nokogiri/html_document.h +0 -10
  127. data/ext/nokogiri/html_element_description.h +0 -10
  128. data/ext/nokogiri/html_entity_lookup.h +0 -8
  129. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  130. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  131. data/ext/nokogiri/nokogiri.h +0 -121
  132. data/ext/nokogiri/xml_attr.h +0 -9
  133. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  134. data/ext/nokogiri/xml_cdata.h +0 -9
  135. data/ext/nokogiri/xml_comment.h +0 -9
  136. data/ext/nokogiri/xml_document.h +0 -23
  137. data/ext/nokogiri/xml_document_fragment.h +0 -10
  138. data/ext/nokogiri/xml_dtd.h +0 -10
  139. data/ext/nokogiri/xml_element_content.h +0 -10
  140. data/ext/nokogiri/xml_element_decl.h +0 -9
  141. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  142. data/ext/nokogiri/xml_entity_decl.h +0 -10
  143. data/ext/nokogiri/xml_entity_reference.h +0 -9
  144. data/ext/nokogiri/xml_io.h +0 -11
  145. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  146. data/ext/nokogiri/xml_namespace.h +0 -14
  147. data/ext/nokogiri/xml_node.h +0 -13
  148. data/ext/nokogiri/xml_node_set.h +0 -12
  149. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  150. data/ext/nokogiri/xml_reader.h +0 -10
  151. data/ext/nokogiri/xml_relax_ng.h +0 -9
  152. data/ext/nokogiri/xml_sax_parser.h +0 -39
  153. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  154. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  155. data/ext/nokogiri/xml_schema.h +0 -9
  156. data/ext/nokogiri/xml_syntax_error.h +0 -13
  157. data/ext/nokogiri/xml_text.h +0 -9
  158. data/ext/nokogiri/xml_xpath_context.h +0 -10
  159. data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -88,7 +88,7 @@ rule
88
88
  )
89
89
  }
90
90
  | LSQUARE NUMBER RSQUARE {
91
- # Non standard, but hpricot supports it.
91
+ # non-standard, from hpricot
92
92
  result = Node.new(:PSEUDO_CLASS,
93
93
  [Node.new(:FUNCTION, ['nth-child(', val[1]])]
94
94
  )
@@ -139,7 +139,7 @@ rule
139
139
  when 'n'
140
140
  result = Node.new(:NTH, ['1','n','+','0'])
141
141
  else
142
- # This is not CSS standard. It allows us to support this:
142
+ # non-standard to support custom functions:
143
143
  # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
144
144
  # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
145
145
  # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
@@ -1,63 +1,66 @@
1
- require 'thread'
1
+ # frozen_string_literal: true
2
+ require "thread"
2
3
 
3
4
  module Nokogiri
4
5
  module CSS
5
6
  class Parser < Racc::Parser
6
- @cache_on = true
7
- @cache = {}
8
- @mutex = Mutex.new
7
+ CACHE_SWITCH_NAME = :nokogiri_css_parser_cache_is_off
8
+
9
+ @cache = {}
10
+ @mutex = Mutex.new
9
11
 
10
12
  class << self
11
- # Turn on CSS parse caching
12
- attr_accessor :cache_on
13
- alias :cache_on? :cache_on
14
- alias :set_cache :cache_on=
13
+ # Return a thread-local boolean indicating whether the CSS-to-XPath cache is active. (Default is `true`.)
14
+ def cache_on?
15
+ !Thread.current[CACHE_SWITCH_NAME]
16
+ end
17
+
18
+ # Set a thread-local boolean to turn cacheing on and off. Truthy values turn the cache on, falsey values turn the cache off.
19
+ def set_cache(value)
20
+ Thread.current[CACHE_SWITCH_NAME] = !value
21
+ end
15
22
 
16
23
  # Get the css selector in +string+ from the cache
17
- def [] string
18
- return unless @cache_on
24
+ def [](string)
25
+ return unless cache_on?
19
26
  @mutex.synchronize { @cache[string] }
20
27
  end
21
28
 
22
29
  # Set the css selector in +string+ in the cache to +value+
23
- def []= string, value
24
- return value unless @cache_on
30
+ def []=(string, value)
31
+ return value unless cache_on?
25
32
  @mutex.synchronize { @cache[string] = value }
26
33
  end
27
34
 
28
35
  # Clear the cache
29
- def clear_cache
30
- @mutex.synchronize { @cache = {} }
36
+ def clear_cache(create_new_object = false)
37
+ @mutex.synchronize do
38
+ if create_new_object
39
+ @cache = {}
40
+ else
41
+ @cache.clear
42
+ end
43
+ end
31
44
  end
32
45
 
33
46
  # Execute +block+ without cache
34
- def without_cache &block
35
- tmp = @cache_on
36
- @cache_on = false
47
+ def without_cache(&block)
48
+ original_cache_setting = cache_on?
49
+ set_cache false
37
50
  block.call
38
- @cache_on = tmp
39
- end
40
-
41
- ###
42
- # Parse this CSS selector in +selector+. Returns an AST.
43
- def parse selector
44
- @warned ||= false
45
- unless @warned
46
- $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
47
- @warned = true
48
- end
49
- new.parse selector
51
+ ensure
52
+ set_cache original_cache_setting
50
53
  end
51
54
  end
52
55
 
53
56
  # Create a new CSS parser with respect to +namespaces+
54
- def initialize namespaces = {}
55
- @tokenizer = Tokenizer.new
57
+ def initialize(namespaces = {})
58
+ @tokenizer = Tokenizer.new
56
59
  @namespaces = namespaces
57
60
  super()
58
61
  end
59
62
 
60
- def parse string
63
+ def parse(string)
61
64
  @tokenizer.scan_setup string
62
65
  do_parse
63
66
  end
@@ -67,14 +70,14 @@ module Nokogiri
67
70
  end
68
71
 
69
72
  # Get the xpath for +string+ using +options+
70
- def xpath_for string, options={}
73
+ def xpath_for(string, options = {})
71
74
  key = "#{string}#{options[:ns]}#{options[:prefix]}"
72
75
  v = self.class[key]
73
76
  return v if v
74
77
 
75
78
  args = [
76
- options[:prefix] || '//',
77
- options[:visitor] || XPathVisitor.new
79
+ options[:prefix] || "//",
80
+ options[:visitor] || XPathVisitor.new,
78
81
  ]
79
82
  self.class[key] = parse(string).map { |ast|
80
83
  ast.to_xpath(*args)
@@ -82,7 +85,7 @@ module Nokogiri
82
85
  end
83
86
 
84
87
  # On CSS parser error, raise an exception
85
- def on_error error_token_id, error_value, value_stack
88
+ def on_error(error_token_id, error_value, value_stack)
86
89
  after = value_stack.compact.last
87
90
  raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
88
91
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'nokogiri/syntax_error'
2
3
  module Nokogiri
3
4
  module CSS
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  #--
2
3
  # DO NOT MODIFY!!!!
3
4
  # This file is automatically generated by rex 1.0.7
@@ -1,8 +1,8 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module CSS
3
4
  class XPathVisitor # :nodoc:
4
5
  def visit_function node
5
-
6
6
  msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
7
  return self.send(msg, node) if self.respond_to?(msg)
8
8
 
@@ -12,49 +12,51 @@ module Nokogiri
12
12
  when /^self\(/
13
13
  "self::#{node.value[1]}"
14
14
  when /^eq\(/
15
- "position() = #{node.value[1]}"
15
+ "position()=#{node.value[1]}"
16
16
  when /^(nth|nth-of-type)\(/
17
17
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
18
18
  nth(node.value[1])
19
19
  else
20
- "position() = #{node.value[1]}"
20
+ "position()=#{node.value[1]}"
21
21
  end
22
22
  when /^nth-child\(/
23
23
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
24
24
  nth(node.value[1], :child => true)
25
25
  else
26
- "count(preceding-sibling::*) = #{node.value[1].to_i-1}"
26
+ "count(preceding-sibling::*)=#{node.value[1].to_i-1}"
27
27
  end
28
28
  when /^nth-last-of-type\(/
29
29
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
30
30
  nth(node.value[1], :last => true)
31
31
  else
32
32
  index = node.value[1].to_i - 1
33
- index == 0 ? "position() = last()" : "position() = last() - #{index}"
33
+ index == 0 ? "position()=last()" : "position()=last()-#{index}"
34
34
  end
35
35
  when /^nth-last-child\(/
36
36
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
37
37
  nth(node.value[1], :last => true, :child => true)
38
38
  else
39
- "count(following-sibling::*) = #{node.value[1].to_i-1}"
39
+ "count(following-sibling::*)=#{node.value[1].to_i-1}"
40
40
  end
41
41
  when /^(first|first-of-type)\(/
42
- "position() = 1"
42
+ "position()=1"
43
43
  when /^(last|last-of-type)\(/
44
- "position() = last()"
44
+ "position()=last()"
45
45
  when /^contains\(/
46
- "contains(., #{node.value[1]})"
46
+ "contains(.,#{node.value[1]})"
47
47
  when /^gt\(/
48
- "position() > #{node.value[1]}"
48
+ "position()>#{node.value[1]}"
49
49
  when /^only-child\(/
50
- "last() = 1"
50
+ "last()=1"
51
51
  when /^comment\(/
52
52
  "comment()"
53
53
  when /^has\(/
54
- ".//#{node.value[1].accept(self)}"
54
+ is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
55
+ ".#{"//" if !is_direct}#{node.value[1].accept(self)}"
55
56
  else
57
+ # non-standard. this looks like a function call.
56
58
  args = ['.'] + node.value[1..-1]
57
- "#{node.value.first}#{args.join(', ')})"
59
+ "#{node.value.first}#{args.join(',')})"
58
60
  end
59
61
  end
60
62
 
@@ -69,18 +71,18 @@ module Nokogiri
69
71
 
70
72
  def visit_id node
71
73
  node.value.first =~ /^#(.*)$/
72
- "@id = '#{$1}'"
74
+ "@id='#{$1}'"
73
75
  end
74
76
 
75
77
  def visit_attribute_condition node
76
- attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
77
- ''
78
- else
79
- '@'
80
- end
78
+ attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
79
+ ''
80
+ else
81
+ '@'
82
+ end
81
83
  attribute += node.value.first.accept(self)
82
84
 
83
- # Support non-standard css
85
+ # non-standard. attributes starting with '@'
84
86
  attribute.gsub!(/^@@/, '@')
85
87
 
86
88
  return attribute unless node.value.length == 3
@@ -88,29 +90,30 @@ module Nokogiri
88
90
  value = node.value.last
89
91
  value = "'#{value}'" if value !~ /^['"]/
90
92
 
93
+ # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
91
94
  if (value[0]==value[-1]) && %q{"'}.include?(value[0])
92
95
  str_value = value[1..-2]
93
96
  if str_value.include?(value[0])
94
- value = 'concat("' + str_value.split('"', -1).join(%q{", '"', "}) + '", "")'
97
+ value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
95
98
  end
96
99
  end
97
100
 
98
101
  case node.value[1]
99
102
  when :equal
100
- attribute + " = " + "#{value}"
103
+ attribute + "=" + "#{value}"
101
104
  when :not_equal
102
- attribute + " != " + "#{value}"
105
+ attribute + "!=" + "#{value}"
103
106
  when :substring_match
104
- "contains(#{attribute}, #{value})"
107
+ "contains(#{attribute},#{value})"
105
108
  when :prefix_match
106
- "starts-with(#{attribute}, #{value})"
109
+ "starts-with(#{attribute},#{value})"
107
110
  when :dash_match
108
- "#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
111
+ "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
109
112
  when :includes
110
- "contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
113
+ value = value[1..-2] # strip quotes
114
+ css_class(attribute, value)
111
115
  when :suffix_match
112
- "substring(#{attribute}, string-length(#{attribute}) - " +
113
- "string-length(#{value}) + 1, string-length(#{value})) = #{value}"
116
+ "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
114
117
  else
115
118
  attribute + " #{node.value[1]} " + "#{value}"
116
119
  end
@@ -124,14 +127,14 @@ module Nokogiri
124
127
  return self.send(msg, node) if self.respond_to?(msg)
125
128
 
126
129
  case node.value.first
127
- when "first" then "position() = 1"
128
- when "first-child" then "count(preceding-sibling::*) = 0"
129
- when "last" then "position() = last()"
130
- when "last-child" then "count(following-sibling::*) = 0"
131
- when "first-of-type" then "position() = 1"
132
- when "last-of-type" then "position() = last()"
133
- when "only-child" then "count(preceding-sibling::*) = 0 and count(following-sibling::*) = 0"
134
- when "only-of-type" then "last() = 1"
130
+ when "first" then "position()=1"
131
+ when "first-child" then "count(preceding-sibling::*)=0"
132
+ when "last" then "position()=last()"
133
+ when "last-child" then "count(following-sibling::*)=0"
134
+ when "first-of-type" then "position()=1"
135
+ when "last-of-type" then "position()=last()"
136
+ when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
137
+ when "only-of-type" then "last()=1"
135
138
  when "empty" then "not(node())"
136
139
  when "parent" then "node()"
137
140
  when "root" then "not(parent::*)"
@@ -142,7 +145,7 @@ module Nokogiri
142
145
  end
143
146
 
144
147
  def visit_class_condition node
145
- "contains(concat(' ', normalize-space(@class), ' '), ' #{node.value.first} ')"
148
+ css_class("@class", node.value.first)
146
149
  end
147
150
 
148
151
  def visit_combinator node
@@ -179,25 +182,26 @@ module Nokogiri
179
182
  node.accept(self)
180
183
  end
181
184
 
182
- private
185
+ private
186
+
183
187
  def nth node, options={}
184
188
  raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
185
189
 
186
190
  a, b = read_a_and_positive_b node.value
187
191
  position = if options[:child]
188
- options[:last] ? "(count(following-sibling::*) + 1)" : "(count(preceding-sibling::*) + 1)"
192
+ options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
189
193
  else
190
194
  options[:last] ? "(last()-position()+1)" : "position()"
191
195
  end
192
196
 
193
197
  if b.zero?
194
- "(#{position} mod #{a}) = 0"
198
+ "(#{position} mod #{a})=0"
195
199
  else
196
200
  compare = a < 0 ? "<=" : ">="
197
201
  if a.abs == 1
198
- "#{position} #{compare} #{b}"
202
+ "#{position}#{compare}#{b}"
199
203
  else
200
- "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
204
+ "(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
201
205
  end
202
206
  end
203
207
  end
@@ -225,6 +229,32 @@ module Nokogiri
225
229
  end =~ /(nth|first|last|only)-of-type(\()?/
226
230
  end
227
231
  end
232
+
233
+ # use only ordinary xpath functions
234
+ def css_class_standard(hay, needle)
235
+ "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
236
+ end
237
+
238
+ # use the builtin implementation
239
+ def css_class_builtin(hay, needle)
240
+ "nokogiri-builtin:css-class(#{hay},'#{needle}')"
241
+ end
242
+
243
+ alias_method :css_class, :css_class_standard
244
+ end
245
+
246
+ class XPathVisitorAlwaysUseBuiltins < XPathVisitor # :nodoc:
247
+ private
248
+ alias_method :css_class, :css_class_builtin
249
+ end
250
+
251
+ class XPathVisitorOptimallyUseBuiltins < XPathVisitor # :nodoc:
252
+ private
253
+ if Nokogiri.uses_libxml?
254
+ alias_method :css_class, :css_class_builtin
255
+ else
256
+ alias_method :css_class, :css_class_standard
257
+ end
228
258
  end
229
259
  end
230
260
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module Decorators
3
4
  ###
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'nokogiri/html/entity_lookup'
2
3
  require 'nokogiri/html/document'
3
4
  require 'nokogiri/html/document_fragment'
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module HTML
3
4
  ###
@@ -1,3 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+
1
5
  module Nokogiri
2
6
  module HTML
3
7
  class Document < Nokogiri::XML::Document
@@ -160,11 +164,12 @@ module Nokogiri
160
164
  # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
161
165
  # Nokogiri::XML::ParseOptions.
162
166
  def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
163
-
164
167
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
165
- # Give the options to the user
168
+
166
169
  yield options if block_given?
167
170
 
171
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
172
+
168
173
  if string_or_io.respond_to?(:encoding)
169
174
  unless string_or_io.encoding.name == "ASCII-8BIT"
170
175
  encoding ||= string_or_io.encoding.name
@@ -172,7 +177,12 @@ module Nokogiri
172
177
  end
173
178
 
174
179
  if string_or_io.respond_to?(:read)
175
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
180
+ if string_or_io.is_a?(Pathname)
181
+ # resolve the Pathname to the file and open it as an IO object, see #2110
182
+ string_or_io = string_or_io.expand_path.open
183
+ url ||= string_or_io.path
184
+ end
185
+
176
186
  unless encoding
177
187
  # Libxml2's parser has poor support for encoding
178
188
  # detection. First, it does not recognize the HTML5
@@ -251,9 +261,6 @@ module Nokogiri
251
261
  end
252
262
 
253
263
  def self.detect_encoding(chunk)
254
- if Nokogiri.jruby? && EncodingReader.is_jruby_without_fix?
255
- return EncodingReader.detect_encoding_for_jruby_without_fix(chunk)
256
- end
257
264
  m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
258
265
  return Nokogiri.XML(m[1]).encoding
259
266
 
@@ -272,26 +279,6 @@ module Nokogiri
272
279
  end
273
280
  end
274
281
 
275
- def self.is_jruby_without_fix?
276
- JRUBY_VERSION.split('.').join.to_i < 165
277
- end
278
-
279
- def self.detect_encoding_for_jruby_without_fix(chunk)
280
- m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
281
- return Nokogiri.XML(m[1]).encoding
282
-
283
- m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
284
- return m[4]
285
-
286
- catch(:encoding_found) {
287
- Nokogiri::HTML::SAX::Parser.new(JumpSAXHandler.new(:encoding_found.to_s)).parse(chunk)
288
- nil
289
- }
290
- rescue Nokogiri::SyntaxError, RuntimeError
291
- # Ignore parser errors that nokogiri may raise
292
- nil
293
- end
294
-
295
282
  def initialize(io)
296
283
  @io = io
297
284
  @firstchunk = nil