nokogiri 1.10.9-java → 1.11.0.rc4-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +1015 -947
- data/README.md +24 -22
- data/ext/java/nokogiri/HtmlDocument.java +34 -46
- data/ext/java/nokogiri/HtmlSaxParserContext.java +88 -58
- data/ext/java/nokogiri/HtmlSaxPushParser.java +1 -1
- data/ext/java/nokogiri/NokogiriService.java +1 -1
- data/ext/java/nokogiri/XmlAttr.java +13 -20
- data/ext/java/nokogiri/XmlAttributeDecl.java +11 -12
- data/ext/java/nokogiri/XmlCdata.java +3 -4
- data/ext/java/nokogiri/XmlComment.java +1 -1
- data/ext/java/nokogiri/XmlDocument.java +148 -175
- data/ext/java/nokogiri/XmlDocumentFragment.java +13 -31
- data/ext/java/nokogiri/XmlDtd.java +5 -8
- data/ext/java/nokogiri/XmlElement.java +1 -20
- data/ext/java/nokogiri/XmlElementDecl.java +23 -28
- data/ext/java/nokogiri/XmlEntityDecl.java +23 -27
- data/ext/java/nokogiri/XmlEntityReference.java +2 -2
- data/ext/java/nokogiri/XmlNamespace.java +72 -89
- data/ext/java/nokogiri/XmlNode.java +303 -406
- data/ext/java/nokogiri/XmlNodeSet.java +70 -76
- data/ext/java/nokogiri/XmlReader.java +12 -13
- data/ext/java/nokogiri/XmlRelaxng.java +10 -3
- data/ext/java/nokogiri/XmlSaxParserContext.java +15 -10
- data/ext/java/nokogiri/XmlSchema.java +87 -27
- data/ext/java/nokogiri/XmlSyntaxError.java +2 -6
- data/ext/java/nokogiri/XmlText.java +12 -9
- data/ext/java/nokogiri/XmlXpathContext.java +55 -25
- data/ext/java/nokogiri/XsltStylesheet.java +7 -15
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +52 -46
- data/ext/java/nokogiri/internals/NokogiriHandler.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +71 -135
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +90 -58
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +9 -2
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +67 -10
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +4 -2
- data/ext/java/nokogiri/internals/ParserContext.java +27 -73
- data/ext/java/nokogiri/internals/ReaderNode.java +2 -4
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +18 -33
- data/ext/nokogiri/depend +476 -357
- data/ext/nokogiri/extconf.rb +485 -352
- data/ext/nokogiri/html_document.c +79 -78
- data/ext/nokogiri/html_sax_parser_context.c +2 -2
- data/ext/nokogiri/nokogiri.c +34 -40
- data/ext/nokogiri/xml_document.c +18 -4
- data/ext/nokogiri/xml_io.c +8 -6
- data/ext/nokogiri/xml_node.c +21 -1
- data/ext/nokogiri/xml_node_set.c +1 -1
- data/ext/nokogiri/xml_reader.c +6 -17
- data/ext/nokogiri/xml_relax_ng.c +29 -11
- data/ext/nokogiri/xml_sax_parser.c +2 -7
- data/ext/nokogiri/xml_sax_parser_context.c +2 -2
- data/ext/nokogiri/xml_schema.c +55 -13
- data/ext/nokogiri/xml_xpath_context.c +80 -4
- data/ext/nokogiri/xslt_stylesheet.c +1 -8
- data/lib/nokogiri.rb +4 -21
- data/lib/nokogiri/css.rb +1 -0
- data/lib/nokogiri/css/node.rb +1 -0
- data/lib/nokogiri/css/parser.rb +63 -62
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +39 -36
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +1 -0
- data/lib/nokogiri/css/xpath_visitor.rb +73 -43
- data/lib/nokogiri/decorators/slop.rb +1 -0
- data/lib/nokogiri/html.rb +1 -0
- data/lib/nokogiri/html/builder.rb +1 -0
- data/lib/nokogiri/html/document.rb +13 -26
- data/lib/nokogiri/html/document_fragment.rb +1 -0
- data/lib/nokogiri/html/element_description.rb +1 -0
- data/lib/nokogiri/html/element_description_defaults.rb +1 -0
- data/lib/nokogiri/html/entity_lookup.rb +1 -0
- data/lib/nokogiri/html/sax/parser.rb +1 -0
- data/lib/nokogiri/html/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html/sax/push_parser.rb +1 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version.rb +3 -109
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +182 -0
- data/lib/nokogiri/xml.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +1 -0
- data/lib/nokogiri/xml/attribute_decl.rb +1 -0
- data/lib/nokogiri/xml/builder.rb +3 -2
- data/lib/nokogiri/xml/cdata.rb +1 -0
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +20 -15
- data/lib/nokogiri/xml/document_fragment.rb +5 -6
- data/lib/nokogiri/xml/dtd.rb +1 -0
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +1 -0
- data/lib/nokogiri/xml/node.rb +587 -249
- data/lib/nokogiri/xml/node/save_options.rb +1 -0
- data/lib/nokogiri/xml/node_set.rb +1 -0
- data/lib/nokogiri/xml/notation.rb +1 -0
- data/lib/nokogiri/xml/parse_options.rb +10 -3
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/pp/character_data.rb +1 -0
- data/lib/nokogiri/xml/pp/node.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +1 -0
- data/lib/nokogiri/xml/reader.rb +7 -3
- data/lib/nokogiri/xml/relax_ng.rb +7 -2
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +1 -0
- data/lib/nokogiri/xml/sax/parser.rb +1 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +13 -4
- data/lib/nokogiri/xml/searchable.rb +25 -16
- data/lib/nokogiri/xml/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/xpath_context.rb +1 -0
- data/lib/nokogiri/xslt.rb +1 -0
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/xsd/xmlparser/nokogiri.rb +1 -0
- metadata +89 -96
- data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +0 -107
- data/ext/java/nokogiri/internals/UncloseableInputStream.java +0 -102
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/nokogiri.h +0 -121
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
data/lib/nokogiri/css/parser.y
CHANGED
@@ -88,7 +88,7 @@ rule
|
|
88
88
|
)
|
89
89
|
}
|
90
90
|
| LSQUARE NUMBER RSQUARE {
|
91
|
-
#
|
91
|
+
# non-standard, from hpricot
|
92
92
|
result = Node.new(:PSEUDO_CLASS,
|
93
93
|
[Node.new(:FUNCTION, ['nth-child(', val[1]])]
|
94
94
|
)
|
@@ -139,7 +139,7 @@ rule
|
|
139
139
|
when 'n'
|
140
140
|
result = Node.new(:NTH, ['1','n','+','0'])
|
141
141
|
else
|
142
|
-
#
|
142
|
+
# non-standard to support custom functions:
|
143
143
|
# assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
|
144
144
|
# assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
|
145
145
|
# assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
|
@@ -1,63 +1,66 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "thread"
|
2
3
|
|
3
4
|
module Nokogiri
|
4
5
|
module CSS
|
5
6
|
class Parser < Racc::Parser
|
6
|
-
|
7
|
-
|
8
|
-
@
|
7
|
+
CACHE_SWITCH_NAME = :nokogiri_css_parser_cache_is_off
|
8
|
+
|
9
|
+
@cache = {}
|
10
|
+
@mutex = Mutex.new
|
9
11
|
|
10
12
|
class << self
|
11
|
-
#
|
12
|
-
|
13
|
-
|
14
|
-
|
13
|
+
# Return a thread-local boolean indicating whether the CSS-to-XPath cache is active. (Default is `true`.)
|
14
|
+
def cache_on?
|
15
|
+
!Thread.current[CACHE_SWITCH_NAME]
|
16
|
+
end
|
17
|
+
|
18
|
+
# Set a thread-local boolean to turn cacheing on and off. Truthy values turn the cache on, falsey values turn the cache off.
|
19
|
+
def set_cache(value)
|
20
|
+
Thread.current[CACHE_SWITCH_NAME] = !value
|
21
|
+
end
|
15
22
|
|
16
23
|
# Get the css selector in +string+ from the cache
|
17
|
-
def []
|
18
|
-
return unless
|
24
|
+
def [](string)
|
25
|
+
return unless cache_on?
|
19
26
|
@mutex.synchronize { @cache[string] }
|
20
27
|
end
|
21
28
|
|
22
29
|
# Set the css selector in +string+ in the cache to +value+
|
23
|
-
def []=
|
24
|
-
return value unless
|
30
|
+
def []=(string, value)
|
31
|
+
return value unless cache_on?
|
25
32
|
@mutex.synchronize { @cache[string] = value }
|
26
33
|
end
|
27
34
|
|
28
35
|
# Clear the cache
|
29
|
-
def clear_cache
|
30
|
-
@mutex.synchronize
|
36
|
+
def clear_cache(create_new_object = false)
|
37
|
+
@mutex.synchronize do
|
38
|
+
if create_new_object
|
39
|
+
@cache = {}
|
40
|
+
else
|
41
|
+
@cache.clear
|
42
|
+
end
|
43
|
+
end
|
31
44
|
end
|
32
45
|
|
33
46
|
# Execute +block+ without cache
|
34
|
-
def without_cache
|
35
|
-
|
36
|
-
|
47
|
+
def without_cache(&block)
|
48
|
+
original_cache_setting = cache_on?
|
49
|
+
set_cache false
|
37
50
|
block.call
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
###
|
42
|
-
# Parse this CSS selector in +selector+. Returns an AST.
|
43
|
-
def parse selector
|
44
|
-
@warned ||= false
|
45
|
-
unless @warned
|
46
|
-
$stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
|
47
|
-
@warned = true
|
48
|
-
end
|
49
|
-
new.parse selector
|
51
|
+
ensure
|
52
|
+
set_cache original_cache_setting
|
50
53
|
end
|
51
54
|
end
|
52
55
|
|
53
56
|
# Create a new CSS parser with respect to +namespaces+
|
54
|
-
def initialize
|
55
|
-
@tokenizer
|
57
|
+
def initialize(namespaces = {})
|
58
|
+
@tokenizer = Tokenizer.new
|
56
59
|
@namespaces = namespaces
|
57
60
|
super()
|
58
61
|
end
|
59
62
|
|
60
|
-
def parse
|
63
|
+
def parse(string)
|
61
64
|
@tokenizer.scan_setup string
|
62
65
|
do_parse
|
63
66
|
end
|
@@ -67,14 +70,14 @@ module Nokogiri
|
|
67
70
|
end
|
68
71
|
|
69
72
|
# Get the xpath for +string+ using +options+
|
70
|
-
def xpath_for
|
73
|
+
def xpath_for(string, options = {})
|
71
74
|
key = "#{string}#{options[:ns]}#{options[:prefix]}"
|
72
75
|
v = self.class[key]
|
73
76
|
return v if v
|
74
77
|
|
75
78
|
args = [
|
76
|
-
options[:prefix] ||
|
77
|
-
options[:visitor] || XPathVisitor.new
|
79
|
+
options[:prefix] || "//",
|
80
|
+
options[:visitor] || XPathVisitor.new,
|
78
81
|
]
|
79
82
|
self.class[key] = parse(string).map { |ast|
|
80
83
|
ast.to_xpath(*args)
|
@@ -82,7 +85,7 @@ module Nokogiri
|
|
82
85
|
end
|
83
86
|
|
84
87
|
# On CSS parser error, raise an exception
|
85
|
-
def on_error
|
88
|
+
def on_error(error_token_id, error_value, value_stack)
|
86
89
|
after = value_stack.compact.last
|
87
90
|
raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
|
88
91
|
end
|
@@ -1,8 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module CSS
|
3
4
|
class XPathVisitor # :nodoc:
|
4
5
|
def visit_function node
|
5
|
-
|
6
6
|
msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
|
7
7
|
return self.send(msg, node) if self.respond_to?(msg)
|
8
8
|
|
@@ -12,49 +12,51 @@ module Nokogiri
|
|
12
12
|
when /^self\(/
|
13
13
|
"self::#{node.value[1]}"
|
14
14
|
when /^eq\(/
|
15
|
-
"position()
|
15
|
+
"position()=#{node.value[1]}"
|
16
16
|
when /^(nth|nth-of-type)\(/
|
17
17
|
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
18
18
|
nth(node.value[1])
|
19
19
|
else
|
20
|
-
"position()
|
20
|
+
"position()=#{node.value[1]}"
|
21
21
|
end
|
22
22
|
when /^nth-child\(/
|
23
23
|
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
24
24
|
nth(node.value[1], :child => true)
|
25
25
|
else
|
26
|
-
"count(preceding-sibling::*)
|
26
|
+
"count(preceding-sibling::*)=#{node.value[1].to_i-1}"
|
27
27
|
end
|
28
28
|
when /^nth-last-of-type\(/
|
29
29
|
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
30
30
|
nth(node.value[1], :last => true)
|
31
31
|
else
|
32
32
|
index = node.value[1].to_i - 1
|
33
|
-
index == 0 ? "position()
|
33
|
+
index == 0 ? "position()=last()" : "position()=last()-#{index}"
|
34
34
|
end
|
35
35
|
when /^nth-last-child\(/
|
36
36
|
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
37
37
|
nth(node.value[1], :last => true, :child => true)
|
38
38
|
else
|
39
|
-
"count(following-sibling::*)
|
39
|
+
"count(following-sibling::*)=#{node.value[1].to_i-1}"
|
40
40
|
end
|
41
41
|
when /^(first|first-of-type)\(/
|
42
|
-
"position()
|
42
|
+
"position()=1"
|
43
43
|
when /^(last|last-of-type)\(/
|
44
|
-
"position()
|
44
|
+
"position()=last()"
|
45
45
|
when /^contains\(/
|
46
|
-
"contains(
|
46
|
+
"contains(.,#{node.value[1]})"
|
47
47
|
when /^gt\(/
|
48
|
-
"position()
|
48
|
+
"position()>#{node.value[1]}"
|
49
49
|
when /^only-child\(/
|
50
|
-
"last()
|
50
|
+
"last()=1"
|
51
51
|
when /^comment\(/
|
52
52
|
"comment()"
|
53
53
|
when /^has\(/
|
54
|
-
|
54
|
+
is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
|
55
|
+
".#{"//" if !is_direct}#{node.value[1].accept(self)}"
|
55
56
|
else
|
57
|
+
# non-standard. this looks like a function call.
|
56
58
|
args = ['.'] + node.value[1..-1]
|
57
|
-
"#{node.value.first}#{args.join(',
|
59
|
+
"#{node.value.first}#{args.join(',')})"
|
58
60
|
end
|
59
61
|
end
|
60
62
|
|
@@ -69,18 +71,18 @@ module Nokogiri
|
|
69
71
|
|
70
72
|
def visit_id node
|
71
73
|
node.value.first =~ /^#(.*)$/
|
72
|
-
"@id
|
74
|
+
"@id='#{$1}'"
|
73
75
|
end
|
74
76
|
|
75
77
|
def visit_attribute_condition node
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
78
|
+
attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
|
79
|
+
''
|
80
|
+
else
|
81
|
+
'@'
|
82
|
+
end
|
81
83
|
attribute += node.value.first.accept(self)
|
82
84
|
|
83
|
-
#
|
85
|
+
# non-standard. attributes starting with '@'
|
84
86
|
attribute.gsub!(/^@@/, '@')
|
85
87
|
|
86
88
|
return attribute unless node.value.length == 3
|
@@ -88,29 +90,30 @@ module Nokogiri
|
|
88
90
|
value = node.value.last
|
89
91
|
value = "'#{value}'" if value !~ /^['"]/
|
90
92
|
|
93
|
+
# quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
|
91
94
|
if (value[0]==value[-1]) && %q{"'}.include?(value[0])
|
92
95
|
str_value = value[1..-2]
|
93
96
|
if str_value.include?(value[0])
|
94
|
-
value = 'concat("' + str_value.split('"', -1).join(%q{",
|
97
|
+
value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
|
95
98
|
end
|
96
99
|
end
|
97
100
|
|
98
101
|
case node.value[1]
|
99
102
|
when :equal
|
100
|
-
attribute + "
|
103
|
+
attribute + "=" + "#{value}"
|
101
104
|
when :not_equal
|
102
|
-
attribute + "
|
105
|
+
attribute + "!=" + "#{value}"
|
103
106
|
when :substring_match
|
104
|
-
"contains(#{attribute}
|
107
|
+
"contains(#{attribute},#{value})"
|
105
108
|
when :prefix_match
|
106
|
-
"starts-with(#{attribute}
|
109
|
+
"starts-with(#{attribute},#{value})"
|
107
110
|
when :dash_match
|
108
|
-
"#{attribute}
|
111
|
+
"#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
|
109
112
|
when :includes
|
110
|
-
|
113
|
+
value = value[1..-2] # strip quotes
|
114
|
+
css_class(attribute, value)
|
111
115
|
when :suffix_match
|
112
|
-
"substring(#{attribute},
|
113
|
-
"string-length(#{value}) + 1, string-length(#{value})) = #{value}"
|
116
|
+
"substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
|
114
117
|
else
|
115
118
|
attribute + " #{node.value[1]} " + "#{value}"
|
116
119
|
end
|
@@ -124,14 +127,14 @@ module Nokogiri
|
|
124
127
|
return self.send(msg, node) if self.respond_to?(msg)
|
125
128
|
|
126
129
|
case node.value.first
|
127
|
-
when "first" then "position()
|
128
|
-
when "first-child" then "count(preceding-sibling::*)
|
129
|
-
when "last" then "position()
|
130
|
-
when "last-child" then "count(following-sibling::*)
|
131
|
-
when "first-of-type" then "position()
|
132
|
-
when "last-of-type" then "position()
|
133
|
-
when "only-child" then "count(preceding-sibling::*)
|
134
|
-
when "only-of-type" then "last()
|
130
|
+
when "first" then "position()=1"
|
131
|
+
when "first-child" then "count(preceding-sibling::*)=0"
|
132
|
+
when "last" then "position()=last()"
|
133
|
+
when "last-child" then "count(following-sibling::*)=0"
|
134
|
+
when "first-of-type" then "position()=1"
|
135
|
+
when "last-of-type" then "position()=last()"
|
136
|
+
when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
|
137
|
+
when "only-of-type" then "last()=1"
|
135
138
|
when "empty" then "not(node())"
|
136
139
|
when "parent" then "node()"
|
137
140
|
when "root" then "not(parent::*)"
|
@@ -142,7 +145,7 @@ module Nokogiri
|
|
142
145
|
end
|
143
146
|
|
144
147
|
def visit_class_condition node
|
145
|
-
"
|
148
|
+
css_class("@class", node.value.first)
|
146
149
|
end
|
147
150
|
|
148
151
|
def visit_combinator node
|
@@ -179,25 +182,26 @@ module Nokogiri
|
|
179
182
|
node.accept(self)
|
180
183
|
end
|
181
184
|
|
182
|
-
|
185
|
+
private
|
186
|
+
|
183
187
|
def nth node, options={}
|
184
188
|
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
185
189
|
|
186
190
|
a, b = read_a_and_positive_b node.value
|
187
191
|
position = if options[:child]
|
188
|
-
options[:last] ? "(count(following-sibling::*)
|
192
|
+
options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
|
189
193
|
else
|
190
194
|
options[:last] ? "(last()-position()+1)" : "position()"
|
191
195
|
end
|
192
196
|
|
193
197
|
if b.zero?
|
194
|
-
"(#{position} mod #{a})
|
198
|
+
"(#{position} mod #{a})=0"
|
195
199
|
else
|
196
200
|
compare = a < 0 ? "<=" : ">="
|
197
201
|
if a.abs == 1
|
198
|
-
"#{position}
|
202
|
+
"#{position}#{compare}#{b}"
|
199
203
|
else
|
200
|
-
"(#{position}
|
204
|
+
"(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
|
201
205
|
end
|
202
206
|
end
|
203
207
|
end
|
@@ -225,6 +229,32 @@ module Nokogiri
|
|
225
229
|
end =~ /(nth|first|last|only)-of-type(\()?/
|
226
230
|
end
|
227
231
|
end
|
232
|
+
|
233
|
+
# use only ordinary xpath functions
|
234
|
+
def css_class_standard(hay, needle)
|
235
|
+
"contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
|
236
|
+
end
|
237
|
+
|
238
|
+
# use the builtin implementation
|
239
|
+
def css_class_builtin(hay, needle)
|
240
|
+
"nokogiri-builtin:css-class(#{hay},'#{needle}')"
|
241
|
+
end
|
242
|
+
|
243
|
+
alias_method :css_class, :css_class_standard
|
244
|
+
end
|
245
|
+
|
246
|
+
class XPathVisitorAlwaysUseBuiltins < XPathVisitor # :nodoc:
|
247
|
+
private
|
248
|
+
alias_method :css_class, :css_class_builtin
|
249
|
+
end
|
250
|
+
|
251
|
+
class XPathVisitorOptimallyUseBuiltins < XPathVisitor # :nodoc:
|
252
|
+
private
|
253
|
+
if Nokogiri.uses_libxml?
|
254
|
+
alias_method :css_class, :css_class_builtin
|
255
|
+
else
|
256
|
+
alias_method :css_class, :css_class_standard
|
257
|
+
end
|
228
258
|
end
|
229
259
|
end
|
230
260
|
end
|
data/lib/nokogiri/html.rb
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'pathname'
|
4
|
+
|
1
5
|
module Nokogiri
|
2
6
|
module HTML
|
3
7
|
class Document < Nokogiri::XML::Document
|
@@ -160,11 +164,12 @@ module Nokogiri
|
|
160
164
|
# Nokogiri::XML::ParseOptions::RECOVER. See the constants in
|
161
165
|
# Nokogiri::XML::ParseOptions.
|
162
166
|
def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
|
163
|
-
|
164
167
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
165
|
-
|
168
|
+
|
166
169
|
yield options if block_given?
|
167
170
|
|
171
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
172
|
+
|
168
173
|
if string_or_io.respond_to?(:encoding)
|
169
174
|
unless string_or_io.encoding.name == "ASCII-8BIT"
|
170
175
|
encoding ||= string_or_io.encoding.name
|
@@ -172,7 +177,12 @@ module Nokogiri
|
|
172
177
|
end
|
173
178
|
|
174
179
|
if string_or_io.respond_to?(:read)
|
175
|
-
|
180
|
+
if string_or_io.is_a?(Pathname)
|
181
|
+
# resolve the Pathname to the file and open it as an IO object, see #2110
|
182
|
+
string_or_io = string_or_io.expand_path.open
|
183
|
+
url ||= string_or_io.path
|
184
|
+
end
|
185
|
+
|
176
186
|
unless encoding
|
177
187
|
# Libxml2's parser has poor support for encoding
|
178
188
|
# detection. First, it does not recognize the HTML5
|
@@ -251,9 +261,6 @@ module Nokogiri
|
|
251
261
|
end
|
252
262
|
|
253
263
|
def self.detect_encoding(chunk)
|
254
|
-
if Nokogiri.jruby? && EncodingReader.is_jruby_without_fix?
|
255
|
-
return EncodingReader.detect_encoding_for_jruby_without_fix(chunk)
|
256
|
-
end
|
257
264
|
m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
|
258
265
|
return Nokogiri.XML(m[1]).encoding
|
259
266
|
|
@@ -272,26 +279,6 @@ module Nokogiri
|
|
272
279
|
end
|
273
280
|
end
|
274
281
|
|
275
|
-
def self.is_jruby_without_fix?
|
276
|
-
JRUBY_VERSION.split('.').join.to_i < 165
|
277
|
-
end
|
278
|
-
|
279
|
-
def self.detect_encoding_for_jruby_without_fix(chunk)
|
280
|
-
m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
|
281
|
-
return Nokogiri.XML(m[1]).encoding
|
282
|
-
|
283
|
-
m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
|
284
|
-
return m[4]
|
285
|
-
|
286
|
-
catch(:encoding_found) {
|
287
|
-
Nokogiri::HTML::SAX::Parser.new(JumpSAXHandler.new(:encoding_found.to_s)).parse(chunk)
|
288
|
-
nil
|
289
|
-
}
|
290
|
-
rescue Nokogiri::SyntaxError, RuntimeError
|
291
|
-
# Ignore parser errors that nokogiri may raise
|
292
|
-
nil
|
293
|
-
end
|
294
|
-
|
295
282
|
def initialize(io)
|
296
283
|
@io = io
|
297
284
|
@firstchunk = nil
|