nokogiri-backport 1.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1682 -0
- data/LICENSE.md +9 -0
- data/README.md +272 -0
- data/bin/nokogiri +118 -0
- data/dependencies.yml +74 -0
- data/ext/java/nokogiri/EncodingHandler.java +124 -0
- data/ext/java/nokogiri/HtmlDocument.java +178 -0
- data/ext/java/nokogiri/HtmlElementDescription.java +148 -0
- data/ext/java/nokogiri/HtmlEntityLookup.java +79 -0
- data/ext/java/nokogiri/HtmlSaxParserContext.java +282 -0
- data/ext/java/nokogiri/HtmlSaxPushParser.java +222 -0
- data/ext/java/nokogiri/NokogiriService.java +597 -0
- data/ext/java/nokogiri/XmlAttr.java +162 -0
- data/ext/java/nokogiri/XmlAttributeDecl.java +129 -0
- data/ext/java/nokogiri/XmlCdata.java +82 -0
- data/ext/java/nokogiri/XmlComment.java +97 -0
- data/ext/java/nokogiri/XmlDocument.java +633 -0
- data/ext/java/nokogiri/XmlDocumentFragment.java +185 -0
- data/ext/java/nokogiri/XmlDtd.java +481 -0
- data/ext/java/nokogiri/XmlElement.java +68 -0
- data/ext/java/nokogiri/XmlElementContent.java +382 -0
- data/ext/java/nokogiri/XmlElementDecl.java +147 -0
- data/ext/java/nokogiri/XmlEntityDecl.java +157 -0
- data/ext/java/nokogiri/XmlEntityReference.java +101 -0
- data/ext/java/nokogiri/XmlNamespace.java +199 -0
- data/ext/java/nokogiri/XmlNode.java +1684 -0
- data/ext/java/nokogiri/XmlNodeSet.java +434 -0
- data/ext/java/nokogiri/XmlProcessingInstruction.java +100 -0
- data/ext/java/nokogiri/XmlReader.java +531 -0
- data/ext/java/nokogiri/XmlRelaxng.java +151 -0
- data/ext/java/nokogiri/XmlSaxParserContext.java +374 -0
- data/ext/java/nokogiri/XmlSaxPushParser.java +286 -0
- data/ext/java/nokogiri/XmlSchema.java +388 -0
- data/ext/java/nokogiri/XmlSyntaxError.java +138 -0
- data/ext/java/nokogiri/XmlText.java +110 -0
- data/ext/java/nokogiri/XmlXpathContext.java +301 -0
- data/ext/java/nokogiri/XsltStylesheet.java +347 -0
- data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +252 -0
- data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +20 -0
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
- data/ext/java/nokogiri/internals/NokogiriDomParser.java +116 -0
- data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +121 -0
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +69 -0
- data/ext/java/nokogiri/internals/NokogiriHandler.java +327 -0
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +734 -0
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +217 -0
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +127 -0
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +100 -0
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +121 -0
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +78 -0
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +180 -0
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +72 -0
- data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +60 -0
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +87 -0
- data/ext/java/nokogiri/internals/ParserContext.java +259 -0
- data/ext/java/nokogiri/internals/ReaderNode.java +488 -0
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +778 -0
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +73 -0
- data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +168 -0
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +42 -0
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +274 -0
- data/ext/java/nokogiri/internals/XmlSaxParser.java +65 -0
- data/ext/java/nokogiri/internals/c14n/AttrCompare.java +119 -0
- data/ext/java/nokogiri/internals/c14n/C14nHelper.java +159 -0
- data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +37 -0
- data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +93 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +252 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +639 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +38 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +38 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +367 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +295 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +40 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +44 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +44 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +43 -0
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +630 -0
- data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +173 -0
- data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +76 -0
- data/ext/java/nokogiri/internals/c14n/Constants.java +42 -0
- data/ext/java/nokogiri/internals/c14n/ElementProxy.java +293 -0
- data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +93 -0
- data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +79 -0
- data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +166 -0
- data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +76 -0
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +402 -0
- data/ext/java/nokogiri/internals/c14n/NodeFilter.java +51 -0
- data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +179 -0
- data/ext/java/nokogiri/internals/c14n/XMLUtils.java +507 -0
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1745 -0
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +685 -0
- data/ext/nokogiri/depend +477 -0
- data/ext/nokogiri/extconf.rb +836 -0
- data/ext/nokogiri/html_document.c +171 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +279 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +116 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/html_sax_push_parser.c +87 -0
- data/ext/nokogiri/html_sax_push_parser.h +9 -0
- data/ext/nokogiri/nokogiri.c +135 -0
- data/ext/nokogiri/nokogiri.h +130 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +62 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +69 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +622 -0
- data/ext/nokogiri/xml_document.h +23 -0
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +202 -0
- data/ext/nokogiri/xml_dtd.h +10 -0
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +52 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +63 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_namespace.c +111 -0
- data/ext/nokogiri/xml_namespace.h +14 -0
- data/ext/nokogiri/xml_node.c +1773 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +486 -0
- data/ext/nokogiri/xml_node_set.h +12 -0
- data/ext/nokogiri/xml_processing_instruction.c +56 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +657 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +179 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +305 -0
- data/ext/nokogiri/xml_sax_parser.h +39 -0
- data/ext/nokogiri/xml_sax_parser_context.c +262 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +159 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +276 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +64 -0
- data/ext/nokogiri/xml_syntax_error.h +13 -0
- data/ext/nokogiri/xml_text.c +52 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath_context.c +374 -0
- data/ext/nokogiri/xml_xpath_context.h +10 -0
- data/ext/nokogiri/xslt_stylesheet.c +263 -0
- data/ext/nokogiri/xslt_stylesheet.h +14 -0
- data/lib/isorelax.jar +0 -0
- data/lib/jing.jar +0 -0
- data/lib/nekodtd.jar +0 -0
- data/lib/nekohtml.jar +0 -0
- data/lib/nokogiri/css/node.rb +53 -0
- data/lib/nokogiri/css/parser.rb +751 -0
- data/lib/nokogiri/css/parser.y +272 -0
- data/lib/nokogiri/css/parser_extras.rb +94 -0
- data/lib/nokogiri/css/syntax_error.rb +8 -0
- data/lib/nokogiri/css/tokenizer.rb +154 -0
- data/lib/nokogiri/css/tokenizer.rex +55 -0
- data/lib/nokogiri/css/xpath_visitor.rb +260 -0
- data/lib/nokogiri/css.rb +28 -0
- data/lib/nokogiri/decorators/slop.rb +43 -0
- data/lib/nokogiri/html/builder.rb +36 -0
- data/lib/nokogiri/html/document.rb +322 -0
- data/lib/nokogiri/html/document_fragment.rb +50 -0
- data/lib/nokogiri/html/element_description.rb +24 -0
- data/lib/nokogiri/html/element_description_defaults.rb +672 -0
- data/lib/nokogiri/html/entity_lookup.rb +14 -0
- data/lib/nokogiri/html/sax/parser.rb +63 -0
- data/lib/nokogiri/html/sax/parser_context.rb +17 -0
- data/lib/nokogiri/html/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html.rb +38 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/syntax_error.rb +5 -0
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +182 -0
- data/lib/nokogiri/version.rb +3 -0
- data/lib/nokogiri/xml/attr.rb +15 -0
- data/lib/nokogiri/xml/attribute_decl.rb +19 -0
- data/lib/nokogiri/xml/builder.rb +447 -0
- data/lib/nokogiri/xml/cdata.rb +12 -0
- data/lib/nokogiri/xml/character_data.rb +8 -0
- data/lib/nokogiri/xml/document.rb +290 -0
- data/lib/nokogiri/xml/document_fragment.rb +159 -0
- data/lib/nokogiri/xml/dtd.rb +33 -0
- data/lib/nokogiri/xml/element_content.rb +37 -0
- data/lib/nokogiri/xml/element_decl.rb +14 -0
- data/lib/nokogiri/xml/entity_decl.rb +20 -0
- data/lib/nokogiri/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +14 -0
- data/lib/nokogiri/xml/node/save_options.rb +62 -0
- data/lib/nokogiri/xml/node.rb +1240 -0
- data/lib/nokogiri/xml/node_set.rb +372 -0
- data/lib/nokogiri/xml/notation.rb +7 -0
- data/lib/nokogiri/xml/parse_options.rb +127 -0
- data/lib/nokogiri/xml/pp/character_data.rb +19 -0
- data/lib/nokogiri/xml/pp/node.rb +57 -0
- data/lib/nokogiri/xml/pp.rb +3 -0
- data/lib/nokogiri/xml/processing_instruction.rb +9 -0
- data/lib/nokogiri/xml/reader.rb +116 -0
- data/lib/nokogiri/xml/relax_ng.rb +37 -0
- data/lib/nokogiri/xml/sax/document.rb +172 -0
- data/lib/nokogiri/xml/sax/parser.rb +123 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +17 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
- data/lib/nokogiri/xml/sax.rb +5 -0
- data/lib/nokogiri/xml/schema.rb +72 -0
- data/lib/nokogiri/xml/searchable.rb +239 -0
- data/lib/nokogiri/xml/syntax_error.rb +71 -0
- data/lib/nokogiri/xml/text.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +12 -0
- data/lib/nokogiri/xml/xpath.rb +11 -0
- data/lib/nokogiri/xml/xpath_context.rb +17 -0
- data/lib/nokogiri/xml.rb +76 -0
- data/lib/nokogiri/xslt/stylesheet.rb +26 -0
- data/lib/nokogiri/xslt.rb +57 -0
- data/lib/nokogiri.rb +144 -0
- data/lib/serializer.jar +0 -0
- data/lib/xalan.jar +0 -0
- data/lib/xercesImpl.jar +0 -0
- data/lib/xml-apis.jar +0 -0
- data/lib/xsd/xmlparser/nokogiri.rb +103 -0
- metadata +531 -0
@@ -0,0 +1,260 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module CSS
|
4
|
+
class XPathVisitor # :nodoc:
|
5
|
+
def visit_function node
|
6
|
+
msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
|
7
|
+
return self.send(msg, node) if self.respond_to?(msg)
|
8
|
+
|
9
|
+
case node.value.first
|
10
|
+
when /^text\(/
|
11
|
+
'child::text()'
|
12
|
+
when /^self\(/
|
13
|
+
"self::#{node.value[1]}"
|
14
|
+
when /^eq\(/
|
15
|
+
"position()=#{node.value[1]}"
|
16
|
+
when /^(nth|nth-of-type)\(/
|
17
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
18
|
+
nth(node.value[1])
|
19
|
+
else
|
20
|
+
"position()=#{node.value[1]}"
|
21
|
+
end
|
22
|
+
when /^nth-child\(/
|
23
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
24
|
+
nth(node.value[1], :child => true)
|
25
|
+
else
|
26
|
+
"count(preceding-sibling::*)=#{node.value[1].to_i-1}"
|
27
|
+
end
|
28
|
+
when /^nth-last-of-type\(/
|
29
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
30
|
+
nth(node.value[1], :last => true)
|
31
|
+
else
|
32
|
+
index = node.value[1].to_i - 1
|
33
|
+
index == 0 ? "position()=last()" : "position()=last()-#{index}"
|
34
|
+
end
|
35
|
+
when /^nth-last-child\(/
|
36
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
37
|
+
nth(node.value[1], :last => true, :child => true)
|
38
|
+
else
|
39
|
+
"count(following-sibling::*)=#{node.value[1].to_i-1}"
|
40
|
+
end
|
41
|
+
when /^(first|first-of-type)\(/
|
42
|
+
"position()=1"
|
43
|
+
when /^(last|last-of-type)\(/
|
44
|
+
"position()=last()"
|
45
|
+
when /^contains\(/
|
46
|
+
"contains(.,#{node.value[1]})"
|
47
|
+
when /^gt\(/
|
48
|
+
"position()>#{node.value[1]}"
|
49
|
+
when /^only-child\(/
|
50
|
+
"last()=1"
|
51
|
+
when /^comment\(/
|
52
|
+
"comment()"
|
53
|
+
when /^has\(/
|
54
|
+
is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
|
55
|
+
".#{"//" if !is_direct}#{node.value[1].accept(self)}"
|
56
|
+
else
|
57
|
+
# non-standard. this looks like a function call.
|
58
|
+
args = ['.'] + node.value[1..-1]
|
59
|
+
"#{node.value.first}#{args.join(',')})"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def visit_not node
|
64
|
+
child = node.value.first
|
65
|
+
if :ELEMENT_NAME == child.type
|
66
|
+
"not(self::#{child.accept(self)})"
|
67
|
+
else
|
68
|
+
"not(#{child.accept(self)})"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def visit_id node
|
73
|
+
node.value.first =~ /^#(.*)$/
|
74
|
+
"@id='#{$1}'"
|
75
|
+
end
|
76
|
+
|
77
|
+
def visit_attribute_condition node
|
78
|
+
attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
|
79
|
+
''
|
80
|
+
else
|
81
|
+
'@'
|
82
|
+
end
|
83
|
+
attribute += node.value.first.accept(self)
|
84
|
+
|
85
|
+
# non-standard. attributes starting with '@'
|
86
|
+
attribute.gsub!(/^@@/, '@')
|
87
|
+
|
88
|
+
return attribute unless node.value.length == 3
|
89
|
+
|
90
|
+
value = node.value.last
|
91
|
+
value = "'#{value}'" if value !~ /^['"]/
|
92
|
+
|
93
|
+
# quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
|
94
|
+
if (value[0]==value[-1]) && %q{"'}.include?(value[0])
|
95
|
+
str_value = value[1..-2]
|
96
|
+
if str_value.include?(value[0])
|
97
|
+
value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
case node.value[1]
|
102
|
+
when :equal
|
103
|
+
attribute + "=" + "#{value}"
|
104
|
+
when :not_equal
|
105
|
+
attribute + "!=" + "#{value}"
|
106
|
+
when :substring_match
|
107
|
+
"contains(#{attribute},#{value})"
|
108
|
+
when :prefix_match
|
109
|
+
"starts-with(#{attribute},#{value})"
|
110
|
+
when :dash_match
|
111
|
+
"#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
|
112
|
+
when :includes
|
113
|
+
value = value[1..-2] # strip quotes
|
114
|
+
css_class(attribute, value)
|
115
|
+
when :suffix_match
|
116
|
+
"substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
|
117
|
+
else
|
118
|
+
attribute + " #{node.value[1]} " + "#{value}"
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def visit_pseudo_class node
|
123
|
+
if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
|
124
|
+
node.value.first.accept(self)
|
125
|
+
else
|
126
|
+
msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, '')}"
|
127
|
+
return self.send(msg, node) if self.respond_to?(msg)
|
128
|
+
|
129
|
+
case node.value.first
|
130
|
+
when "first" then "position()=1"
|
131
|
+
when "first-child" then "count(preceding-sibling::*)=0"
|
132
|
+
when "last" then "position()=last()"
|
133
|
+
when "last-child" then "count(following-sibling::*)=0"
|
134
|
+
when "first-of-type" then "position()=1"
|
135
|
+
when "last-of-type" then "position()=last()"
|
136
|
+
when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
|
137
|
+
when "only-of-type" then "last()=1"
|
138
|
+
when "empty" then "not(node())"
|
139
|
+
when "parent" then "node()"
|
140
|
+
when "root" then "not(parent::*)"
|
141
|
+
else
|
142
|
+
node.value.first + "(.)"
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def visit_class_condition node
|
148
|
+
css_class("@class", node.value.first)
|
149
|
+
end
|
150
|
+
|
151
|
+
def visit_combinator node
|
152
|
+
if is_of_type_pseudo_class?(node.value.last)
|
153
|
+
"#{node.value.first.accept(self) if node.value.first}][#{node.value.last.accept(self)}"
|
154
|
+
else
|
155
|
+
"#{node.value.first.accept(self) if node.value.first} and #{node.value.last.accept(self)}"
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
{
|
160
|
+
'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
|
161
|
+
'following_selector' => "/following-sibling::",
|
162
|
+
'descendant_selector' => '//',
|
163
|
+
'child_selector' => '/',
|
164
|
+
}.each do |k,v|
|
165
|
+
class_eval %{
|
166
|
+
def visit_#{k} node
|
167
|
+
"\#{node.value.first.accept(self) if node.value.first}#{v}\#{node.value.last.accept(self)}"
|
168
|
+
end
|
169
|
+
}
|
170
|
+
end
|
171
|
+
|
172
|
+
def visit_conditional_selector node
|
173
|
+
node.value.first.accept(self) + '[' +
|
174
|
+
node.value.last.accept(self) + ']'
|
175
|
+
end
|
176
|
+
|
177
|
+
def visit_element_name node
|
178
|
+
node.value.first
|
179
|
+
end
|
180
|
+
|
181
|
+
def accept node
|
182
|
+
node.accept(self)
|
183
|
+
end
|
184
|
+
|
185
|
+
private
|
186
|
+
|
187
|
+
def nth node, options={}
|
188
|
+
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
189
|
+
|
190
|
+
a, b = read_a_and_positive_b node.value
|
191
|
+
position = if options[:child]
|
192
|
+
options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
|
193
|
+
else
|
194
|
+
options[:last] ? "(last()-position()+1)" : "position()"
|
195
|
+
end
|
196
|
+
|
197
|
+
if b.zero?
|
198
|
+
"(#{position} mod #{a})=0"
|
199
|
+
else
|
200
|
+
compare = a < 0 ? "<=" : ">="
|
201
|
+
if a.abs == 1
|
202
|
+
"#{position}#{compare}#{b}"
|
203
|
+
else
|
204
|
+
"(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
def read_a_and_positive_b values
|
210
|
+
op = values[2]
|
211
|
+
if op == "+"
|
212
|
+
a = values[0].to_i
|
213
|
+
b = values[3].to_i
|
214
|
+
elsif op == "-"
|
215
|
+
a = values[0].to_i
|
216
|
+
b = a - (values[3].to_i % a)
|
217
|
+
else
|
218
|
+
raise ArgumentError, "expected an+b node to have either + or - as the operator, but is #{op.inspect}"
|
219
|
+
end
|
220
|
+
[a, b]
|
221
|
+
end
|
222
|
+
|
223
|
+
def is_of_type_pseudo_class? node
|
224
|
+
if node.type==:PSEUDO_CLASS
|
225
|
+
if node.value[0].is_a?(Nokogiri::CSS::Node) and node.value[0].type == :FUNCTION
|
226
|
+
node.value[0].value[0]
|
227
|
+
else
|
228
|
+
node.value[0]
|
229
|
+
end =~ /(nth|first|last|only)-of-type(\()?/
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
# use only ordinary xpath functions
|
234
|
+
def css_class_standard(hay, needle)
|
235
|
+
"contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
|
236
|
+
end
|
237
|
+
|
238
|
+
# use the builtin implementation
|
239
|
+
def css_class_builtin(hay, needle)
|
240
|
+
"nokogiri-builtin:css-class(#{hay},'#{needle}')"
|
241
|
+
end
|
242
|
+
|
243
|
+
alias_method :css_class, :css_class_standard
|
244
|
+
end
|
245
|
+
|
246
|
+
class XPathVisitorAlwaysUseBuiltins < XPathVisitor # :nodoc:
|
247
|
+
private
|
248
|
+
alias_method :css_class, :css_class_builtin
|
249
|
+
end
|
250
|
+
|
251
|
+
class XPathVisitorOptimallyUseBuiltins < XPathVisitor # :nodoc:
|
252
|
+
private
|
253
|
+
if Nokogiri.uses_libxml?
|
254
|
+
alias_method :css_class, :css_class_builtin
|
255
|
+
else
|
256
|
+
alias_method :css_class, :css_class_standard
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
data/lib/nokogiri/css.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'nokogiri/css/node'
|
3
|
+
require 'nokogiri/css/xpath_visitor'
|
4
|
+
x = $-w
|
5
|
+
$-w = false
|
6
|
+
require 'nokogiri/css/parser'
|
7
|
+
$-w = x
|
8
|
+
|
9
|
+
require 'nokogiri/css/tokenizer'
|
10
|
+
require 'nokogiri/css/syntax_error'
|
11
|
+
|
12
|
+
module Nokogiri
|
13
|
+
module CSS
|
14
|
+
class << self
|
15
|
+
###
|
16
|
+
# Parse this CSS selector in +selector+. Returns an AST.
|
17
|
+
def parse selector
|
18
|
+
Parser.new.parse selector
|
19
|
+
end
|
20
|
+
|
21
|
+
###
|
22
|
+
# Get the XPath for +selector+.
|
23
|
+
def xpath_for selector, options={}
|
24
|
+
Parser.new(options[:ns] || {}).xpath_for selector, options
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module Decorators
|
4
|
+
###
|
5
|
+
# The Slop decorator implements method missing such that a methods may be
|
6
|
+
# used instead of XPath or CSS. See Nokogiri.Slop
|
7
|
+
module Slop
|
8
|
+
# The default XPath search context for Slop
|
9
|
+
XPATH_PREFIX = "./"
|
10
|
+
|
11
|
+
###
|
12
|
+
# look for node with +name+. See Nokogiri.Slop
|
13
|
+
def method_missing name, *args, &block
|
14
|
+
if args.empty?
|
15
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
|
16
|
+
elsif args.first.is_a? Hash
|
17
|
+
hash = args.first
|
18
|
+
if hash[:css]
|
19
|
+
list = css("#{name}#{hash[:css]}")
|
20
|
+
elsif hash[:xpath]
|
21
|
+
conds = Array(hash[:xpath]).join(' and ')
|
22
|
+
list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
|
23
|
+
end
|
24
|
+
else
|
25
|
+
CSS::Parser.without_cache do
|
26
|
+
list = xpath(
|
27
|
+
*CSS.xpath_for("#{name}#{args.first}", :prefix => XPATH_PREFIX)
|
28
|
+
)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
super if list.empty?
|
33
|
+
list.length == 1 ? list.first : list
|
34
|
+
end
|
35
|
+
|
36
|
+
def respond_to_missing? name, include_private = false
|
37
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
|
38
|
+
|
39
|
+
!list.empty?
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module HTML
|
4
|
+
###
|
5
|
+
# Nokogiri HTML builder is used for building HTML documents. It is very
|
6
|
+
# similar to the Nokogiri::XML::Builder. In fact, you should go read the
|
7
|
+
# documentation for Nokogiri::XML::Builder before reading this
|
8
|
+
# documentation.
|
9
|
+
#
|
10
|
+
# == Synopsis:
|
11
|
+
#
|
12
|
+
# Create an HTML document with a body that has an onload attribute, and a
|
13
|
+
# span tag with a class of "bold" that has content of "Hello world".
|
14
|
+
#
|
15
|
+
# builder = Nokogiri::HTML::Builder.new do |doc|
|
16
|
+
# doc.html {
|
17
|
+
# doc.body(:onload => 'some_func();') {
|
18
|
+
# doc.span.bold {
|
19
|
+
# doc.text "Hello world"
|
20
|
+
# }
|
21
|
+
# }
|
22
|
+
# }
|
23
|
+
# end
|
24
|
+
# puts builder.to_html
|
25
|
+
#
|
26
|
+
# The HTML builder inherits from the XML builder, so make sure to read the
|
27
|
+
# Nokogiri::XML::Builder documentation.
|
28
|
+
class Builder < Nokogiri::XML::Builder
|
29
|
+
###
|
30
|
+
# Convert the builder to HTML
|
31
|
+
def to_html
|
32
|
+
@doc.to_html
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,322 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'pathname'
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
module HTML
|
7
|
+
class Document < Nokogiri::XML::Document
|
8
|
+
###
|
9
|
+
# Get the meta tag encoding for this document. If there is no meta tag,
|
10
|
+
# then nil is returned.
|
11
|
+
def meta_encoding
|
12
|
+
case
|
13
|
+
when meta = at('//meta[@charset]')
|
14
|
+
meta[:charset]
|
15
|
+
when meta = meta_content_type
|
16
|
+
meta['content'][/charset\s*=\s*([\w-]+)/i, 1]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
###
|
21
|
+
# Set the meta tag encoding for this document.
|
22
|
+
#
|
23
|
+
# If an meta encoding tag is already present, its content is
|
24
|
+
# replaced with the given text.
|
25
|
+
#
|
26
|
+
# Otherwise, this method tries to create one at an appropriate
|
27
|
+
# place supplying head and/or html elements as necessary, which
|
28
|
+
# is inside a head element if any, and before any text node or
|
29
|
+
# content element (typically <body>) if any.
|
30
|
+
#
|
31
|
+
# The result when trying to set an encoding that is different
|
32
|
+
# from the document encoding is undefined.
|
33
|
+
#
|
34
|
+
# Beware in CRuby, that libxml2 automatically inserts a meta tag
|
35
|
+
# into a head element.
|
36
|
+
def meta_encoding= encoding
|
37
|
+
case
|
38
|
+
when meta = meta_content_type
|
39
|
+
meta['content'] = 'text/html; charset=%s' % encoding
|
40
|
+
encoding
|
41
|
+
when meta = at('//meta[@charset]')
|
42
|
+
meta['charset'] = encoding
|
43
|
+
else
|
44
|
+
meta = XML::Node.new('meta', self)
|
45
|
+
if dtd = internal_subset and dtd.html5_dtd?
|
46
|
+
meta['charset'] = encoding
|
47
|
+
else
|
48
|
+
meta['http-equiv'] = 'Content-Type'
|
49
|
+
meta['content'] = 'text/html; charset=%s' % encoding
|
50
|
+
end
|
51
|
+
|
52
|
+
case
|
53
|
+
when head = at('//head')
|
54
|
+
head.prepend_child(meta)
|
55
|
+
else
|
56
|
+
set_metadata_element(meta)
|
57
|
+
end
|
58
|
+
encoding
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def meta_content_type
|
63
|
+
xpath('//meta[@http-equiv and boolean(@content)]').find { |node|
|
64
|
+
node['http-equiv'] =~ /\AContent-Type\z/i
|
65
|
+
}
|
66
|
+
end
|
67
|
+
private :meta_content_type
|
68
|
+
|
69
|
+
###
|
70
|
+
# Get the title string of this document. Return nil if there is
|
71
|
+
# no title tag.
|
72
|
+
def title
|
73
|
+
title = at('//title') and title.inner_text
|
74
|
+
end
|
75
|
+
|
76
|
+
###
|
77
|
+
# Set the title string of this document.
|
78
|
+
#
|
79
|
+
# If a title element is already present, its content is replaced
|
80
|
+
# with the given text.
|
81
|
+
#
|
82
|
+
# Otherwise, this method tries to create one at an appropriate
|
83
|
+
# place supplying head and/or html elements as necessary, which
|
84
|
+
# is inside a head element if any, right after a meta
|
85
|
+
# encoding/charset tag if any, and before any text node or
|
86
|
+
# content element (typically <body>) if any.
|
87
|
+
def title=(text)
|
88
|
+
tnode = XML::Text.new(text, self)
|
89
|
+
if title = at('//title')
|
90
|
+
title.children = tnode
|
91
|
+
return text
|
92
|
+
end
|
93
|
+
|
94
|
+
title = XML::Node.new('title', self) << tnode
|
95
|
+
case
|
96
|
+
when head = at('//head')
|
97
|
+
head << title
|
98
|
+
when meta = at('//meta[@charset]') || meta_content_type
|
99
|
+
# better put after charset declaration
|
100
|
+
meta.add_next_sibling(title)
|
101
|
+
else
|
102
|
+
set_metadata_element(title)
|
103
|
+
end
|
104
|
+
text
|
105
|
+
end
|
106
|
+
|
107
|
+
def set_metadata_element(element)
|
108
|
+
case
|
109
|
+
when head = at('//head')
|
110
|
+
head << element
|
111
|
+
when html = at('//html')
|
112
|
+
head = html.prepend_child(XML::Node.new('head', self))
|
113
|
+
head.prepend_child(element)
|
114
|
+
when first = children.find { |node|
|
115
|
+
case node
|
116
|
+
when XML::Element, XML::Text
|
117
|
+
true
|
118
|
+
end
|
119
|
+
}
|
120
|
+
# We reach here only if the underlying document model
|
121
|
+
# allows <html>/<head> elements to be omitted and does not
|
122
|
+
# automatically supply them.
|
123
|
+
first.add_previous_sibling(element)
|
124
|
+
else
|
125
|
+
html = add_child(XML::Node.new('html', self))
|
126
|
+
head = html.add_child(XML::Node.new('head', self))
|
127
|
+
head.prepend_child(element)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
private :set_metadata_element
|
131
|
+
|
132
|
+
####
|
133
|
+
# Serialize Node using +options+. Save options can also be set using a
|
134
|
+
# block. See SaveOptions.
|
135
|
+
#
|
136
|
+
# These two statements are equivalent:
|
137
|
+
#
|
138
|
+
# node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
|
139
|
+
#
|
140
|
+
# or
|
141
|
+
#
|
142
|
+
# node.serialize(:encoding => 'UTF-8') do |config|
|
143
|
+
# config.format.as_xml
|
144
|
+
# end
|
145
|
+
#
|
146
|
+
def serialize options = {}
|
147
|
+
options[:save_with] ||= XML::Node::SaveOptions::DEFAULT_HTML
|
148
|
+
super
|
149
|
+
end
|
150
|
+
|
151
|
+
####
|
152
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
153
|
+
def fragment tags = nil
|
154
|
+
DocumentFragment.new(self, tags, self.root)
|
155
|
+
end
|
156
|
+
|
157
|
+
class << self
|
158
|
+
###
|
159
|
+
# Parse HTML. +string_or_io+ may be a String, or any object that
|
160
|
+
# responds to _read_ and _close_ such as an IO, or StringIO.
|
161
|
+
# +url+ is resource where this document is located. +encoding+ is the
|
162
|
+
# encoding that should be used when processing the document. +options+
|
163
|
+
# is a number that sets options in the parser, such as
|
164
|
+
# Nokogiri::XML::ParseOptions::RECOVER. See the constants in
|
165
|
+
# Nokogiri::XML::ParseOptions.
|
166
|
+
def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
|
167
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
168
|
+
|
169
|
+
yield options if block_given?
|
170
|
+
|
171
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
172
|
+
|
173
|
+
if string_or_io.respond_to?(:encoding)
|
174
|
+
unless string_or_io.encoding.name == "ASCII-8BIT"
|
175
|
+
encoding ||= string_or_io.encoding.name
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
if string_or_io.respond_to?(:read)
|
180
|
+
if string_or_io.is_a?(Pathname)
|
181
|
+
# resolve the Pathname to the file and open it as an IO object, see #2110
|
182
|
+
string_or_io = string_or_io.expand_path.open
|
183
|
+
url ||= string_or_io.path
|
184
|
+
end
|
185
|
+
|
186
|
+
unless encoding
|
187
|
+
# Libxml2's parser has poor support for encoding
|
188
|
+
# detection. First, it does not recognize the HTML5
|
189
|
+
# style meta charset declaration. Secondly, even if it
|
190
|
+
# successfully detects an encoding hint, it does not
|
191
|
+
# re-decode or re-parse the preceding part which may be
|
192
|
+
# garbled.
|
193
|
+
#
|
194
|
+
# EncodingReader aims to perform advanced encoding
|
195
|
+
# detection beyond what Libxml2 does, and to emulate
|
196
|
+
# rewinding of a stream and make Libxml2 redo parsing
|
197
|
+
# from the start when an encoding hint is found.
|
198
|
+
string_or_io = EncodingReader.new(string_or_io)
|
199
|
+
begin
|
200
|
+
return read_io(string_or_io, url, encoding, options.to_i)
|
201
|
+
rescue EncodingFound => e
|
202
|
+
encoding = e.found_encoding
|
203
|
+
end
|
204
|
+
end
|
205
|
+
return read_io(string_or_io, url, encoding, options.to_i)
|
206
|
+
end
|
207
|
+
|
208
|
+
# read_memory pukes on empty docs
|
209
|
+
if string_or_io.nil? or string_or_io.empty?
|
210
|
+
return encoding ? new.tap { |i| i.encoding = encoding } : new
|
211
|
+
end
|
212
|
+
|
213
|
+
encoding ||= EncodingReader.detect_encoding(string_or_io)
|
214
|
+
|
215
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
class EncodingFound < StandardError # :nodoc:
|
220
|
+
attr_reader :found_encoding
|
221
|
+
|
222
|
+
def initialize(encoding)
|
223
|
+
@found_encoding = encoding
|
224
|
+
super("encoding found: %s" % encoding)
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
class EncodingReader # :nodoc:
|
229
|
+
class SAXHandler < Nokogiri::XML::SAX::Document # :nodoc:
|
230
|
+
attr_reader :encoding
|
231
|
+
|
232
|
+
def initialize
|
233
|
+
@encoding = nil
|
234
|
+
super()
|
235
|
+
end
|
236
|
+
|
237
|
+
def start_element(name, attrs = [])
|
238
|
+
return unless name == 'meta'
|
239
|
+
attr = Hash[attrs]
|
240
|
+
charset = attr['charset'] and
|
241
|
+
@encoding = charset
|
242
|
+
http_equiv = attr['http-equiv'] and
|
243
|
+
http_equiv.match(/\AContent-Type\z/i) and
|
244
|
+
content = attr['content'] and
|
245
|
+
m = content.match(/;\s*charset\s*=\s*([\w-]+)/) and
|
246
|
+
@encoding = m[1]
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
class JumpSAXHandler < SAXHandler
|
251
|
+
def initialize(jumptag)
|
252
|
+
@jumptag = jumptag
|
253
|
+
super()
|
254
|
+
end
|
255
|
+
|
256
|
+
def start_element(name, attrs = [])
|
257
|
+
super
|
258
|
+
throw @jumptag, @encoding if @encoding
|
259
|
+
throw @jumptag, nil if name =~ /\A(?:div|h1|img|p|br)\z/
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
def self.detect_encoding(chunk)
|
264
|
+
m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
|
265
|
+
return Nokogiri.XML(m[1]).encoding
|
266
|
+
|
267
|
+
if Nokogiri.jruby?
|
268
|
+
m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
|
269
|
+
return m[4]
|
270
|
+
catch(:encoding_found) {
|
271
|
+
Nokogiri::HTML::SAX::Parser.new(JumpSAXHandler.new(:encoding_found)).parse(chunk)
|
272
|
+
nil
|
273
|
+
}
|
274
|
+
else
|
275
|
+
handler = SAXHandler.new
|
276
|
+
parser = Nokogiri::HTML::SAX::PushParser.new(handler)
|
277
|
+
parser << chunk rescue Nokogiri::SyntaxError
|
278
|
+
handler.encoding
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
def initialize(io)
|
283
|
+
@io = io
|
284
|
+
@firstchunk = nil
|
285
|
+
@encoding_found = nil
|
286
|
+
end
|
287
|
+
|
288
|
+
# This method is used by the C extension so that
|
289
|
+
# Nokogiri::HTML::Document#read_io() does not leak memory when
|
290
|
+
# EncodingFound is raised.
|
291
|
+
attr_reader :encoding_found
|
292
|
+
|
293
|
+
def read(len)
|
294
|
+
# no support for a call without len
|
295
|
+
|
296
|
+
if !@firstchunk
|
297
|
+
@firstchunk = @io.read(len) or return nil
|
298
|
+
|
299
|
+
# This implementation expects that the first call from
|
300
|
+
# htmlReadIO() is made with a length long enough (~1KB) to
|
301
|
+
# achieve advanced encoding detection.
|
302
|
+
if encoding = EncodingReader.detect_encoding(@firstchunk)
|
303
|
+
# The first chunk is stored for the next read in retry.
|
304
|
+
raise @encoding_found = EncodingFound.new(encoding)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
@encoding_found = nil
|
308
|
+
|
309
|
+
ret = @firstchunk.slice!(0, len)
|
310
|
+
if (len -= ret.length) > 0
|
311
|
+
rest = @io.read(len) and ret << rest
|
312
|
+
end
|
313
|
+
if ret.empty?
|
314
|
+
nil
|
315
|
+
else
|
316
|
+
ret
|
317
|
+
end
|
318
|
+
end
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
end
|