nokogiri-backport 1.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (239) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1682 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +272 -0
  6. data/bin/nokogiri +118 -0
  7. data/dependencies.yml +74 -0
  8. data/ext/java/nokogiri/EncodingHandler.java +124 -0
  9. data/ext/java/nokogiri/HtmlDocument.java +178 -0
  10. data/ext/java/nokogiri/HtmlElementDescription.java +148 -0
  11. data/ext/java/nokogiri/HtmlEntityLookup.java +79 -0
  12. data/ext/java/nokogiri/HtmlSaxParserContext.java +282 -0
  13. data/ext/java/nokogiri/HtmlSaxPushParser.java +222 -0
  14. data/ext/java/nokogiri/NokogiriService.java +597 -0
  15. data/ext/java/nokogiri/XmlAttr.java +162 -0
  16. data/ext/java/nokogiri/XmlAttributeDecl.java +129 -0
  17. data/ext/java/nokogiri/XmlCdata.java +82 -0
  18. data/ext/java/nokogiri/XmlComment.java +97 -0
  19. data/ext/java/nokogiri/XmlDocument.java +633 -0
  20. data/ext/java/nokogiri/XmlDocumentFragment.java +185 -0
  21. data/ext/java/nokogiri/XmlDtd.java +481 -0
  22. data/ext/java/nokogiri/XmlElement.java +68 -0
  23. data/ext/java/nokogiri/XmlElementContent.java +382 -0
  24. data/ext/java/nokogiri/XmlElementDecl.java +147 -0
  25. data/ext/java/nokogiri/XmlEntityDecl.java +157 -0
  26. data/ext/java/nokogiri/XmlEntityReference.java +101 -0
  27. data/ext/java/nokogiri/XmlNamespace.java +199 -0
  28. data/ext/java/nokogiri/XmlNode.java +1684 -0
  29. data/ext/java/nokogiri/XmlNodeSet.java +434 -0
  30. data/ext/java/nokogiri/XmlProcessingInstruction.java +100 -0
  31. data/ext/java/nokogiri/XmlReader.java +531 -0
  32. data/ext/java/nokogiri/XmlRelaxng.java +151 -0
  33. data/ext/java/nokogiri/XmlSaxParserContext.java +374 -0
  34. data/ext/java/nokogiri/XmlSaxPushParser.java +286 -0
  35. data/ext/java/nokogiri/XmlSchema.java +388 -0
  36. data/ext/java/nokogiri/XmlSyntaxError.java +138 -0
  37. data/ext/java/nokogiri/XmlText.java +110 -0
  38. data/ext/java/nokogiri/XmlXpathContext.java +301 -0
  39. data/ext/java/nokogiri/XsltStylesheet.java +347 -0
  40. data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
  41. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +252 -0
  42. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +20 -0
  43. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
  44. data/ext/java/nokogiri/internals/NokogiriDomParser.java +116 -0
  45. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +121 -0
  46. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +69 -0
  47. data/ext/java/nokogiri/internals/NokogiriHandler.java +327 -0
  48. data/ext/java/nokogiri/internals/NokogiriHelpers.java +734 -0
  49. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +217 -0
  50. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +127 -0
  51. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +100 -0
  52. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +121 -0
  53. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +78 -0
  54. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +180 -0
  55. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +72 -0
  56. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +60 -0
  57. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +87 -0
  58. data/ext/java/nokogiri/internals/ParserContext.java +259 -0
  59. data/ext/java/nokogiri/internals/ReaderNode.java +488 -0
  60. data/ext/java/nokogiri/internals/SaveContextVisitor.java +778 -0
  61. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +73 -0
  62. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +168 -0
  63. data/ext/java/nokogiri/internals/XmlDeclHandler.java +42 -0
  64. data/ext/java/nokogiri/internals/XmlDomParserContext.java +274 -0
  65. data/ext/java/nokogiri/internals/XmlSaxParser.java +65 -0
  66. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +119 -0
  67. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +159 -0
  68. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +37 -0
  69. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +93 -0
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +252 -0
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +639 -0
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +38 -0
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +38 -0
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +367 -0
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +295 -0
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +40 -0
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +44 -0
  78. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +44 -0
  79. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +43 -0
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +630 -0
  81. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +173 -0
  82. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +76 -0
  83. data/ext/java/nokogiri/internals/c14n/Constants.java +42 -0
  84. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +293 -0
  85. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +93 -0
  86. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +79 -0
  87. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +166 -0
  88. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +76 -0
  89. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +402 -0
  90. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +51 -0
  91. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +179 -0
  92. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +507 -0
  93. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1745 -0
  94. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +685 -0
  95. data/ext/nokogiri/depend +477 -0
  96. data/ext/nokogiri/extconf.rb +836 -0
  97. data/ext/nokogiri/html_document.c +171 -0
  98. data/ext/nokogiri/html_document.h +10 -0
  99. data/ext/nokogiri/html_element_description.c +279 -0
  100. data/ext/nokogiri/html_element_description.h +10 -0
  101. data/ext/nokogiri/html_entity_lookup.c +32 -0
  102. data/ext/nokogiri/html_entity_lookup.h +8 -0
  103. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  104. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  105. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  106. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  107. data/ext/nokogiri/nokogiri.c +135 -0
  108. data/ext/nokogiri/nokogiri.h +130 -0
  109. data/ext/nokogiri/xml_attr.c +103 -0
  110. data/ext/nokogiri/xml_attr.h +9 -0
  111. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  112. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  113. data/ext/nokogiri/xml_cdata.c +62 -0
  114. data/ext/nokogiri/xml_cdata.h +9 -0
  115. data/ext/nokogiri/xml_comment.c +69 -0
  116. data/ext/nokogiri/xml_comment.h +9 -0
  117. data/ext/nokogiri/xml_document.c +622 -0
  118. data/ext/nokogiri/xml_document.h +23 -0
  119. data/ext/nokogiri/xml_document_fragment.c +48 -0
  120. data/ext/nokogiri/xml_document_fragment.h +10 -0
  121. data/ext/nokogiri/xml_dtd.c +202 -0
  122. data/ext/nokogiri/xml_dtd.h +10 -0
  123. data/ext/nokogiri/xml_element_content.c +123 -0
  124. data/ext/nokogiri/xml_element_content.h +10 -0
  125. data/ext/nokogiri/xml_element_decl.c +69 -0
  126. data/ext/nokogiri/xml_element_decl.h +9 -0
  127. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  128. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  129. data/ext/nokogiri/xml_entity_decl.c +110 -0
  130. data/ext/nokogiri/xml_entity_decl.h +10 -0
  131. data/ext/nokogiri/xml_entity_reference.c +52 -0
  132. data/ext/nokogiri/xml_entity_reference.h +9 -0
  133. data/ext/nokogiri/xml_io.c +63 -0
  134. data/ext/nokogiri/xml_io.h +11 -0
  135. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  136. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  137. data/ext/nokogiri/xml_namespace.c +111 -0
  138. data/ext/nokogiri/xml_namespace.h +14 -0
  139. data/ext/nokogiri/xml_node.c +1773 -0
  140. data/ext/nokogiri/xml_node.h +13 -0
  141. data/ext/nokogiri/xml_node_set.c +486 -0
  142. data/ext/nokogiri/xml_node_set.h +12 -0
  143. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  144. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  145. data/ext/nokogiri/xml_reader.c +657 -0
  146. data/ext/nokogiri/xml_reader.h +10 -0
  147. data/ext/nokogiri/xml_relax_ng.c +179 -0
  148. data/ext/nokogiri/xml_relax_ng.h +9 -0
  149. data/ext/nokogiri/xml_sax_parser.c +305 -0
  150. data/ext/nokogiri/xml_sax_parser.h +39 -0
  151. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  152. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  153. data/ext/nokogiri/xml_sax_push_parser.c +159 -0
  154. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  155. data/ext/nokogiri/xml_schema.c +276 -0
  156. data/ext/nokogiri/xml_schema.h +9 -0
  157. data/ext/nokogiri/xml_syntax_error.c +64 -0
  158. data/ext/nokogiri/xml_syntax_error.h +13 -0
  159. data/ext/nokogiri/xml_text.c +52 -0
  160. data/ext/nokogiri/xml_text.h +9 -0
  161. data/ext/nokogiri/xml_xpath_context.c +374 -0
  162. data/ext/nokogiri/xml_xpath_context.h +10 -0
  163. data/ext/nokogiri/xslt_stylesheet.c +263 -0
  164. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  165. data/lib/isorelax.jar +0 -0
  166. data/lib/jing.jar +0 -0
  167. data/lib/nekodtd.jar +0 -0
  168. data/lib/nekohtml.jar +0 -0
  169. data/lib/nokogiri/css/node.rb +53 -0
  170. data/lib/nokogiri/css/parser.rb +751 -0
  171. data/lib/nokogiri/css/parser.y +272 -0
  172. data/lib/nokogiri/css/parser_extras.rb +94 -0
  173. data/lib/nokogiri/css/syntax_error.rb +8 -0
  174. data/lib/nokogiri/css/tokenizer.rb +154 -0
  175. data/lib/nokogiri/css/tokenizer.rex +55 -0
  176. data/lib/nokogiri/css/xpath_visitor.rb +260 -0
  177. data/lib/nokogiri/css.rb +28 -0
  178. data/lib/nokogiri/decorators/slop.rb +43 -0
  179. data/lib/nokogiri/html/builder.rb +36 -0
  180. data/lib/nokogiri/html/document.rb +322 -0
  181. data/lib/nokogiri/html/document_fragment.rb +50 -0
  182. data/lib/nokogiri/html/element_description.rb +24 -0
  183. data/lib/nokogiri/html/element_description_defaults.rb +672 -0
  184. data/lib/nokogiri/html/entity_lookup.rb +14 -0
  185. data/lib/nokogiri/html/sax/parser.rb +63 -0
  186. data/lib/nokogiri/html/sax/parser_context.rb +17 -0
  187. data/lib/nokogiri/html/sax/push_parser.rb +37 -0
  188. data/lib/nokogiri/html.rb +38 -0
  189. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  190. data/lib/nokogiri/syntax_error.rb +5 -0
  191. data/lib/nokogiri/version/constant.rb +5 -0
  192. data/lib/nokogiri/version/info.rb +182 -0
  193. data/lib/nokogiri/version.rb +3 -0
  194. data/lib/nokogiri/xml/attr.rb +15 -0
  195. data/lib/nokogiri/xml/attribute_decl.rb +19 -0
  196. data/lib/nokogiri/xml/builder.rb +447 -0
  197. data/lib/nokogiri/xml/cdata.rb +12 -0
  198. data/lib/nokogiri/xml/character_data.rb +8 -0
  199. data/lib/nokogiri/xml/document.rb +290 -0
  200. data/lib/nokogiri/xml/document_fragment.rb +159 -0
  201. data/lib/nokogiri/xml/dtd.rb +33 -0
  202. data/lib/nokogiri/xml/element_content.rb +37 -0
  203. data/lib/nokogiri/xml/element_decl.rb +14 -0
  204. data/lib/nokogiri/xml/entity_decl.rb +20 -0
  205. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  206. data/lib/nokogiri/xml/namespace.rb +14 -0
  207. data/lib/nokogiri/xml/node/save_options.rb +62 -0
  208. data/lib/nokogiri/xml/node.rb +1240 -0
  209. data/lib/nokogiri/xml/node_set.rb +372 -0
  210. data/lib/nokogiri/xml/notation.rb +7 -0
  211. data/lib/nokogiri/xml/parse_options.rb +127 -0
  212. data/lib/nokogiri/xml/pp/character_data.rb +19 -0
  213. data/lib/nokogiri/xml/pp/node.rb +57 -0
  214. data/lib/nokogiri/xml/pp.rb +3 -0
  215. data/lib/nokogiri/xml/processing_instruction.rb +9 -0
  216. data/lib/nokogiri/xml/reader.rb +116 -0
  217. data/lib/nokogiri/xml/relax_ng.rb +37 -0
  218. data/lib/nokogiri/xml/sax/document.rb +172 -0
  219. data/lib/nokogiri/xml/sax/parser.rb +123 -0
  220. data/lib/nokogiri/xml/sax/parser_context.rb +17 -0
  221. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  222. data/lib/nokogiri/xml/sax.rb +5 -0
  223. data/lib/nokogiri/xml/schema.rb +72 -0
  224. data/lib/nokogiri/xml/searchable.rb +239 -0
  225. data/lib/nokogiri/xml/syntax_error.rb +71 -0
  226. data/lib/nokogiri/xml/text.rb +10 -0
  227. data/lib/nokogiri/xml/xpath/syntax_error.rb +12 -0
  228. data/lib/nokogiri/xml/xpath.rb +11 -0
  229. data/lib/nokogiri/xml/xpath_context.rb +17 -0
  230. data/lib/nokogiri/xml.rb +76 -0
  231. data/lib/nokogiri/xslt/stylesheet.rb +26 -0
  232. data/lib/nokogiri/xslt.rb +57 -0
  233. data/lib/nokogiri.rb +144 -0
  234. data/lib/serializer.jar +0 -0
  235. data/lib/xalan.jar +0 -0
  236. data/lib/xercesImpl.jar +0 -0
  237. data/lib/xml-apis.jar +0 -0
  238. data/lib/xsd/xmlparser/nokogiri.rb +103 -0
  239. metadata +531 -0
@@ -0,0 +1,260 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module CSS
4
+ class XPathVisitor # :nodoc:
5
+ def visit_function node
6
+ msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
+ return self.send(msg, node) if self.respond_to?(msg)
8
+
9
+ case node.value.first
10
+ when /^text\(/
11
+ 'child::text()'
12
+ when /^self\(/
13
+ "self::#{node.value[1]}"
14
+ when /^eq\(/
15
+ "position()=#{node.value[1]}"
16
+ when /^(nth|nth-of-type)\(/
17
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
18
+ nth(node.value[1])
19
+ else
20
+ "position()=#{node.value[1]}"
21
+ end
22
+ when /^nth-child\(/
23
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
24
+ nth(node.value[1], :child => true)
25
+ else
26
+ "count(preceding-sibling::*)=#{node.value[1].to_i-1}"
27
+ end
28
+ when /^nth-last-of-type\(/
29
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
30
+ nth(node.value[1], :last => true)
31
+ else
32
+ index = node.value[1].to_i - 1
33
+ index == 0 ? "position()=last()" : "position()=last()-#{index}"
34
+ end
35
+ when /^nth-last-child\(/
36
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
37
+ nth(node.value[1], :last => true, :child => true)
38
+ else
39
+ "count(following-sibling::*)=#{node.value[1].to_i-1}"
40
+ end
41
+ when /^(first|first-of-type)\(/
42
+ "position()=1"
43
+ when /^(last|last-of-type)\(/
44
+ "position()=last()"
45
+ when /^contains\(/
46
+ "contains(.,#{node.value[1]})"
47
+ when /^gt\(/
48
+ "position()>#{node.value[1]}"
49
+ when /^only-child\(/
50
+ "last()=1"
51
+ when /^comment\(/
52
+ "comment()"
53
+ when /^has\(/
54
+ is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
55
+ ".#{"//" if !is_direct}#{node.value[1].accept(self)}"
56
+ else
57
+ # non-standard. this looks like a function call.
58
+ args = ['.'] + node.value[1..-1]
59
+ "#{node.value.first}#{args.join(',')})"
60
+ end
61
+ end
62
+
63
+ def visit_not node
64
+ child = node.value.first
65
+ if :ELEMENT_NAME == child.type
66
+ "not(self::#{child.accept(self)})"
67
+ else
68
+ "not(#{child.accept(self)})"
69
+ end
70
+ end
71
+
72
+ def visit_id node
73
+ node.value.first =~ /^#(.*)$/
74
+ "@id='#{$1}'"
75
+ end
76
+
77
+ def visit_attribute_condition node
78
+ attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
79
+ ''
80
+ else
81
+ '@'
82
+ end
83
+ attribute += node.value.first.accept(self)
84
+
85
+ # non-standard. attributes starting with '@'
86
+ attribute.gsub!(/^@@/, '@')
87
+
88
+ return attribute unless node.value.length == 3
89
+
90
+ value = node.value.last
91
+ value = "'#{value}'" if value !~ /^['"]/
92
+
93
+ # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
94
+ if (value[0]==value[-1]) && %q{"'}.include?(value[0])
95
+ str_value = value[1..-2]
96
+ if str_value.include?(value[0])
97
+ value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
98
+ end
99
+ end
100
+
101
+ case node.value[1]
102
+ when :equal
103
+ attribute + "=" + "#{value}"
104
+ when :not_equal
105
+ attribute + "!=" + "#{value}"
106
+ when :substring_match
107
+ "contains(#{attribute},#{value})"
108
+ when :prefix_match
109
+ "starts-with(#{attribute},#{value})"
110
+ when :dash_match
111
+ "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
112
+ when :includes
113
+ value = value[1..-2] # strip quotes
114
+ css_class(attribute, value)
115
+ when :suffix_match
116
+ "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
117
+ else
118
+ attribute + " #{node.value[1]} " + "#{value}"
119
+ end
120
+ end
121
+
122
+ def visit_pseudo_class node
123
+ if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
124
+ node.value.first.accept(self)
125
+ else
126
+ msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, '')}"
127
+ return self.send(msg, node) if self.respond_to?(msg)
128
+
129
+ case node.value.first
130
+ when "first" then "position()=1"
131
+ when "first-child" then "count(preceding-sibling::*)=0"
132
+ when "last" then "position()=last()"
133
+ when "last-child" then "count(following-sibling::*)=0"
134
+ when "first-of-type" then "position()=1"
135
+ when "last-of-type" then "position()=last()"
136
+ when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
137
+ when "only-of-type" then "last()=1"
138
+ when "empty" then "not(node())"
139
+ when "parent" then "node()"
140
+ when "root" then "not(parent::*)"
141
+ else
142
+ node.value.first + "(.)"
143
+ end
144
+ end
145
+ end
146
+
147
+ def visit_class_condition node
148
+ css_class("@class", node.value.first)
149
+ end
150
+
151
+ def visit_combinator node
152
+ if is_of_type_pseudo_class?(node.value.last)
153
+ "#{node.value.first.accept(self) if node.value.first}][#{node.value.last.accept(self)}"
154
+ else
155
+ "#{node.value.first.accept(self) if node.value.first} and #{node.value.last.accept(self)}"
156
+ end
157
+ end
158
+
159
+ {
160
+ 'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
161
+ 'following_selector' => "/following-sibling::",
162
+ 'descendant_selector' => '//',
163
+ 'child_selector' => '/',
164
+ }.each do |k,v|
165
+ class_eval %{
166
+ def visit_#{k} node
167
+ "\#{node.value.first.accept(self) if node.value.first}#{v}\#{node.value.last.accept(self)}"
168
+ end
169
+ }
170
+ end
171
+
172
+ def visit_conditional_selector node
173
+ node.value.first.accept(self) + '[' +
174
+ node.value.last.accept(self) + ']'
175
+ end
176
+
177
+ def visit_element_name node
178
+ node.value.first
179
+ end
180
+
181
+ def accept node
182
+ node.accept(self)
183
+ end
184
+
185
+ private
186
+
187
+ def nth node, options={}
188
+ raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
189
+
190
+ a, b = read_a_and_positive_b node.value
191
+ position = if options[:child]
192
+ options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
193
+ else
194
+ options[:last] ? "(last()-position()+1)" : "position()"
195
+ end
196
+
197
+ if b.zero?
198
+ "(#{position} mod #{a})=0"
199
+ else
200
+ compare = a < 0 ? "<=" : ">="
201
+ if a.abs == 1
202
+ "#{position}#{compare}#{b}"
203
+ else
204
+ "(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
205
+ end
206
+ end
207
+ end
208
+
209
+ def read_a_and_positive_b values
210
+ op = values[2]
211
+ if op == "+"
212
+ a = values[0].to_i
213
+ b = values[3].to_i
214
+ elsif op == "-"
215
+ a = values[0].to_i
216
+ b = a - (values[3].to_i % a)
217
+ else
218
+ raise ArgumentError, "expected an+b node to have either + or - as the operator, but is #{op.inspect}"
219
+ end
220
+ [a, b]
221
+ end
222
+
223
+ def is_of_type_pseudo_class? node
224
+ if node.type==:PSEUDO_CLASS
225
+ if node.value[0].is_a?(Nokogiri::CSS::Node) and node.value[0].type == :FUNCTION
226
+ node.value[0].value[0]
227
+ else
228
+ node.value[0]
229
+ end =~ /(nth|first|last|only)-of-type(\()?/
230
+ end
231
+ end
232
+
233
+ # use only ordinary xpath functions
234
+ def css_class_standard(hay, needle)
235
+ "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
236
+ end
237
+
238
+ # use the builtin implementation
239
+ def css_class_builtin(hay, needle)
240
+ "nokogiri-builtin:css-class(#{hay},'#{needle}')"
241
+ end
242
+
243
+ alias_method :css_class, :css_class_standard
244
+ end
245
+
246
+ class XPathVisitorAlwaysUseBuiltins < XPathVisitor # :nodoc:
247
+ private
248
+ alias_method :css_class, :css_class_builtin
249
+ end
250
+
251
+ class XPathVisitorOptimallyUseBuiltins < XPathVisitor # :nodoc:
252
+ private
253
+ if Nokogiri.uses_libxml?
254
+ alias_method :css_class, :css_class_builtin
255
+ else
256
+ alias_method :css_class, :css_class_standard
257
+ end
258
+ end
259
+ end
260
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+ require 'nokogiri/css/node'
3
+ require 'nokogiri/css/xpath_visitor'
4
+ x = $-w
5
+ $-w = false
6
+ require 'nokogiri/css/parser'
7
+ $-w = x
8
+
9
+ require 'nokogiri/css/tokenizer'
10
+ require 'nokogiri/css/syntax_error'
11
+
12
+ module Nokogiri
13
+ module CSS
14
+ class << self
15
+ ###
16
+ # Parse this CSS selector in +selector+. Returns an AST.
17
+ def parse selector
18
+ Parser.new.parse selector
19
+ end
20
+
21
+ ###
22
+ # Get the XPath for +selector+.
23
+ def xpath_for selector, options={}
24
+ Parser.new(options[:ns] || {}).xpath_for selector, options
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module Decorators
4
+ ###
5
+ # The Slop decorator implements method missing such that a methods may be
6
+ # used instead of XPath or CSS. See Nokogiri.Slop
7
+ module Slop
8
+ # The default XPath search context for Slop
9
+ XPATH_PREFIX = "./"
10
+
11
+ ###
12
+ # look for node with +name+. See Nokogiri.Slop
13
+ def method_missing name, *args, &block
14
+ if args.empty?
15
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
16
+ elsif args.first.is_a? Hash
17
+ hash = args.first
18
+ if hash[:css]
19
+ list = css("#{name}#{hash[:css]}")
20
+ elsif hash[:xpath]
21
+ conds = Array(hash[:xpath]).join(' and ')
22
+ list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
23
+ end
24
+ else
25
+ CSS::Parser.without_cache do
26
+ list = xpath(
27
+ *CSS.xpath_for("#{name}#{args.first}", :prefix => XPATH_PREFIX)
28
+ )
29
+ end
30
+ end
31
+
32
+ super if list.empty?
33
+ list.length == 1 ? list.first : list
34
+ end
35
+
36
+ def respond_to_missing? name, include_private = false
37
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
38
+
39
+ !list.empty?
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module HTML
4
+ ###
5
+ # Nokogiri HTML builder is used for building HTML documents. It is very
6
+ # similar to the Nokogiri::XML::Builder. In fact, you should go read the
7
+ # documentation for Nokogiri::XML::Builder before reading this
8
+ # documentation.
9
+ #
10
+ # == Synopsis:
11
+ #
12
+ # Create an HTML document with a body that has an onload attribute, and a
13
+ # span tag with a class of "bold" that has content of "Hello world".
14
+ #
15
+ # builder = Nokogiri::HTML::Builder.new do |doc|
16
+ # doc.html {
17
+ # doc.body(:onload => 'some_func();') {
18
+ # doc.span.bold {
19
+ # doc.text "Hello world"
20
+ # }
21
+ # }
22
+ # }
23
+ # end
24
+ # puts builder.to_html
25
+ #
26
+ # The HTML builder inherits from the XML builder, so make sure to read the
27
+ # Nokogiri::XML::Builder documentation.
28
+ class Builder < Nokogiri::XML::Builder
29
+ ###
30
+ # Convert the builder to HTML
31
+ def to_html
32
+ @doc.to_html
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,322 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+
5
+ module Nokogiri
6
+ module HTML
7
+ class Document < Nokogiri::XML::Document
8
+ ###
9
+ # Get the meta tag encoding for this document. If there is no meta tag,
10
+ # then nil is returned.
11
+ def meta_encoding
12
+ case
13
+ when meta = at('//meta[@charset]')
14
+ meta[:charset]
15
+ when meta = meta_content_type
16
+ meta['content'][/charset\s*=\s*([\w-]+)/i, 1]
17
+ end
18
+ end
19
+
20
+ ###
21
+ # Set the meta tag encoding for this document.
22
+ #
23
+ # If an meta encoding tag is already present, its content is
24
+ # replaced with the given text.
25
+ #
26
+ # Otherwise, this method tries to create one at an appropriate
27
+ # place supplying head and/or html elements as necessary, which
28
+ # is inside a head element if any, and before any text node or
29
+ # content element (typically <body>) if any.
30
+ #
31
+ # The result when trying to set an encoding that is different
32
+ # from the document encoding is undefined.
33
+ #
34
+ # Beware in CRuby, that libxml2 automatically inserts a meta tag
35
+ # into a head element.
36
+ def meta_encoding= encoding
37
+ case
38
+ when meta = meta_content_type
39
+ meta['content'] = 'text/html; charset=%s' % encoding
40
+ encoding
41
+ when meta = at('//meta[@charset]')
42
+ meta['charset'] = encoding
43
+ else
44
+ meta = XML::Node.new('meta', self)
45
+ if dtd = internal_subset and dtd.html5_dtd?
46
+ meta['charset'] = encoding
47
+ else
48
+ meta['http-equiv'] = 'Content-Type'
49
+ meta['content'] = 'text/html; charset=%s' % encoding
50
+ end
51
+
52
+ case
53
+ when head = at('//head')
54
+ head.prepend_child(meta)
55
+ else
56
+ set_metadata_element(meta)
57
+ end
58
+ encoding
59
+ end
60
+ end
61
+
62
+ def meta_content_type
63
+ xpath('//meta[@http-equiv and boolean(@content)]').find { |node|
64
+ node['http-equiv'] =~ /\AContent-Type\z/i
65
+ }
66
+ end
67
+ private :meta_content_type
68
+
69
+ ###
70
+ # Get the title string of this document. Return nil if there is
71
+ # no title tag.
72
+ def title
73
+ title = at('//title') and title.inner_text
74
+ end
75
+
76
+ ###
77
+ # Set the title string of this document.
78
+ #
79
+ # If a title element is already present, its content is replaced
80
+ # with the given text.
81
+ #
82
+ # Otherwise, this method tries to create one at an appropriate
83
+ # place supplying head and/or html elements as necessary, which
84
+ # is inside a head element if any, right after a meta
85
+ # encoding/charset tag if any, and before any text node or
86
+ # content element (typically <body>) if any.
87
+ def title=(text)
88
+ tnode = XML::Text.new(text, self)
89
+ if title = at('//title')
90
+ title.children = tnode
91
+ return text
92
+ end
93
+
94
+ title = XML::Node.new('title', self) << tnode
95
+ case
96
+ when head = at('//head')
97
+ head << title
98
+ when meta = at('//meta[@charset]') || meta_content_type
99
+ # better put after charset declaration
100
+ meta.add_next_sibling(title)
101
+ else
102
+ set_metadata_element(title)
103
+ end
104
+ text
105
+ end
106
+
107
+ def set_metadata_element(element)
108
+ case
109
+ when head = at('//head')
110
+ head << element
111
+ when html = at('//html')
112
+ head = html.prepend_child(XML::Node.new('head', self))
113
+ head.prepend_child(element)
114
+ when first = children.find { |node|
115
+ case node
116
+ when XML::Element, XML::Text
117
+ true
118
+ end
119
+ }
120
+ # We reach here only if the underlying document model
121
+ # allows <html>/<head> elements to be omitted and does not
122
+ # automatically supply them.
123
+ first.add_previous_sibling(element)
124
+ else
125
+ html = add_child(XML::Node.new('html', self))
126
+ head = html.add_child(XML::Node.new('head', self))
127
+ head.prepend_child(element)
128
+ end
129
+ end
130
+ private :set_metadata_element
131
+
132
+ ####
133
+ # Serialize Node using +options+. Save options can also be set using a
134
+ # block. See SaveOptions.
135
+ #
136
+ # These two statements are equivalent:
137
+ #
138
+ # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
139
+ #
140
+ # or
141
+ #
142
+ # node.serialize(:encoding => 'UTF-8') do |config|
143
+ # config.format.as_xml
144
+ # end
145
+ #
146
+ def serialize options = {}
147
+ options[:save_with] ||= XML::Node::SaveOptions::DEFAULT_HTML
148
+ super
149
+ end
150
+
151
+ ####
152
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
153
+ def fragment tags = nil
154
+ DocumentFragment.new(self, tags, self.root)
155
+ end
156
+
157
+ class << self
158
+ ###
159
+ # Parse HTML. +string_or_io+ may be a String, or any object that
160
+ # responds to _read_ and _close_ such as an IO, or StringIO.
161
+ # +url+ is resource where this document is located. +encoding+ is the
162
+ # encoding that should be used when processing the document. +options+
163
+ # is a number that sets options in the parser, such as
164
+ # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
165
+ # Nokogiri::XML::ParseOptions.
166
+ def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
167
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
168
+
169
+ yield options if block_given?
170
+
171
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
172
+
173
+ if string_or_io.respond_to?(:encoding)
174
+ unless string_or_io.encoding.name == "ASCII-8BIT"
175
+ encoding ||= string_or_io.encoding.name
176
+ end
177
+ end
178
+
179
+ if string_or_io.respond_to?(:read)
180
+ if string_or_io.is_a?(Pathname)
181
+ # resolve the Pathname to the file and open it as an IO object, see #2110
182
+ string_or_io = string_or_io.expand_path.open
183
+ url ||= string_or_io.path
184
+ end
185
+
186
+ unless encoding
187
+ # Libxml2's parser has poor support for encoding
188
+ # detection. First, it does not recognize the HTML5
189
+ # style meta charset declaration. Secondly, even if it
190
+ # successfully detects an encoding hint, it does not
191
+ # re-decode or re-parse the preceding part which may be
192
+ # garbled.
193
+ #
194
+ # EncodingReader aims to perform advanced encoding
195
+ # detection beyond what Libxml2 does, and to emulate
196
+ # rewinding of a stream and make Libxml2 redo parsing
197
+ # from the start when an encoding hint is found.
198
+ string_or_io = EncodingReader.new(string_or_io)
199
+ begin
200
+ return read_io(string_or_io, url, encoding, options.to_i)
201
+ rescue EncodingFound => e
202
+ encoding = e.found_encoding
203
+ end
204
+ end
205
+ return read_io(string_or_io, url, encoding, options.to_i)
206
+ end
207
+
208
+ # read_memory pukes on empty docs
209
+ if string_or_io.nil? or string_or_io.empty?
210
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
211
+ end
212
+
213
+ encoding ||= EncodingReader.detect_encoding(string_or_io)
214
+
215
+ read_memory(string_or_io, url, encoding, options.to_i)
216
+ end
217
+ end
218
+
219
+ class EncodingFound < StandardError # :nodoc:
220
+ attr_reader :found_encoding
221
+
222
+ def initialize(encoding)
223
+ @found_encoding = encoding
224
+ super("encoding found: %s" % encoding)
225
+ end
226
+ end
227
+
228
+ class EncodingReader # :nodoc:
229
+ class SAXHandler < Nokogiri::XML::SAX::Document # :nodoc:
230
+ attr_reader :encoding
231
+
232
+ def initialize
233
+ @encoding = nil
234
+ super()
235
+ end
236
+
237
+ def start_element(name, attrs = [])
238
+ return unless name == 'meta'
239
+ attr = Hash[attrs]
240
+ charset = attr['charset'] and
241
+ @encoding = charset
242
+ http_equiv = attr['http-equiv'] and
243
+ http_equiv.match(/\AContent-Type\z/i) and
244
+ content = attr['content'] and
245
+ m = content.match(/;\s*charset\s*=\s*([\w-]+)/) and
246
+ @encoding = m[1]
247
+ end
248
+ end
249
+
250
+ class JumpSAXHandler < SAXHandler
251
+ def initialize(jumptag)
252
+ @jumptag = jumptag
253
+ super()
254
+ end
255
+
256
+ def start_element(name, attrs = [])
257
+ super
258
+ throw @jumptag, @encoding if @encoding
259
+ throw @jumptag, nil if name =~ /\A(?:div|h1|img|p|br)\z/
260
+ end
261
+ end
262
+
263
+ def self.detect_encoding(chunk)
264
+ m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
265
+ return Nokogiri.XML(m[1]).encoding
266
+
267
+ if Nokogiri.jruby?
268
+ m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
269
+ return m[4]
270
+ catch(:encoding_found) {
271
+ Nokogiri::HTML::SAX::Parser.new(JumpSAXHandler.new(:encoding_found)).parse(chunk)
272
+ nil
273
+ }
274
+ else
275
+ handler = SAXHandler.new
276
+ parser = Nokogiri::HTML::SAX::PushParser.new(handler)
277
+ parser << chunk rescue Nokogiri::SyntaxError
278
+ handler.encoding
279
+ end
280
+ end
281
+
282
+ def initialize(io)
283
+ @io = io
284
+ @firstchunk = nil
285
+ @encoding_found = nil
286
+ end
287
+
288
+ # This method is used by the C extension so that
289
+ # Nokogiri::HTML::Document#read_io() does not leak memory when
290
+ # EncodingFound is raised.
291
+ attr_reader :encoding_found
292
+
293
+ def read(len)
294
+ # no support for a call without len
295
+
296
+ if !@firstchunk
297
+ @firstchunk = @io.read(len) or return nil
298
+
299
+ # This implementation expects that the first call from
300
+ # htmlReadIO() is made with a length long enough (~1KB) to
301
+ # achieve advanced encoding detection.
302
+ if encoding = EncodingReader.detect_encoding(@firstchunk)
303
+ # The first chunk is stored for the next read in retry.
304
+ raise @encoding_found = EncodingFound.new(encoding)
305
+ end
306
+ end
307
+ @encoding_found = nil
308
+
309
+ ret = @firstchunk.slice!(0, len)
310
+ if (len -= ret.length) > 0
311
+ rest = @io.read(len) and ret << rest
312
+ end
313
+ if ret.empty?
314
+ nil
315
+ else
316
+ ret
317
+ end
318
+ end
319
+ end
320
+ end
321
+ end
322
+ end