nokogiri 1.5.10 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (328) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +278 -0
  6. data/bin/nokogiri +50 -10
  7. data/dependencies.yml +74 -0
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +944 -100
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +232 -87
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +305 -201
  25. data/ext/nokogiri/xml_document_fragment.c +13 -15
  26. data/ext/nokogiri/xml_dtd.c +54 -48
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +30 -19
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +808 -503
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +198 -186
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +162 -98
  45. data/ext/nokogiri/xslt_stylesheet.c +162 -168
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4886 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/css/node.rb +1 -50
  92. data/lib/nokogiri/css/parser.rb +317 -286
  93. data/lib/nokogiri/css/parser.y +57 -43
  94. data/lib/nokogiri/css/parser_extras.rb +39 -36
  95. data/lib/nokogiri/css/syntax_error.rb +2 -1
  96. data/lib/nokogiri/css/tokenizer.rb +105 -103
  97. data/lib/nokogiri/css/tokenizer.rex +5 -5
  98. data/lib/nokogiri/css/xpath_visitor.rb +137 -48
  99. data/lib/nokogiri/css.rb +15 -14
  100. data/lib/nokogiri/decorators/slop.rb +13 -5
  101. data/lib/nokogiri/extension.rb +31 -0
  102. data/lib/nokogiri/gumbo.rb +14 -0
  103. data/lib/nokogiri/html.rb +32 -27
  104. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  105. data/lib/nokogiri/{html → html4}/document.rb +118 -50
  106. data/lib/nokogiri/{html → html4}/document_fragment.rb +20 -11
  107. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  109. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  110. data/lib/nokogiri/{html → html4}/sax/parser.rb +22 -14
  111. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  112. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  113. data/lib/nokogiri/html4.rb +40 -0
  114. data/lib/nokogiri/html5/document.rb +74 -0
  115. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  116. data/lib/nokogiri/html5/node.rb +93 -0
  117. data/lib/nokogiri/html5.rb +473 -0
  118. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  119. data/lib/nokogiri/syntax_error.rb +1 -0
  120. data/lib/nokogiri/version/constant.rb +5 -0
  121. data/lib/nokogiri/version/info.rb +215 -0
  122. data/lib/nokogiri/version.rb +3 -91
  123. data/lib/nokogiri/xml/attr.rb +1 -0
  124. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  125. data/lib/nokogiri/xml/builder.rb +75 -33
  126. data/lib/nokogiri/xml/cdata.rb +1 -0
  127. data/lib/nokogiri/xml/character_data.rb +1 -0
  128. data/lib/nokogiri/xml/document.rb +157 -54
  129. data/lib/nokogiri/xml/document_fragment.rb +55 -8
  130. data/lib/nokogiri/xml/dtd.rb +15 -4
  131. data/lib/nokogiri/xml/element_content.rb +1 -0
  132. data/lib/nokogiri/xml/element_decl.rb +1 -0
  133. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  134. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  135. data/lib/nokogiri/xml/namespace.rb +1 -0
  136. data/lib/nokogiri/xml/node/save_options.rb +2 -1
  137. data/lib/nokogiri/xml/node.rb +712 -431
  138. data/lib/nokogiri/xml/node_set.rb +140 -123
  139. data/lib/nokogiri/xml/notation.rb +1 -0
  140. data/lib/nokogiri/xml/parse_options.rb +31 -0
  141. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  142. data/lib/nokogiri/xml/pp/node.rb +1 -0
  143. data/lib/nokogiri/xml/pp.rb +3 -2
  144. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  145. data/lib/nokogiri/xml/reader.rb +9 -12
  146. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  147. data/lib/nokogiri/xml/sax/document.rb +25 -30
  148. data/lib/nokogiri/xml/sax/parser.rb +8 -8
  149. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  150. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  151. data/lib/nokogiri/xml/sax.rb +5 -4
  152. data/lib/nokogiri/xml/schema.rb +13 -4
  153. data/lib/nokogiri/xml/searchable.rb +239 -0
  154. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  155. data/lib/nokogiri/xml/text.rb +1 -0
  156. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  157. data/lib/nokogiri/xml/xpath.rb +4 -5
  158. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  159. data/lib/nokogiri/xml.rb +37 -35
  160. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  161. data/lib/nokogiri/xslt.rb +17 -16
  162. data/lib/nokogiri.rb +55 -58
  163. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  164. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  165. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  166. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  167. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  168. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  169. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  171. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  172. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  173. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  174. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  175. metadata +307 -459
  176. data/.autotest +0 -26
  177. data/.gemtest +0 -0
  178. data/CHANGELOG.ja.rdoc +0 -785
  179. data/CHANGELOG.rdoc +0 -783
  180. data/C_CODING_STYLE.rdoc +0 -33
  181. data/Manifest.txt +0 -303
  182. data/README.ja.rdoc +0 -106
  183. data/README.rdoc +0 -175
  184. data/ROADMAP.md +0 -90
  185. data/Rakefile +0 -228
  186. data/STANDARD_RESPONSES.md +0 -47
  187. data/Y_U_NO_GEMSPEC.md +0 -155
  188. data/build_all +0 -105
  189. data/ext/nokogiri/html_document.c +0 -170
  190. data/ext/nokogiri/html_document.h +0 -10
  191. data/ext/nokogiri/html_element_description.c +0 -279
  192. data/ext/nokogiri/html_element_description.h +0 -10
  193. data/ext/nokogiri/html_entity_lookup.c +0 -32
  194. data/ext/nokogiri/html_entity_lookup.h +0 -8
  195. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  196. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  197. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  198. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  199. data/ext/nokogiri/xml_attr.h +0 -9
  200. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  201. data/ext/nokogiri/xml_cdata.h +0 -9
  202. data/ext/nokogiri/xml_comment.h +0 -9
  203. data/ext/nokogiri/xml_document.h +0 -23
  204. data/ext/nokogiri/xml_document_fragment.h +0 -10
  205. data/ext/nokogiri/xml_dtd.h +0 -10
  206. data/ext/nokogiri/xml_element_content.h +0 -10
  207. data/ext/nokogiri/xml_element_decl.h +0 -9
  208. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  209. data/ext/nokogiri/xml_entity_decl.h +0 -10
  210. data/ext/nokogiri/xml_entity_reference.h +0 -9
  211. data/ext/nokogiri/xml_io.c +0 -56
  212. data/ext/nokogiri/xml_io.h +0 -11
  213. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  214. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  215. data/ext/nokogiri/xml_namespace.h +0 -13
  216. data/ext/nokogiri/xml_node.h +0 -13
  217. data/ext/nokogiri/xml_node_set.h +0 -14
  218. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  219. data/ext/nokogiri/xml_reader.h +0 -10
  220. data/ext/nokogiri/xml_relax_ng.h +0 -9
  221. data/ext/nokogiri/xml_sax_parser.h +0 -39
  222. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  223. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  224. data/ext/nokogiri/xml_schema.h +0 -9
  225. data/ext/nokogiri/xml_syntax_error.h +0 -13
  226. data/ext/nokogiri/xml_text.h +0 -9
  227. data/ext/nokogiri/xml_xpath_context.h +0 -10
  228. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  229. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  230. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  231. data/tasks/cross_compile.rb +0 -150
  232. data/tasks/nokogiri.org.rb +0 -24
  233. data/tasks/test.rb +0 -95
  234. data/test/css/test_nthiness.rb +0 -159
  235. data/test/css/test_parser.rb +0 -341
  236. data/test/css/test_tokenizer.rb +0 -198
  237. data/test/css/test_xpath_visitor.rb +0 -91
  238. data/test/decorators/test_slop.rb +0 -16
  239. data/test/files/2ch.html +0 -108
  240. data/test/files/address_book.rlx +0 -12
  241. data/test/files/address_book.xml +0 -10
  242. data/test/files/bar/bar.xsd +0 -4
  243. data/test/files/dont_hurt_em_why.xml +0 -422
  244. data/test/files/encoding.html +0 -82
  245. data/test/files/encoding.xhtml +0 -84
  246. data/test/files/exslt.xml +0 -8
  247. data/test/files/exslt.xslt +0 -35
  248. data/test/files/foo/foo.xsd +0 -4
  249. data/test/files/metacharset.html +0 -10
  250. data/test/files/noencoding.html +0 -47
  251. data/test/files/po.xml +0 -32
  252. data/test/files/po.xsd +0 -66
  253. data/test/files/shift_jis.html +0 -10
  254. data/test/files/shift_jis.xml +0 -5
  255. data/test/files/snuggles.xml +0 -3
  256. data/test/files/staff.dtd +0 -10
  257. data/test/files/staff.xml +0 -59
  258. data/test/files/staff.xslt +0 -32
  259. data/test/files/test_document_url/bar.xml +0 -2
  260. data/test/files/test_document_url/document.dtd +0 -4
  261. data/test/files/test_document_url/document.xml +0 -6
  262. data/test/files/tlm.html +0 -850
  263. data/test/files/to_be_xincluded.xml +0 -2
  264. data/test/files/valid_bar.xml +0 -2
  265. data/test/files/xinclude.xml +0 -4
  266. data/test/helper.rb +0 -154
  267. data/test/html/sax/test_parser.rb +0 -141
  268. data/test/html/sax/test_parser_context.rb +0 -46
  269. data/test/html/test_builder.rb +0 -164
  270. data/test/html/test_document.rb +0 -552
  271. data/test/html/test_document_encoding.rb +0 -138
  272. data/test/html/test_document_fragment.rb +0 -261
  273. data/test/html/test_element_description.rb +0 -105
  274. data/test/html/test_named_characters.rb +0 -14
  275. data/test/html/test_node.rb +0 -196
  276. data/test/html/test_node_encoding.rb +0 -27
  277. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  278. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  279. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  280. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  281. data/test/test_convert_xpath.rb +0 -135
  282. data/test/test_css_cache.rb +0 -45
  283. data/test/test_encoding_handler.rb +0 -46
  284. data/test/test_memory_leak.rb +0 -156
  285. data/test/test_nokogiri.rb +0 -132
  286. data/test/test_reader.rb +0 -555
  287. data/test/test_soap4r_sax.rb +0 -52
  288. data/test/test_xslt_transforms.rb +0 -254
  289. data/test/xml/node/test_save_options.rb +0 -28
  290. data/test/xml/node/test_subclass.rb +0 -44
  291. data/test/xml/sax/test_parser.rb +0 -366
  292. data/test/xml/sax/test_parser_context.rb +0 -106
  293. data/test/xml/sax/test_push_parser.rb +0 -157
  294. data/test/xml/test_attr.rb +0 -64
  295. data/test/xml/test_attribute_decl.rb +0 -86
  296. data/test/xml/test_builder.rb +0 -306
  297. data/test/xml/test_c14n.rb +0 -151
  298. data/test/xml/test_cdata.rb +0 -48
  299. data/test/xml/test_comment.rb +0 -29
  300. data/test/xml/test_document.rb +0 -828
  301. data/test/xml/test_document_encoding.rb +0 -28
  302. data/test/xml/test_document_fragment.rb +0 -223
  303. data/test/xml/test_dtd.rb +0 -103
  304. data/test/xml/test_dtd_encoding.rb +0 -33
  305. data/test/xml/test_element_content.rb +0 -56
  306. data/test/xml/test_element_decl.rb +0 -73
  307. data/test/xml/test_entity_decl.rb +0 -122
  308. data/test/xml/test_entity_reference.rb +0 -245
  309. data/test/xml/test_namespace.rb +0 -95
  310. data/test/xml/test_node.rb +0 -1137
  311. data/test/xml/test_node_attributes.rb +0 -96
  312. data/test/xml/test_node_encoding.rb +0 -107
  313. data/test/xml/test_node_inheritance.rb +0 -32
  314. data/test/xml/test_node_reparenting.rb +0 -374
  315. data/test/xml/test_node_set.rb +0 -755
  316. data/test/xml/test_parse_options.rb +0 -64
  317. data/test/xml/test_processing_instruction.rb +0 -30
  318. data/test/xml/test_reader_encoding.rb +0 -142
  319. data/test/xml/test_relax_ng.rb +0 -60
  320. data/test/xml/test_schema.rb +0 -103
  321. data/test/xml/test_syntax_error.rb +0 -12
  322. data/test/xml/test_text.rb +0 -45
  323. data/test/xml/test_unparented_node.rb +0 -422
  324. data/test/xml/test_xinclude.rb +0 -83
  325. data/test/xml/test_xpath.rb +0 -295
  326. data/test/xslt/test_custom_functions.rb +0 -133
  327. data/test/xslt/test_exception_handling.rb +0 -37
  328. data/test_all +0 -81
@@ -1,8 +1,8 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module CSS
3
4
  class XPathVisitor # :nodoc:
4
5
  def visit_function node
5
- # note that nth-child and nth-last-child are preprocessed in css/node.rb.
6
6
  msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
7
  return self.send(msg, node) if self.respond_to?(msg)
8
8
 
@@ -12,37 +12,51 @@ module Nokogiri
12
12
  when /^self\(/
13
13
  "self::#{node.value[1]}"
14
14
  when /^eq\(/
15
- "position() = #{node.value[1]}"
16
- when /^(nth|nth-of-type|nth-child)\(/
17
- if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
18
- an_plus_b(node.value[1])
15
+ "position()=#{node.value[1]}"
16
+ when /^(nth|nth-of-type)\(/
17
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
18
+ nth(node.value[1])
19
19
  else
20
- "position() = #{node.value[1]}"
20
+ "position()=#{node.value[1]}"
21
21
  end
22
- when /^(nth-last-child|nth-last-of-type)\(/
23
- if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
24
- an_plus_b(node.value[1], :last => true)
22
+ when /^nth-child\(/
23
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
24
+ nth(node.value[1], :child => true)
25
+ else
26
+ "count(preceding-sibling::*)=#{node.value[1].to_i-1}"
27
+ end
28
+ when /^nth-last-of-type\(/
29
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
30
+ nth(node.value[1], :last => true)
25
31
  else
26
32
  index = node.value[1].to_i - 1
27
- index == 0 ? "position() = last()" : "position() = last() - #{index}"
33
+ index == 0 ? "position()=last()" : "position()=last()-#{index}"
34
+ end
35
+ when /^nth-last-child\(/
36
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
37
+ nth(node.value[1], :last => true, :child => true)
38
+ else
39
+ "count(following-sibling::*)=#{node.value[1].to_i-1}"
28
40
  end
29
41
  when /^(first|first-of-type)\(/
30
- "position() = 1"
42
+ "position()=1"
31
43
  when /^(last|last-of-type)\(/
32
- "position() = last()"
44
+ "position()=last()"
33
45
  when /^contains\(/
34
- "contains(., #{node.value[1]})"
46
+ "contains(.,#{node.value[1]})"
35
47
  when /^gt\(/
36
- "position() > #{node.value[1]}"
48
+ "position()>#{node.value[1]}"
37
49
  when /^only-child\(/
38
- "last() = 1"
50
+ "last()=1"
39
51
  when /^comment\(/
40
52
  "comment()"
41
53
  when /^has\(/
42
- node.value[1].accept(self)
54
+ is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
55
+ ".#{"//" if !is_direct}#{node.value[1].accept(self)}"
43
56
  else
57
+ # non-standard. this looks like a function call.
44
58
  args = ['.'] + node.value[1..-1]
45
- "#{node.value.first}#{args.join(', ')})"
59
+ "#{node.value.first}#{args.join(',')})"
46
60
  end
47
61
  end
48
62
 
@@ -57,18 +71,18 @@ module Nokogiri
57
71
 
58
72
  def visit_id node
59
73
  node.value.first =~ /^#(.*)$/
60
- "@id = '#{$1}'"
74
+ "@id='#{$1}'"
61
75
  end
62
76
 
63
77
  def visit_attribute_condition node
64
- attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
65
- ''
66
- else
67
- '@'
68
- end
78
+ attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
79
+ ''
80
+ else
81
+ '@'
82
+ end
69
83
  attribute += node.value.first.accept(self)
70
84
 
71
- # Support non-standard css
85
+ # non-standard. attributes starting with '@'
72
86
  attribute.gsub!(/^@@/, '@')
73
87
 
74
88
  return attribute unless node.value.length == 3
@@ -76,22 +90,30 @@ module Nokogiri
76
90
  value = node.value.last
77
91
  value = "'#{value}'" if value !~ /^['"]/
78
92
 
93
+ # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
94
+ if (value[0]==value[-1]) && %q{"'}.include?(value[0])
95
+ str_value = value[1..-2]
96
+ if str_value.include?(value[0])
97
+ value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
98
+ end
99
+ end
100
+
79
101
  case node.value[1]
80
102
  when :equal
81
- attribute + " = " + "#{value}"
103
+ attribute + "=" + "#{value}"
82
104
  when :not_equal
83
- attribute + " != " + "#{value}"
105
+ attribute + "!=" + "#{value}"
84
106
  when :substring_match
85
- "contains(#{attribute}, #{value})"
107
+ "contains(#{attribute},#{value})"
86
108
  when :prefix_match
87
- "starts-with(#{attribute}, #{value})"
109
+ "starts-with(#{attribute},#{value})"
88
110
  when :dash_match
89
- "#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
111
+ "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
90
112
  when :includes
91
- "contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
113
+ value = value[1..-2] # strip quotes
114
+ css_class(attribute, value)
92
115
  when :suffix_match
93
- "substring(#{attribute}, string-length(#{attribute}) - " +
94
- "string-length(#{value}) + 1, string-length(#{value})) = #{value}"
116
+ "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
95
117
  else
96
118
  attribute + " #{node.value[1]} " + "#{value}"
97
119
  end
@@ -105,11 +127,14 @@ module Nokogiri
105
127
  return self.send(msg, node) if self.respond_to?(msg)
106
128
 
107
129
  case node.value.first
108
- when "first", "first-child" then "position() = 1"
109
- when "last", "last-child" then "position() = last()"
110
- when "first-of-type" then "position() = 1"
111
- when "last-of-type" then "position() = last()"
112
- when "only-of-type" then "last() = 1"
130
+ when "first" then "position()=1"
131
+ when "first-child" then "count(preceding-sibling::*)=0"
132
+ when "last" then "position()=last()"
133
+ when "last-child" then "count(following-sibling::*)=0"
134
+ when "first-of-type" then "position()=1"
135
+ when "last-of-type" then "position()=last()"
136
+ when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
137
+ when "only-of-type" then "last()=1"
113
138
  when "empty" then "not(node())"
114
139
  when "parent" then "node()"
115
140
  when "root" then "not(parent::*)"
@@ -120,11 +145,18 @@ module Nokogiri
120
145
  end
121
146
 
122
147
  def visit_class_condition node
123
- "contains(concat(' ', normalize-space(@class), ' '), ' #{node.value.first} ')"
148
+ css_class("@class", node.value.first)
149
+ end
150
+
151
+ def visit_combinator node
152
+ if is_of_type_pseudo_class?(node.value.last)
153
+ "#{node.value.first.accept(self) if node.value.first}][#{node.value.last.accept(self)}"
154
+ else
155
+ "#{node.value.first.accept(self) if node.value.first} and #{node.value.last.accept(self)}"
156
+ end
124
157
  end
125
158
 
126
159
  {
127
- 'combinator' => ' and ',
128
160
  'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
129
161
  'following_selector' => "/following-sibling::",
130
162
  'descendant_selector' => '//',
@@ -150,22 +182,79 @@ module Nokogiri
150
182
  node.accept(self)
151
183
  end
152
184
 
153
- private
154
- def an_plus_b node, options={}
185
+ private
186
+
187
+ def nth node, options={}
155
188
  raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
156
189
 
157
- a = node.value[0].to_i
158
- b = node.value[3].to_i
159
- position = options[:last] ? "(last()-position()+1)" : "position()"
190
+ a, b = read_a_and_positive_b node.value
191
+ position = if options[:child]
192
+ options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
193
+ else
194
+ options[:last] ? "(last()-position()+1)" : "position()"
195
+ end
160
196
 
161
- if (b == 0)
162
- return "(#{position} mod #{a}) = 0"
197
+ if b.zero?
198
+ "(#{position} mod #{a})=0"
163
199
  else
164
- compare = (a < 0) ? "<=" : ">="
165
- return "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
200
+ compare = a < 0 ? "<=" : ">="
201
+ if a.abs == 1
202
+ "#{position}#{compare}#{b}"
203
+ else
204
+ "(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
205
+ end
166
206
  end
167
207
  end
168
208
 
209
+ def read_a_and_positive_b values
210
+ op = values[2]
211
+ if op == "+"
212
+ a = values[0].to_i
213
+ b = values[3].to_i
214
+ elsif op == "-"
215
+ a = values[0].to_i
216
+ b = a - (values[3].to_i % a)
217
+ else
218
+ raise ArgumentError, "expected an+b node to have either + or - as the operator, but is #{op.inspect}"
219
+ end
220
+ [a, b]
221
+ end
222
+
223
+ def is_of_type_pseudo_class? node
224
+ if node.type==:PSEUDO_CLASS
225
+ if node.value[0].is_a?(Nokogiri::CSS::Node) and node.value[0].type == :FUNCTION
226
+ node.value[0].value[0]
227
+ else
228
+ node.value[0]
229
+ end =~ /(nth|first|last|only)-of-type(\()?/
230
+ end
231
+ end
232
+
233
+ # use only ordinary xpath functions
234
+ def css_class_standard(hay, needle)
235
+ "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
236
+ end
237
+
238
+ # use the builtin implementation
239
+ def css_class_builtin(hay, needle)
240
+ "nokogiri-builtin:css-class(#{hay},'#{needle}')"
241
+ end
242
+
243
+ alias_method :css_class, :css_class_standard
244
+ end
245
+
246
+ class XPathVisitorAlwaysUseBuiltins < XPathVisitor # :nodoc:
247
+ private
248
+ alias_method :css_class, :css_class_builtin
249
+ end
250
+
251
+ class XPathVisitorOptimallyUseBuiltins < XPathVisitor # :nodoc:
252
+ private
253
+ if Nokogiri.uses_libxml?
254
+ alias_method :css_class, :css_class_builtin
255
+ else
256
+ alias_method :css_class, :css_class_standard
257
+ end
169
258
  end
170
259
  end
171
260
  end
data/lib/nokogiri/css.rb CHANGED
@@ -1,27 +1,28 @@
1
- require 'nokogiri/css/node'
2
- require 'nokogiri/css/xpath_visitor'
3
- x = $-w
4
- $-w = false
5
- require 'nokogiri/css/parser'
6
- $-w = x
7
-
8
- require 'nokogiri/css/tokenizer'
9
- require 'nokogiri/css/syntax_error'
10
-
1
+ # frozen_string_literal: true
11
2
  module Nokogiri
12
3
  module CSS
13
4
  class << self
14
5
  ###
15
6
  # Parse this CSS selector in +selector+. Returns an AST.
16
- def parse selector
17
- Parser.new.parse selector
7
+ def parse(selector)
8
+ Parser.new.parse(selector)
18
9
  end
19
10
 
20
11
  ###
21
12
  # Get the XPath for +selector+.
22
- def xpath_for selector, options={}
23
- Parser.new(options[:ns] || {}).xpath_for selector, options
13
+ def xpath_for(selector, options = {})
14
+ Parser.new(options[:ns] || {}).xpath_for(selector, options)
24
15
  end
25
16
  end
26
17
  end
27
18
  end
19
+
20
+ require_relative "css/node"
21
+ require_relative "css/xpath_visitor"
22
+ x = $-w
23
+ $-w = false
24
+ require_relative "css/parser"
25
+ $-w = x
26
+
27
+ require_relative "css/tokenizer"
28
+ require_relative "css/syntax_error"
@@ -1,28 +1,30 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module Decorators
3
4
  ###
4
5
  # The Slop decorator implements method missing such that a methods may be
5
6
  # used instead of XPath or CSS. See Nokogiri.Slop
6
7
  module Slop
8
+ # The default XPath search context for Slop
9
+ XPATH_PREFIX = "./"
10
+
7
11
  ###
8
12
  # look for node with +name+. See Nokogiri.Slop
9
13
  def method_missing name, *args, &block
10
- prefix = implied_xpath_context
11
-
12
14
  if args.empty?
13
- list = xpath("#{prefix}#{name.to_s.sub(/^_/, '')}")
15
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
14
16
  elsif args.first.is_a? Hash
15
17
  hash = args.first
16
18
  if hash[:css]
17
19
  list = css("#{name}#{hash[:css]}")
18
20
  elsif hash[:xpath]
19
21
  conds = Array(hash[:xpath]).join(' and ')
20
- list = xpath("#{prefix}#{name}[#{conds}]")
22
+ list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
21
23
  end
22
24
  else
23
25
  CSS::Parser.without_cache do
24
26
  list = xpath(
25
- *CSS.xpath_for("#{name}#{args.first}", :prefix => prefix)
27
+ *CSS.xpath_for("#{name}#{args.first}", :prefix => XPATH_PREFIX)
26
28
  )
27
29
  end
28
30
  end
@@ -30,6 +32,12 @@ module Nokogiri
30
32
  super if list.empty?
31
33
  list.length == 1 ? list.first : list
32
34
  end
35
+
36
+ def respond_to_missing? name, include_private = false
37
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
38
+
39
+ !list.empty?
40
+ end
33
41
  end
34
42
  end
35
43
  end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ # load the C or Java extension
4
+ begin
5
+ # native precompiled gems package shared libraries in <gem_dir>/lib/nokogiri/<ruby_version>
6
+ ::RUBY_VERSION =~ /(\d+\.\d+)/
7
+ require_relative "#{Regexp.last_match(1)}/nokogiri"
8
+ rescue LoadError => e
9
+ if e.message =~ /GLIBC/
10
+ warn(<<~EOM)
11
+
12
+ ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system with glibc < 2.17:
13
+
14
+ #{e.message}
15
+
16
+ If that's the case, then please install Nokogiri via the `ruby` platform gem:
17
+ gem install nokogiri --platform=ruby
18
+ or:
19
+ bundle config set force_ruby_platform true
20
+
21
+ Please visit https://nokogiri.org/tutorials/installing_nokogiri.html for more help.
22
+
23
+ EOM
24
+ raise e
25
+ end
26
+
27
+ # use "require" instead of "require_relative" because non-native gems will place C extension files
28
+ # in Gem::BasicSpecification#extension_dir after compilation (during normal installation), which
29
+ # is in $LOAD_PATH but not necessarily relative to this file (see #2300)
30
+ require "nokogiri/nokogiri"
31
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module Gumbo
4
+ # The default maximum number of attributes per element.
5
+ DEFAULT_MAX_ATTRIBUTES = 400
6
+
7
+ # The default maximum number of errors for parsing a document or a fragment.
8
+ DEFAULT_MAX_ERRORS = 0
9
+
10
+ # The default maximum depth of the DOM tree produced by parsing a document
11
+ # or fragment.
12
+ DEFAULT_MAX_TREE_DEPTH = 400
13
+ end
14
+ end
data/lib/nokogiri/html.rb CHANGED
@@ -1,37 +1,42 @@
1
- require 'nokogiri/html/entity_lookup'
2
- require 'nokogiri/html/document'
3
- require 'nokogiri/html/document_fragment'
4
- require 'nokogiri/html/sax/parser_context'
5
- require 'nokogiri/html/sax/parser'
6
- require 'nokogiri/html/sax/push_parser'
7
- require 'nokogiri/html/element_description'
8
- require 'nokogiri/html/element_description_defaults'
1
+ # frozen_string_literal: true
2
+ require_relative "html4"
9
3
 
10
4
  module Nokogiri
11
- class << self
12
- ###
13
- # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
14
- def HTML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
15
- Nokogiri::HTML::Document.parse(thing, url, encoding, options, &block)
16
- end
17
- end
5
+ HTML = Nokogiri::HTML4
6
+
7
+ # @!method HTML(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
8
+ # Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
9
+ # @!scope class
10
+ define_singleton_method(:HTML, Nokogiri.method(:HTML4))
18
11
 
12
+ # @note This module/namespace is an alias for {Nokogiri::HTML4} as of v1.12.0. Before v1.12.0,
13
+ # {Nokogiri::HTML4} did not exist, and this was the module/namespace for all HTML-related
14
+ # classes.
19
15
  module HTML
20
- class << self
21
- ###
22
- # Parse HTML. Convenience method for Nokogiri::HTML::Document.parse
23
- def parse thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
24
- Document.parse(thing, url, encoding, options, &block)
16
+ # @note This class is an alias for {Nokogiri::HTML4::Document} as of v1.12.0.
17
+ class Document < Nokogiri::XML::Document
18
+ end
19
+
20
+ # @note This class is an alias for {Nokogiri::HTML4::DocumentFragment} as of v1.12.0.
21
+ class DocumentFragment < Nokogiri::XML::DocumentFragment
22
+ end
23
+
24
+ # @note This class is an alias for {Nokogiri::HTML4::Builder} as of v1.12.0.
25
+ class Builder < Nokogiri::XML::Builder
26
+ end
27
+
28
+ module SAX
29
+ # @note This class is an alias for {Nokogiri::HTML4::SAX::Parser} as of v1.12.0.
30
+ class Parser < Nokogiri::XML::SAX::Parser
25
31
  end
26
32
 
27
- ####
28
- # Parse a fragment from +string+ in to a NodeSet.
29
- def fragment string, encoding = nil
30
- HTML::DocumentFragment.parse string, encoding
33
+ # @note This class is an alias for {Nokogiri::HTML4::SAX::ParserContext} as of v1.12.0.
34
+ class ParserContext < Nokogiri::XML::SAX::ParserContext
31
35
  end
32
- end
33
36
 
34
- # Instance of Nokogiri::HTML::EntityLookup
35
- NamedCharacters = EntityLookup.new
37
+ # @note This class is an alias for {Nokogiri::HTML4::SAX::PushParser} as of v1.12.0.
38
+ class PushParser
39
+ end
40
+ end
36
41
  end
37
42
  end
@@ -1,5 +1,6 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
- module HTML
3
+ module HTML4
3
4
  ###
4
5
  # Nokogiri HTML builder is used for building HTML documents. It is very
5
6
  # similar to the Nokogiri::XML::Builder. In fact, you should go read the
@@ -11,7 +12,7 @@ module Nokogiri
11
12
  # Create an HTML document with a body that has an onload attribute, and a
12
13
  # span tag with a class of "bold" that has content of "Hello world".
13
14
  #
14
- # builder = Nokogiri::HTML::Builder.new do |doc|
15
+ # builder = Nokogiri::HTML4::Builder.new do |doc|
15
16
  # doc.html {
16
17
  # doc.body(:onload => 'some_func();') {
17
18
  # doc.span.bold {