nokogiri 1.6.0 → 1.13.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (340) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -19
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +280 -0
  6. data/bin/nokogiri +84 -31
  7. data/dependencies.yml +23 -4
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +952 -132
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +231 -96
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +327 -223
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +45 -20
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +1290 -680
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +227 -189
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +227 -140
  45. data/ext/nokogiri/xslt_stylesheet.c +269 -177
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -58
  93. data/lib/nokogiri/css/parser.rb +407 -357
  94. data/lib/nokogiri/css/parser.y +265 -246
  95. data/lib/nokogiri/css/parser_extras.rb +52 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +8 -7
  99. data/lib/nokogiri/css/xpath_visitor.rb +266 -80
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +17 -8
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/html4/document.rb +331 -0
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -15
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +88 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +96 -0
  118. data/lib/nokogiri/html5.rb +477 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +221 -0
  123. data/lib/nokogiri/version.rb +3 -105
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +96 -54
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +234 -95
  130. data/lib/nokogiri/xml/document_fragment.rb +86 -36
  131. data/lib/nokogiri/xml/dtd.rb +16 -4
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +8 -4
  138. data/lib/nokogiri/xml/node.rb +947 -502
  139. data/lib/nokogiri/xml/node_set.rb +168 -159
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +40 -5
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +43 -41
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +270 -0
  155. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -36
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +69 -69
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  171. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  172. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  173. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  174. data/ports/archives/libxml2-2.9.13.tar.xz +0 -0
  175. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  176. metadata +278 -362
  177. data/.autotest +0 -26
  178. data/.gemtest +0 -0
  179. data/.travis.yml +0 -27
  180. data/CHANGELOG.ja.rdoc +0 -819
  181. data/CHANGELOG.rdoc +0 -819
  182. data/C_CODING_STYLE.rdoc +0 -33
  183. data/Manifest.txt +0 -315
  184. data/README.ja.rdoc +0 -106
  185. data/README.rdoc +0 -175
  186. data/ROADMAP.md +0 -90
  187. data/Rakefile +0 -246
  188. data/STANDARD_RESPONSES.md +0 -47
  189. data/Y_U_NO_GEMSPEC.md +0 -155
  190. data/build_all +0 -105
  191. data/ext/nokogiri/html_document.c +0 -170
  192. data/ext/nokogiri/html_document.h +0 -10
  193. data/ext/nokogiri/html_element_description.c +0 -279
  194. data/ext/nokogiri/html_element_description.h +0 -10
  195. data/ext/nokogiri/html_entity_lookup.c +0 -32
  196. data/ext/nokogiri/html_entity_lookup.h +0 -8
  197. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  198. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  199. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  200. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  201. data/ext/nokogiri/xml_attr.h +0 -9
  202. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  203. data/ext/nokogiri/xml_cdata.h +0 -9
  204. data/ext/nokogiri/xml_comment.h +0 -9
  205. data/ext/nokogiri/xml_document.h +0 -23
  206. data/ext/nokogiri/xml_document_fragment.h +0 -10
  207. data/ext/nokogiri/xml_dtd.h +0 -10
  208. data/ext/nokogiri/xml_element_content.h +0 -10
  209. data/ext/nokogiri/xml_element_decl.h +0 -9
  210. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  211. data/ext/nokogiri/xml_entity_decl.h +0 -10
  212. data/ext/nokogiri/xml_entity_reference.h +0 -9
  213. data/ext/nokogiri/xml_io.c +0 -56
  214. data/ext/nokogiri/xml_io.h +0 -11
  215. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  216. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  217. data/ext/nokogiri/xml_namespace.h +0 -13
  218. data/ext/nokogiri/xml_node.h +0 -13
  219. data/ext/nokogiri/xml_node_set.h +0 -14
  220. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  221. data/ext/nokogiri/xml_reader.h +0 -10
  222. data/ext/nokogiri/xml_relax_ng.h +0 -9
  223. data/ext/nokogiri/xml_sax_parser.h +0 -39
  224. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  225. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  226. data/ext/nokogiri/xml_schema.h +0 -9
  227. data/ext/nokogiri/xml_syntax_error.h +0 -13
  228. data/ext/nokogiri/xml_text.h +0 -9
  229. data/ext/nokogiri/xml_xpath_context.h +0 -10
  230. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  231. data/lib/nokogiri/html/document.rb +0 -254
  232. data/lib/nokogiri/html/document_fragment.rb +0 -41
  233. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  234. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  235. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  236. data/ports/archives/libxml2-2.8.0.tar.gz +0 -0
  237. data/ports/archives/libxslt-1.1.26.tar.gz +0 -0
  238. data/tasks/cross_compile.rb +0 -132
  239. data/tasks/nokogiri.org.rb +0 -24
  240. data/tasks/test.rb +0 -95
  241. data/test/css/test_nthiness.rb +0 -159
  242. data/test/css/test_parser.rb +0 -341
  243. data/test/css/test_tokenizer.rb +0 -198
  244. data/test/css/test_xpath_visitor.rb +0 -91
  245. data/test/decorators/test_slop.rb +0 -16
  246. data/test/files/2ch.html +0 -108
  247. data/test/files/address_book.rlx +0 -12
  248. data/test/files/address_book.xml +0 -10
  249. data/test/files/bar/bar.xsd +0 -4
  250. data/test/files/bogus.xml +0 -0
  251. data/test/files/dont_hurt_em_why.xml +0 -422
  252. data/test/files/encoding.html +0 -82
  253. data/test/files/encoding.xhtml +0 -84
  254. data/test/files/exslt.xml +0 -8
  255. data/test/files/exslt.xslt +0 -35
  256. data/test/files/foo/foo.xsd +0 -4
  257. data/test/files/metacharset.html +0 -10
  258. data/test/files/noencoding.html +0 -47
  259. data/test/files/po.xml +0 -32
  260. data/test/files/po.xsd +0 -66
  261. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  262. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  263. data/test/files/saml/xenc_schema.xsd +0 -146
  264. data/test/files/saml/xmldsig_schema.xsd +0 -318
  265. data/test/files/shift_jis.html +0 -10
  266. data/test/files/shift_jis.xml +0 -5
  267. data/test/files/snuggles.xml +0 -3
  268. data/test/files/staff.dtd +0 -10
  269. data/test/files/staff.xml +0 -59
  270. data/test/files/staff.xslt +0 -32
  271. data/test/files/test_document_url/bar.xml +0 -2
  272. data/test/files/test_document_url/document.dtd +0 -4
  273. data/test/files/test_document_url/document.xml +0 -6
  274. data/test/files/tlm.html +0 -850
  275. data/test/files/to_be_xincluded.xml +0 -2
  276. data/test/files/valid_bar.xml +0 -2
  277. data/test/files/xinclude.xml +0 -4
  278. data/test/helper.rb +0 -154
  279. data/test/html/sax/test_parser.rb +0 -141
  280. data/test/html/sax/test_parser_context.rb +0 -46
  281. data/test/html/test_builder.rb +0 -164
  282. data/test/html/test_document.rb +0 -552
  283. data/test/html/test_document_encoding.rb +0 -138
  284. data/test/html/test_document_fragment.rb +0 -261
  285. data/test/html/test_element_description.rb +0 -105
  286. data/test/html/test_named_characters.rb +0 -14
  287. data/test/html/test_node.rb +0 -196
  288. data/test/html/test_node_encoding.rb +0 -27
  289. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  290. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  291. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  292. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  293. data/test/test_convert_xpath.rb +0 -135
  294. data/test/test_css_cache.rb +0 -45
  295. data/test/test_encoding_handler.rb +0 -46
  296. data/test/test_memory_leak.rb +0 -156
  297. data/test/test_nokogiri.rb +0 -132
  298. data/test/test_reader.rb +0 -555
  299. data/test/test_soap4r_sax.rb +0 -52
  300. data/test/test_xslt_transforms.rb +0 -254
  301. data/test/xml/node/test_save_options.rb +0 -28
  302. data/test/xml/node/test_subclass.rb +0 -44
  303. data/test/xml/sax/test_parser.rb +0 -366
  304. data/test/xml/sax/test_parser_context.rb +0 -106
  305. data/test/xml/sax/test_push_parser.rb +0 -157
  306. data/test/xml/test_attr.rb +0 -64
  307. data/test/xml/test_attribute_decl.rb +0 -86
  308. data/test/xml/test_builder.rb +0 -306
  309. data/test/xml/test_c14n.rb +0 -151
  310. data/test/xml/test_cdata.rb +0 -48
  311. data/test/xml/test_comment.rb +0 -29
  312. data/test/xml/test_document.rb +0 -828
  313. data/test/xml/test_document_encoding.rb +0 -28
  314. data/test/xml/test_document_fragment.rb +0 -223
  315. data/test/xml/test_dtd.rb +0 -103
  316. data/test/xml/test_dtd_encoding.rb +0 -33
  317. data/test/xml/test_element_content.rb +0 -56
  318. data/test/xml/test_element_decl.rb +0 -73
  319. data/test/xml/test_entity_decl.rb +0 -122
  320. data/test/xml/test_entity_reference.rb +0 -245
  321. data/test/xml/test_namespace.rb +0 -95
  322. data/test/xml/test_node.rb +0 -1137
  323. data/test/xml/test_node_attributes.rb +0 -96
  324. data/test/xml/test_node_encoding.rb +0 -107
  325. data/test/xml/test_node_inheritance.rb +0 -32
  326. data/test/xml/test_node_reparenting.rb +0 -374
  327. data/test/xml/test_node_set.rb +0 -755
  328. data/test/xml/test_parse_options.rb +0 -64
  329. data/test/xml/test_processing_instruction.rb +0 -30
  330. data/test/xml/test_reader_encoding.rb +0 -142
  331. data/test/xml/test_relax_ng.rb +0 -60
  332. data/test/xml/test_schema.rb +0 -103
  333. data/test/xml/test_syntax_error.rb +0 -12
  334. data/test/xml/test_text.rb +0 -45
  335. data/test/xml/test_unparented_node.rb +0 -422
  336. data/test/xml/test_xinclude.rb +0 -83
  337. data/test/xml/test_xpath.rb +0 -295
  338. data/test/xslt/test_custom_functions.rb +0 -133
  339. data/test/xslt/test_exception_handling.rb +0 -37
  340. data/test_all +0 -81
@@ -1,11 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  class << self
4
6
  ###
5
7
  # Create a new Nokogiri::XML::Schema object using a +string_or_io+
6
8
  # object.
7
- def Schema string_or_io
8
- Schema.new(string_or_io)
9
+ def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
10
+ Schema.new(string_or_io, options)
9
11
  end
10
12
  end
11
13
 
@@ -26,15 +28,23 @@ module Nokogiri
26
28
  # end
27
29
  #
28
30
  # The list of errors are Nokogiri::XML::SyntaxError objects.
31
+ #
32
+ # NOTE: As of v1.11.0, Schema treats inputs as UNTRUSTED by default, and so external entities
33
+ # are not resolved from the network (`http://` or `ftp://`). Previously, parsing treated
34
+ # documents as "trusted" by default which was counter to Nokogiri's "untrusted by default"
35
+ # security policy. If a document is trusted, then the caller may turn off the NONET option via
36
+ # the ParseOptions to re-enable external entity resolution over a network connection.
29
37
  class Schema
30
38
  # Errors while parsing the schema file
31
39
  attr_accessor :errors
40
+ # The Nokogiri::XML::ParseOptions used to parse the schema
41
+ attr_accessor :parse_options
32
42
 
33
43
  ###
34
44
  # Create a new Nokogiri::XML::Schema object using a +string_or_io+
35
45
  # object.
36
- def self.new string_or_io
37
- from_document Nokogiri::XML(string_or_io)
46
+ def self.new(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
47
+ from_document(Nokogiri::XML(string_or_io), options)
38
48
  end
39
49
 
40
50
  ###
@@ -42,9 +52,9 @@ module Nokogiri
42
52
  # Nokogiri::XML::Document object, or a filename. An Array of
43
53
  # Nokogiri::XML::SyntaxError objects found while validating the
44
54
  # +thing+ is returned.
45
- def validate thing
46
- if thing.is_a?(Nokogiri::XML::Document)
47
- validate_document(thing)
55
+ def validate(thing)
56
+ if thing.is_a?(Nokogiri::XML::Document)
57
+ validate_document(thing)
48
58
  elsif File.file?(thing)
49
59
  validate_file(thing)
50
60
  else
@@ -55,8 +65,8 @@ module Nokogiri
55
65
  ###
56
66
  # Returns true if +thing+ is a valid Nokogiri::XML::Document or
57
67
  # file.
58
- def valid? thing
59
- validate(thing).length == 0
68
+ def valid?(thing)
69
+ validate(thing).empty?
60
70
  end
61
71
  end
62
72
  end
@@ -0,0 +1,270 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ module XML
6
+ #
7
+ # The Searchable module declares the interface used for searching your DOM.
8
+ #
9
+ # It implements the public methods #search, #css, and #xpath,
10
+ # as well as allowing specific implementations to specialize some
11
+ # of the important behaviors.
12
+ #
13
+ module Searchable
14
+ # Regular expression used by Searchable#search to determine if a query
15
+ # string is CSS or XPath
16
+ LOOKS_LIKE_XPATH = %r{^(\./|/|\.\.|\.$)}
17
+
18
+ # :section: Searching via XPath or CSS Queries
19
+
20
+ ###
21
+ # call-seq:
22
+ # search(*paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class])
23
+ #
24
+ # Search this object for +paths+. +paths+ must be one or more XPath or CSS queries:
25
+ #
26
+ # node.search("div.employee", ".//title")
27
+ #
28
+ # A hash of namespace bindings may be appended:
29
+ #
30
+ # node.search('.//bike:tire', {'bike' => 'http://schwinn.com/'})
31
+ # node.search('bike|tire', {'bike' => 'http://schwinn.com/'})
32
+ #
33
+ # For XPath queries, a hash of variable bindings may also be appended to the namespace
34
+ # bindings. For example:
35
+ #
36
+ # node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
37
+ #
38
+ # 💡 Custom XPath functions and CSS pseudo-selectors may also be defined. To define custom
39
+ # functions create a class and implement the function you want to define. The first argument
40
+ # to the method will be the current matching NodeSet. Any other arguments are ones that you
41
+ # pass in. Note that this class may appear anywhere in the argument list. For example:
42
+ #
43
+ # handler = Class.new {
44
+ # def regex node_set, regex
45
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
46
+ # end
47
+ # }.new
48
+ # node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
49
+ #
50
+ # See Searchable#xpath and Searchable#css for further usage help.
51
+ def search(*args)
52
+ paths, handler, ns, binds = extract_params(args)
53
+
54
+ xpaths = paths.map(&:to_s).map do |path|
55
+ LOOKS_LIKE_XPATH.match?(path) ? path : xpath_query_from_css_rule(path, ns)
56
+ end.flatten.uniq
57
+
58
+ xpath(*(xpaths + [ns, handler, binds].compact))
59
+ end
60
+
61
+ alias_method :/, :search
62
+
63
+ ###
64
+ # call-seq:
65
+ # at(*paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class])
66
+ #
67
+ # Search this object for +paths+, and return only the first
68
+ # result. +paths+ must be one or more XPath or CSS queries.
69
+ #
70
+ # See Searchable#search for more information.
71
+ def at(*args)
72
+ search(*args).first
73
+ end
74
+
75
+ alias_method :%, :at
76
+
77
+ ###
78
+ # call-seq:
79
+ # css(*rules, [namespace-bindings, custom-pseudo-class])
80
+ #
81
+ # Search this object for CSS +rules+. +rules+ must be one or more CSS
82
+ # selectors. For example:
83
+ #
84
+ # node.css('title')
85
+ # node.css('body h1.bold')
86
+ # node.css('div + p.green', 'div#one')
87
+ #
88
+ # A hash of namespace bindings may be appended. For example:
89
+ #
90
+ # node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
91
+ #
92
+ # 💡 Custom CSS pseudo classes may also be defined which are mapped to a custom XPath
93
+ # function. To define custom pseudo classes, create a class and implement the custom pseudo
94
+ # class you want defined. The first argument to the method will be the matching context
95
+ # NodeSet. Any other arguments are ones that you pass in. For example:
96
+ #
97
+ # handler = Class.new {
98
+ # def regex(node_set, regex)
99
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
100
+ # end
101
+ # }.new
102
+ # node.css('title:regex("\w+")', handler)
103
+ #
104
+ # 💡 Some XPath syntax is supported in CSS queries. For example, to query for an attribute:
105
+ #
106
+ # node.css('img > @href') # returns all +href+ attributes on an +img+ element
107
+ # node.css('img / @href') # same
108
+ #
109
+ # # ⚠ this returns +class+ attributes from all +div+ elements AND THEIR CHILDREN!
110
+ # node.css('div @class')
111
+ #
112
+ # node.css
113
+ #
114
+ # 💡 Array-like syntax is supported in CSS queries as an alternative to using +:nth-child()+.
115
+ #
116
+ # ⚠ NOTE that indices are 1-based like +:nth-child+ and not 0-based like Ruby Arrays. For
117
+ # example:
118
+ #
119
+ # # equivalent to 'li:nth-child(2)'
120
+ # node.css('li[2]') # retrieve the second li element in a list
121
+ #
122
+ # ⚠ NOTE that the CSS query string is case-sensitive with regards to your document type. HTML
123
+ # tags will match only lowercase CSS queries, so if you search for "H1" in an HTML document,
124
+ # you'll never find anything. However, "H1" might be found in an XML document, where tags
125
+ # names are case-sensitive (e.g., "H1" is distinct from "h1").
126
+ def css(*args)
127
+ rules, handler, ns, _ = extract_params(args)
128
+
129
+ css_internal(self, rules, handler, ns)
130
+ end
131
+
132
+ ##
133
+ # call-seq:
134
+ # at_css(*rules, [namespace-bindings, custom-pseudo-class])
135
+ #
136
+ # Search this object for CSS +rules+, and return only the first
137
+ # match. +rules+ must be one or more CSS selectors.
138
+ #
139
+ # See Searchable#css for more information.
140
+ def at_css(*args)
141
+ css(*args).first
142
+ end
143
+
144
+ ###
145
+ # call-seq:
146
+ # xpath(*paths, [namespace-bindings, variable-bindings, custom-handler-class])
147
+ #
148
+ # Search this node for XPath +paths+. +paths+ must be one or more XPath
149
+ # queries.
150
+ #
151
+ # node.xpath('.//title')
152
+ #
153
+ # A hash of namespace bindings may be appended. For example:
154
+ #
155
+ # node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
156
+ # node.xpath('.//xmlns:name', node.root.namespaces)
157
+ #
158
+ # A hash of variable bindings may also be appended to the namespace bindings. For example:
159
+ #
160
+ # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
161
+ #
162
+ # 💡 Custom XPath functions may also be defined. To define custom functions create a class and
163
+ # implement the function you want to define. The first argument to the method will be the
164
+ # current matching NodeSet. Any other arguments are ones that you pass in. Note that this
165
+ # class may appear anywhere in the argument list. For example:
166
+ #
167
+ # handler = Class.new {
168
+ # def regex(node_set, regex)
169
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
170
+ # end
171
+ # }.new
172
+ # node.xpath('.//title[regex(., "\w+")]', handler)
173
+ #
174
+ def xpath(*args)
175
+ paths, handler, ns, binds = extract_params(args)
176
+
177
+ xpath_internal(self, paths, handler, ns, binds)
178
+ end
179
+
180
+ ##
181
+ # call-seq:
182
+ # at_xpath(*paths, [namespace-bindings, variable-bindings, custom-handler-class])
183
+ #
184
+ # Search this node for XPath +paths+, and return only the first
185
+ # match. +paths+ must be one or more XPath queries.
186
+ #
187
+ # See Searchable#xpath for more information.
188
+ def at_xpath(*args)
189
+ xpath(*args).first
190
+ end
191
+
192
+ # :call-seq:
193
+ # >(selector) → NodeSet
194
+ #
195
+ # Search this node's immediate children using CSS selector +selector+
196
+ def >(selector) # rubocop:disable Naming/BinaryOperatorParameterName
197
+ ns = (document.root&.namespaces || {})
198
+ xpath(CSS.xpath_for(selector, prefix: "./", ns: ns).first)
199
+ end
200
+
201
+ # :section:
202
+
203
+ private
204
+
205
+ def css_internal(node, rules, handler, ns)
206
+ xpath_internal(node, css_rules_to_xpath(rules, ns), handler, ns, nil)
207
+ end
208
+
209
+ def xpath_internal(node, paths, handler, ns, binds)
210
+ document = node.document
211
+ return NodeSet.new(document) unless document
212
+
213
+ if paths.length == 1
214
+ return xpath_impl(node, paths.first, handler, ns, binds)
215
+ end
216
+
217
+ NodeSet.new(document) do |combined|
218
+ paths.each do |path|
219
+ xpath_impl(node, path, handler, ns, binds).each { |set| combined << set }
220
+ end
221
+ end
222
+ end
223
+
224
+ def xpath_impl(node, path, handler, ns, binds)
225
+ ctx = XPathContext.new(node)
226
+ ctx.register_namespaces(ns)
227
+ path = path.gsub(/xmlns:/, " :") unless Nokogiri.uses_libxml?
228
+
229
+ binds&.each do |key, value|
230
+ ctx.register_variable(key.to_s, value)
231
+ end
232
+
233
+ ctx.evaluate(path, handler)
234
+ end
235
+
236
+ def css_rules_to_xpath(rules, ns)
237
+ rules.map { |rule| xpath_query_from_css_rule(rule, ns) }
238
+ end
239
+
240
+ def xpath_query_from_css_rule(rule, ns)
241
+ visitor = Nokogiri::CSS::XPathVisitor.new(
242
+ builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
243
+ doctype: document.xpath_doctype,
244
+ )
245
+ self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
246
+ CSS.xpath_for(rule.to_s, { prefix: implied_xpath_context, ns: ns,
247
+ visitor: visitor, })
248
+ end.join(" | ")
249
+ end
250
+
251
+ def extract_params(params) # :nodoc:
252
+ handler = params.find do |param|
253
+ ![Hash, String, Symbol].include?(param.class)
254
+ end
255
+ params -= [handler] if handler
256
+
257
+ hashes = []
258
+ while Hash === params.last || params.last.nil?
259
+ hashes << params.pop
260
+ break if params.empty?
261
+ end
262
+ ns, binds = hashes.reverse
263
+
264
+ ns ||= (document.root&.namespaces || {})
265
+
266
+ [params, handler, ns, binds]
267
+ end
268
+ end
269
+ end
270
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  ###
@@ -40,7 +42,29 @@ module Nokogiri
40
42
  end
41
43
 
42
44
  def to_s
43
- super.chomp
45
+ message = super.chomp
46
+ [location_to_s, level_to_s, message]
47
+ .compact.join(": ")
48
+ .force_encoding(message.encoding)
49
+ end
50
+
51
+ private
52
+
53
+ def level_to_s
54
+ case level
55
+ when 3 then "FATAL"
56
+ when 2 then "ERROR"
57
+ when 1 then "WARNING"
58
+ end
59
+ end
60
+
61
+ def nil_or_zero?(attribute)
62
+ attribute.nil? || attribute.zero?
63
+ end
64
+
65
+ def location_to_s
66
+ return nil if nil_or_zero?(line) && nil_or_zero?(column)
67
+ "#{line}:#{column}"
44
68
  end
45
69
  end
46
70
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  class Text < Nokogiri::XML::CharacterData
@@ -1,9 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
- class XPath
5
+ module XPath
4
6
  class SyntaxError < XML::SyntaxError
5
7
  def to_s
6
- [super.chomp, str1].compact.join(': ')
8
+ [super.chomp, str1].compact.join(": ")
7
9
  end
8
10
  end
9
11
  end
@@ -1,10 +1,21 @@
1
- require 'nokogiri/xml/xpath/syntax_error'
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Nokogiri
4
4
  module XML
5
- class XPath
6
- # The Nokogiri::XML::Document tied to this XPath instance
7
- attr_accessor :document
5
+ module XPath
6
+ # The XPath search prefix to search globally, +//+
7
+ GLOBAL_SEARCH_PREFIX = "//"
8
+
9
+ # The XPath search prefix to search direct descendants of the root element, +/+
10
+ ROOT_SEARCH_PREFIX = "/"
11
+
12
+ # The XPath search prefix to search direct descendants of the current element, +./+
13
+ CURRENT_SEARCH_PREFIX = "./"
14
+
15
+ # The XPath search prefix to search anywhere in the current element's subtree, +.//+
16
+ SUBTREE_SEARCH_PREFIX = ".//"
8
17
  end
9
18
  end
10
19
  end
20
+
21
+ require_relative "xpath/syntax_error"
@@ -1,16 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  class XPathContext
4
-
5
6
  ###
6
7
  # Register namespaces in +namespaces+
7
8
  def register_namespaces(namespaces)
8
9
  namespaces.each do |k, v|
9
- k = k.to_s.gsub(/.*:/,'') # strip off 'xmlns:' or 'xml:'
10
+ k = k.to_s.gsub(/.*:/, "") # strip off 'xmlns:' or 'xml:'
10
11
  register_ns(k, v)
11
12
  end
12
13
  end
13
-
14
14
  end
15
15
  end
16
16
  end
data/lib/nokogiri/xml.rb CHANGED
@@ -1,35 +1,10 @@
1
- require 'nokogiri/xml/pp'
2
- require 'nokogiri/xml/parse_options'
3
- require 'nokogiri/xml/sax'
4
- require 'nokogiri/xml/node'
5
- require 'nokogiri/xml/attribute_decl'
6
- require 'nokogiri/xml/element_decl'
7
- require 'nokogiri/xml/element_content'
8
- require 'nokogiri/xml/character_data'
9
- require 'nokogiri/xml/namespace'
10
- require 'nokogiri/xml/attr'
11
- require 'nokogiri/xml/dtd'
12
- require 'nokogiri/xml/cdata'
13
- require 'nokogiri/xml/text'
14
- require 'nokogiri/xml/document'
15
- require 'nokogiri/xml/document_fragment'
16
- require 'nokogiri/xml/processing_instruction'
17
- require 'nokogiri/xml/node_set'
18
- require 'nokogiri/xml/syntax_error'
19
- require 'nokogiri/xml/xpath'
20
- require 'nokogiri/xml/xpath_context'
21
- require 'nokogiri/xml/builder'
22
- require 'nokogiri/xml/reader'
23
- require 'nokogiri/xml/notation'
24
- require 'nokogiri/xml/entity_decl'
25
- require 'nokogiri/xml/schema'
26
- require 'nokogiri/xml/relax_ng'
1
+ # frozen_string_literal: true
27
2
 
28
3
  module Nokogiri
29
4
  class << self
30
5
  ###
31
6
  # Parse XML. Convenience method for Nokogiri::XML::Document.parse
32
- def XML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_XML, &block
7
+ def XML(thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_XML, &block)
33
8
  Nokogiri::XML::Document.parse(thing, url, encoding, options, &block)
34
9
  end
35
10
  end
@@ -38,20 +13,18 @@ module Nokogiri
38
13
  # Original C14N 1.0 spec canonicalization
39
14
  XML_C14N_1_0 = 0
40
15
  # Exclusive C14N 1.0 spec canonicalization
41
- XML_C14N_EXCLUSIVE_1_0 = 1
16
+ XML_C14N_EXCLUSIVE_1_0 = 1
42
17
  # C14N 1.1 spec canonicalization
43
18
  XML_C14N_1_1 = 2
44
19
  class << self
45
20
  ###
46
21
  # Parse an XML document using the Nokogiri::XML::Reader API. See
47
22
  # Nokogiri::XML::Reader for mor information
48
- def Reader string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT
49
-
50
- options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
51
- # Give the options to the user
23
+ def Reader(string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT)
24
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
52
25
  yield options if block_given?
53
26
 
54
- if string_or_io.respond_to? :read
27
+ if string_or_io.respond_to?(:read)
55
28
  return Reader.from_io(string_or_io, url, encoding, options.to_i)
56
29
  end
57
30
  Reader.from_memory(string_or_io, url, encoding, options.to_i)
@@ -59,15 +32,44 @@ module Nokogiri
59
32
 
60
33
  ###
61
34
  # Parse XML. Convenience method for Nokogiri::XML::Document.parse
62
- def parse thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
35
+ def parse(thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block)
63
36
  Document.parse(thing, url, encoding, options, &block)
64
37
  end
65
38
 
66
39
  ####
67
40
  # Parse a fragment from +string+ in to a NodeSet.
68
- def fragment string
69
- XML::DocumentFragment.parse(string)
41
+ def fragment(string, options = ParseOptions::DEFAULT_XML, &block)
42
+ XML::DocumentFragment.parse(string, options, &block)
70
43
  end
71
44
  end
72
45
  end
73
46
  end
47
+
48
+ require_relative "xml/pp"
49
+ require_relative "xml/parse_options"
50
+ require_relative "xml/sax"
51
+ require_relative "xml/searchable"
52
+ require_relative "xml/node"
53
+ require_relative "xml/attribute_decl"
54
+ require_relative "xml/element_decl"
55
+ require_relative "xml/element_content"
56
+ require_relative "xml/character_data"
57
+ require_relative "xml/namespace"
58
+ require_relative "xml/attr"
59
+ require_relative "xml/dtd"
60
+ require_relative "xml/cdata"
61
+ require_relative "xml/text"
62
+ require_relative "xml/document"
63
+ require_relative "xml/document_fragment"
64
+ require_relative "xml/processing_instruction"
65
+ require_relative "xml/node_set"
66
+ require_relative "xml/syntax_error"
67
+ require_relative "xml/xpath"
68
+ require_relative "xml/xpath_context"
69
+ require_relative "xml/builder"
70
+ require_relative "xml/reader"
71
+ require_relative "xml/notation"
72
+ require_relative "xml/entity_decl"
73
+ require_relative "xml/entity_reference"
74
+ require_relative "xml/schema"
75
+ require_relative "xml/relax_ng"
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XSLT
3
5
  ###
@@ -17,7 +19,7 @@ module Nokogiri
17
19
  # Apply an XSLT stylesheet to an XML::Document.
18
20
  # +params+ is an array of strings used as XSLT parameters.
19
21
  # returns serialized document
20
- def apply_to document, params = []
22
+ def apply_to(document, params = [])
21
23
  serialize(transform(document, params))
22
24
  end
23
25
  end
data/lib/nokogiri/xslt.rb CHANGED
@@ -1,4 +1,5 @@
1
- require 'nokogiri/xslt/stylesheet'
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  module Nokogiri
4
5
  class << self
@@ -9,7 +10,7 @@ module Nokogiri
9
10
  #
10
11
  # xslt = Nokogiri::XSLT(File.read(ARGV[0]))
11
12
  #
12
- def XSLT stylesheet, modules = {}
13
+ def XSLT(stylesheet, modules = {})
13
14
  XSLT.parse(stylesheet, modules)
14
15
  end
15
16
  end
@@ -21,36 +22,44 @@ module Nokogiri
21
22
  class << self
22
23
  ###
23
24
  # Parse the stylesheet in +string+, register any +modules+
24
- def parse string, modules = {}
25
+ def parse(string, modules = {})
25
26
  modules.each do |url, klass|
26
- XSLT.register url, klass
27
+ XSLT.register(url, klass)
27
28
  end
28
29
 
30
+ doc = XML::Document.parse(string, nil, nil, XML::ParseOptions::DEFAULT_XSLT)
29
31
  if Nokogiri.jruby?
30
- Stylesheet.parse_stylesheet_doc(XML.parse(string), string)
32
+ Stylesheet.parse_stylesheet_doc(doc, string)
31
33
  else
32
- Stylesheet.parse_stylesheet_doc(XML.parse(string))
34
+ Stylesheet.parse_stylesheet_doc(doc)
33
35
  end
34
36
  end
35
37
 
36
- ###
37
- # Quote parameters in +params+ for stylesheet safety
38
- def quote_params params
39
- parray = (params.instance_of?(Hash) ? params.to_a.flatten : params).dup
40
- parray.each_with_index do |v,i|
41
- if i % 2 > 0
42
- parray[i]=
43
- if v =~ /'/
44
- "concat('#{ v.gsub(/'/, %q{', "'", '}) }')"
45
- else
46
- "'#{v}'";
47
- end
38
+ # :call-seq:
39
+ # quote_params(params) Array
40
+ #
41
+ # Quote parameters in +params+ for stylesheet safety.
42
+ # See Nokogiri::XSLT::Stylesheet.transform for example usage.
43
+ #
44
+ # [Parameters]
45
+ # - +params+ (Hash, Array) XSLT parameters (key->value, or tuples of [key, value])
46
+ #
47
+ # [Returns] Array of string parameters, with quotes correctly escaped for use with XSLT::Stylesheet.transform
48
+ #
49
+ def quote_params(params)
50
+ params.flatten.each_slice(2).each_with_object([]) do |kv, quoted_params|
51
+ key, value = kv.map(&:to_s)
52
+ value = if /'/.match?(value)
53
+ "concat('#{value.gsub(/'/, %q{', "'", '})}')"
48
54
  else
49
- parray[i] = v.to_s
55
+ "'#{value}'"
50
56
  end
57
+ quoted_params << key
58
+ quoted_params << value
51
59
  end
52
- parray.flatten
53
60
  end
54
61
  end
55
62
  end
56
63
  end
64
+
65
+ require_relative "xslt/stylesheet"