nokogiri 1.5.10 → 1.13.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (334) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +5 -0
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +280 -0
  6. data/bin/nokogiri +84 -31
  7. data/dependencies.yml +73 -0
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +956 -100
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +232 -87
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +327 -223
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +45 -20
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +1290 -680
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +227 -189
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +227 -140
  45. data/ext/nokogiri/xslt_stylesheet.c +162 -168
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -58
  93. data/lib/nokogiri/css/parser.rb +327 -288
  94. data/lib/nokogiri/css/parser.y +67 -45
  95. data/lib/nokogiri/css/parser_extras.rb +52 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +7 -6
  99. data/lib/nokogiri/css/xpath_visitor.rb +263 -75
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +17 -8
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/html4/document.rb +331 -0
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -15
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +88 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +96 -0
  118. data/lib/nokogiri/html5.rb +477 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +221 -0
  123. data/lib/nokogiri/version.rb +3 -90
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +96 -54
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +234 -95
  130. data/lib/nokogiri/xml/document_fragment.rb +86 -36
  131. data/lib/nokogiri/xml/dtd.rb +16 -4
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +8 -4
  138. data/lib/nokogiri/xml/node.rb +947 -502
  139. data/lib/nokogiri/xml/node_set.rb +168 -159
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +40 -5
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +43 -41
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +259 -0
  155. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -36
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +18 -16
  163. data/lib/nokogiri.rb +69 -69
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  171. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  172. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  173. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  174. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  175. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  176. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  177. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  178. metadata +382 -460
  179. data/.autotest +0 -26
  180. data/.gemtest +0 -0
  181. data/CHANGELOG.ja.rdoc +0 -785
  182. data/CHANGELOG.rdoc +0 -783
  183. data/C_CODING_STYLE.rdoc +0 -33
  184. data/Manifest.txt +0 -303
  185. data/README.ja.rdoc +0 -106
  186. data/README.rdoc +0 -175
  187. data/ROADMAP.md +0 -90
  188. data/Rakefile +0 -228
  189. data/STANDARD_RESPONSES.md +0 -47
  190. data/Y_U_NO_GEMSPEC.md +0 -155
  191. data/build_all +0 -105
  192. data/ext/nokogiri/html_document.c +0 -170
  193. data/ext/nokogiri/html_document.h +0 -10
  194. data/ext/nokogiri/html_element_description.c +0 -279
  195. data/ext/nokogiri/html_element_description.h +0 -10
  196. data/ext/nokogiri/html_entity_lookup.c +0 -32
  197. data/ext/nokogiri/html_entity_lookup.h +0 -8
  198. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  199. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  200. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  201. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  202. data/ext/nokogiri/xml_attr.h +0 -9
  203. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  204. data/ext/nokogiri/xml_cdata.h +0 -9
  205. data/ext/nokogiri/xml_comment.h +0 -9
  206. data/ext/nokogiri/xml_document.h +0 -23
  207. data/ext/nokogiri/xml_document_fragment.h +0 -10
  208. data/ext/nokogiri/xml_dtd.h +0 -10
  209. data/ext/nokogiri/xml_element_content.h +0 -10
  210. data/ext/nokogiri/xml_element_decl.h +0 -9
  211. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  212. data/ext/nokogiri/xml_entity_decl.h +0 -10
  213. data/ext/nokogiri/xml_entity_reference.h +0 -9
  214. data/ext/nokogiri/xml_io.c +0 -56
  215. data/ext/nokogiri/xml_io.h +0 -11
  216. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  217. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  218. data/ext/nokogiri/xml_namespace.h +0 -13
  219. data/ext/nokogiri/xml_node.h +0 -13
  220. data/ext/nokogiri/xml_node_set.h +0 -14
  221. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  222. data/ext/nokogiri/xml_reader.h +0 -10
  223. data/ext/nokogiri/xml_relax_ng.h +0 -9
  224. data/ext/nokogiri/xml_sax_parser.h +0 -39
  225. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  226. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  227. data/ext/nokogiri/xml_schema.h +0 -9
  228. data/ext/nokogiri/xml_syntax_error.h +0 -13
  229. data/ext/nokogiri/xml_text.h +0 -9
  230. data/ext/nokogiri/xml_xpath_context.h +0 -10
  231. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  232. data/lib/nokogiri/html/document.rb +0 -254
  233. data/lib/nokogiri/html/document_fragment.rb +0 -41
  234. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  235. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  236. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  237. data/tasks/cross_compile.rb +0 -150
  238. data/tasks/nokogiri.org.rb +0 -24
  239. data/tasks/test.rb +0 -95
  240. data/test/css/test_nthiness.rb +0 -159
  241. data/test/css/test_parser.rb +0 -341
  242. data/test/css/test_tokenizer.rb +0 -198
  243. data/test/css/test_xpath_visitor.rb +0 -91
  244. data/test/decorators/test_slop.rb +0 -16
  245. data/test/files/2ch.html +0 -108
  246. data/test/files/address_book.rlx +0 -12
  247. data/test/files/address_book.xml +0 -10
  248. data/test/files/bar/bar.xsd +0 -4
  249. data/test/files/dont_hurt_em_why.xml +0 -422
  250. data/test/files/encoding.html +0 -82
  251. data/test/files/encoding.xhtml +0 -84
  252. data/test/files/exslt.xml +0 -8
  253. data/test/files/exslt.xslt +0 -35
  254. data/test/files/foo/foo.xsd +0 -4
  255. data/test/files/metacharset.html +0 -10
  256. data/test/files/noencoding.html +0 -47
  257. data/test/files/po.xml +0 -32
  258. data/test/files/po.xsd +0 -66
  259. data/test/files/shift_jis.html +0 -10
  260. data/test/files/shift_jis.xml +0 -5
  261. data/test/files/snuggles.xml +0 -3
  262. data/test/files/staff.dtd +0 -10
  263. data/test/files/staff.xml +0 -59
  264. data/test/files/staff.xslt +0 -32
  265. data/test/files/test_document_url/bar.xml +0 -2
  266. data/test/files/test_document_url/document.dtd +0 -4
  267. data/test/files/test_document_url/document.xml +0 -6
  268. data/test/files/tlm.html +0 -850
  269. data/test/files/to_be_xincluded.xml +0 -2
  270. data/test/files/valid_bar.xml +0 -2
  271. data/test/files/xinclude.xml +0 -4
  272. data/test/helper.rb +0 -154
  273. data/test/html/sax/test_parser.rb +0 -141
  274. data/test/html/sax/test_parser_context.rb +0 -46
  275. data/test/html/test_builder.rb +0 -164
  276. data/test/html/test_document.rb +0 -552
  277. data/test/html/test_document_encoding.rb +0 -138
  278. data/test/html/test_document_fragment.rb +0 -261
  279. data/test/html/test_element_description.rb +0 -105
  280. data/test/html/test_named_characters.rb +0 -14
  281. data/test/html/test_node.rb +0 -196
  282. data/test/html/test_node_encoding.rb +0 -27
  283. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  284. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  285. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  286. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  287. data/test/test_convert_xpath.rb +0 -135
  288. data/test/test_css_cache.rb +0 -45
  289. data/test/test_encoding_handler.rb +0 -46
  290. data/test/test_memory_leak.rb +0 -156
  291. data/test/test_nokogiri.rb +0 -132
  292. data/test/test_reader.rb +0 -555
  293. data/test/test_soap4r_sax.rb +0 -52
  294. data/test/test_xslt_transforms.rb +0 -254
  295. data/test/xml/node/test_save_options.rb +0 -28
  296. data/test/xml/node/test_subclass.rb +0 -44
  297. data/test/xml/sax/test_parser.rb +0 -366
  298. data/test/xml/sax/test_parser_context.rb +0 -106
  299. data/test/xml/sax/test_push_parser.rb +0 -157
  300. data/test/xml/test_attr.rb +0 -64
  301. data/test/xml/test_attribute_decl.rb +0 -86
  302. data/test/xml/test_builder.rb +0 -306
  303. data/test/xml/test_c14n.rb +0 -151
  304. data/test/xml/test_cdata.rb +0 -48
  305. data/test/xml/test_comment.rb +0 -29
  306. data/test/xml/test_document.rb +0 -828
  307. data/test/xml/test_document_encoding.rb +0 -28
  308. data/test/xml/test_document_fragment.rb +0 -223
  309. data/test/xml/test_dtd.rb +0 -103
  310. data/test/xml/test_dtd_encoding.rb +0 -33
  311. data/test/xml/test_element_content.rb +0 -56
  312. data/test/xml/test_element_decl.rb +0 -73
  313. data/test/xml/test_entity_decl.rb +0 -122
  314. data/test/xml/test_entity_reference.rb +0 -245
  315. data/test/xml/test_namespace.rb +0 -95
  316. data/test/xml/test_node.rb +0 -1137
  317. data/test/xml/test_node_attributes.rb +0 -96
  318. data/test/xml/test_node_encoding.rb +0 -107
  319. data/test/xml/test_node_inheritance.rb +0 -32
  320. data/test/xml/test_node_reparenting.rb +0 -374
  321. data/test/xml/test_node_set.rb +0 -755
  322. data/test/xml/test_parse_options.rb +0 -64
  323. data/test/xml/test_processing_instruction.rb +0 -30
  324. data/test/xml/test_reader_encoding.rb +0 -142
  325. data/test/xml/test_relax_ng.rb +0 -60
  326. data/test/xml/test_schema.rb +0 -103
  327. data/test/xml/test_syntax_error.rb +0 -12
  328. data/test/xml/test_text.rb +0 -45
  329. data/test/xml/test_unparented_node.rb +0 -422
  330. data/test/xml/test_xinclude.rb +0 -83
  331. data/test/xml/test_xpath.rb +0 -295
  332. data/test/xslt/test_custom_functions.rb +0 -133
  333. data/test/xslt/test_exception_handling.rb +0 -37
  334. data/test_all +0 -81
@@ -1,19 +1,23 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "pathname"
5
+
1
6
  module Nokogiri
2
7
  module XML
3
- ##
4
- # Nokogiri::XML::Document is the main entry point for dealing with
5
- # XML documents. The Document is created by parsing an XML document.
6
- # See Nokogiri::XML::Document.parse() for more information on parsing.
7
- #
8
- # For searching a Document, see Nokogiri::XML::Node#css and
9
- # Nokogiri::XML::Node#xpath
8
+ # Nokogiri::XML::Document is the main entry point for dealing with XML documents. The Document
9
+ # is created by parsing an XML document. See Nokogiri::XML::Document.parse for more information
10
+ # on parsing.
10
11
  #
12
+ # For searching a Document, see Nokogiri::XML::Searchable#css and
13
+ # Nokogiri::XML::Searchable#xpath
11
14
  class Document < Nokogiri::XML::Node
12
- # I'm ignoring unicode characters here.
13
- # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details.
15
+ # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
16
+ # attempting to handle unicode characters partly because libxml2 doesn't handle unicode
17
+ # characters in NCNAMEs.
14
18
  NCNAME_START_CHAR = "A-Za-z_"
15
- NCNAME_CHAR = NCNAME_START_CHAR + "\\-.0-9"
16
- NCNAME_RE = /^xmlns(:[#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*)?$/
19
+ NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
20
+ NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
17
21
 
18
22
  ##
19
23
  # Parse an XML file.
@@ -33,22 +37,35 @@ module Nokogiri
33
37
  # +block+ (optional) is passed a configuration object on which
34
38
  # parse options may be set.
35
39
  #
36
- # When parsing untrusted documents, it's recommended that the
37
- # +nonet+ option be used, as shown in this example code:
38
- #
39
- # Nokogiri::XML::Document.parse(xml_string) { |config| config.nonet }
40
+ # By default, Nokogiri treats documents as untrusted, and so
41
+ # does not attempt to load DTDs or access the network. See
42
+ # Nokogiri::XML::ParseOptions for a complete list of options;
43
+ # and that module's DEFAULT_XML constant for what's set (and not
44
+ # set) by default.
40
45
  #
41
46
  # Nokogiri.XML() is a convenience method which will call this method.
42
47
  #
43
- def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
44
- options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
45
- # Give the options to the user
48
+ def self.parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
49
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
46
50
  yield options if block_given?
47
51
 
48
- return new if empty_doc?(string_or_io)
52
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
53
+
54
+ if empty_doc?(string_or_io)
55
+ if options.strict?
56
+ raise Nokogiri::XML::SyntaxError, "Empty document"
57
+ else
58
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
59
+ end
60
+ end
49
61
 
50
62
  doc = if string_or_io.respond_to?(:read)
51
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
63
+ if string_or_io.is_a?(Pathname)
64
+ # resolve the Pathname to the file and open it as an IO object, see #2110
65
+ string_or_io = string_or_io.expand_path.open
66
+ url ||= string_or_io.path
67
+ end
68
+
52
69
  read_io(string_or_io, url, encoding, options.to_i)
53
70
  else
54
71
  # read_memory pukes on empty docs
@@ -58,68 +75,192 @@ module Nokogiri
58
75
  # do xinclude processing
59
76
  doc.do_xinclude(options) if options.xinclude?
60
77
 
61
- return doc
78
+ doc
62
79
  end
63
80
 
64
- # A list of Nokogiri::XML::SyntaxError found when parsing a document
81
+ ##
82
+ # :singleton-method: wrap
83
+ # :call-seq: wrap(java_document) → Nokogiri::XML::Document
84
+ #
85
+ # ⚠ This method is only available when running JRuby.
86
+ #
87
+ # Create a Document using an existing Java DOM document object.
88
+ #
89
+ # The returned Document shares the same underlying data structure as the Java object, so
90
+ # changes in one are reflected in the other.
91
+ #
92
+ # [Parameters]
93
+ # - `java_document` (Java::OrgW3cDom::Document)
94
+ # (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
95
+ #
96
+ # [Returns] Nokogiri::XML::Document
97
+ #
98
+ # See also \#to_java
99
+
100
+ # :method: to_java
101
+ # :call-seq: to_java() → Java::OrgW3cDom::Document
102
+ #
103
+ # ⚠ This method is only available when running JRuby.
104
+ #
105
+ # Returns the underlying Java DOM document object for this document.
106
+ #
107
+ # The returned Java object shares the same underlying data structure as this document, so
108
+ # changes in one are reflected in the other.
109
+ #
110
+ # [Returns]
111
+ # Java::OrgW3cDom::Document
112
+ # (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
113
+ #
114
+ # See also Document.wrap
115
+
116
+ # The errors found while parsing a document.
117
+ #
118
+ # [Returns] Array<Nokogiri::XML::SyntaxError>
65
119
  attr_accessor :errors
66
120
 
67
- def initialize *args # :nodoc:
121
+ # When `true`, reparented elements without a namespace will inherit their new parent's
122
+ # namespace (if one exists). Defaults to `false`.
123
+ #
124
+ # [Returns] Boolean
125
+ #
126
+ # *Example:* Default behavior of namespace inheritance
127
+ #
128
+ # xml = <<~EOF
129
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
130
+ # <foo:parent>
131
+ # </foo:parent>
132
+ # </root>
133
+ # EOF
134
+ # doc = Nokogiri::XML(xml)
135
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
136
+ # parent.add_child("<child></child>")
137
+ # doc.to_xml
138
+ # # => <?xml version="1.0"?>
139
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
140
+ # # <foo:parent>
141
+ # # <child/>
142
+ # # </foo:parent>
143
+ # # </root>
144
+ #
145
+ # *Example:* Setting namespace inheritance to `true`
146
+ #
147
+ # xml = <<~EOF
148
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
149
+ # <foo:parent>
150
+ # </foo:parent>
151
+ # </root>
152
+ # EOF
153
+ # doc = Nokogiri::XML(xml)
154
+ # doc.namespace_inheritance = true
155
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
156
+ # parent.add_child("<child></child>")
157
+ # doc.to_xml
158
+ # # => <?xml version="1.0"?>
159
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
160
+ # # <foo:parent>
161
+ # # <foo:child/>
162
+ # # </foo:parent>
163
+ # # </root>
164
+ #
165
+ # Since v1.12.4
166
+ attr_accessor :namespace_inheritance
167
+
168
+ def initialize(*args) # :nodoc:
68
169
  @errors = []
69
170
  @decorators = nil
171
+ @namespace_inheritance = false
70
172
  end
71
173
 
72
- ##
73
- # Create an element with +name+, and optionally setting the content and attributes.
174
+ # :call-seq:
175
+ # create_element(name, *contents_or_attrs, &block) Nokogiri::XML::Element
74
176
  #
75
- # doc.create_element "div" # <div></div>
76
- # doc.create_element "div", :class => "container" # <div class='container'></div>
77
- # doc.create_element "div", "contents" # <div>contents</div>
78
- # doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
79
- # doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
177
+ # Create a new Element with `name` belonging to this document, optionally setting contents or
178
+ # attributes.
80
179
  #
81
- def create_element name, *args, &block
180
+ # This method is _not_ the most user-friendly option if your intention is to add a node to the
181
+ # document tree. Prefer one of the Nokogiri::XML::Node methods like Node#add_child,
182
+ # Node#add_next_sibling, Node#replace, etc. which will both create an element (or subtree) and
183
+ # place it in the document tree.
184
+ #
185
+ # Arguments may be passed to initialize the element:
186
+ #
187
+ # - a Hash argument will be used to set attributes
188
+ # - a non-Hash object that responds to \#to_s will be used to set the new node's contents
189
+ #
190
+ # A block may be passed to mutate the node.
191
+ #
192
+ # [Parameters]
193
+ # - `name` (String)
194
+ # - `contents_or_attrs` (\#to_s, Hash)
195
+ # [Yields] `node` (Nokogiri::XML::Element)
196
+ # [Returns] Nokogiri::XML::Element
197
+ #
198
+ # *Example:* An empty element without attributes
199
+ #
200
+ # doc.create_element("div")
201
+ # # => <div></div>
202
+ #
203
+ # *Example:* An element with contents
204
+ #
205
+ # doc.create_element("div", "contents")
206
+ # # => <div>contents</div>
207
+ #
208
+ # *Example:* An element with attributes
209
+ #
210
+ # doc.create_element("div", {"class" => "container"})
211
+ # # => <div class='container'></div>
212
+ #
213
+ # *Example:* An element with contents and attributes
214
+ #
215
+ # doc.create_element("div", "contents", {"class" => "container"})
216
+ # # => <div class='container'>contents</div>
217
+ #
218
+ # *Example:* Passing a block to mutate the element
219
+ #
220
+ # doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
221
+ #
222
+ def create_element(name, *contents_or_attrs, &block)
82
223
  elm = Nokogiri::XML::Element.new(name, self, &block)
83
- args.each do |arg|
224
+ contents_or_attrs.each do |arg|
84
225
  case arg
85
226
  when Hash
86
- arg.each { |k,v|
227
+ arg.each do |k, v|
87
228
  key = k.to_s
88
229
  if key =~ NCNAME_RE
89
- ns_name = key.split(":", 2)[1]
90
- elm.add_namespace_definition ns_name, v
230
+ ns_name = Regexp.last_match(1)
231
+ elm.add_namespace_definition(ns_name, v)
91
232
  else
92
233
  elm[k.to_s] = v.to_s
93
234
  end
94
- }
235
+ end
95
236
  else
96
237
  elm.content = arg
97
238
  end
98
239
  end
99
- if ns = elm.namespace_definitions.find { |n| n.prefix.nil? or n.prefix == '' }
240
+ if (ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == "") })
100
241
  elm.namespace = ns
101
242
  end
102
243
  elm
103
244
  end
104
245
 
105
246
  # Create a Text Node with +string+
106
- def create_text_node string, &block
107
- Nokogiri::XML::Text.new string.to_s, self, &block
247
+ def create_text_node(string, &block)
248
+ Nokogiri::XML::Text.new(string.to_s, self, &block)
108
249
  end
109
250
 
110
251
  # Create a CDATA Node containing +string+
111
- def create_cdata string, &block
112
- Nokogiri::XML::CDATA.new self, string.to_s, &block
252
+ def create_cdata(string, &block)
253
+ Nokogiri::XML::CDATA.new(self, string.to_s, &block)
113
254
  end
114
255
 
115
256
  # Create a Comment Node containing +string+
116
- def create_comment string, &block
117
- Nokogiri::XML::Comment.new self, string.to_s, &block
257
+ def create_comment(string, &block)
258
+ Nokogiri::XML::Comment.new(self, string.to_s, &block)
118
259
  end
119
260
 
120
261
  # The name of this document. Always returns "document"
121
262
  def name
122
- 'document'
263
+ "document"
123
264
  end
124
265
 
125
266
  # A reference to +self+
@@ -127,46 +268,51 @@ module Nokogiri
127
268
  self
128
269
  end
129
270
 
130
- ##
131
- # Recursively get all namespaces from this node and its subtree and
132
- # return them as a hash.
271
+ # :call-seq:
272
+ # collect_namespaces() Hash<String(Namespace#prefix) String(Namespace#href)>
133
273
  #
134
- # For example, given this document:
274
+ # Recursively get all namespaces from this node and its subtree and return them as a
275
+ # hash.
135
276
  #
136
- # <root xmlns:foo="bar">
277
+ # ⚠ This method will not handle duplicate namespace prefixes, since the return value is a hash.
278
+ #
279
+ # Note that this method does an xpath lookup for nodes with namespaces, and as a result the
280
+ # order (and which duplicate prefix "wins") may be dependent on the implementation of the
281
+ # underlying XML library.
282
+ #
283
+ # *Example:* Basic usage
284
+ #
285
+ # Given this document:
286
+ #
287
+ # <root xmlns="default" xmlns:foo="bar">
137
288
  # <bar xmlns:hello="world" />
138
289
  # </root>
139
290
  #
140
291
  # This method will return:
141
292
  #
142
- # { 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }
293
+ # {"xmlns:foo"=>"bar", "xmlns"=>"default", "xmlns:hello"=>"world"}
294
+ #
295
+ # *Example:* Duplicate prefixes
143
296
  #
144
- # WARNING: this method will clobber duplicate names in the keys.
145
- # For example, given this document:
297
+ # Given this document:
146
298
  #
147
299
  # <root xmlns:foo="bar">
148
300
  # <bar xmlns:foo="baz" />
149
301
  # </root>
150
302
  #
151
- # The hash returned will look like this: { 'xmlns:foo' => 'bar' }
303
+ # The hash returned will be something like:
152
304
  #
153
- # Non-prefixed default namespaces (as in "xmlns=") are not included
154
- # in the hash.
155
- #
156
- # Note that this method does an xpath lookup for nodes with
157
- # namespaces, and as a result the order may be dependent on the
158
- # implementation of the underlying XML library.
305
+ # {"xmlns:foo" => "baz"}
159
306
  #
160
307
  def collect_namespaces
161
- xpath("//namespace::*").inject({}) do |hash, ns|
162
- hash[["xmlns",ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
163
- hash
308
+ xpath("//namespace::*").each_with_object({}) do |ns, hash|
309
+ hash[["xmlns", ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
164
310
  end
165
311
  end
166
312
 
167
313
  # Get the list of decorators given +key+
168
- def decorators key
169
- @decorators ||= Hash.new
314
+ def decorators(key)
315
+ @decorators ||= {}
170
316
  @decorators[key] ||= []
171
317
  end
172
318
 
@@ -175,7 +321,7 @@ module Nokogiri
175
321
  # the document or +nil+ when there is no DTD.
176
322
  def validate
177
323
  return nil unless internal_subset
178
- internal_subset.validate self
324
+ internal_subset.validate(self)
179
325
  end
180
326
 
181
327
  ##
@@ -195,7 +341,7 @@ module Nokogiri
195
341
  # ... which does absolutely nothing.
196
342
  #
197
343
  def slop!
198
- unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
344
+ unless decorators(XML::Node).include?(Nokogiri::Decorators::Slop)
199
345
  decorators(XML::Node) << Nokogiri::Decorators::Slop
200
346
  decorate!
201
347
  end
@@ -205,16 +351,16 @@ module Nokogiri
205
351
 
206
352
  ##
207
353
  # Apply any decorators to +node+
208
- def decorate node
354
+ def decorate(node)
209
355
  return unless @decorators
210
- @decorators.each { |klass,list|
356
+ @decorators.each do |klass, list|
211
357
  next unless node.is_a?(klass)
212
358
  list.each { |moodule| node.extend(moodule) }
213
- }
359
+ end
214
360
  end
215
361
 
216
- alias :to_xml :serialize
217
- alias :clone :dup
362
+ alias_method :to_xml, :serialize
363
+ alias_method :clone, :dup
218
364
 
219
365
  # Get the hash of namespaces on the root Nokogiri::XML::Node
220
366
  def namespaces
@@ -224,52 +370,45 @@ module Nokogiri
224
370
  ##
225
371
  # Create a Nokogiri::XML::DocumentFragment from +tags+
226
372
  # Returns an empty fragment if +tags+ is nil.
227
- def fragment tags = nil
228
- DocumentFragment.new(self, tags, self.root)
373
+ def fragment(tags = nil)
374
+ DocumentFragment.new(self, tags, root)
229
375
  end
230
376
 
231
377
  undef_method :swap, :parent, :namespace, :default_namespace=
232
378
  undef_method :add_namespace_definition, :attributes
233
379
  undef_method :namespace_definitions, :line, :add_namespace
234
380
 
235
- def add_child node_or_tags
236
- raise "Document already has a root node" if root
381
+ def add_child(node_or_tags)
382
+ raise "A document may not have multiple root nodes." if (root && root.name != "nokogiri_text_wrapper") && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
237
383
  node_or_tags = coerce(node_or_tags)
238
384
  if node_or_tags.is_a?(XML::NodeSet)
239
- raise "Document cannot have multiple root nodes" if node_or_tags.size > 1
385
+ raise "A document may not have multiple root nodes." if node_or_tags.size > 1
240
386
  super(node_or_tags.first)
241
387
  else
242
388
  super
243
389
  end
244
390
  end
245
- alias :<< :add_child
391
+ alias_method :<<, :add_child
246
392
 
247
- ##
248
- # +JRuby+
249
- # Wraps Java's org.w3c.dom.document and returns Nokogiri::XML::Document
250
- def self.wrap document
251
- raise "JRuby only method" unless Nokogiri.jruby?
252
- return wrapJavaDocument(document)
253
- end
254
-
255
- ##
256
- # +JRuby+
257
- # Returns Java's org.w3c.dom.document of this Document.
258
- def to_java
259
- raise "JRuby only method" unless Nokogiri.jruby?
260
- return toJavaDocument()
393
+ # :call-seq:
394
+ # xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
395
+ #
396
+ # [Returns] The document type which determines CSS-to-XPath translation.
397
+ #
398
+ # See XPathVisitor for more information.
399
+ def xpath_doctype
400
+ Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML
261
401
  end
262
402
 
263
403
  private
264
- def self.empty_doc? string_or_io
404
+
405
+ def self.empty_doc?(string_or_io)
265
406
  string_or_io.nil? ||
266
407
  (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
267
408
  (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
268
409
  end
269
410
 
270
- def implied_xpath_context
271
- "/"
272
- end
411
+ IMPLIED_XPATH_CONTEXTS = ["//"].freeze # :nodoc:
273
412
 
274
413
  def inspect_attributes
275
414
  [:name, :children]
@@ -1,34 +1,57 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  class DocumentFragment < Nokogiri::XML::Node
6
+ ####
7
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
8
+ def self.parse(tags, options = ParseOptions::DEFAULT_XML, &block)
9
+ new(XML::Document.new, tags, nil, options, &block)
10
+ end
11
+
4
12
  ##
5
13
  # Create a new DocumentFragment from +tags+.
6
14
  #
7
15
  # If +ctx+ is present, it is used as a context node for the
8
16
  # subtree created, e.g., namespaces will be resolved relative
9
17
  # to +ctx+.
10
- def initialize document, tags = nil, ctx = nil
18
+ def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_XML)
11
19
  return self unless tags
12
20
 
21
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
22
+ yield options if block_given?
23
+
13
24
  children = if ctx
14
- # Fix for issue#490
15
- if Nokogiri.jruby?
16
- # fix for issue #770
17
- ctx.parse("<root #{namespace_declarations(ctx)}>#{tags}</root>").children
18
- else
19
- ctx.parse(tags)
20
- end
21
- else
22
- XML::Document.parse("<root>#{tags}</root>") \
23
- .xpath("/root/node()")
24
- end
25
+ # Fix for issue#490
26
+ if Nokogiri.jruby?
27
+ # fix for issue #770
28
+ ctx.parse("<root #{namespace_declarations(ctx)}>#{tags}</root>", options).children
29
+ else
30
+ ctx.parse(tags, options)
31
+ end
32
+ else
33
+ wrapper_doc = XML::Document.parse("<root>#{tags}</root>", nil, nil, options)
34
+ self.errors = wrapper_doc.errors
35
+ wrapper_doc.xpath("/root/node()")
36
+ end
25
37
  children.each { |child| child.parent = self }
26
38
  end
27
39
 
40
+ if Nokogiri.uses_libxml?
41
+ def dup
42
+ new_document = document.dup
43
+ new_fragment = self.class.new(new_document)
44
+ children.each do |child|
45
+ child.dup(1, new_document).parent = new_fragment
46
+ end
47
+ new_fragment
48
+ end
49
+ end
50
+
28
51
  ###
29
52
  # return the name for DocumentFragment
30
53
  def name
31
- '#document-fragment'
54
+ "#document-fragment"
32
55
  end
33
56
 
34
57
  ###
@@ -40,10 +63,10 @@ module Nokogiri
40
63
  ###
41
64
  # Convert this DocumentFragment to html
42
65
  # See Nokogiri::XML::NodeSet#to_html
43
- def to_html *args
66
+ def to_html(*args)
44
67
  if Nokogiri.jruby?
45
68
  options = args.first.is_a?(Hash) ? args.shift : {}
46
- if !options[:save_with]
69
+ unless options[:save_with]
47
70
  options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
48
71
  end
49
72
  args.insert(0, options)
@@ -54,10 +77,10 @@ module Nokogiri
54
77
  ###
55
78
  # Convert this DocumentFragment to xhtml
56
79
  # See Nokogiri::XML::NodeSet#to_xhtml
57
- def to_xhtml *args
80
+ def to_xhtml(*args)
58
81
  if Nokogiri.jruby?
59
82
  options = args.first.is_a?(Hash) ? args.shift : {}
60
- if !options[:save_with]
83
+ unless options[:save_with]
61
84
  options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_XHTML
62
85
  end
63
86
  args.insert(0, options)
@@ -68,44 +91,71 @@ module Nokogiri
68
91
  ###
69
92
  # Convert this DocumentFragment to xml
70
93
  # See Nokogiri::XML::NodeSet#to_xml
71
- def to_xml *args
94
+ def to_xml(*args)
72
95
  children.to_xml(*args)
73
96
  end
74
97
 
75
98
  ###
76
- # Search this fragment. See Nokogiri::XML::Node#css
77
- def css *args
99
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
100
+ #
101
+ # Search this fragment for CSS +rules+. +rules+ must be one or more CSS
102
+ # selectors. For example:
103
+ #
104
+ # For more information see Nokogiri::XML::Searchable#css
105
+ def css(*args)
78
106
  if children.any?
79
- children.css(*args)
107
+ children.css(*args) # 'children' is a smell here
80
108
  else
81
109
  NodeSet.new(document)
82
110
  end
83
111
  end
84
112
 
85
- alias :serialize :to_s
113
+ #
114
+ # NOTE that we don't delegate #xpath to children ... another smell.
115
+ # def xpath ; end
116
+ #
117
+
118
+ ###
119
+ # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
120
+ #
121
+ # Search this fragment for +paths+. +paths+ must be one or more XPath or CSS queries.
122
+ #
123
+ # For more information see Nokogiri::XML::Searchable#search
124
+ def search(*rules)
125
+ rules, handler, ns, binds = extract_params(rules)
86
126
 
87
- class << self
88
- ####
89
- # Create a Nokogiri::XML::DocumentFragment from +tags+
90
- def parse tags
91
- self.new(XML::Document.new, tags)
127
+ rules.inject(NodeSet.new(document)) do |set, rule|
128
+ set + if Searchable::LOOKS_LIKE_XPATH.match?(rule)
129
+ xpath(*[rule, ns, handler, binds].compact)
130
+ else
131
+ children.css(*[rule, ns, handler].compact) # 'children' is a smell here
132
+ end
92
133
  end
93
134
  end
94
135
 
136
+ alias_method :serialize, :to_s
137
+
138
+ # A list of Nokogiri::XML::SyntaxError found when parsing a document
139
+ def errors
140
+ document.errors
141
+ end
142
+
143
+ def errors=(things) # :nodoc:
144
+ document.errors = things
145
+ end
146
+
147
+ def fragment(data)
148
+ document.fragment(data)
149
+ end
150
+
95
151
  private
96
152
 
97
153
  # fix for issue 770
98
- def namespace_declarations ctx
154
+ def namespace_declarations(ctx)
99
155
  ctx.namespace_scopes.map do |namespace|
100
156
  prefix = namespace.prefix.nil? ? "" : ":#{namespace.prefix}"
101
- %Q{xmlns#{prefix}="#{namespace.href}"}
102
- end.join ' '
103
- end
104
-
105
- def coerce data
106
- return super unless String === data
107
-
108
- document.fragment(data).children
157
+ %{xmlns#{prefix}="#{namespace.href}"}
158
+ end.join(" ")
109
159
  end
110
160
  end
111
161
  end