nokogiri 1.5.10 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (328) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +278 -0
  6. data/bin/nokogiri +50 -10
  7. data/dependencies.yml +74 -0
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +944 -100
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +232 -87
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +305 -201
  25. data/ext/nokogiri/xml_document_fragment.c +13 -15
  26. data/ext/nokogiri/xml_dtd.c +54 -48
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +30 -19
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +808 -503
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +198 -186
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +162 -98
  45. data/ext/nokogiri/xslt_stylesheet.c +162 -168
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4886 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/css/node.rb +1 -50
  92. data/lib/nokogiri/css/parser.rb +317 -286
  93. data/lib/nokogiri/css/parser.y +57 -43
  94. data/lib/nokogiri/css/parser_extras.rb +39 -36
  95. data/lib/nokogiri/css/syntax_error.rb +2 -1
  96. data/lib/nokogiri/css/tokenizer.rb +105 -103
  97. data/lib/nokogiri/css/tokenizer.rex +5 -5
  98. data/lib/nokogiri/css/xpath_visitor.rb +137 -48
  99. data/lib/nokogiri/css.rb +15 -14
  100. data/lib/nokogiri/decorators/slop.rb +13 -5
  101. data/lib/nokogiri/extension.rb +31 -0
  102. data/lib/nokogiri/gumbo.rb +14 -0
  103. data/lib/nokogiri/html.rb +32 -27
  104. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  105. data/lib/nokogiri/{html → html4}/document.rb +118 -50
  106. data/lib/nokogiri/{html → html4}/document_fragment.rb +20 -11
  107. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  109. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  110. data/lib/nokogiri/{html → html4}/sax/parser.rb +22 -14
  111. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  112. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  113. data/lib/nokogiri/html4.rb +40 -0
  114. data/lib/nokogiri/html5/document.rb +74 -0
  115. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  116. data/lib/nokogiri/html5/node.rb +93 -0
  117. data/lib/nokogiri/html5.rb +473 -0
  118. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  119. data/lib/nokogiri/syntax_error.rb +1 -0
  120. data/lib/nokogiri/version/constant.rb +5 -0
  121. data/lib/nokogiri/version/info.rb +215 -0
  122. data/lib/nokogiri/version.rb +3 -91
  123. data/lib/nokogiri/xml/attr.rb +1 -0
  124. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  125. data/lib/nokogiri/xml/builder.rb +75 -33
  126. data/lib/nokogiri/xml/cdata.rb +1 -0
  127. data/lib/nokogiri/xml/character_data.rb +1 -0
  128. data/lib/nokogiri/xml/document.rb +157 -54
  129. data/lib/nokogiri/xml/document_fragment.rb +55 -8
  130. data/lib/nokogiri/xml/dtd.rb +15 -4
  131. data/lib/nokogiri/xml/element_content.rb +1 -0
  132. data/lib/nokogiri/xml/element_decl.rb +1 -0
  133. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  134. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  135. data/lib/nokogiri/xml/namespace.rb +1 -0
  136. data/lib/nokogiri/xml/node/save_options.rb +2 -1
  137. data/lib/nokogiri/xml/node.rb +712 -431
  138. data/lib/nokogiri/xml/node_set.rb +140 -123
  139. data/lib/nokogiri/xml/notation.rb +1 -0
  140. data/lib/nokogiri/xml/parse_options.rb +31 -0
  141. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  142. data/lib/nokogiri/xml/pp/node.rb +1 -0
  143. data/lib/nokogiri/xml/pp.rb +3 -2
  144. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  145. data/lib/nokogiri/xml/reader.rb +9 -12
  146. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  147. data/lib/nokogiri/xml/sax/document.rb +25 -30
  148. data/lib/nokogiri/xml/sax/parser.rb +8 -8
  149. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  150. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  151. data/lib/nokogiri/xml/sax.rb +5 -4
  152. data/lib/nokogiri/xml/schema.rb +13 -4
  153. data/lib/nokogiri/xml/searchable.rb +239 -0
  154. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  155. data/lib/nokogiri/xml/text.rb +1 -0
  156. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  157. data/lib/nokogiri/xml/xpath.rb +4 -5
  158. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  159. data/lib/nokogiri/xml.rb +37 -35
  160. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  161. data/lib/nokogiri/xslt.rb +17 -16
  162. data/lib/nokogiri.rb +55 -58
  163. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  164. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  165. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  166. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  167. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  168. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  169. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  171. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  172. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  173. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  174. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  175. metadata +307 -459
  176. data/.autotest +0 -26
  177. data/.gemtest +0 -0
  178. data/CHANGELOG.ja.rdoc +0 -785
  179. data/CHANGELOG.rdoc +0 -783
  180. data/C_CODING_STYLE.rdoc +0 -33
  181. data/Manifest.txt +0 -303
  182. data/README.ja.rdoc +0 -106
  183. data/README.rdoc +0 -175
  184. data/ROADMAP.md +0 -90
  185. data/Rakefile +0 -228
  186. data/STANDARD_RESPONSES.md +0 -47
  187. data/Y_U_NO_GEMSPEC.md +0 -155
  188. data/build_all +0 -105
  189. data/ext/nokogiri/html_document.c +0 -170
  190. data/ext/nokogiri/html_document.h +0 -10
  191. data/ext/nokogiri/html_element_description.c +0 -279
  192. data/ext/nokogiri/html_element_description.h +0 -10
  193. data/ext/nokogiri/html_entity_lookup.c +0 -32
  194. data/ext/nokogiri/html_entity_lookup.h +0 -8
  195. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  196. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  197. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  198. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  199. data/ext/nokogiri/xml_attr.h +0 -9
  200. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  201. data/ext/nokogiri/xml_cdata.h +0 -9
  202. data/ext/nokogiri/xml_comment.h +0 -9
  203. data/ext/nokogiri/xml_document.h +0 -23
  204. data/ext/nokogiri/xml_document_fragment.h +0 -10
  205. data/ext/nokogiri/xml_dtd.h +0 -10
  206. data/ext/nokogiri/xml_element_content.h +0 -10
  207. data/ext/nokogiri/xml_element_decl.h +0 -9
  208. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  209. data/ext/nokogiri/xml_entity_decl.h +0 -10
  210. data/ext/nokogiri/xml_entity_reference.h +0 -9
  211. data/ext/nokogiri/xml_io.c +0 -56
  212. data/ext/nokogiri/xml_io.h +0 -11
  213. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  214. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  215. data/ext/nokogiri/xml_namespace.h +0 -13
  216. data/ext/nokogiri/xml_node.h +0 -13
  217. data/ext/nokogiri/xml_node_set.h +0 -14
  218. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  219. data/ext/nokogiri/xml_reader.h +0 -10
  220. data/ext/nokogiri/xml_relax_ng.h +0 -9
  221. data/ext/nokogiri/xml_sax_parser.h +0 -39
  222. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  223. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  224. data/ext/nokogiri/xml_schema.h +0 -9
  225. data/ext/nokogiri/xml_syntax_error.h +0 -13
  226. data/ext/nokogiri/xml_text.h +0 -9
  227. data/ext/nokogiri/xml_xpath_context.h +0 -10
  228. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  229. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  230. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  231. data/tasks/cross_compile.rb +0 -150
  232. data/tasks/nokogiri.org.rb +0 -24
  233. data/tasks/test.rb +0 -95
  234. data/test/css/test_nthiness.rb +0 -159
  235. data/test/css/test_parser.rb +0 -341
  236. data/test/css/test_tokenizer.rb +0 -198
  237. data/test/css/test_xpath_visitor.rb +0 -91
  238. data/test/decorators/test_slop.rb +0 -16
  239. data/test/files/2ch.html +0 -108
  240. data/test/files/address_book.rlx +0 -12
  241. data/test/files/address_book.xml +0 -10
  242. data/test/files/bar/bar.xsd +0 -4
  243. data/test/files/dont_hurt_em_why.xml +0 -422
  244. data/test/files/encoding.html +0 -82
  245. data/test/files/encoding.xhtml +0 -84
  246. data/test/files/exslt.xml +0 -8
  247. data/test/files/exslt.xslt +0 -35
  248. data/test/files/foo/foo.xsd +0 -4
  249. data/test/files/metacharset.html +0 -10
  250. data/test/files/noencoding.html +0 -47
  251. data/test/files/po.xml +0 -32
  252. data/test/files/po.xsd +0 -66
  253. data/test/files/shift_jis.html +0 -10
  254. data/test/files/shift_jis.xml +0 -5
  255. data/test/files/snuggles.xml +0 -3
  256. data/test/files/staff.dtd +0 -10
  257. data/test/files/staff.xml +0 -59
  258. data/test/files/staff.xslt +0 -32
  259. data/test/files/test_document_url/bar.xml +0 -2
  260. data/test/files/test_document_url/document.dtd +0 -4
  261. data/test/files/test_document_url/document.xml +0 -6
  262. data/test/files/tlm.html +0 -850
  263. data/test/files/to_be_xincluded.xml +0 -2
  264. data/test/files/valid_bar.xml +0 -2
  265. data/test/files/xinclude.xml +0 -4
  266. data/test/helper.rb +0 -154
  267. data/test/html/sax/test_parser.rb +0 -141
  268. data/test/html/sax/test_parser_context.rb +0 -46
  269. data/test/html/test_builder.rb +0 -164
  270. data/test/html/test_document.rb +0 -552
  271. data/test/html/test_document_encoding.rb +0 -138
  272. data/test/html/test_document_fragment.rb +0 -261
  273. data/test/html/test_element_description.rb +0 -105
  274. data/test/html/test_named_characters.rb +0 -14
  275. data/test/html/test_node.rb +0 -196
  276. data/test/html/test_node_encoding.rb +0 -27
  277. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  278. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  279. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  280. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  281. data/test/test_convert_xpath.rb +0 -135
  282. data/test/test_css_cache.rb +0 -45
  283. data/test/test_encoding_handler.rb +0 -46
  284. data/test/test_memory_leak.rb +0 -156
  285. data/test/test_nokogiri.rb +0 -132
  286. data/test/test_reader.rb +0 -555
  287. data/test/test_soap4r_sax.rb +0 -52
  288. data/test/test_xslt_transforms.rb +0 -254
  289. data/test/xml/node/test_save_options.rb +0 -28
  290. data/test/xml/node/test_subclass.rb +0 -44
  291. data/test/xml/sax/test_parser.rb +0 -366
  292. data/test/xml/sax/test_parser_context.rb +0 -106
  293. data/test/xml/sax/test_push_parser.rb +0 -157
  294. data/test/xml/test_attr.rb +0 -64
  295. data/test/xml/test_attribute_decl.rb +0 -86
  296. data/test/xml/test_builder.rb +0 -306
  297. data/test/xml/test_c14n.rb +0 -151
  298. data/test/xml/test_cdata.rb +0 -48
  299. data/test/xml/test_comment.rb +0 -29
  300. data/test/xml/test_document.rb +0 -828
  301. data/test/xml/test_document_encoding.rb +0 -28
  302. data/test/xml/test_document_fragment.rb +0 -223
  303. data/test/xml/test_dtd.rb +0 -103
  304. data/test/xml/test_dtd_encoding.rb +0 -33
  305. data/test/xml/test_element_content.rb +0 -56
  306. data/test/xml/test_element_decl.rb +0 -73
  307. data/test/xml/test_entity_decl.rb +0 -122
  308. data/test/xml/test_entity_reference.rb +0 -245
  309. data/test/xml/test_namespace.rb +0 -95
  310. data/test/xml/test_node.rb +0 -1137
  311. data/test/xml/test_node_attributes.rb +0 -96
  312. data/test/xml/test_node_encoding.rb +0 -107
  313. data/test/xml/test_node_inheritance.rb +0 -32
  314. data/test/xml/test_node_reparenting.rb +0 -374
  315. data/test/xml/test_node_set.rb +0 -755
  316. data/test/xml/test_parse_options.rb +0 -64
  317. data/test/xml/test_processing_instruction.rb +0 -30
  318. data/test/xml/test_reader_encoding.rb +0 -142
  319. data/test/xml/test_relax_ng.rb +0 -60
  320. data/test/xml/test_schema.rb +0 -103
  321. data/test/xml/test_syntax_error.rb +0 -12
  322. data/test/xml/test_text.rb +0 -45
  323. data/test/xml/test_unparented_node.rb +0 -422
  324. data/test/xml/test_xinclude.rb +0 -83
  325. data/test/xml/test_xpath.rb +0 -295
  326. data/test/xslt/test_custom_functions.rb +0 -133
  327. data/test/xslt/test_exception_handling.rb +0 -37
  328. data/test_all +0 -81
@@ -1,3 +1,8 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require 'pathname'
5
+
1
6
  module Nokogiri
2
7
  module XML
3
8
  ##
@@ -5,15 +10,16 @@ module Nokogiri
5
10
  # XML documents. The Document is created by parsing an XML document.
6
11
  # See Nokogiri::XML::Document.parse() for more information on parsing.
7
12
  #
8
- # For searching a Document, see Nokogiri::XML::Node#css and
9
- # Nokogiri::XML::Node#xpath
13
+ # For searching a Document, see Nokogiri::XML::Searchable#css and
14
+ # Nokogiri::XML::Searchable#xpath
10
15
  #
11
16
  class Document < Nokogiri::XML::Node
12
- # I'm ignoring unicode characters here.
13
- # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details.
17
+ # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
18
+ # attempting to handle unicode characters partly because libxml2 doesn't handle unicode
19
+ # characters in NCNAMEs.
14
20
  NCNAME_START_CHAR = "A-Za-z_"
15
- NCNAME_CHAR = NCNAME_START_CHAR + "\\-.0-9"
16
- NCNAME_RE = /^xmlns(:[#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*)?$/
21
+ NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
22
+ NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
17
23
 
18
24
  ##
19
25
  # Parse an XML file.
@@ -33,70 +39,184 @@ module Nokogiri
33
39
  # +block+ (optional) is passed a configuration object on which
34
40
  # parse options may be set.
35
41
  #
36
- # When parsing untrusted documents, it's recommended that the
37
- # +nonet+ option be used, as shown in this example code:
38
- #
39
- # Nokogiri::XML::Document.parse(xml_string) { |config| config.nonet }
42
+ # By default, Nokogiri treats documents as untrusted, and so
43
+ # does not attempt to load DTDs or access the network. See
44
+ # Nokogiri::XML::ParseOptions for a complete list of options;
45
+ # and that module's DEFAULT_XML constant for what's set (and not
46
+ # set) by default.
40
47
  #
41
48
  # Nokogiri.XML() is a convenience method which will call this method.
42
49
  #
43
- def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
44
- options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
45
- # Give the options to the user
50
+ def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
51
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
52
+
46
53
  yield options if block_given?
47
54
 
48
- return new if empty_doc?(string_or_io)
55
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
49
56
 
50
- doc = if string_or_io.respond_to?(:read)
51
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
52
- read_io(string_or_io, url, encoding, options.to_i)
53
- else
54
- # read_memory pukes on empty docs
55
- read_memory(string_or_io, url, encoding, options.to_i)
57
+ if empty_doc?(string_or_io)
58
+ if options.strict?
59
+ raise Nokogiri::XML::SyntaxError.new("Empty document")
60
+ else
61
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
62
+ end
56
63
  end
57
64
 
65
+ doc = if string_or_io.respond_to?(:read)
66
+ if string_or_io.is_a?(Pathname)
67
+ # resolve the Pathname to the file and open it as an IO object, see #2110
68
+ string_or_io = string_or_io.expand_path.open
69
+ url ||= string_or_io.path
70
+ end
71
+
72
+ read_io(string_or_io, url, encoding, options.to_i)
73
+ else
74
+ # read_memory pukes on empty docs
75
+ read_memory(string_or_io, url, encoding, options.to_i)
76
+ end
77
+
58
78
  # do xinclude processing
59
79
  doc.do_xinclude(options) if options.xinclude?
60
80
 
61
81
  return doc
62
82
  end
63
83
 
84
+ ##
85
+ # @!method wrap(java_document)
86
+ # @!scope class
87
+ #
88
+ # Create a {Document} using an existing Java DOM document object.
89
+ #
90
+ # The returned {Document} shares the same underlying data structure as the Java object, so
91
+ # changes in one are reflected in the other.
92
+ #
93
+ # @param java_document [Java::OrgW3cDom::Document]
94
+ # @return [Nokogiri::XML::Document]
95
+ # @note This method is only available when running JRuby.
96
+ # @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
97
+ # @see #to_java
98
+
99
+ ##
100
+ # @!method to_java()
101
+ #
102
+ # Returns the underlying Java DOM document object for the {Document}.
103
+ #
104
+ # The returned Java object shares the same underlying data structure as the {Document}, so
105
+ # changes in one are reflected in the other.
106
+ #
107
+ # @return [Java::OrgW3cDom::Document]
108
+ # @note This method is only available when running JRuby.
109
+ # @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
110
+ # @see .wrap
111
+
112
+
64
113
  # A list of Nokogiri::XML::SyntaxError found when parsing a document
65
114
  attr_accessor :errors
66
115
 
116
+ # When true, reparented elements without a namespace will inherit their new parent's
117
+ # namespace (if one exists). Defaults to +false+.
118
+ #
119
+ # @example Default behavior of namespace inheritance
120
+ # xml = <<~EOF
121
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
122
+ # <foo:parent>
123
+ # </foo:parent>
124
+ # </root>
125
+ # EOF
126
+ # doc = Nokogiri::XML(xml)
127
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
128
+ # parent.add_child("<child></child>")
129
+ # doc.to_xml
130
+ # # => <?xml version="1.0"?>
131
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
132
+ # # <foo:parent>
133
+ # # <child/>
134
+ # # </foo:parent>
135
+ # # </root>
136
+ #
137
+ # @example Setting namespace inheritance to +true+
138
+ # xml = <<~EOF
139
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
140
+ # <foo:parent>
141
+ # </foo:parent>
142
+ # </root>
143
+ # EOF
144
+ # doc = Nokogiri::XML(xml)
145
+ # doc.namespace_inheritance = true
146
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
147
+ # parent.add_child("<child></child>")
148
+ # doc.to_xml
149
+ # # => <?xml version="1.0"?>
150
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
151
+ # # <foo:parent>
152
+ # # <foo:child/>
153
+ # # </foo:parent>
154
+ # # </root>
155
+ #
156
+ # @return [Boolean]
157
+ #
158
+ # @since v1.12.4
159
+ attr_accessor :namespace_inheritance
160
+
67
161
  def initialize *args # :nodoc:
68
162
  @errors = []
69
163
  @decorators = nil
164
+ @namespace_inheritance = false
70
165
  end
71
166
 
72
167
  ##
73
- # Create an element with +name+, and optionally setting the content and attributes.
168
+ # Create a new +Element+ with +name+ sharing GC lifecycle with the document, optionally
169
+ # setting contents or attributes.
170
+ #
171
+ # Arguments may be passed to initialize the element:
172
+ # - a +Hash+ argument will be used to set attributes
173
+ # - a non-Hash object that responds to +#to_s+ will be used to set the new node's contents
174
+ #
175
+ # A block may be passed to mutate the node.
176
+ #
177
+ # @param name [String]
178
+ # @param contents_or_attrs [#to_s,Hash]
179
+ # @yieldparam node [Nokogiri::XML::Element]
180
+ # @return [Nokogiri::XML::Element]
181
+ #
182
+ # @example An empty element without attributes
183
+ # doc.create_element("div")
184
+ # # => <div></div>
74
185
  #
75
- # doc.create_element "div" # <div></div>
76
- # doc.create_element "div", :class => "container" # <div class='container'></div>
77
- # doc.create_element "div", "contents" # <div>contents</div>
78
- # doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
79
- # doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
186
+ # @example An element with contents
187
+ # doc.create_element("div", "contents")
188
+ # # => <div>contents</div>
80
189
  #
81
- def create_element name, *args, &block
190
+ # @example An element with attributes
191
+ # doc.create_element("div", {"class" => "container"})
192
+ # # => <div class='container'></div>
193
+ #
194
+ # @example An element with contents and attributes
195
+ # doc.create_element("div", "contents", {"class" => "container"})
196
+ # # => <div class='container'>contents</div>
197
+ #
198
+ # @example Passing a block to mutate the element
199
+ # doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
200
+ #
201
+ def create_element(name, *contents_or_attrs, &block)
82
202
  elm = Nokogiri::XML::Element.new(name, self, &block)
83
- args.each do |arg|
203
+ contents_or_attrs.each do |arg|
84
204
  case arg
85
205
  when Hash
86
- arg.each { |k,v|
206
+ arg.each do |k, v|
87
207
  key = k.to_s
88
208
  if key =~ NCNAME_RE
89
- ns_name = key.split(":", 2)[1]
90
- elm.add_namespace_definition ns_name, v
209
+ ns_name = Regexp.last_match(1)
210
+ elm.add_namespace_definition(ns_name, v)
91
211
  else
92
212
  elm[k.to_s] = v.to_s
93
213
  end
94
- }
214
+ end
95
215
  else
96
216
  elm.content = arg
97
217
  end
98
218
  end
99
- if ns = elm.namespace_definitions.find { |n| n.prefix.nil? or n.prefix == '' }
219
+ if ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == '') }
100
220
  elm.namespace = ns
101
221
  end
102
222
  elm
@@ -233,10 +353,10 @@ module Nokogiri
233
353
  undef_method :namespace_definitions, :line, :add_namespace
234
354
 
235
355
  def add_child node_or_tags
236
- raise "Document already has a root node" if root
356
+ raise "A document may not have multiple root nodes." if (root && root.name != 'nokogiri_text_wrapper') && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
237
357
  node_or_tags = coerce(node_or_tags)
238
358
  if node_or_tags.is_a?(XML::NodeSet)
239
- raise "Document cannot have multiple root nodes" if node_or_tags.size > 1
359
+ raise "A document may not have multiple root nodes." if node_or_tags.size > 1
240
360
  super(node_or_tags.first)
241
361
  else
242
362
  super
@@ -244,32 +364,15 @@ module Nokogiri
244
364
  end
245
365
  alias :<< :add_child
246
366
 
247
- ##
248
- # +JRuby+
249
- # Wraps Java's org.w3c.dom.document and returns Nokogiri::XML::Document
250
- def self.wrap document
251
- raise "JRuby only method" unless Nokogiri.jruby?
252
- return wrapJavaDocument(document)
253
- end
254
-
255
- ##
256
- # +JRuby+
257
- # Returns Java's org.w3c.dom.document of this Document.
258
- def to_java
259
- raise "JRuby only method" unless Nokogiri.jruby?
260
- return toJavaDocument()
261
- end
262
-
263
367
  private
368
+
264
369
  def self.empty_doc? string_or_io
265
370
  string_or_io.nil? ||
266
371
  (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
267
372
  (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
268
373
  end
269
374
 
270
- def implied_xpath_context
271
- "/"
272
- end
375
+ IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
273
376
 
274
377
  def inspect_attributes
275
378
  [:name, :children]
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class DocumentFragment < Nokogiri::XML::Node
@@ -25,6 +26,17 @@ module Nokogiri
25
26
  children.each { |child| child.parent = self }
26
27
  end
27
28
 
29
+ if Nokogiri.uses_libxml?
30
+ def dup
31
+ new_document = document.dup
32
+ new_fragment = self.class.new(new_document)
33
+ children.each do |child|
34
+ child.dup(1, new_document).parent = new_fragment
35
+ end
36
+ new_fragment
37
+ end
38
+ end
39
+
28
40
  ###
29
41
  # return the name for DocumentFragment
30
42
  def name
@@ -73,15 +85,43 @@ module Nokogiri
73
85
  end
74
86
 
75
87
  ###
76
- # Search this fragment. See Nokogiri::XML::Node#css
88
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
89
+ #
90
+ # Search this fragment for CSS +rules+. +rules+ must be one or more CSS
91
+ # selectors. For example:
92
+ #
93
+ # For more information see Nokogiri::XML::Searchable#css
77
94
  def css *args
78
95
  if children.any?
79
- children.css(*args)
96
+ children.css(*args) # 'children' is a smell here
80
97
  else
81
98
  NodeSet.new(document)
82
99
  end
83
100
  end
84
101
 
102
+ #
103
+ # NOTE that we don't delegate #xpath to children ... another smell.
104
+ # def xpath ; end
105
+ #
106
+
107
+ ###
108
+ # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
109
+ #
110
+ # Search this fragment for +paths+. +paths+ must be one or more XPath or CSS queries.
111
+ #
112
+ # For more information see Nokogiri::XML::Searchable#search
113
+ def search *rules
114
+ rules, handler, ns, binds = extract_params(rules)
115
+
116
+ rules.inject(NodeSet.new(document)) do |set, rule|
117
+ set += if rule =~ Searchable::LOOKS_LIKE_XPATH
118
+ xpath(*([rule, ns, handler, binds].compact))
119
+ else
120
+ children.css(*([rule, ns, handler].compact)) # 'children' is a smell here
121
+ end
122
+ end
123
+ end
124
+
85
125
  alias :serialize :to_s
86
126
 
87
127
  class << self
@@ -92,6 +132,19 @@ module Nokogiri
92
132
  end
93
133
  end
94
134
 
135
+ # A list of Nokogiri::XML::SyntaxError found when parsing a document
136
+ def errors
137
+ document.errors
138
+ end
139
+
140
+ def errors= things # :nodoc:
141
+ document.errors = things
142
+ end
143
+
144
+ def fragment(data)
145
+ document.fragment(data)
146
+ end
147
+
95
148
  private
96
149
 
97
150
  # fix for issue 770
@@ -101,12 +154,6 @@ module Nokogiri
101
154
  %Q{xmlns#{prefix}="#{namespace.href}"}
102
155
  end.join ' '
103
156
  end
104
-
105
- def coerce data
106
- return super unless String === data
107
-
108
- document.fragment(data).children
109
- end
110
157
  end
111
158
  end
112
159
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class DTD < Nokogiri::XML::Node
@@ -12,10 +13,20 @@ module Nokogiri
12
13
  attributes.keys
13
14
  end
14
15
 
15
- def each &block
16
- attributes.each { |key, value|
17
- block.call([key, value])
18
- }
16
+ def each
17
+ attributes.each do |key, value|
18
+ yield([key, value])
19
+ end
20
+ end
21
+
22
+ def html_dtd?
23
+ name.casecmp('html').zero?
24
+ end
25
+
26
+ def html5_dtd?
27
+ html_dtd? &&
28
+ external_id.nil? &&
29
+ (system_id.nil? || system_id == 'about:legacy-compat')
19
30
  end
20
31
  end
21
32
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ###
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class ElementDecl < Nokogiri::XML::Node
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class EntityDecl < Nokogiri::XML::Node
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ class EntityReference < Nokogiri::XML::Node
5
+ def children
6
+ # libxml2 will create a malformed child node for predefined
7
+ # entities. because any use of that child is likely to cause a
8
+ # segfault, we shall pretend that it doesn't exist.
9
+ #
10
+ # see https://github.com/sparklemotion/nokogiri/issues/1238 for details
11
+ NodeSet.new(document)
12
+ end
13
+
14
+ def inspect_attributes
15
+ [:name]
16
+ end
17
+ end
18
+ end
19
+ end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class Namespace
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class Node
@@ -33,7 +34,7 @@ module Nokogiri
33
34
  DEFAULT_HTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML
34
35
  end
35
36
  # the default for XHTML document
36
- DEFAULT_XHTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_XHTML
37
+ DEFAULT_XHTML = FORMAT | NO_DECLARATION | AS_XHTML
37
38
 
38
39
  # Integer representation of the SaveOptions
39
40
  attr_reader :options