nokogiri 1.5.10 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (328) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +278 -0
  6. data/bin/nokogiri +50 -10
  7. data/dependencies.yml +74 -0
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +944 -100
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +232 -87
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +305 -201
  25. data/ext/nokogiri/xml_document_fragment.c +13 -15
  26. data/ext/nokogiri/xml_dtd.c +54 -48
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +30 -19
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +808 -503
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +198 -186
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +162 -98
  45. data/ext/nokogiri/xslt_stylesheet.c +162 -168
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4886 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/css/node.rb +1 -50
  92. data/lib/nokogiri/css/parser.rb +317 -286
  93. data/lib/nokogiri/css/parser.y +57 -43
  94. data/lib/nokogiri/css/parser_extras.rb +39 -36
  95. data/lib/nokogiri/css/syntax_error.rb +2 -1
  96. data/lib/nokogiri/css/tokenizer.rb +105 -103
  97. data/lib/nokogiri/css/tokenizer.rex +5 -5
  98. data/lib/nokogiri/css/xpath_visitor.rb +137 -48
  99. data/lib/nokogiri/css.rb +15 -14
  100. data/lib/nokogiri/decorators/slop.rb +13 -5
  101. data/lib/nokogiri/extension.rb +31 -0
  102. data/lib/nokogiri/gumbo.rb +14 -0
  103. data/lib/nokogiri/html.rb +32 -27
  104. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  105. data/lib/nokogiri/{html → html4}/document.rb +118 -50
  106. data/lib/nokogiri/{html → html4}/document_fragment.rb +20 -11
  107. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  109. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  110. data/lib/nokogiri/{html → html4}/sax/parser.rb +22 -14
  111. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  112. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  113. data/lib/nokogiri/html4.rb +40 -0
  114. data/lib/nokogiri/html5/document.rb +74 -0
  115. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  116. data/lib/nokogiri/html5/node.rb +93 -0
  117. data/lib/nokogiri/html5.rb +473 -0
  118. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  119. data/lib/nokogiri/syntax_error.rb +1 -0
  120. data/lib/nokogiri/version/constant.rb +5 -0
  121. data/lib/nokogiri/version/info.rb +215 -0
  122. data/lib/nokogiri/version.rb +3 -91
  123. data/lib/nokogiri/xml/attr.rb +1 -0
  124. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  125. data/lib/nokogiri/xml/builder.rb +75 -33
  126. data/lib/nokogiri/xml/cdata.rb +1 -0
  127. data/lib/nokogiri/xml/character_data.rb +1 -0
  128. data/lib/nokogiri/xml/document.rb +157 -54
  129. data/lib/nokogiri/xml/document_fragment.rb +55 -8
  130. data/lib/nokogiri/xml/dtd.rb +15 -4
  131. data/lib/nokogiri/xml/element_content.rb +1 -0
  132. data/lib/nokogiri/xml/element_decl.rb +1 -0
  133. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  134. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  135. data/lib/nokogiri/xml/namespace.rb +1 -0
  136. data/lib/nokogiri/xml/node/save_options.rb +2 -1
  137. data/lib/nokogiri/xml/node.rb +712 -431
  138. data/lib/nokogiri/xml/node_set.rb +140 -123
  139. data/lib/nokogiri/xml/notation.rb +1 -0
  140. data/lib/nokogiri/xml/parse_options.rb +31 -0
  141. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  142. data/lib/nokogiri/xml/pp/node.rb +1 -0
  143. data/lib/nokogiri/xml/pp.rb +3 -2
  144. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  145. data/lib/nokogiri/xml/reader.rb +9 -12
  146. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  147. data/lib/nokogiri/xml/sax/document.rb +25 -30
  148. data/lib/nokogiri/xml/sax/parser.rb +8 -8
  149. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  150. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  151. data/lib/nokogiri/xml/sax.rb +5 -4
  152. data/lib/nokogiri/xml/schema.rb +13 -4
  153. data/lib/nokogiri/xml/searchable.rb +239 -0
  154. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  155. data/lib/nokogiri/xml/text.rb +1 -0
  156. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  157. data/lib/nokogiri/xml/xpath.rb +4 -5
  158. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  159. data/lib/nokogiri/xml.rb +37 -35
  160. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  161. data/lib/nokogiri/xslt.rb +17 -16
  162. data/lib/nokogiri.rb +55 -58
  163. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  164. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  165. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  166. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  167. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  168. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  169. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  171. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  172. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  173. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  174. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  175. metadata +307 -459
  176. data/.autotest +0 -26
  177. data/.gemtest +0 -0
  178. data/CHANGELOG.ja.rdoc +0 -785
  179. data/CHANGELOG.rdoc +0 -783
  180. data/C_CODING_STYLE.rdoc +0 -33
  181. data/Manifest.txt +0 -303
  182. data/README.ja.rdoc +0 -106
  183. data/README.rdoc +0 -175
  184. data/ROADMAP.md +0 -90
  185. data/Rakefile +0 -228
  186. data/STANDARD_RESPONSES.md +0 -47
  187. data/Y_U_NO_GEMSPEC.md +0 -155
  188. data/build_all +0 -105
  189. data/ext/nokogiri/html_document.c +0 -170
  190. data/ext/nokogiri/html_document.h +0 -10
  191. data/ext/nokogiri/html_element_description.c +0 -279
  192. data/ext/nokogiri/html_element_description.h +0 -10
  193. data/ext/nokogiri/html_entity_lookup.c +0 -32
  194. data/ext/nokogiri/html_entity_lookup.h +0 -8
  195. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  196. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  197. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  198. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  199. data/ext/nokogiri/xml_attr.h +0 -9
  200. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  201. data/ext/nokogiri/xml_cdata.h +0 -9
  202. data/ext/nokogiri/xml_comment.h +0 -9
  203. data/ext/nokogiri/xml_document.h +0 -23
  204. data/ext/nokogiri/xml_document_fragment.h +0 -10
  205. data/ext/nokogiri/xml_dtd.h +0 -10
  206. data/ext/nokogiri/xml_element_content.h +0 -10
  207. data/ext/nokogiri/xml_element_decl.h +0 -9
  208. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  209. data/ext/nokogiri/xml_entity_decl.h +0 -10
  210. data/ext/nokogiri/xml_entity_reference.h +0 -9
  211. data/ext/nokogiri/xml_io.c +0 -56
  212. data/ext/nokogiri/xml_io.h +0 -11
  213. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  214. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  215. data/ext/nokogiri/xml_namespace.h +0 -13
  216. data/ext/nokogiri/xml_node.h +0 -13
  217. data/ext/nokogiri/xml_node_set.h +0 -14
  218. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  219. data/ext/nokogiri/xml_reader.h +0 -10
  220. data/ext/nokogiri/xml_relax_ng.h +0 -9
  221. data/ext/nokogiri/xml_sax_parser.h +0 -39
  222. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  223. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  224. data/ext/nokogiri/xml_schema.h +0 -9
  225. data/ext/nokogiri/xml_syntax_error.h +0 -13
  226. data/ext/nokogiri/xml_text.h +0 -9
  227. data/ext/nokogiri/xml_xpath_context.h +0 -10
  228. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  229. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  230. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  231. data/tasks/cross_compile.rb +0 -150
  232. data/tasks/nokogiri.org.rb +0 -24
  233. data/tasks/test.rb +0 -95
  234. data/test/css/test_nthiness.rb +0 -159
  235. data/test/css/test_parser.rb +0 -341
  236. data/test/css/test_tokenizer.rb +0 -198
  237. data/test/css/test_xpath_visitor.rb +0 -91
  238. data/test/decorators/test_slop.rb +0 -16
  239. data/test/files/2ch.html +0 -108
  240. data/test/files/address_book.rlx +0 -12
  241. data/test/files/address_book.xml +0 -10
  242. data/test/files/bar/bar.xsd +0 -4
  243. data/test/files/dont_hurt_em_why.xml +0 -422
  244. data/test/files/encoding.html +0 -82
  245. data/test/files/encoding.xhtml +0 -84
  246. data/test/files/exslt.xml +0 -8
  247. data/test/files/exslt.xslt +0 -35
  248. data/test/files/foo/foo.xsd +0 -4
  249. data/test/files/metacharset.html +0 -10
  250. data/test/files/noencoding.html +0 -47
  251. data/test/files/po.xml +0 -32
  252. data/test/files/po.xsd +0 -66
  253. data/test/files/shift_jis.html +0 -10
  254. data/test/files/shift_jis.xml +0 -5
  255. data/test/files/snuggles.xml +0 -3
  256. data/test/files/staff.dtd +0 -10
  257. data/test/files/staff.xml +0 -59
  258. data/test/files/staff.xslt +0 -32
  259. data/test/files/test_document_url/bar.xml +0 -2
  260. data/test/files/test_document_url/document.dtd +0 -4
  261. data/test/files/test_document_url/document.xml +0 -6
  262. data/test/files/tlm.html +0 -850
  263. data/test/files/to_be_xincluded.xml +0 -2
  264. data/test/files/valid_bar.xml +0 -2
  265. data/test/files/xinclude.xml +0 -4
  266. data/test/helper.rb +0 -154
  267. data/test/html/sax/test_parser.rb +0 -141
  268. data/test/html/sax/test_parser_context.rb +0 -46
  269. data/test/html/test_builder.rb +0 -164
  270. data/test/html/test_document.rb +0 -552
  271. data/test/html/test_document_encoding.rb +0 -138
  272. data/test/html/test_document_fragment.rb +0 -261
  273. data/test/html/test_element_description.rb +0 -105
  274. data/test/html/test_named_characters.rb +0 -14
  275. data/test/html/test_node.rb +0 -196
  276. data/test/html/test_node_encoding.rb +0 -27
  277. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  278. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  279. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  280. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  281. data/test/test_convert_xpath.rb +0 -135
  282. data/test/test_css_cache.rb +0 -45
  283. data/test/test_encoding_handler.rb +0 -46
  284. data/test/test_memory_leak.rb +0 -156
  285. data/test/test_nokogiri.rb +0 -132
  286. data/test/test_reader.rb +0 -555
  287. data/test/test_soap4r_sax.rb +0 -52
  288. data/test/test_xslt_transforms.rb +0 -254
  289. data/test/xml/node/test_save_options.rb +0 -28
  290. data/test/xml/node/test_subclass.rb +0 -44
  291. data/test/xml/sax/test_parser.rb +0 -366
  292. data/test/xml/sax/test_parser_context.rb +0 -106
  293. data/test/xml/sax/test_push_parser.rb +0 -157
  294. data/test/xml/test_attr.rb +0 -64
  295. data/test/xml/test_attribute_decl.rb +0 -86
  296. data/test/xml/test_builder.rb +0 -306
  297. data/test/xml/test_c14n.rb +0 -151
  298. data/test/xml/test_cdata.rb +0 -48
  299. data/test/xml/test_comment.rb +0 -29
  300. data/test/xml/test_document.rb +0 -828
  301. data/test/xml/test_document_encoding.rb +0 -28
  302. data/test/xml/test_document_fragment.rb +0 -223
  303. data/test/xml/test_dtd.rb +0 -103
  304. data/test/xml/test_dtd_encoding.rb +0 -33
  305. data/test/xml/test_element_content.rb +0 -56
  306. data/test/xml/test_element_decl.rb +0 -73
  307. data/test/xml/test_entity_decl.rb +0 -122
  308. data/test/xml/test_entity_reference.rb +0 -245
  309. data/test/xml/test_namespace.rb +0 -95
  310. data/test/xml/test_node.rb +0 -1137
  311. data/test/xml/test_node_attributes.rb +0 -96
  312. data/test/xml/test_node_encoding.rb +0 -107
  313. data/test/xml/test_node_inheritance.rb +0 -32
  314. data/test/xml/test_node_reparenting.rb +0 -374
  315. data/test/xml/test_node_set.rb +0 -755
  316. data/test/xml/test_parse_options.rb +0 -64
  317. data/test/xml/test_processing_instruction.rb +0 -30
  318. data/test/xml/test_reader_encoding.rb +0 -142
  319. data/test/xml/test_relax_ng.rb +0 -60
  320. data/test/xml/test_schema.rb +0 -103
  321. data/test/xml/test_syntax_error.rb +0 -12
  322. data/test/xml/test_text.rb +0 -45
  323. data/test/xml/test_unparented_node.rb +0 -422
  324. data/test/xml/test_xinclude.rb +0 -83
  325. data/test/xml/test_xpath.rb +0 -295
  326. data/test/xslt/test_custom_functions.rb +0 -133
  327. data/test/xslt/test_exception_handling.rb +0 -37
  328. data/test_all +0 -81
@@ -1,15 +1,19 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ####
4
5
  # A NodeSet contains a list of Nokogiri::XML::Node objects. Typically
5
6
  # a NodeSet is return as a result of searching a Document via
6
- # Nokogiri::XML::Node#css or Nokogiri::XML::Node#xpath
7
+ # Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath
7
8
  class NodeSet
9
+ include Nokogiri::XML::Searchable
8
10
  include Enumerable
9
11
 
10
12
  # The Document this NodeSet is associated with
11
13
  attr_accessor :document
12
14
 
15
+ alias :clone :dup
16
+
13
17
  # Create a NodeSet with +document+ defaulting to +list+
14
18
  def initialize document, list = []
15
19
  @document = document
@@ -23,7 +27,7 @@ module Nokogiri
23
27
  def first n = nil
24
28
  return self[0] unless n
25
29
  list = []
26
- n.times { |i| list << self[i] }
30
+ [n, length].min.times { |i| list << self[i] }
27
31
  list
28
32
  end
29
33
 
@@ -40,9 +44,14 @@ module Nokogiri
40
44
  end
41
45
 
42
46
  ###
43
- # Returns the index of the first node in self that is == to +node+. Returns nil if no match is found.
44
- def index(node)
45
- each_with_index { |member, j| return j if member == node }
47
+ # Returns the index of the first node in self that is == to +node+ or meets the given block. Returns nil if no match is found.
48
+ def index(node = nil)
49
+ if node
50
+ warn "given block not used" if block_given?
51
+ each_with_index { |member, j| return j if member == node }
52
+ elsif block_given?
53
+ each_with_index { |member, j| return j if yield(member) }
54
+ end
46
55
  nil
47
56
  end
48
57
 
@@ -62,78 +71,34 @@ module Nokogiri
62
71
  alias :remove :unlink
63
72
 
64
73
  ###
65
- # Search this document for +paths+
74
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
66
75
  #
67
- # For more information see Nokogiri::XML::Node#css and
68
- # Nokogiri::XML::Node#xpath
69
- def search *paths
70
- handler = ![
71
- Hash, String, Symbol
72
- ].include?(paths.last.class) ? paths.pop : nil
73
-
74
- ns = paths.last.is_a?(Hash) ? paths.pop : nil
75
-
76
- sub_set = NodeSet.new(document)
77
-
78
- paths.each do |path|
79
- sub_set += send(
80
- path =~ /^(\.\/|\/|\.\.|\.$)/ ? :xpath : :css,
81
- *(paths + [ns, handler]).compact
82
- )
83
- end
84
-
85
- document.decorate(sub_set)
86
- sub_set
87
- end
88
- alias :/ :search
89
-
90
- ###
91
- # Search this NodeSet for css +paths+
76
+ # Search this node set for CSS +rules+. +rules+ must be one or more CSS
77
+ # selectors. For example:
92
78
  #
93
- # For more information see Nokogiri::XML::Node#css
94
- def css *paths
95
- handler = ![
96
- Hash, String, Symbol
97
- ].include?(paths.last.class) ? paths.pop : nil
98
-
99
- ns = paths.last.is_a?(Hash) ? paths.pop : nil
100
-
101
- sub_set = NodeSet.new(document)
79
+ # For more information see Nokogiri::XML::Searchable#css
80
+ def css *args
81
+ rules, handler, ns, _ = extract_params(args)
82
+ paths = css_rules_to_xpath(rules, ns)
102
83
 
103
- each do |node|
104
- doc = node.document
105
- search_ns = ns || (doc.root ? doc.root.namespaces : {})
106
-
107
- xpaths = paths.map { |rule|
108
- [
109
- CSS.xpath_for(rule.to_s, :prefix => ".//", :ns => search_ns),
110
- CSS.xpath_for(rule.to_s, :prefix => "self::", :ns => search_ns)
111
- ].join(' | ')
112
- }
113
-
114
- sub_set += node.xpath(*(xpaths + [search_ns, handler].compact))
84
+ inject(NodeSet.new(document)) do |set, node|
85
+ set + xpath_internal(node, paths, handler, ns, nil)
115
86
  end
116
- document.decorate(sub_set)
117
- sub_set
118
87
  end
119
88
 
120
89
  ###
121
- # Search this NodeSet for XPath +paths+
90
+ # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
122
91
  #
123
- # For more information see Nokogiri::XML::Node#xpath
124
- def xpath *paths
125
- handler = ![
126
- Hash, String, Symbol
127
- ].include?(paths.last.class) ? paths.pop : nil
128
-
129
- ns = paths.last.is_a?(Hash) ? paths.pop : nil
92
+ # Search this node set for XPath +paths+. +paths+ must be one or more XPath
93
+ # queries.
94
+ #
95
+ # For more information see Nokogiri::XML::Searchable#xpath
96
+ def xpath *args
97
+ paths, handler, ns, binds = extract_params(args)
130
98
 
131
- sub_set = NodeSet.new(document)
132
- each do |node|
133
- sub_set += node.xpath(*(paths + [ns, handler].compact))
99
+ inject(NodeSet.new(document)) do |set, node|
100
+ set + xpath_internal(node, paths, handler, ns, binds)
134
101
  end
135
- document.decorate(sub_set)
136
- sub_set
137
102
  end
138
103
 
139
104
  ###
@@ -144,31 +109,25 @@ module Nokogiri
144
109
  end
145
110
 
146
111
  ###
147
- # If path is a string, search this document for +path+ returning the
148
- # first Node. Otherwise, index in to the array with +path+.
149
- def at path, ns = document.root ? document.root.namespaces : {}
150
- return self[path] if path.is_a?(Numeric)
151
- search(path, ns).first
152
- end
153
- alias :% :at
154
-
155
- ##
156
- # Search this NodeSet for the first occurrence of XPath +paths+.
157
- # Equivalent to <tt>xpath(paths).first</tt>
158
- # See NodeSet#xpath for more information.
112
+ # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
159
113
  #
160
- def at_xpath *paths
161
- xpath(*paths).first
162
- end
163
-
164
- ##
165
- # Search this NodeSet for the first occurrence of CSS +rules+.
166
- # Equivalent to <tt>css(rules).first</tt>
167
- # See NodeSet#css for more information.
114
+ # Search this object for +paths+, and return only the first
115
+ # result. +paths+ must be one or more XPath or CSS queries.
116
+ #
117
+ # See Searchable#search for more information.
118
+ #
119
+ # Or, if passed an integer, index into the NodeSet:
168
120
  #
169
- def at_css *rules
170
- css(*rules).first
121
+ # node_set.at(3) # same as node_set[3]
122
+ #
123
+ def at *args
124
+ if args.length == 1 && args.first.is_a?(Numeric)
125
+ return self[args.first]
126
+ end
127
+
128
+ super(*args)
171
129
  end
130
+ alias :% :at
172
131
 
173
132
  ###
174
133
  # Filter this list for nodes that match +expr+
@@ -177,46 +136,86 @@ module Nokogiri
177
136
  end
178
137
 
179
138
  ###
180
- # Append the class attribute +name+ to all Node objects in the NodeSet.
139
+ # Add the class attribute +name+ to all Node objects in the
140
+ # NodeSet.
141
+ #
142
+ # See Nokogiri::XML::Node#add_class for more information.
181
143
  def add_class name
182
144
  each do |el|
183
- classes = el['class'].to_s.split(/\s+/)
184
- el['class'] = classes.push(name).uniq.join " "
145
+ el.add_class(name)
185
146
  end
186
147
  self
187
148
  end
188
149
 
189
150
  ###
190
- # Remove the class attribute +name+ from all Node objects in the NodeSet.
191
- # If +name+ is nil, remove the class attribute from all Nodes in the
151
+ # Append the class attribute +name+ to all Node objects in the
192
152
  # NodeSet.
153
+ #
154
+ # See Nokogiri::XML::Node#append_class for more information.
155
+ def append_class name
156
+ each do |el|
157
+ el.append_class(name)
158
+ end
159
+ self
160
+ end
161
+
162
+ ###
163
+ # Remove the class attribute +name+ from all Node objects in the
164
+ # NodeSet.
165
+ #
166
+ # See Nokogiri::XML::Node#remove_class for more information.
193
167
  def remove_class name = nil
194
168
  each do |el|
195
- if name
196
- classes = el['class'].to_s.split(/\s+/)
197
- if classes.empty?
198
- el.delete 'class'
199
- else
200
- el['class'] = (classes - [name]).uniq.join " "
201
- end
202
- else
203
- el.delete "class"
204
- end
169
+ el.remove_class(name)
205
170
  end
206
171
  self
207
172
  end
208
173
 
209
174
  ###
210
- # Set the attribute +key+ to +value+ or the return value of +blk+
211
- # on all Node objects in the NodeSet.
212
- def attr key, value = nil, &blk
213
- unless Hash === key || key && (value || blk)
214
- return first.attribute(key)
175
+ # Set attributes on each Node in the NodeSet, or get an
176
+ # attribute from the first Node in the NodeSet.
177
+ #
178
+ # To get an attribute from the first Node in a NodeSet:
179
+ #
180
+ # node_set.attr("href") # => "https://www.nokogiri.org"
181
+ #
182
+ # Note that an empty NodeSet will return nil when +#attr+ is called as a getter.
183
+ #
184
+ # To set an attribute on each node, +key+ can either be an
185
+ # attribute name, or a Hash of attribute names and values. When
186
+ # called as a setter, +#attr+ returns the NodeSet.
187
+ #
188
+ # If +key+ is an attribute name, then either +value+ or +block+
189
+ # must be passed.
190
+ #
191
+ # If +key+ is a Hash then attributes will be set for each
192
+ # key/value pair:
193
+ #
194
+ # node_set.attr("href" => "https://www.nokogiri.org", "class" => "member")
195
+ #
196
+ # If +value+ is passed, it will be used as the attribute value
197
+ # for all nodes:
198
+ #
199
+ # node_set.attr("href", "https://www.nokogiri.org")
200
+ #
201
+ # If +block+ is passed, it will be called on each Node object in
202
+ # the NodeSet and the return value used as the attribute value
203
+ # for that node:
204
+ #
205
+ # node_set.attr("class") { |node| node.name }
206
+ #
207
+ def attr key, value = nil, &block
208
+ unless key.is_a?(Hash) || (key && (value || block))
209
+ return first ? first.attribute(key) : nil
215
210
  end
216
211
 
217
212
  hash = key.is_a?(Hash) ? key : { key => value }
218
213
 
219
- hash.each { |k,v| each { |el| el[k] = v || blk[el] } }
214
+ hash.each do |k,v|
215
+ each do |node|
216
+ node[k] = v || block.call(node)
217
+ end
218
+ end
220
219
 
221
220
  self
222
221
  end
@@ -229,19 +228,34 @@ module Nokogiri
229
228
  each { |el| el.delete name }
230
229
  self
231
230
  end
231
+ alias remove_attribute remove_attr
232
232
 
233
233
  ###
234
234
  # Iterate over each node, yielding to +block+
235
- def each(&block)
235
+ def each
236
+ return to_enum unless block_given?
237
+
236
238
  0.upto(length - 1) do |x|
237
239
  yield self[x]
238
240
  end
241
+ self
239
242
  end
240
243
 
241
244
  ###
242
245
  # Get the inner text of all contained Node objects
246
+ #
247
+ # Note: This joins the text of all Node objects in the NodeSet:
248
+ #
249
+ # doc = Nokogiri::XML('<xml><a><d>foo</d><d>bar</d></a></xml>')
250
+ # doc.css('d').text # => "foobar"
251
+ #
252
+ # Instead, if you want to return the text of all nodes in the NodeSet:
253
+ #
254
+ # doc.css('d').map(&:text) # => ["foo", "bar"]
255
+ #
256
+ # See Nokogiri::XML::Node#content for more information.
243
257
  def inner_text
244
- collect{|j| j.inner_text}.join('')
258
+ collect(&:inner_text).join('')
245
259
  end
246
260
  alias :text :inner_text
247
261
 
@@ -252,20 +266,15 @@ module Nokogiri
252
266
  end
253
267
 
254
268
  ###
255
- # Wrap this NodeSet with +html+ or the results of the builder in +blk+
256
- def wrap(html, &blk)
257
- each do |j|
258
- new_parent = document.parse(html).first
259
- j.add_next_sibling(new_parent)
260
- new_parent.add_child(j)
261
- end
262
- self
269
+ # Wrap this NodeSet with +html+
270
+ def wrap html
271
+ map { |node| node.wrap html }
263
272
  end
264
273
 
265
274
  ###
266
275
  # Convert this NodeSet to a string.
267
276
  def to_s
268
- map { |x| x.to_s }.join
277
+ map(&:to_s).join
269
278
  end
270
279
 
271
280
  ###
@@ -329,7 +338,11 @@ module Nokogiri
329
338
  # Returns a new NodeSet containing all the children of all the nodes in
330
339
  # the NodeSet
331
340
  def children
332
- inject(NodeSet.new(document)) { |set, node| set += node.children }
341
+ node_set = NodeSet.new(document)
342
+ each do |node|
343
+ node.children.each { |n| node_set.push(n) }
344
+ end
345
+ node_set
333
346
  end
334
347
 
335
348
  ###
@@ -346,10 +359,14 @@ module Nokogiri
346
359
  ###
347
360
  # Return a nicely formated string representation
348
361
  def inspect
349
- "[#{map { |c| c.inspect }.join ', '}]"
362
+ "[#{map(&:inspect).join ', '}]"
350
363
  end
351
364
 
352
365
  alias :+ :|
366
+
367
+ # @private
368
+ IMPLIED_XPATH_CONTEXTS = [ './/'.freeze, 'self::'.freeze ].freeze # :nodoc:
369
+
353
370
  end
354
371
  end
355
372
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class Notation < Struct.new(:name, :public_id, :system_id)
@@ -1,7 +1,30 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ###
4
5
  # Parse options for passing to Nokogiri.XML or Nokogiri.HTML
6
+ #
7
+ # == Building combinations of parse options
8
+ # You can build your own combinations of these parse options by using any of the following methods:
9
+ # *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options.
10
+ # [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
11
+ # Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
12
+ # [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
13
+ # Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new.recover.noent)
14
+ # [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
15
+ # Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}
16
+ #
17
+ # == Removing particular parse options
18
+ # You can also remove options from an instance of +ParseOptions+ dynamically.
19
+ # Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these methods on an instance of +ParseOptions+ to remove the option.
20
+ # Note that this is not available for +STRICT+.
21
+ #
22
+ # # Setting the RECOVER & NOENT options...
23
+ # options = Nokogiri::XML::ParseOptions.new.recover.noent
24
+ # # later...
25
+ # options.norecover # Removes the Nokogiri::XML::ParseOptions::RECOVER option
26
+ # options.nonoent # Removes the Nokogiri::XML::ParseOptions::NOENT option
27
+ #
5
28
  class ParseOptions
6
29
  # Strict parsing
7
30
  STRICT = 0
@@ -48,8 +71,12 @@ module Nokogiri
48
71
 
49
72
  # the default options used for parsing XML documents
50
73
  DEFAULT_XML = RECOVER | NONET
74
+ # the default options used for parsing XSLT stylesheets
75
+ DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA
51
76
  # the default options used for parsing HTML documents
52
77
  DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
78
+ # the default options used for parsing XML schemas
79
+ DEFAULT_SCHEMA = NONET
53
80
 
54
81
  attr_accessor :options
55
82
  def initialize options = STRICT
@@ -84,6 +111,10 @@ module Nokogiri
84
111
  @options & RECOVER == STRICT
85
112
  end
86
113
 
114
+ def ==(other)
115
+ other.to_i == to_i
116
+ end
117
+
87
118
  alias :to_i :options
88
119
 
89
120
  def inspect
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  module PP
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  module PP
@@ -1,2 +1,3 @@
1
- require 'nokogiri/xml/pp/node'
2
- require 'nokogiri/xml/pp/character_data'
1
+ # frozen_string_literal: true
2
+ require_relative "pp/node"
3
+ require_relative "pp/character_data"
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class ProcessingInstruction < Node
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ###
@@ -85,19 +86,15 @@ module Nokogiri
85
86
  private :initialize
86
87
 
87
88
  ###
88
- # Get a list of attributes for the current node.
89
+ # Get the attributes of the current node as a Hash
90
+ # @return [Hash<String, String>] Attribute names and values
89
91
  def attributes
90
- Hash[attribute_nodes.map { |node|
91
- [node.name, node.to_s]
92
- }].merge(namespaces || {})
93
- end
94
-
95
- ###
96
- # Get a list of attributes for the current node
97
- def attribute_nodes
98
- nodes = attr_nodes
99
- nodes.each { |v| v.instance_variable_set(:@_r, self) }
100
- nodes
92
+ attrs_hash = attribute_nodes.each_with_object({}) do |node, hash|
93
+ hash[node.name] = node.to_s
94
+ end
95
+ ns = namespaces
96
+ attrs_hash.merge!(ns) if ns
97
+ attrs_hash
101
98
  end
102
99
 
103
100
  ###
@@ -1,11 +1,12 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class << self
4
5
  ###
5
6
  # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
6
7
  # See Nokogiri::XML::RelaxNG for an example.
7
- def RelaxNG string_or_io
8
- RelaxNG.new(string_or_io)
8
+ def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
9
+ RelaxNG.new(string_or_io, options)
9
10
  end
10
11
  end
11
12
 
@@ -26,6 +27,10 @@ module Nokogiri
26
27
  # end
27
28
  #
28
29
  # The list of errors are Nokogiri::XML::SyntaxError objects.
30
+ #
31
+ # NOTE: RelaxNG input is always treated as TRUSTED documents, meaning that they will cause the
32
+ # underlying parsing libraries to access network resources. This is counter to Nokogiri's
33
+ # "untrusted by default" security policy, but is a limitation of the underlying libraries.
29
34
  class RelaxNG < Nokogiri::XML::Schema
30
35
  end
31
36
  end
@@ -1,20 +1,20 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ###
4
- # SAX Parsers are event driven parsers. Nokogiri provides two different
5
- # event based parsers when dealing with XML. If you want to do SAX style
6
- # parsing using HTML, check out Nokogiri::HTML::SAX.
5
+ # SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
6
+ # dealing with XML. If you want to do SAX style parsing using HTML, check out
7
+ # Nokogiri::HTML4::SAX.
7
8
  #
8
- # The basic way a SAX style parser works is by creating a parser,
9
- # telling the parser about the events we're interested in, then giving
10
- # the parser some XML to process. The parser will notify you when
11
- # it encounters events your said you would like to know about.
9
+ # The basic way a SAX style parser works is by creating a parser, telling the parser about the
10
+ # events we're interested in, then giving the parser some XML to process. The parser will notify
11
+ # you when it encounters events you said you would like to know about.
12
12
  #
13
- # To register for events, you simply subclass Nokogiri::XML::SAX::Document,
14
- # and implement the methods for which you would like notification.
13
+ # To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
14
+ # methods for which you would like notification.
15
15
  #
16
- # For example, if I want to be notified when a document ends, and when an
17
- # element starts, I would write a class like this:
16
+ # For example, if I want to be notified when a document ends, and when an element starts, I
17
+ # would write a class like this:
18
18
  #
19
19
  # class MyDocument < Nokogiri::XML::SAX::Document
20
20
  # def end_document
@@ -26,8 +26,7 @@ module Nokogiri
26
26
  # end
27
27
  # end
28
28
  #
29
- # Then I would instantiate a SAX parser with this document, and feed the
30
- # parser some XML
29
+ # Then I would instantiate a SAX parser with this document, and feed the parser some XML
31
30
  #
32
31
  # # Create a new parser
33
32
  # parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
@@ -35,25 +34,21 @@ module Nokogiri
35
34
  # # Feed the parser some XML
36
35
  # parser.parse(File.open(ARGV[0]))
37
36
  #
38
- # Now my document handler will be called when each node starts, and when
39
- # then document ends. To see what kinds of events are available, take
40
- # a look at Nokogiri::XML::SAX::Document.
37
+ # Now my document handler will be called when each node starts, and when then document ends. To
38
+ # see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
41
39
  #
42
- # Two SAX parsers for XML are available, a parser that reads from a string
43
- # or IO object as it feels necessary, and a parser that lets you spoon
44
- # feed it XML. If you want to let Nokogiri deal with reading your XML,
45
- # use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
40
+ # Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
41
+ # feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
42
+ # deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
46
43
  # control over the XML input, use the Nokogiri::XML::SAX::PushParser.
47
44
  module SAX
48
45
  ###
49
- # This class is used for registering types of events you are interested
50
- # in handling. All of the methods on this class are available as
51
- # possible events while parsing an XML document. To register for any
52
- # particular event, just subclass this class and implement the methods
53
- # you are interested in knowing about.
46
+ # This class is used for registering types of events you are interested in handling. All of
47
+ # the methods on this class are available as possible events while parsing an XML document. To
48
+ # register for any particular event, just subclass this class and implement the methods you
49
+ # are interested in knowing about.
54
50
  #
55
- # To only be notified about start and end element events, write a class
56
- # like this:
51
+ # To only be notified about start and end element events, write a class like this:
57
52
  #
58
53
  # class MyDocument < Nokogiri::XML::SAX::Document
59
54
  # def start_element name, attrs = []
@@ -65,8 +60,8 @@ module Nokogiri
65
60
  # end
66
61
  # end
67
62
  #
68
- # You can use this event handler for any SAX style parser included with
69
- # Nokogiri. See Nokogiri::XML::SAX, and Nokogiri::HTML::SAX.
63
+ # You can use this event handler for any SAX style parser included with Nokogiri. See
64
+ # Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
70
65
  class Document
71
66
  ###
72
67
  # Called when an XML declaration is parsed
@@ -128,7 +123,7 @@ module Nokogiri
128
123
  end
129
124
 
130
125
  ###
131
- # Characters read between a tag. This method might be called multiple
126
+ # Characters read between a tag. This method might be called multiple
132
127
  # times given one contiguous string of characters.
133
128
  #
134
129
  # +string+ contains the character data
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  module SAX
@@ -68,8 +69,7 @@ module Nokogiri
68
69
 
69
70
  # Create a new Parser with +doc+ and +encoding+
70
71
  def initialize doc = Nokogiri::XML::SAX::Document.new, encoding = 'UTF-8'
71
- check_encoding(encoding)
72
- @encoding = encoding
72
+ @encoding = check_encoding(encoding)
73
73
  @document = doc
74
74
  @warned = false
75
75
  end
@@ -88,9 +88,8 @@ module Nokogiri
88
88
  ###
89
89
  # Parse given +io+
90
90
  def parse_io io, encoding = 'ASCII'
91
- check_encoding(encoding)
92
- @encoding = encoding
93
- ctx = ParserContext.io(io, ENCODINGS[encoding])
91
+ @encoding = check_encoding(encoding)
92
+ ctx = ParserContext.io(io, ENCODINGS[@encoding])
94
93
  yield ctx if block_given?
95
94
  ctx.parse_with self
96
95
  end
@@ -99,7 +98,7 @@ module Nokogiri
99
98
  # Parse a file with +filename+
100
99
  def parse_file filename
101
100
  raise ArgumentError unless filename
102
- raise Errno::ENOENT unless File.exists?(filename)
101
+ raise Errno::ENOENT unless File.exist?(filename)
103
102
  raise Errno::EISDIR if File.directory?(filename)
104
103
  ctx = ParserContext.file filename
105
104
  yield ctx if block_given?
@@ -114,8 +113,9 @@ module Nokogiri
114
113
 
115
114
  private
116
115
  def check_encoding(encoding)
117
- encoding.upcase!
118
- raise ArgumentError.new("'#{encoding}' is not a valid encoding") unless ENCODINGS[encoding]
116
+ encoding.upcase.tap do |enc|
117
+ raise ArgumentError.new("'#{enc}' is not a valid encoding") unless ENCODINGS[enc]
118
+ end
119
119
  end
120
120
  end
121
121
  end