nokogiri 1.5.10 → 1.13.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (334) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +5 -0
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +280 -0
  6. data/bin/nokogiri +84 -31
  7. data/dependencies.yml +73 -0
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +956 -100
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +232 -87
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +327 -223
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +45 -20
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +1290 -680
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +227 -189
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +227 -140
  45. data/ext/nokogiri/xslt_stylesheet.c +162 -168
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -58
  93. data/lib/nokogiri/css/parser.rb +327 -288
  94. data/lib/nokogiri/css/parser.y +67 -45
  95. data/lib/nokogiri/css/parser_extras.rb +52 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +7 -6
  99. data/lib/nokogiri/css/xpath_visitor.rb +263 -75
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +17 -8
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/html4/document.rb +331 -0
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -15
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +88 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +96 -0
  118. data/lib/nokogiri/html5.rb +477 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +221 -0
  123. data/lib/nokogiri/version.rb +3 -90
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +96 -54
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +234 -95
  130. data/lib/nokogiri/xml/document_fragment.rb +86 -36
  131. data/lib/nokogiri/xml/dtd.rb +16 -4
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +8 -4
  138. data/lib/nokogiri/xml/node.rb +947 -502
  139. data/lib/nokogiri/xml/node_set.rb +168 -159
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +40 -5
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +43 -41
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +259 -0
  155. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -36
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +18 -16
  163. data/lib/nokogiri.rb +69 -69
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  171. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  172. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  173. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  174. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  175. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  176. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  177. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  178. metadata +382 -460
  179. data/.autotest +0 -26
  180. data/.gemtest +0 -0
  181. data/CHANGELOG.ja.rdoc +0 -785
  182. data/CHANGELOG.rdoc +0 -783
  183. data/C_CODING_STYLE.rdoc +0 -33
  184. data/Manifest.txt +0 -303
  185. data/README.ja.rdoc +0 -106
  186. data/README.rdoc +0 -175
  187. data/ROADMAP.md +0 -90
  188. data/Rakefile +0 -228
  189. data/STANDARD_RESPONSES.md +0 -47
  190. data/Y_U_NO_GEMSPEC.md +0 -155
  191. data/build_all +0 -105
  192. data/ext/nokogiri/html_document.c +0 -170
  193. data/ext/nokogiri/html_document.h +0 -10
  194. data/ext/nokogiri/html_element_description.c +0 -279
  195. data/ext/nokogiri/html_element_description.h +0 -10
  196. data/ext/nokogiri/html_entity_lookup.c +0 -32
  197. data/ext/nokogiri/html_entity_lookup.h +0 -8
  198. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  199. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  200. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  201. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  202. data/ext/nokogiri/xml_attr.h +0 -9
  203. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  204. data/ext/nokogiri/xml_cdata.h +0 -9
  205. data/ext/nokogiri/xml_comment.h +0 -9
  206. data/ext/nokogiri/xml_document.h +0 -23
  207. data/ext/nokogiri/xml_document_fragment.h +0 -10
  208. data/ext/nokogiri/xml_dtd.h +0 -10
  209. data/ext/nokogiri/xml_element_content.h +0 -10
  210. data/ext/nokogiri/xml_element_decl.h +0 -9
  211. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  212. data/ext/nokogiri/xml_entity_decl.h +0 -10
  213. data/ext/nokogiri/xml_entity_reference.h +0 -9
  214. data/ext/nokogiri/xml_io.c +0 -56
  215. data/ext/nokogiri/xml_io.h +0 -11
  216. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  217. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  218. data/ext/nokogiri/xml_namespace.h +0 -13
  219. data/ext/nokogiri/xml_node.h +0 -13
  220. data/ext/nokogiri/xml_node_set.h +0 -14
  221. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  222. data/ext/nokogiri/xml_reader.h +0 -10
  223. data/ext/nokogiri/xml_relax_ng.h +0 -9
  224. data/ext/nokogiri/xml_sax_parser.h +0 -39
  225. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  226. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  227. data/ext/nokogiri/xml_schema.h +0 -9
  228. data/ext/nokogiri/xml_syntax_error.h +0 -13
  229. data/ext/nokogiri/xml_text.h +0 -9
  230. data/ext/nokogiri/xml_xpath_context.h +0 -10
  231. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  232. data/lib/nokogiri/html/document.rb +0 -254
  233. data/lib/nokogiri/html/document_fragment.rb +0 -41
  234. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  235. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  236. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  237. data/tasks/cross_compile.rb +0 -150
  238. data/tasks/nokogiri.org.rb +0 -24
  239. data/tasks/test.rb +0 -95
  240. data/test/css/test_nthiness.rb +0 -159
  241. data/test/css/test_parser.rb +0 -341
  242. data/test/css/test_tokenizer.rb +0 -198
  243. data/test/css/test_xpath_visitor.rb +0 -91
  244. data/test/decorators/test_slop.rb +0 -16
  245. data/test/files/2ch.html +0 -108
  246. data/test/files/address_book.rlx +0 -12
  247. data/test/files/address_book.xml +0 -10
  248. data/test/files/bar/bar.xsd +0 -4
  249. data/test/files/dont_hurt_em_why.xml +0 -422
  250. data/test/files/encoding.html +0 -82
  251. data/test/files/encoding.xhtml +0 -84
  252. data/test/files/exslt.xml +0 -8
  253. data/test/files/exslt.xslt +0 -35
  254. data/test/files/foo/foo.xsd +0 -4
  255. data/test/files/metacharset.html +0 -10
  256. data/test/files/noencoding.html +0 -47
  257. data/test/files/po.xml +0 -32
  258. data/test/files/po.xsd +0 -66
  259. data/test/files/shift_jis.html +0 -10
  260. data/test/files/shift_jis.xml +0 -5
  261. data/test/files/snuggles.xml +0 -3
  262. data/test/files/staff.dtd +0 -10
  263. data/test/files/staff.xml +0 -59
  264. data/test/files/staff.xslt +0 -32
  265. data/test/files/test_document_url/bar.xml +0 -2
  266. data/test/files/test_document_url/document.dtd +0 -4
  267. data/test/files/test_document_url/document.xml +0 -6
  268. data/test/files/tlm.html +0 -850
  269. data/test/files/to_be_xincluded.xml +0 -2
  270. data/test/files/valid_bar.xml +0 -2
  271. data/test/files/xinclude.xml +0 -4
  272. data/test/helper.rb +0 -154
  273. data/test/html/sax/test_parser.rb +0 -141
  274. data/test/html/sax/test_parser_context.rb +0 -46
  275. data/test/html/test_builder.rb +0 -164
  276. data/test/html/test_document.rb +0 -552
  277. data/test/html/test_document_encoding.rb +0 -138
  278. data/test/html/test_document_fragment.rb +0 -261
  279. data/test/html/test_element_description.rb +0 -105
  280. data/test/html/test_named_characters.rb +0 -14
  281. data/test/html/test_node.rb +0 -196
  282. data/test/html/test_node_encoding.rb +0 -27
  283. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  284. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  285. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  286. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  287. data/test/test_convert_xpath.rb +0 -135
  288. data/test/test_css_cache.rb +0 -45
  289. data/test/test_encoding_handler.rb +0 -46
  290. data/test/test_memory_leak.rb +0 -156
  291. data/test/test_nokogiri.rb +0 -132
  292. data/test/test_reader.rb +0 -555
  293. data/test/test_soap4r_sax.rb +0 -52
  294. data/test/test_xslt_transforms.rb +0 -254
  295. data/test/xml/node/test_save_options.rb +0 -28
  296. data/test/xml/node/test_subclass.rb +0 -44
  297. data/test/xml/sax/test_parser.rb +0 -366
  298. data/test/xml/sax/test_parser_context.rb +0 -106
  299. data/test/xml/sax/test_push_parser.rb +0 -157
  300. data/test/xml/test_attr.rb +0 -64
  301. data/test/xml/test_attribute_decl.rb +0 -86
  302. data/test/xml/test_builder.rb +0 -306
  303. data/test/xml/test_c14n.rb +0 -151
  304. data/test/xml/test_cdata.rb +0 -48
  305. data/test/xml/test_comment.rb +0 -29
  306. data/test/xml/test_document.rb +0 -828
  307. data/test/xml/test_document_encoding.rb +0 -28
  308. data/test/xml/test_document_fragment.rb +0 -223
  309. data/test/xml/test_dtd.rb +0 -103
  310. data/test/xml/test_dtd_encoding.rb +0 -33
  311. data/test/xml/test_element_content.rb +0 -56
  312. data/test/xml/test_element_decl.rb +0 -73
  313. data/test/xml/test_entity_decl.rb +0 -122
  314. data/test/xml/test_entity_reference.rb +0 -245
  315. data/test/xml/test_namespace.rb +0 -95
  316. data/test/xml/test_node.rb +0 -1137
  317. data/test/xml/test_node_attributes.rb +0 -96
  318. data/test/xml/test_node_encoding.rb +0 -107
  319. data/test/xml/test_node_inheritance.rb +0 -32
  320. data/test/xml/test_node_reparenting.rb +0 -374
  321. data/test/xml/test_node_set.rb +0 -755
  322. data/test/xml/test_parse_options.rb +0 -64
  323. data/test/xml/test_processing_instruction.rb +0 -30
  324. data/test/xml/test_reader_encoding.rb +0 -142
  325. data/test/xml/test_relax_ng.rb +0 -60
  326. data/test/xml/test_schema.rb +0 -103
  327. data/test/xml/test_syntax_error.rb +0 -12
  328. data/test/xml/test_text.rb +0 -45
  329. data/test/xml/test_unparented_node.rb +0 -422
  330. data/test/xml/test_xinclude.rb +0 -83
  331. data/test/xml/test_xpath.rb +0 -295
  332. data/test/xslt/test_custom_functions.rb +0 -133
  333. data/test/xslt/test_exception_handling.rb +0 -37
  334. data/test_all +0 -81
@@ -1,17 +1,22 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  ####
4
6
  # A NodeSet contains a list of Nokogiri::XML::Node objects. Typically
5
7
  # a NodeSet is return as a result of searching a Document via
6
- # Nokogiri::XML::Node#css or Nokogiri::XML::Node#xpath
8
+ # Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath
7
9
  class NodeSet
10
+ include Nokogiri::XML::Searchable
8
11
  include Enumerable
9
12
 
10
13
  # The Document this NodeSet is associated with
11
14
  attr_accessor :document
12
15
 
16
+ alias_method :clone, :dup
17
+
13
18
  # Create a NodeSet with +document+ defaulting to +list+
14
- def initialize document, list = []
19
+ def initialize(document, list = [])
15
20
  @document = document
16
21
  document.decorate(self)
17
22
  list.each { |x| self << x }
@@ -20,10 +25,10 @@ module Nokogiri
20
25
 
21
26
  ###
22
27
  # Get the first element of the NodeSet.
23
- def first n = nil
28
+ def first(n = nil)
24
29
  return self[0] unless n
25
30
  list = []
26
- n.times { |i| list << self[i] }
31
+ [n, length].min.times { |i| list << self[i] }
27
32
  list
28
33
  end
29
34
 
@@ -40,240 +45,238 @@ module Nokogiri
40
45
  end
41
46
 
42
47
  ###
43
- # Returns the index of the first node in self that is == to +node+. Returns nil if no match is found.
44
- def index(node)
45
- each_with_index { |member, j| return j if member == node }
48
+ # Returns the index of the first node in self that is == to +node+ or meets the given block. Returns nil if no match is found.
49
+ def index(node = nil)
50
+ if node
51
+ warn("given block not used") if block_given?
52
+ each_with_index { |member, j| return j if member == node }
53
+ elsif block_given?
54
+ each_with_index { |member, j| return j if yield(member) }
55
+ end
46
56
  nil
47
57
  end
48
58
 
49
59
  ###
50
60
  # Insert +datum+ before the first Node in this NodeSet
51
- def before datum
52
- first.before datum
61
+ def before(datum)
62
+ first.before(datum)
53
63
  end
54
64
 
55
65
  ###
56
66
  # Insert +datum+ after the last Node in this NodeSet
57
- def after datum
58
- last.after datum
67
+ def after(datum)
68
+ last.after(datum)
59
69
  end
60
70
 
61
- alias :<< :push
62
- alias :remove :unlink
71
+ alias_method :<<, :push
72
+ alias_method :remove, :unlink
63
73
 
64
74
  ###
65
- # Search this document for +paths+
75
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
66
76
  #
67
- # For more information see Nokogiri::XML::Node#css and
68
- # Nokogiri::XML::Node#xpath
69
- def search *paths
70
- handler = ![
71
- Hash, String, Symbol
72
- ].include?(paths.last.class) ? paths.pop : nil
73
-
74
- ns = paths.last.is_a?(Hash) ? paths.pop : nil
75
-
76
- sub_set = NodeSet.new(document)
77
-
78
- paths.each do |path|
79
- sub_set += send(
80
- path =~ /^(\.\/|\/|\.\.|\.$)/ ? :xpath : :css,
81
- *(paths + [ns, handler]).compact
82
- )
83
- end
84
-
85
- document.decorate(sub_set)
86
- sub_set
87
- end
88
- alias :/ :search
89
-
90
- ###
91
- # Search this NodeSet for css +paths+
77
+ # Search this node set for CSS +rules+. +rules+ must be one or more CSS
78
+ # selectors. For example:
92
79
  #
93
- # For more information see Nokogiri::XML::Node#css
94
- def css *paths
95
- handler = ![
96
- Hash, String, Symbol
97
- ].include?(paths.last.class) ? paths.pop : nil
98
-
99
- ns = paths.last.is_a?(Hash) ? paths.pop : nil
80
+ # For more information see Nokogiri::XML::Searchable#css
81
+ def css(*args)
82
+ rules, handler, ns, _ = extract_params(args)
83
+ paths = css_rules_to_xpath(rules, ns)
100
84
 
101
- sub_set = NodeSet.new(document)
102
-
103
- each do |node|
104
- doc = node.document
105
- search_ns = ns || (doc.root ? doc.root.namespaces : {})
106
-
107
- xpaths = paths.map { |rule|
108
- [
109
- CSS.xpath_for(rule.to_s, :prefix => ".//", :ns => search_ns),
110
- CSS.xpath_for(rule.to_s, :prefix => "self::", :ns => search_ns)
111
- ].join(' | ')
112
- }
113
-
114
- sub_set += node.xpath(*(xpaths + [search_ns, handler].compact))
85
+ inject(NodeSet.new(document)) do |set, node|
86
+ set + xpath_internal(node, paths, handler, ns, nil)
115
87
  end
116
- document.decorate(sub_set)
117
- sub_set
118
88
  end
119
89
 
120
90
  ###
121
- # Search this NodeSet for XPath +paths+
91
+ # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
122
92
  #
123
- # For more information see Nokogiri::XML::Node#xpath
124
- def xpath *paths
125
- handler = ![
126
- Hash, String, Symbol
127
- ].include?(paths.last.class) ? paths.pop : nil
128
-
129
- ns = paths.last.is_a?(Hash) ? paths.pop : nil
93
+ # Search this node set for XPath +paths+. +paths+ must be one or more XPath
94
+ # queries.
95
+ #
96
+ # For more information see Nokogiri::XML::Searchable#xpath
97
+ def xpath(*args)
98
+ paths, handler, ns, binds = extract_params(args)
130
99
 
131
- sub_set = NodeSet.new(document)
132
- each do |node|
133
- sub_set += node.xpath(*(paths + [ns, handler].compact))
100
+ inject(NodeSet.new(document)) do |set, node|
101
+ set + xpath_internal(node, paths, handler, ns, binds)
134
102
  end
135
- document.decorate(sub_set)
136
- sub_set
137
- end
138
-
139
- ###
140
- # Search this NodeSet's nodes' immediate children using CSS selector +selector+
141
- def > selector
142
- ns = document.root.namespaces
143
- xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
144
103
  end
145
104
 
146
105
  ###
147
- # If path is a string, search this document for +path+ returning the
148
- # first Node. Otherwise, index in to the array with +path+.
149
- def at path, ns = document.root ? document.root.namespaces : {}
150
- return self[path] if path.is_a?(Numeric)
151
- search(path, ns).first
152
- end
153
- alias :% :at
154
-
155
- ##
156
- # Search this NodeSet for the first occurrence of XPath +paths+.
157
- # Equivalent to <tt>xpath(paths).first</tt>
158
- # See NodeSet#xpath for more information.
106
+ # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
159
107
  #
160
- def at_xpath *paths
161
- xpath(*paths).first
162
- end
163
-
164
- ##
165
- # Search this NodeSet for the first occurrence of CSS +rules+.
166
- # Equivalent to <tt>css(rules).first</tt>
167
- # See NodeSet#css for more information.
108
+ # Search this object for +paths+, and return only the first
109
+ # result. +paths+ must be one or more XPath or CSS queries.
110
+ #
111
+ # See Searchable#search for more information.
112
+ #
113
+ # Or, if passed an integer, index into the NodeSet:
114
+ #
115
+ # node_set.at(3) # same as node_set[3]
168
116
  #
169
- def at_css *rules
170
- css(*rules).first
117
+ def at(*args)
118
+ if args.length == 1 && args.first.is_a?(Numeric)
119
+ return self[args.first]
120
+ end
121
+
122
+ super(*args)
171
123
  end
124
+ alias_method :%, :at
172
125
 
173
126
  ###
174
127
  # Filter this list for nodes that match +expr+
175
- def filter expr
128
+ def filter(expr)
176
129
  find_all { |node| node.matches?(expr) }
177
130
  end
178
131
 
179
132
  ###
180
- # Append the class attribute +name+ to all Node objects in the NodeSet.
181
- def add_class name
133
+ # Add the class attribute +name+ to all Node objects in the
134
+ # NodeSet.
135
+ #
136
+ # See Nokogiri::XML::Node#add_class for more information.
137
+ def add_class(name)
182
138
  each do |el|
183
- classes = el['class'].to_s.split(/\s+/)
184
- el['class'] = classes.push(name).uniq.join " "
139
+ el.add_class(name)
185
140
  end
186
141
  self
187
142
  end
188
143
 
189
144
  ###
190
- # Remove the class attribute +name+ from all Node objects in the NodeSet.
191
- # If +name+ is nil, remove the class attribute from all Nodes in the
145
+ # Append the class attribute +name+ to all Node objects in the
192
146
  # NodeSet.
193
- def remove_class name = nil
147
+ #
148
+ # See Nokogiri::XML::Node#append_class for more information.
149
+ def append_class(name)
194
150
  each do |el|
195
- if name
196
- classes = el['class'].to_s.split(/\s+/)
197
- if classes.empty?
198
- el.delete 'class'
199
- else
200
- el['class'] = (classes - [name]).uniq.join " "
201
- end
202
- else
203
- el.delete "class"
204
- end
151
+ el.append_class(name)
152
+ end
153
+ self
154
+ end
155
+
156
+ ###
157
+ # Remove the class attribute +name+ from all Node objects in the
158
+ # NodeSet.
159
+ #
160
+ # See Nokogiri::XML::Node#remove_class for more information.
161
+ def remove_class(name = nil)
162
+ each do |el|
163
+ el.remove_class(name)
205
164
  end
206
165
  self
207
166
  end
208
167
 
209
168
  ###
210
- # Set the attribute +key+ to +value+ or the return value of +blk+
211
- # on all Node objects in the NodeSet.
212
- def attr key, value = nil, &blk
213
- unless Hash === key || key && (value || blk)
214
- return first.attribute(key)
169
+ # Set attributes on each Node in the NodeSet, or get an
170
+ # attribute from the first Node in the NodeSet.
171
+ #
172
+ # To get an attribute from the first Node in a NodeSet:
173
+ #
174
+ # node_set.attr("href") # => "https://www.nokogiri.org"
175
+ #
176
+ # Note that an empty NodeSet will return nil when +#attr+ is called as a getter.
177
+ #
178
+ # To set an attribute on each node, +key+ can either be an
179
+ # attribute name, or a Hash of attribute names and values. When
180
+ # called as a setter, +#attr+ returns the NodeSet.
181
+ #
182
+ # If +key+ is an attribute name, then either +value+ or +block+
183
+ # must be passed.
184
+ #
185
+ # If +key+ is a Hash then attributes will be set for each
186
+ # key/value pair:
187
+ #
188
+ # node_set.attr("href" => "https://www.nokogiri.org", "class" => "member")
189
+ #
190
+ # If +value+ is passed, it will be used as the attribute value
191
+ # for all nodes:
192
+ #
193
+ # node_set.attr("href", "https://www.nokogiri.org")
194
+ #
195
+ # If +block+ is passed, it will be called on each Node object in
196
+ # the NodeSet and the return value used as the attribute value
197
+ # for that node:
198
+ #
199
+ # node_set.attr("class") { |node| node.name }
200
+ #
201
+ def attr(key, value = nil, &block)
202
+ unless key.is_a?(Hash) || (key && (value || block))
203
+ return first ? first.attribute(key) : nil
215
204
  end
216
205
 
217
206
  hash = key.is_a?(Hash) ? key : { key => value }
218
207
 
219
- hash.each { |k,v| each { |el| el[k] = v || blk[el] } }
208
+ hash.each do |k, v|
209
+ each do |node|
210
+ node[k] = v || yield(node)
211
+ end
212
+ end
220
213
 
221
214
  self
222
215
  end
223
- alias :set :attr
224
- alias :attribute :attr
216
+ alias_method :set, :attr
217
+ alias_method :attribute, :attr
225
218
 
226
219
  ###
227
220
  # Remove the attributed named +name+ from all Node objects in the NodeSet
228
- def remove_attr name
229
- each { |el| el.delete name }
221
+ def remove_attr(name)
222
+ each { |el| el.delete(name) }
230
223
  self
231
224
  end
225
+ alias_method :remove_attribute, :remove_attr
232
226
 
233
227
  ###
234
228
  # Iterate over each node, yielding to +block+
235
- def each(&block)
229
+ def each
230
+ return to_enum unless block_given?
231
+
236
232
  0.upto(length - 1) do |x|
237
233
  yield self[x]
238
234
  end
235
+ self
239
236
  end
240
237
 
241
238
  ###
242
239
  # Get the inner text of all contained Node objects
240
+ #
241
+ # Note: This joins the text of all Node objects in the NodeSet:
242
+ #
243
+ # doc = Nokogiri::XML('<xml><a><d>foo</d><d>bar</d></a></xml>')
244
+ # doc.css('d').text # => "foobar"
245
+ #
246
+ # Instead, if you want to return the text of all nodes in the NodeSet:
247
+ #
248
+ # doc.css('d').map(&:text) # => ["foo", "bar"]
249
+ #
250
+ # See Nokogiri::XML::Node#content for more information.
243
251
  def inner_text
244
- collect{|j| j.inner_text}.join('')
252
+ collect(&:inner_text).join("")
245
253
  end
246
- alias :text :inner_text
254
+ alias_method :text, :inner_text
247
255
 
248
256
  ###
249
257
  # Get the inner html of all contained Node objects
250
- def inner_html *args
251
- collect{|j| j.inner_html(*args) }.join('')
258
+ def inner_html(*args)
259
+ collect { |j| j.inner_html(*args) }.join("")
252
260
  end
253
261
 
254
262
  ###
255
- # Wrap this NodeSet with +html+ or the results of the builder in +blk+
256
- def wrap(html, &blk)
257
- each do |j|
258
- new_parent = document.parse(html).first
259
- j.add_next_sibling(new_parent)
260
- new_parent.add_child(j)
261
- end
262
- self
263
+ # Wrap this NodeSet with +html+
264
+ def wrap(html)
265
+ map { |node| node.wrap(html) }
263
266
  end
264
267
 
265
268
  ###
266
269
  # Convert this NodeSet to a string.
267
270
  def to_s
268
- map { |x| x.to_s }.join
271
+ map(&:to_s).join
269
272
  end
270
273
 
271
274
  ###
272
275
  # Convert this NodeSet to HTML
273
- def to_html *args
276
+ def to_html(*args)
274
277
  if Nokogiri.jruby?
275
278
  options = args.first.is_a?(Hash) ? args.shift : {}
276
- if !options[:save_with]
279
+ unless options[:save_with]
277
280
  options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
278
281
  end
279
282
  args.insert(0, options)
@@ -283,25 +286,25 @@ module Nokogiri
283
286
 
284
287
  ###
285
288
  # Convert this NodeSet to XHTML
286
- def to_xhtml *args
289
+ def to_xhtml(*args)
287
290
  map { |x| x.to_xhtml(*args) }.join
288
291
  end
289
292
 
290
293
  ###
291
294
  # Convert this NodeSet to XML
292
- def to_xml *args
295
+ def to_xml(*args)
293
296
  map { |x| x.to_xml(*args) }.join
294
297
  end
295
298
 
296
- alias :size :length
297
- alias :to_ary :to_a
299
+ alias_method :size, :length
300
+ alias_method :to_ary, :to_a
298
301
 
299
302
  ###
300
303
  # Removes the last element from set and returns it, or +nil+ if
301
304
  # the set is empty
302
305
  def pop
303
306
  return nil if length == 0
304
- delete last
307
+ delete(last)
305
308
  end
306
309
 
307
310
  ###
@@ -309,14 +312,14 @@ module Nokogiri
309
312
  # +nil+ if the set is empty.
310
313
  def shift
311
314
  return nil if length == 0
312
- delete first
315
+ delete(first)
313
316
  end
314
317
 
315
318
  ###
316
319
  # Equality -- Two NodeSets are equal if the contain the same number
317
320
  # of elements and if each element is equal to the corresponding
318
321
  # element in the other NodeSet
319
- def == other
322
+ def ==(other)
320
323
  return false unless other.is_a?(Nokogiri::XML::NodeSet)
321
324
  return false unless length == other.length
322
325
  each_with_index do |node, i|
@@ -329,7 +332,11 @@ module Nokogiri
329
332
  # Returns a new NodeSet containing all the children of all the nodes in
330
333
  # the NodeSet
331
334
  def children
332
- inject(NodeSet.new(document)) { |set, node| set += node.children }
335
+ node_set = NodeSet.new(document)
336
+ each do |node|
337
+ node.children.each { |n| node_set.push(n) }
338
+ end
339
+ node_set
333
340
  end
334
341
 
335
342
  ###
@@ -338,7 +345,7 @@ module Nokogiri
338
345
  def reverse
339
346
  node_set = NodeSet.new(document)
340
347
  (length - 1).downto(0) do |x|
341
- node_set.push self[x]
348
+ node_set.push(self[x])
342
349
  end
343
350
  node_set
344
351
  end
@@ -346,10 +353,12 @@ module Nokogiri
346
353
  ###
347
354
  # Return a nicely formated string representation
348
355
  def inspect
349
- "[#{map { |c| c.inspect }.join ', '}]"
356
+ "[#{map(&:inspect).join(", ")}]"
350
357
  end
351
358
 
352
- alias :+ :|
359
+ alias_method :+, :|
360
+
361
+ IMPLIED_XPATH_CONTEXTS = [".//", "self::"].freeze # :nodoc:
353
362
  end
354
363
  end
355
364
  end
@@ -1,6 +1,19 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
5
+ # Struct representing an {XML Schema Notation}[https://www.w3.org/TR/xml/#Notations]
3
6
  class Notation < Struct.new(:name, :public_id, :system_id)
7
+ # dead comment to ensure rdoc processing
8
+
9
+ # :attr: name (String)
10
+ # The name for the element.
11
+
12
+ # :attr: public_id (String)
13
+ # The URI corresponding to the public identifier
14
+
15
+ # :attr: system_id (String,nil)
16
+ # The URI corresponding to the system identifier
4
17
  end
5
18
  end
6
19
  end
@@ -1,7 +1,31 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  ###
4
6
  # Parse options for passing to Nokogiri.XML or Nokogiri.HTML
7
+ #
8
+ # == Building combinations of parse options
9
+ # You can build your own combinations of these parse options by using any of the following methods:
10
+ # *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options.
11
+ # [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
12
+ # Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
13
+ # [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
14
+ # Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new.recover.noent)
15
+ # [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
16
+ # Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}
17
+ #
18
+ # == Removing particular parse options
19
+ # You can also remove options from an instance of +ParseOptions+ dynamically.
20
+ # Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these methods on an instance of +ParseOptions+ to remove the option.
21
+ # Note that this is not available for +STRICT+.
22
+ #
23
+ # # Setting the RECOVER & NOENT options...
24
+ # options = Nokogiri::XML::ParseOptions.new.recover.noent
25
+ # # later...
26
+ # options.norecover # Removes the Nokogiri::XML::ParseOptions::RECOVER option
27
+ # options.nonoent # Removes the Nokogiri::XML::ParseOptions::NOENT option
28
+ #
5
29
  class ParseOptions
6
30
  # Strict parsing
7
31
  STRICT = 0
@@ -45,14 +69,21 @@ module Nokogiri
45
69
  NOBASEFIX = 1 << 18
46
70
  # relax any hardcoded limit from the parser
47
71
  HUGE = 1 << 19
72
+ # line numbers stored as long int (instead of a short int)
73
+ BIG_LINES = 1 << 22
48
74
 
49
75
  # the default options used for parsing XML documents
50
- DEFAULT_XML = RECOVER | NONET
76
+ DEFAULT_XML = RECOVER | NONET | BIG_LINES
77
+ # the default options used for parsing XSLT stylesheets
78
+ DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
51
79
  # the default options used for parsing HTML documents
52
- DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
80
+ DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
81
+ # the default options used for parsing XML schemas
82
+ DEFAULT_SCHEMA = NONET | BIG_LINES
53
83
 
54
84
  attr_accessor :options
55
- def initialize options = STRICT
85
+
86
+ def initialize(options = STRICT)
56
87
  @options = options
57
88
  end
58
89
 
@@ -84,14 +115,18 @@ module Nokogiri
84
115
  @options & RECOVER == STRICT
85
116
  end
86
117
 
87
- alias :to_i :options
118
+ def ==(other)
119
+ other.to_i == to_i
120
+ end
121
+
122
+ alias_method :to_i, :options
88
123
 
89
124
  def inspect
90
125
  options = []
91
126
  self.class.constants.each do |k|
92
127
  options << k.downcase if send(:"#{k.downcase}?")
93
128
  end
94
- super.sub(/>$/, " " + options.join(', ') + ">")
129
+ super.sub(/>$/, " " + options.join(", ") + ">")
95
130
  end
96
131
  end
97
132
  end
@@ -1,16 +1,19 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
5
+ # :nodoc: all
3
6
  module PP
4
7
  module CharacterData
5
- def pretty_print pp # :nodoc:
6
- nice_name = self.class.name.split('::').last
7
- pp.group(2, "#(#{nice_name} ", ')') do
8
- pp.pp text
8
+ def pretty_print(pp)
9
+ nice_name = self.class.name.split("::").last
10
+ pp.group(2, "#(#{nice_name} ", ")") do
11
+ pp.pp(text)
9
12
  end
10
13
  end
11
14
 
12
- def inspect # :nodoc:
13
- "#<#{self.class.name}:#{sprintf("0x%x",object_id)} #{text.inspect}>"
15
+ def inspect
16
+ "#<#{self.class.name}:#{format("0x%x", object_id)} #{text.inspect}>"
14
17
  end
15
18
  end
16
19
  end