nokogiri 1.5.10 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (328) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +278 -0
  6. data/bin/nokogiri +50 -10
  7. data/dependencies.yml +74 -0
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +944 -100
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +232 -87
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +305 -201
  25. data/ext/nokogiri/xml_document_fragment.c +13 -15
  26. data/ext/nokogiri/xml_dtd.c +54 -48
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +30 -19
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +808 -503
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +198 -186
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +162 -98
  45. data/ext/nokogiri/xslt_stylesheet.c +162 -168
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4886 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/css/node.rb +1 -50
  92. data/lib/nokogiri/css/parser.rb +317 -286
  93. data/lib/nokogiri/css/parser.y +57 -43
  94. data/lib/nokogiri/css/parser_extras.rb +39 -36
  95. data/lib/nokogiri/css/syntax_error.rb +2 -1
  96. data/lib/nokogiri/css/tokenizer.rb +105 -103
  97. data/lib/nokogiri/css/tokenizer.rex +5 -5
  98. data/lib/nokogiri/css/xpath_visitor.rb +137 -48
  99. data/lib/nokogiri/css.rb +15 -14
  100. data/lib/nokogiri/decorators/slop.rb +13 -5
  101. data/lib/nokogiri/extension.rb +31 -0
  102. data/lib/nokogiri/gumbo.rb +14 -0
  103. data/lib/nokogiri/html.rb +32 -27
  104. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  105. data/lib/nokogiri/{html → html4}/document.rb +118 -50
  106. data/lib/nokogiri/{html → html4}/document_fragment.rb +20 -11
  107. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  109. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  110. data/lib/nokogiri/{html → html4}/sax/parser.rb +22 -14
  111. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  112. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  113. data/lib/nokogiri/html4.rb +40 -0
  114. data/lib/nokogiri/html5/document.rb +74 -0
  115. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  116. data/lib/nokogiri/html5/node.rb +93 -0
  117. data/lib/nokogiri/html5.rb +473 -0
  118. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  119. data/lib/nokogiri/syntax_error.rb +1 -0
  120. data/lib/nokogiri/version/constant.rb +5 -0
  121. data/lib/nokogiri/version/info.rb +215 -0
  122. data/lib/nokogiri/version.rb +3 -91
  123. data/lib/nokogiri/xml/attr.rb +1 -0
  124. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  125. data/lib/nokogiri/xml/builder.rb +75 -33
  126. data/lib/nokogiri/xml/cdata.rb +1 -0
  127. data/lib/nokogiri/xml/character_data.rb +1 -0
  128. data/lib/nokogiri/xml/document.rb +157 -54
  129. data/lib/nokogiri/xml/document_fragment.rb +55 -8
  130. data/lib/nokogiri/xml/dtd.rb +15 -4
  131. data/lib/nokogiri/xml/element_content.rb +1 -0
  132. data/lib/nokogiri/xml/element_decl.rb +1 -0
  133. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  134. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  135. data/lib/nokogiri/xml/namespace.rb +1 -0
  136. data/lib/nokogiri/xml/node/save_options.rb +2 -1
  137. data/lib/nokogiri/xml/node.rb +712 -431
  138. data/lib/nokogiri/xml/node_set.rb +140 -123
  139. data/lib/nokogiri/xml/notation.rb +1 -0
  140. data/lib/nokogiri/xml/parse_options.rb +31 -0
  141. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  142. data/lib/nokogiri/xml/pp/node.rb +1 -0
  143. data/lib/nokogiri/xml/pp.rb +3 -2
  144. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  145. data/lib/nokogiri/xml/reader.rb +9 -12
  146. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  147. data/lib/nokogiri/xml/sax/document.rb +25 -30
  148. data/lib/nokogiri/xml/sax/parser.rb +8 -8
  149. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  150. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  151. data/lib/nokogiri/xml/sax.rb +5 -4
  152. data/lib/nokogiri/xml/schema.rb +13 -4
  153. data/lib/nokogiri/xml/searchable.rb +239 -0
  154. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  155. data/lib/nokogiri/xml/text.rb +1 -0
  156. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  157. data/lib/nokogiri/xml/xpath.rb +4 -5
  158. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  159. data/lib/nokogiri/xml.rb +37 -35
  160. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  161. data/lib/nokogiri/xslt.rb +17 -16
  162. data/lib/nokogiri.rb +55 -58
  163. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  164. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  165. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  166. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  167. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  168. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  169. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  171. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  172. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  173. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  174. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  175. metadata +307 -459
  176. data/.autotest +0 -26
  177. data/.gemtest +0 -0
  178. data/CHANGELOG.ja.rdoc +0 -785
  179. data/CHANGELOG.rdoc +0 -783
  180. data/C_CODING_STYLE.rdoc +0 -33
  181. data/Manifest.txt +0 -303
  182. data/README.ja.rdoc +0 -106
  183. data/README.rdoc +0 -175
  184. data/ROADMAP.md +0 -90
  185. data/Rakefile +0 -228
  186. data/STANDARD_RESPONSES.md +0 -47
  187. data/Y_U_NO_GEMSPEC.md +0 -155
  188. data/build_all +0 -105
  189. data/ext/nokogiri/html_document.c +0 -170
  190. data/ext/nokogiri/html_document.h +0 -10
  191. data/ext/nokogiri/html_element_description.c +0 -279
  192. data/ext/nokogiri/html_element_description.h +0 -10
  193. data/ext/nokogiri/html_entity_lookup.c +0 -32
  194. data/ext/nokogiri/html_entity_lookup.h +0 -8
  195. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  196. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  197. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  198. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  199. data/ext/nokogiri/xml_attr.h +0 -9
  200. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  201. data/ext/nokogiri/xml_cdata.h +0 -9
  202. data/ext/nokogiri/xml_comment.h +0 -9
  203. data/ext/nokogiri/xml_document.h +0 -23
  204. data/ext/nokogiri/xml_document_fragment.h +0 -10
  205. data/ext/nokogiri/xml_dtd.h +0 -10
  206. data/ext/nokogiri/xml_element_content.h +0 -10
  207. data/ext/nokogiri/xml_element_decl.h +0 -9
  208. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  209. data/ext/nokogiri/xml_entity_decl.h +0 -10
  210. data/ext/nokogiri/xml_entity_reference.h +0 -9
  211. data/ext/nokogiri/xml_io.c +0 -56
  212. data/ext/nokogiri/xml_io.h +0 -11
  213. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  214. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  215. data/ext/nokogiri/xml_namespace.h +0 -13
  216. data/ext/nokogiri/xml_node.h +0 -13
  217. data/ext/nokogiri/xml_node_set.h +0 -14
  218. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  219. data/ext/nokogiri/xml_reader.h +0 -10
  220. data/ext/nokogiri/xml_relax_ng.h +0 -9
  221. data/ext/nokogiri/xml_sax_parser.h +0 -39
  222. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  223. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  224. data/ext/nokogiri/xml_schema.h +0 -9
  225. data/ext/nokogiri/xml_syntax_error.h +0 -13
  226. data/ext/nokogiri/xml_text.h +0 -9
  227. data/ext/nokogiri/xml_xpath_context.h +0 -10
  228. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  229. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  230. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  231. data/tasks/cross_compile.rb +0 -150
  232. data/tasks/nokogiri.org.rb +0 -24
  233. data/tasks/test.rb +0 -95
  234. data/test/css/test_nthiness.rb +0 -159
  235. data/test/css/test_parser.rb +0 -341
  236. data/test/css/test_tokenizer.rb +0 -198
  237. data/test/css/test_xpath_visitor.rb +0 -91
  238. data/test/decorators/test_slop.rb +0 -16
  239. data/test/files/2ch.html +0 -108
  240. data/test/files/address_book.rlx +0 -12
  241. data/test/files/address_book.xml +0 -10
  242. data/test/files/bar/bar.xsd +0 -4
  243. data/test/files/dont_hurt_em_why.xml +0 -422
  244. data/test/files/encoding.html +0 -82
  245. data/test/files/encoding.xhtml +0 -84
  246. data/test/files/exslt.xml +0 -8
  247. data/test/files/exslt.xslt +0 -35
  248. data/test/files/foo/foo.xsd +0 -4
  249. data/test/files/metacharset.html +0 -10
  250. data/test/files/noencoding.html +0 -47
  251. data/test/files/po.xml +0 -32
  252. data/test/files/po.xsd +0 -66
  253. data/test/files/shift_jis.html +0 -10
  254. data/test/files/shift_jis.xml +0 -5
  255. data/test/files/snuggles.xml +0 -3
  256. data/test/files/staff.dtd +0 -10
  257. data/test/files/staff.xml +0 -59
  258. data/test/files/staff.xslt +0 -32
  259. data/test/files/test_document_url/bar.xml +0 -2
  260. data/test/files/test_document_url/document.dtd +0 -4
  261. data/test/files/test_document_url/document.xml +0 -6
  262. data/test/files/tlm.html +0 -850
  263. data/test/files/to_be_xincluded.xml +0 -2
  264. data/test/files/valid_bar.xml +0 -2
  265. data/test/files/xinclude.xml +0 -4
  266. data/test/helper.rb +0 -154
  267. data/test/html/sax/test_parser.rb +0 -141
  268. data/test/html/sax/test_parser_context.rb +0 -46
  269. data/test/html/test_builder.rb +0 -164
  270. data/test/html/test_document.rb +0 -552
  271. data/test/html/test_document_encoding.rb +0 -138
  272. data/test/html/test_document_fragment.rb +0 -261
  273. data/test/html/test_element_description.rb +0 -105
  274. data/test/html/test_named_characters.rb +0 -14
  275. data/test/html/test_node.rb +0 -196
  276. data/test/html/test_node_encoding.rb +0 -27
  277. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  278. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  279. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  280. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  281. data/test/test_convert_xpath.rb +0 -135
  282. data/test/test_css_cache.rb +0 -45
  283. data/test/test_encoding_handler.rb +0 -46
  284. data/test/test_memory_leak.rb +0 -156
  285. data/test/test_nokogiri.rb +0 -132
  286. data/test/test_reader.rb +0 -555
  287. data/test/test_soap4r_sax.rb +0 -52
  288. data/test/test_xslt_transforms.rb +0 -254
  289. data/test/xml/node/test_save_options.rb +0 -28
  290. data/test/xml/node/test_subclass.rb +0 -44
  291. data/test/xml/sax/test_parser.rb +0 -366
  292. data/test/xml/sax/test_parser_context.rb +0 -106
  293. data/test/xml/sax/test_push_parser.rb +0 -157
  294. data/test/xml/test_attr.rb +0 -64
  295. data/test/xml/test_attribute_decl.rb +0 -86
  296. data/test/xml/test_builder.rb +0 -306
  297. data/test/xml/test_c14n.rb +0 -151
  298. data/test/xml/test_cdata.rb +0 -48
  299. data/test/xml/test_comment.rb +0 -29
  300. data/test/xml/test_document.rb +0 -828
  301. data/test/xml/test_document_encoding.rb +0 -28
  302. data/test/xml/test_document_fragment.rb +0 -223
  303. data/test/xml/test_dtd.rb +0 -103
  304. data/test/xml/test_dtd_encoding.rb +0 -33
  305. data/test/xml/test_element_content.rb +0 -56
  306. data/test/xml/test_element_decl.rb +0 -73
  307. data/test/xml/test_entity_decl.rb +0 -122
  308. data/test/xml/test_entity_reference.rb +0 -245
  309. data/test/xml/test_namespace.rb +0 -95
  310. data/test/xml/test_node.rb +0 -1137
  311. data/test/xml/test_node_attributes.rb +0 -96
  312. data/test/xml/test_node_encoding.rb +0 -107
  313. data/test/xml/test_node_inheritance.rb +0 -32
  314. data/test/xml/test_node_reparenting.rb +0 -374
  315. data/test/xml/test_node_set.rb +0 -755
  316. data/test/xml/test_parse_options.rb +0 -64
  317. data/test/xml/test_processing_instruction.rb +0 -30
  318. data/test/xml/test_reader_encoding.rb +0 -142
  319. data/test/xml/test_relax_ng.rb +0 -60
  320. data/test/xml/test_schema.rb +0 -103
  321. data/test/xml/test_syntax_error.rb +0 -12
  322. data/test/xml/test_text.rb +0 -45
  323. data/test/xml/test_unparented_node.rb +0 -422
  324. data/test/xml/test_xinclude.rb +0 -83
  325. data/test/xml/test_xpath.rb +0 -295
  326. data/test/xslt/test_custom_functions.rb +0 -133
  327. data/test/xslt/test_exception_handling.rb +0 -37
  328. data/test_all +0 -81
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  module SAX
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  module SAX
@@ -1,4 +1,5 @@
1
- require 'nokogiri/xml/sax/document'
2
- require 'nokogiri/xml/sax/parser_context'
3
- require 'nokogiri/xml/sax/parser'
4
- require 'nokogiri/xml/sax/push_parser'
1
+ # frozen_string_literal: true
2
+ require_relative "sax/document"
3
+ require_relative "sax/parser_context"
4
+ require_relative "sax/parser"
5
+ require_relative "sax/push_parser"
@@ -1,11 +1,12 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class << self
4
5
  ###
5
6
  # Create a new Nokogiri::XML::Schema object using a +string_or_io+
6
7
  # object.
7
- def Schema string_or_io
8
- Schema.new(string_or_io)
8
+ def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
9
+ Schema.new(string_or_io, options)
9
10
  end
10
11
  end
11
12
 
@@ -26,15 +27,23 @@ module Nokogiri
26
27
  # end
27
28
  #
28
29
  # The list of errors are Nokogiri::XML::SyntaxError objects.
30
+ #
31
+ # NOTE: As of v1.11.0, Schema treats inputs as UNTRUSTED by default, and so external entities
32
+ # are not resolved from the network (`http://` or `ftp://`). Previously, parsing treated
33
+ # documents as "trusted" by default which was counter to Nokogiri's "untrusted by default"
34
+ # security policy. If a document is trusted, then the caller may turn off the NONET option via
35
+ # the ParseOptions to re-enable external entity resolution over a network connection.
29
36
  class Schema
30
37
  # Errors while parsing the schema file
31
38
  attr_accessor :errors
39
+ # The Nokogiri::XML::ParseOptions used to parse the schema
40
+ attr_accessor :parse_options
32
41
 
33
42
  ###
34
43
  # Create a new Nokogiri::XML::Schema object using a +string_or_io+
35
44
  # object.
36
- def self.new string_or_io
37
- from_document Nokogiri::XML(string_or_io)
45
+ def self.new string_or_io, options = ParseOptions::DEFAULT_SCHEMA
46
+ from_document(Nokogiri::XML(string_or_io), options)
38
47
  end
39
48
 
40
49
  ###
@@ -0,0 +1,239 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ module XML
4
+ #
5
+ # The Searchable module declares the interface used for searching your DOM.
6
+ #
7
+ # It implements the public methods `search`, `css`, and `xpath`,
8
+ # as well as allowing specific implementations to specialize some
9
+ # of the important behaviors.
10
+ #
11
+ module Searchable
12
+ # Regular expression used by Searchable#search to determine if a query
13
+ # string is CSS or XPath
14
+ LOOKS_LIKE_XPATH = /^(\.\/|\/|\.\.|\.$)/
15
+
16
+ # @!group Searching via XPath or CSS Queries
17
+
18
+ ###
19
+ # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
20
+ #
21
+ # Search this object for +paths+. +paths+ must be one or more XPath or CSS queries:
22
+ #
23
+ # node.search("div.employee", ".//title")
24
+ #
25
+ # A hash of namespace bindings may be appended:
26
+ #
27
+ # node.search('.//bike:tire', {'bike' => 'http://schwinn.com/'})
28
+ # node.search('bike|tire', {'bike' => 'http://schwinn.com/'})
29
+ #
30
+ # For XPath queries, a hash of variable bindings may also be
31
+ # appended to the namespace bindings. For example:
32
+ #
33
+ # node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
34
+ #
35
+ # Custom XPath functions and CSS pseudo-selectors may also be
36
+ # defined. To define custom functions create a class and
37
+ # implement the function you want to define. The first argument
38
+ # to the method will be the current matching NodeSet. Any other
39
+ # arguments are ones that you pass in. Note that this class may
40
+ # appear anywhere in the argument list. For example:
41
+ #
42
+ # node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")'
43
+ # Class.new {
44
+ # def regex node_set, regex
45
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
46
+ # end
47
+ # }.new
48
+ # )
49
+ #
50
+ # See Searchable#xpath and Searchable#css for further usage help.
51
+ def search(*args)
52
+ paths, handler, ns, binds = extract_params(args)
53
+
54
+ xpaths = paths.map(&:to_s).map do |path|
55
+ (path =~ LOOKS_LIKE_XPATH) ? path : xpath_query_from_css_rule(path, ns)
56
+ end.flatten.uniq
57
+
58
+ xpath(*(xpaths + [ns, handler, binds].compact))
59
+ end
60
+
61
+ alias :/ :search
62
+
63
+ ###
64
+ # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
65
+ #
66
+ # Search this object for +paths+, and return only the first
67
+ # result. +paths+ must be one or more XPath or CSS queries.
68
+ #
69
+ # See Searchable#search for more information.
70
+ def at(*args)
71
+ search(*args).first
72
+ end
73
+
74
+ alias :% :at
75
+
76
+ ###
77
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
78
+ #
79
+ # Search this object for CSS +rules+. +rules+ must be one or more CSS
80
+ # selectors. For example:
81
+ #
82
+ # node.css('title')
83
+ # node.css('body h1.bold')
84
+ # node.css('div + p.green', 'div#one')
85
+ #
86
+ # A hash of namespace bindings may be appended. For example:
87
+ #
88
+ # node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
89
+ #
90
+ # Custom CSS pseudo classes may also be defined. To define
91
+ # custom pseudo classes, create a class and implement the custom
92
+ # pseudo class you want defined. The first argument to the
93
+ # method will be the current matching NodeSet. Any other
94
+ # arguments are ones that you pass in. For example:
95
+ #
96
+ # node.css('title:regex("\w+")', Class.new {
97
+ # def regex node_set, regex
98
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
99
+ # end
100
+ # }.new)
101
+ #
102
+ # Note that the CSS query string is case-sensitive with regards
103
+ # to your document type. That is, if you're looking for "H1" in
104
+ # an HTML document, you'll never find anything, since HTML tags
105
+ # will match only lowercase CSS queries. However, "H1" might be
106
+ # found in an XML document, where tags names are case-sensitive
107
+ # (e.g., "H1" is distinct from "h1").
108
+ #
109
+ def css(*args)
110
+ rules, handler, ns, _ = extract_params(args)
111
+
112
+ css_internal self, rules, handler, ns
113
+ end
114
+
115
+ ##
116
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
117
+ #
118
+ # Search this object for CSS +rules+, and return only the first
119
+ # match. +rules+ must be one or more CSS selectors.
120
+ #
121
+ # See Searchable#css for more information.
122
+ def at_css(*args)
123
+ css(*args).first
124
+ end
125
+
126
+ ###
127
+ # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
128
+ #
129
+ # Search this node for XPath +paths+. +paths+ must be one or more XPath
130
+ # queries.
131
+ #
132
+ # node.xpath('.//title')
133
+ #
134
+ # A hash of namespace bindings may be appended. For example:
135
+ #
136
+ # node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
137
+ # node.xpath('.//xmlns:name', node.root.namespaces)
138
+ #
139
+ # A hash of variable bindings may also be appended to the namespace bindings. For example:
140
+ #
141
+ # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
142
+ #
143
+ # Custom XPath functions may also be defined. To define custom
144
+ # functions create a class and implement the function you want
145
+ # to define. The first argument to the method will be the
146
+ # current matching NodeSet. Any other arguments are ones that
147
+ # you pass in. Note that this class may appear anywhere in the
148
+ # argument list. For example:
149
+ #
150
+ # node.xpath('.//title[regex(., "\w+")]', Class.new {
151
+ # def regex node_set, regex
152
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
153
+ # end
154
+ # }.new)
155
+ #
156
+ def xpath(*args)
157
+ paths, handler, ns, binds = extract_params(args)
158
+
159
+ xpath_internal self, paths, handler, ns, binds
160
+ end
161
+
162
+ ##
163
+ # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
164
+ #
165
+ # Search this node for XPath +paths+, and return only the first
166
+ # match. +paths+ must be one or more XPath queries.
167
+ #
168
+ # See Searchable#xpath for more information.
169
+ def at_xpath(*args)
170
+ xpath(*args).first
171
+ end
172
+
173
+ # @!endgroup
174
+
175
+ private
176
+
177
+ def css_internal(node, rules, handler, ns)
178
+ xpath_internal node, css_rules_to_xpath(rules, ns), handler, ns, nil
179
+ end
180
+
181
+ def xpath_internal(node, paths, handler, ns, binds)
182
+ document = node.document
183
+ return NodeSet.new(document) unless document
184
+
185
+ if paths.length == 1
186
+ return xpath_impl(node, paths.first, handler, ns, binds)
187
+ end
188
+
189
+ NodeSet.new(document) do |combined|
190
+ paths.each do |path|
191
+ xpath_impl(node, path, handler, ns, binds).each { |set| combined << set }
192
+ end
193
+ end
194
+ end
195
+
196
+ def xpath_impl(node, path, handler, ns, binds)
197
+ ctx = XPathContext.new(node)
198
+ ctx.register_namespaces(ns)
199
+ path = path.gsub(/xmlns:/, " :") unless Nokogiri.uses_libxml?
200
+
201
+ binds.each do |key, value|
202
+ ctx.register_variable key.to_s, value
203
+ end if binds
204
+
205
+ ctx.evaluate(path, handler)
206
+ end
207
+
208
+ def css_rules_to_xpath(rules, ns)
209
+ rules.map { |rule| xpath_query_from_css_rule(rule, ns) }
210
+ end
211
+
212
+ def xpath_query_from_css_rule(rule, ns)
213
+ visitor = Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins.new
214
+ self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
215
+ CSS.xpath_for(rule.to_s, {:prefix => implied_xpath_context, :ns => ns,
216
+ :visitor => visitor})
217
+ end.join(" | ")
218
+ end
219
+
220
+ def extract_params(params) # :nodoc:
221
+ handler = params.find do |param|
222
+ ![Hash, String, Symbol].include?(param.class)
223
+ end
224
+ params -= [handler] if handler
225
+
226
+ hashes = []
227
+ while Hash === params.last || params.last.nil?
228
+ hashes << params.pop
229
+ break if params.empty?
230
+ end
231
+ ns, binds = hashes.reverse
232
+
233
+ ns ||= document.root ? document.root.namespaces : {}
234
+
235
+ [params, handler, ns, binds]
236
+ end
237
+ end
238
+ end
239
+ end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ###
@@ -40,7 +41,30 @@ module Nokogiri
40
41
  end
41
42
 
42
43
  def to_s
43
- super.chomp
44
+ message = super.chomp
45
+ [location_to_s, level_to_s, message].
46
+ compact.join(": ").
47
+ force_encoding(message.encoding)
48
+ end
49
+
50
+ private
51
+
52
+ def level_to_s
53
+ case level
54
+ when 3 then "FATAL"
55
+ when 2 then "ERROR"
56
+ when 1 then "WARNING"
57
+ else nil
58
+ end
59
+ end
60
+
61
+ def nil_or_zero?(attribute)
62
+ attribute.nil? || attribute.zero?
63
+ end
64
+
65
+ def location_to_s
66
+ return nil if nil_or_zero?(line) && nil_or_zero?(column)
67
+ "#{line}:#{column}"
44
68
  end
45
69
  end
46
70
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class Text < Nokogiri::XML::CharacterData
@@ -1,6 +1,7 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
- class XPath
4
+ module XPath
4
5
  class SyntaxError < XML::SyntaxError
5
6
  def to_s
6
7
  [super.chomp, str1].compact.join(': ')
@@ -1,10 +1,9 @@
1
- require 'nokogiri/xml/xpath/syntax_error'
2
-
1
+ # frozen_string_literal: true
3
2
  module Nokogiri
4
3
  module XML
5
- class XPath
6
- # The Nokogiri::XML::Document tied to this XPath instance
7
- attr_accessor :document
4
+ module XPath
8
5
  end
9
6
  end
10
7
  end
8
+
9
+ require_relative "xpath/syntax_error"
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class XPathContext
data/lib/nokogiri/xml.rb CHANGED
@@ -1,35 +1,9 @@
1
- require 'nokogiri/xml/pp'
2
- require 'nokogiri/xml/parse_options'
3
- require 'nokogiri/xml/sax'
4
- require 'nokogiri/xml/node'
5
- require 'nokogiri/xml/attribute_decl'
6
- require 'nokogiri/xml/element_decl'
7
- require 'nokogiri/xml/element_content'
8
- require 'nokogiri/xml/character_data'
9
- require 'nokogiri/xml/namespace'
10
- require 'nokogiri/xml/attr'
11
- require 'nokogiri/xml/dtd'
12
- require 'nokogiri/xml/cdata'
13
- require 'nokogiri/xml/text'
14
- require 'nokogiri/xml/document'
15
- require 'nokogiri/xml/document_fragment'
16
- require 'nokogiri/xml/processing_instruction'
17
- require 'nokogiri/xml/node_set'
18
- require 'nokogiri/xml/syntax_error'
19
- require 'nokogiri/xml/xpath'
20
- require 'nokogiri/xml/xpath_context'
21
- require 'nokogiri/xml/builder'
22
- require 'nokogiri/xml/reader'
23
- require 'nokogiri/xml/notation'
24
- require 'nokogiri/xml/entity_decl'
25
- require 'nokogiri/xml/schema'
26
- require 'nokogiri/xml/relax_ng'
27
-
1
+ # frozen_string_literal: true
28
2
  module Nokogiri
29
3
  class << self
30
4
  ###
31
5
  # Parse XML. Convenience method for Nokogiri::XML::Document.parse
32
- def XML thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_XML, &block
6
+ def XML(thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_XML, &block)
33
7
  Nokogiri::XML::Document.parse(thing, url, encoding, options, &block)
34
8
  end
35
9
  end
@@ -38,20 +12,19 @@ module Nokogiri
38
12
  # Original C14N 1.0 spec canonicalization
39
13
  XML_C14N_1_0 = 0
40
14
  # Exclusive C14N 1.0 spec canonicalization
41
- XML_C14N_EXCLUSIVE_1_0 = 1
15
+ XML_C14N_EXCLUSIVE_1_0 = 1
42
16
  # C14N 1.1 spec canonicalization
43
17
  XML_C14N_1_1 = 2
44
18
  class << self
45
19
  ###
46
20
  # Parse an XML document using the Nokogiri::XML::Reader API. See
47
21
  # Nokogiri::XML::Reader for mor information
48
- def Reader string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT
49
-
50
- options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
22
+ def Reader(string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT)
23
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
51
24
  # Give the options to the user
52
25
  yield options if block_given?
53
26
 
54
- if string_or_io.respond_to? :read
27
+ if string_or_io.respond_to?(:read)
55
28
  return Reader.from_io(string_or_io, url, encoding, options.to_i)
56
29
  end
57
30
  Reader.from_memory(string_or_io, url, encoding, options.to_i)
@@ -59,15 +32,44 @@ module Nokogiri
59
32
 
60
33
  ###
61
34
  # Parse XML. Convenience method for Nokogiri::XML::Document.parse
62
- def parse thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
35
+ def parse(thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block)
63
36
  Document.parse(thing, url, encoding, options, &block)
64
37
  end
65
38
 
66
39
  ####
67
40
  # Parse a fragment from +string+ in to a NodeSet.
68
- def fragment string
41
+ def fragment(string)
69
42
  XML::DocumentFragment.parse(string)
70
43
  end
71
44
  end
72
45
  end
73
46
  end
47
+
48
+ require_relative "xml/pp"
49
+ require_relative "xml/parse_options"
50
+ require_relative "xml/sax"
51
+ require_relative "xml/searchable"
52
+ require_relative "xml/node"
53
+ require_relative "xml/attribute_decl"
54
+ require_relative "xml/element_decl"
55
+ require_relative "xml/element_content"
56
+ require_relative "xml/character_data"
57
+ require_relative "xml/namespace"
58
+ require_relative "xml/attr"
59
+ require_relative "xml/dtd"
60
+ require_relative "xml/cdata"
61
+ require_relative "xml/text"
62
+ require_relative "xml/document"
63
+ require_relative "xml/document_fragment"
64
+ require_relative "xml/processing_instruction"
65
+ require_relative "xml/node_set"
66
+ require_relative "xml/syntax_error"
67
+ require_relative "xml/xpath"
68
+ require_relative "xml/xpath_context"
69
+ require_relative "xml/builder"
70
+ require_relative "xml/reader"
71
+ require_relative "xml/notation"
72
+ require_relative "xml/entity_decl"
73
+ require_relative "xml/entity_reference"
74
+ require_relative "xml/schema"
75
+ require_relative "xml/relax_ng"
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XSLT
3
4
  ###
@@ -17,7 +18,7 @@ module Nokogiri
17
18
  # Apply an XSLT stylesheet to an XML::Document.
18
19
  # +params+ is an array of strings used as XSLT parameters.
19
20
  # returns serialized document
20
- def apply_to document, params = []
21
+ def apply_to(document, params = [])
21
22
  serialize(transform(document, params))
22
23
  end
23
24
  end
data/lib/nokogiri/xslt.rb CHANGED
@@ -1,5 +1,4 @@
1
- require 'nokogiri/xslt/stylesheet'
2
-
1
+ # frozen_string_literal: true
3
2
  module Nokogiri
4
3
  class << self
5
4
  ###
@@ -21,32 +20,32 @@ module Nokogiri
21
20
  class << self
22
21
  ###
23
22
  # Parse the stylesheet in +string+, register any +modules+
24
- def parse string, modules = {}
23
+ def parse(string, modules = {})
25
24
  modules.each do |url, klass|
26
- XSLT.register url, klass
25
+ XSLT.register(url, klass)
27
26
  end
28
27
 
28
+ doc = XML::Document.parse(string, nil, nil, XML::ParseOptions::DEFAULT_XSLT)
29
29
  if Nokogiri.jruby?
30
- Stylesheet.parse_stylesheet_doc(XML.parse(string), string)
30
+ Stylesheet.parse_stylesheet_doc(doc, string)
31
31
  else
32
- Stylesheet.parse_stylesheet_doc(XML.parse(string))
32
+ Stylesheet.parse_stylesheet_doc(doc)
33
33
  end
34
34
  end
35
35
 
36
36
  ###
37
37
  # Quote parameters in +params+ for stylesheet safety
38
- def quote_params params
38
+ def quote_params(params)
39
39
  parray = (params.instance_of?(Hash) ? params.to_a.flatten : params).dup
40
- parray.each_with_index do |v,i|
41
- if i % 2 > 0
42
- parray[i]=
43
- if v =~ /'/
44
- "concat('#{ v.gsub(/'/, %q{', "'", '}) }')"
45
- else
46
- "'#{v}'";
47
- end
40
+ parray.each_with_index do |v, i|
41
+ parray[i] = if i % 2 > 0
42
+ if v =~ /'/
43
+ "concat('#{v.gsub(/'/, %q{', "'", '})}')"
44
+ else
45
+ "'#{v}'"
46
+ end
48
47
  else
49
- parray[i] = v.to_s
48
+ v.to_s
50
49
  end
51
50
  end
52
51
  parray.flatten
@@ -54,3 +53,5 @@ module Nokogiri
54
53
  end
55
54
  end
56
55
  end
56
+
57
+ require_relative "xslt/stylesheet"