nokogiri 1.6.0 → 1.13.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (340) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -19
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +280 -0
  6. data/bin/nokogiri +84 -31
  7. data/dependencies.yml +23 -4
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +952 -132
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +231 -96
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +327 -223
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +45 -20
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +1290 -680
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +227 -189
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +227 -140
  45. data/ext/nokogiri/xslt_stylesheet.c +269 -177
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -58
  93. data/lib/nokogiri/css/parser.rb +407 -357
  94. data/lib/nokogiri/css/parser.y +265 -246
  95. data/lib/nokogiri/css/parser_extras.rb +52 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +8 -7
  99. data/lib/nokogiri/css/xpath_visitor.rb +266 -80
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +17 -8
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/html4/document.rb +331 -0
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -15
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +88 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +96 -0
  118. data/lib/nokogiri/html5.rb +477 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +221 -0
  123. data/lib/nokogiri/version.rb +3 -105
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +96 -54
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +234 -95
  130. data/lib/nokogiri/xml/document_fragment.rb +86 -36
  131. data/lib/nokogiri/xml/dtd.rb +16 -4
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +8 -4
  138. data/lib/nokogiri/xml/node.rb +947 -502
  139. data/lib/nokogiri/xml/node_set.rb +168 -159
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +40 -5
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +43 -41
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +270 -0
  155. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -36
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +69 -69
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  171. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  172. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  173. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  174. data/ports/archives/libxml2-2.9.13.tar.xz +0 -0
  175. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  176. metadata +278 -362
  177. data/.autotest +0 -26
  178. data/.gemtest +0 -0
  179. data/.travis.yml +0 -27
  180. data/CHANGELOG.ja.rdoc +0 -819
  181. data/CHANGELOG.rdoc +0 -819
  182. data/C_CODING_STYLE.rdoc +0 -33
  183. data/Manifest.txt +0 -315
  184. data/README.ja.rdoc +0 -106
  185. data/README.rdoc +0 -175
  186. data/ROADMAP.md +0 -90
  187. data/Rakefile +0 -246
  188. data/STANDARD_RESPONSES.md +0 -47
  189. data/Y_U_NO_GEMSPEC.md +0 -155
  190. data/build_all +0 -105
  191. data/ext/nokogiri/html_document.c +0 -170
  192. data/ext/nokogiri/html_document.h +0 -10
  193. data/ext/nokogiri/html_element_description.c +0 -279
  194. data/ext/nokogiri/html_element_description.h +0 -10
  195. data/ext/nokogiri/html_entity_lookup.c +0 -32
  196. data/ext/nokogiri/html_entity_lookup.h +0 -8
  197. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  198. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  199. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  200. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  201. data/ext/nokogiri/xml_attr.h +0 -9
  202. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  203. data/ext/nokogiri/xml_cdata.h +0 -9
  204. data/ext/nokogiri/xml_comment.h +0 -9
  205. data/ext/nokogiri/xml_document.h +0 -23
  206. data/ext/nokogiri/xml_document_fragment.h +0 -10
  207. data/ext/nokogiri/xml_dtd.h +0 -10
  208. data/ext/nokogiri/xml_element_content.h +0 -10
  209. data/ext/nokogiri/xml_element_decl.h +0 -9
  210. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  211. data/ext/nokogiri/xml_entity_decl.h +0 -10
  212. data/ext/nokogiri/xml_entity_reference.h +0 -9
  213. data/ext/nokogiri/xml_io.c +0 -56
  214. data/ext/nokogiri/xml_io.h +0 -11
  215. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  216. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  217. data/ext/nokogiri/xml_namespace.h +0 -13
  218. data/ext/nokogiri/xml_node.h +0 -13
  219. data/ext/nokogiri/xml_node_set.h +0 -14
  220. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  221. data/ext/nokogiri/xml_reader.h +0 -10
  222. data/ext/nokogiri/xml_relax_ng.h +0 -9
  223. data/ext/nokogiri/xml_sax_parser.h +0 -39
  224. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  225. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  226. data/ext/nokogiri/xml_schema.h +0 -9
  227. data/ext/nokogiri/xml_syntax_error.h +0 -13
  228. data/ext/nokogiri/xml_text.h +0 -9
  229. data/ext/nokogiri/xml_xpath_context.h +0 -10
  230. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  231. data/lib/nokogiri/html/document.rb +0 -254
  232. data/lib/nokogiri/html/document_fragment.rb +0 -41
  233. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  234. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  235. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  236. data/ports/archives/libxml2-2.8.0.tar.gz +0 -0
  237. data/ports/archives/libxslt-1.1.26.tar.gz +0 -0
  238. data/tasks/cross_compile.rb +0 -132
  239. data/tasks/nokogiri.org.rb +0 -24
  240. data/tasks/test.rb +0 -95
  241. data/test/css/test_nthiness.rb +0 -159
  242. data/test/css/test_parser.rb +0 -341
  243. data/test/css/test_tokenizer.rb +0 -198
  244. data/test/css/test_xpath_visitor.rb +0 -91
  245. data/test/decorators/test_slop.rb +0 -16
  246. data/test/files/2ch.html +0 -108
  247. data/test/files/address_book.rlx +0 -12
  248. data/test/files/address_book.xml +0 -10
  249. data/test/files/bar/bar.xsd +0 -4
  250. data/test/files/bogus.xml +0 -0
  251. data/test/files/dont_hurt_em_why.xml +0 -422
  252. data/test/files/encoding.html +0 -82
  253. data/test/files/encoding.xhtml +0 -84
  254. data/test/files/exslt.xml +0 -8
  255. data/test/files/exslt.xslt +0 -35
  256. data/test/files/foo/foo.xsd +0 -4
  257. data/test/files/metacharset.html +0 -10
  258. data/test/files/noencoding.html +0 -47
  259. data/test/files/po.xml +0 -32
  260. data/test/files/po.xsd +0 -66
  261. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  262. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  263. data/test/files/saml/xenc_schema.xsd +0 -146
  264. data/test/files/saml/xmldsig_schema.xsd +0 -318
  265. data/test/files/shift_jis.html +0 -10
  266. data/test/files/shift_jis.xml +0 -5
  267. data/test/files/snuggles.xml +0 -3
  268. data/test/files/staff.dtd +0 -10
  269. data/test/files/staff.xml +0 -59
  270. data/test/files/staff.xslt +0 -32
  271. data/test/files/test_document_url/bar.xml +0 -2
  272. data/test/files/test_document_url/document.dtd +0 -4
  273. data/test/files/test_document_url/document.xml +0 -6
  274. data/test/files/tlm.html +0 -850
  275. data/test/files/to_be_xincluded.xml +0 -2
  276. data/test/files/valid_bar.xml +0 -2
  277. data/test/files/xinclude.xml +0 -4
  278. data/test/helper.rb +0 -154
  279. data/test/html/sax/test_parser.rb +0 -141
  280. data/test/html/sax/test_parser_context.rb +0 -46
  281. data/test/html/test_builder.rb +0 -164
  282. data/test/html/test_document.rb +0 -552
  283. data/test/html/test_document_encoding.rb +0 -138
  284. data/test/html/test_document_fragment.rb +0 -261
  285. data/test/html/test_element_description.rb +0 -105
  286. data/test/html/test_named_characters.rb +0 -14
  287. data/test/html/test_node.rb +0 -196
  288. data/test/html/test_node_encoding.rb +0 -27
  289. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  290. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  291. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  292. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  293. data/test/test_convert_xpath.rb +0 -135
  294. data/test/test_css_cache.rb +0 -45
  295. data/test/test_encoding_handler.rb +0 -46
  296. data/test/test_memory_leak.rb +0 -156
  297. data/test/test_nokogiri.rb +0 -132
  298. data/test/test_reader.rb +0 -555
  299. data/test/test_soap4r_sax.rb +0 -52
  300. data/test/test_xslt_transforms.rb +0 -254
  301. data/test/xml/node/test_save_options.rb +0 -28
  302. data/test/xml/node/test_subclass.rb +0 -44
  303. data/test/xml/sax/test_parser.rb +0 -366
  304. data/test/xml/sax/test_parser_context.rb +0 -106
  305. data/test/xml/sax/test_push_parser.rb +0 -157
  306. data/test/xml/test_attr.rb +0 -64
  307. data/test/xml/test_attribute_decl.rb +0 -86
  308. data/test/xml/test_builder.rb +0 -306
  309. data/test/xml/test_c14n.rb +0 -151
  310. data/test/xml/test_cdata.rb +0 -48
  311. data/test/xml/test_comment.rb +0 -29
  312. data/test/xml/test_document.rb +0 -828
  313. data/test/xml/test_document_encoding.rb +0 -28
  314. data/test/xml/test_document_fragment.rb +0 -223
  315. data/test/xml/test_dtd.rb +0 -103
  316. data/test/xml/test_dtd_encoding.rb +0 -33
  317. data/test/xml/test_element_content.rb +0 -56
  318. data/test/xml/test_element_decl.rb +0 -73
  319. data/test/xml/test_entity_decl.rb +0 -122
  320. data/test/xml/test_entity_reference.rb +0 -245
  321. data/test/xml/test_namespace.rb +0 -95
  322. data/test/xml/test_node.rb +0 -1137
  323. data/test/xml/test_node_attributes.rb +0 -96
  324. data/test/xml/test_node_encoding.rb +0 -107
  325. data/test/xml/test_node_inheritance.rb +0 -32
  326. data/test/xml/test_node_reparenting.rb +0 -374
  327. data/test/xml/test_node_set.rb +0 -755
  328. data/test/xml/test_parse_options.rb +0 -64
  329. data/test/xml/test_processing_instruction.rb +0 -30
  330. data/test/xml/test_reader_encoding.rb +0 -142
  331. data/test/xml/test_relax_ng.rb +0 -60
  332. data/test/xml/test_schema.rb +0 -103
  333. data/test/xml/test_syntax_error.rb +0 -12
  334. data/test/xml/test_text.rb +0 -45
  335. data/test/xml/test_unparented_node.rb +0 -422
  336. data/test/xml/test_xinclude.rb +0 -83
  337. data/test/xml/test_xpath.rb +0 -295
  338. data/test/xslt/test_custom_functions.rb +0 -133
  339. data/test/xslt/test_exception_handling.rb +0 -37
  340. data/test_all +0 -81
@@ -0,0 +1,121 @@
1
+ #ifndef HAVE_XMLFIRSTELEMENTCHILD
2
+ #include <nokogiri.h>
3
+ /**
4
+ * xmlFirstElementChild:
5
+ * @parent: the parent node
6
+ *
7
+ * Finds the first child node of that element which is a Element node
8
+ * Note the handling of entities references is different than in
9
+ * the W3C DOM element traversal spec since we don't have back reference
10
+ * from entities content to entities references.
11
+ *
12
+ * Returns the first element child or NULL if not available
13
+ */
14
+ xmlNodePtr
15
+ xmlFirstElementChild(xmlNodePtr parent)
16
+ {
17
+ xmlNodePtr cur = NULL;
18
+
19
+ if (parent == NULL) {
20
+ return (NULL);
21
+ }
22
+ switch (parent->type) {
23
+ case XML_ELEMENT_NODE:
24
+ case XML_ENTITY_NODE:
25
+ case XML_DOCUMENT_NODE:
26
+ case XML_HTML_DOCUMENT_NODE:
27
+ cur = parent->children;
28
+ break;
29
+ default:
30
+ return (NULL);
31
+ }
32
+ while (cur != NULL) {
33
+ if (cur->type == XML_ELEMENT_NODE) {
34
+ return (cur);
35
+ }
36
+ cur = cur->next;
37
+ }
38
+ return (NULL);
39
+ }
40
+
41
+ /**
42
+ * xmlNextElementSibling:
43
+ * @node: the current node
44
+ *
45
+ * Finds the first closest next sibling of the node which is an
46
+ * element node.
47
+ * Note the handling of entities references is different than in
48
+ * the W3C DOM element traversal spec since we don't have back reference
49
+ * from entities content to entities references.
50
+ *
51
+ * Returns the next element sibling or NULL if not available
52
+ */
53
+ xmlNodePtr
54
+ xmlNextElementSibling(xmlNodePtr node)
55
+ {
56
+ if (node == NULL) {
57
+ return (NULL);
58
+ }
59
+ switch (node->type) {
60
+ case XML_ELEMENT_NODE:
61
+ case XML_TEXT_NODE:
62
+ case XML_CDATA_SECTION_NODE:
63
+ case XML_ENTITY_REF_NODE:
64
+ case XML_ENTITY_NODE:
65
+ case XML_PI_NODE:
66
+ case XML_COMMENT_NODE:
67
+ case XML_DTD_NODE:
68
+ case XML_XINCLUDE_START:
69
+ case XML_XINCLUDE_END:
70
+ node = node->next;
71
+ break;
72
+ default:
73
+ return (NULL);
74
+ }
75
+ while (node != NULL) {
76
+ if (node->type == XML_ELEMENT_NODE) {
77
+ return (node);
78
+ }
79
+ node = node->next;
80
+ }
81
+ return (NULL);
82
+ }
83
+
84
+ /**
85
+ * xmlLastElementChild:
86
+ * @parent: the parent node
87
+ *
88
+ * Finds the last child node of that element which is a Element node
89
+ * Note the handling of entities references is different than in
90
+ * the W3C DOM element traversal spec since we don't have back reference
91
+ * from entities content to entities references.
92
+ *
93
+ * Returns the last element child or NULL if not available
94
+ */
95
+ xmlNodePtr
96
+ xmlLastElementChild(xmlNodePtr parent)
97
+ {
98
+ xmlNodePtr cur = NULL;
99
+
100
+ if (parent == NULL) {
101
+ return (NULL);
102
+ }
103
+ switch (parent->type) {
104
+ case XML_ELEMENT_NODE:
105
+ case XML_ENTITY_NODE:
106
+ case XML_DOCUMENT_NODE:
107
+ case XML_HTML_DOCUMENT_NODE:
108
+ cur = parent->last;
109
+ break;
110
+ default:
111
+ return (NULL);
112
+ }
113
+ while (cur != NULL) {
114
+ if (cur->type == XML_ELEMENT_NODE) {
115
+ return (cur);
116
+ }
117
+ cur = cur->prev;
118
+ }
119
+ return (NULL);
120
+ }
121
+ #endif
@@ -1,103 +1,189 @@
1
1
  #include <nokogiri.h>
2
2
 
3
3
  VALUE mNokogiri ;
4
+ VALUE mNokogiriGumbo ;
5
+ VALUE mNokogiriHtml4 ;
6
+ VALUE mNokogiriHtml4Sax ;
7
+ VALUE mNokogiriHtml5 ;
4
8
  VALUE mNokogiriXml ;
5
- VALUE mNokogiriHtml ;
6
- VALUE mNokogiriXslt ;
7
9
  VALUE mNokogiriXmlSax ;
8
- VALUE mNokogiriHtmlSax ;
10
+ VALUE mNokogiriXmlXpath ;
11
+ VALUE mNokogiriXslt ;
12
+
13
+ VALUE cNokogiriSyntaxError;
14
+ VALUE cNokogiriXmlCharacterData;
15
+ VALUE cNokogiriXmlElement;
16
+ VALUE cNokogiriXmlXpathSyntaxError;
17
+
18
+ void noko_init_xml_attr(void);
19
+ void noko_init_xml_attribute_decl(void);
20
+ void noko_init_xml_cdata(void);
21
+ void noko_init_xml_comment(void);
22
+ void noko_init_xml_document(void);
23
+ void noko_init_xml_document_fragment(void);
24
+ void noko_init_xml_dtd(void);
25
+ void noko_init_xml_element_content(void);
26
+ void noko_init_xml_element_decl(void);
27
+ void noko_init_xml_encoding_handler(void);
28
+ void noko_init_xml_entity_decl(void);
29
+ void noko_init_xml_entity_reference(void);
30
+ void noko_init_xml_namespace(void);
31
+ void noko_init_xml_node(void);
32
+ void noko_init_xml_node_set(void);
33
+ void noko_init_xml_processing_instruction(void);
34
+ void noko_init_xml_reader(void);
35
+ void noko_init_xml_relax_ng(void);
36
+ void noko_init_xml_sax_parser(void);
37
+ void noko_init_xml_sax_parser_context(void);
38
+ void noko_init_xml_sax_push_parser(void);
39
+ void noko_init_xml_schema(void);
40
+ void noko_init_xml_syntax_error(void);
41
+ void noko_init_xml_text(void);
42
+ void noko_init_xml_xpath_context(void);
43
+ void noko_init_xslt_stylesheet(void);
44
+ void noko_init_html_document(void);
45
+ void noko_init_html_element_description(void);
46
+ void noko_init_html_entity_lookup(void);
47
+ void noko_init_html_sax_parser_context(void);
48
+ void noko_init_html_sax_push_parser(void);
49
+ void noko_init_gumbo(void);
50
+ void noko_init_test_global_handlers(void);
9
51
 
10
- #ifdef USE_INCLUDED_VASPRINTF
52
+ static ID id_read, id_write;
53
+
54
+
55
+ #ifndef HAVE_VASPRINTF
11
56
  /*
12
- * I srsly hate windows. it doesn't have vasprintf.
13
57
  * Thank you Geoffroy Couprie for this implementation of vasprintf!
14
58
  */
15
- int vasprintf (char **strp, const char *fmt, va_list ap)
59
+ int
60
+ vasprintf(char **strp, const char *fmt, va_list ap)
16
61
  {
17
- int len = vsnprintf (NULL, 0, fmt, ap) + 1;
62
+ /* Mingw32/64 have a broken vsnprintf implementation that fails when
63
+ * using a zero-byte limit in order to retrieve the required size for malloc.
64
+ * So we use a one byte buffer instead.
65
+ */
66
+ char tmp[1];
67
+ int len = vsnprintf(tmp, 1, fmt, ap) + 1;
18
68
  char *res = (char *)malloc((unsigned int)len);
19
- if (res == NULL)
20
- return -1;
69
+ if (res == NULL) {
70
+ return -1;
71
+ }
21
72
  *strp = res;
22
73
  return vsnprintf(res, (unsigned int)len, fmt, ap);
23
74
  }
24
75
  #endif
25
76
 
26
- #ifdef USING_SYSTEM_ALLOCATOR_LIBRARY /* Ruby Enterprise Edition with tcmalloc */
27
- void vasprintf_free (void *p)
77
+
78
+ static VALUE
79
+ read_check(VALUE val)
28
80
  {
29
- system_free(p);
81
+ VALUE *args = (VALUE *)val;
82
+ return rb_funcall(args[0], id_read, 1, args[1]);
30
83
  }
31
- #else
32
- void vasprintf_free (void *p)
84
+
85
+
86
+ static VALUE
87
+ read_failed(VALUE arg, VALUE exc)
33
88
  {
34
- free(p);
89
+ return Qundef;
35
90
  }
36
- #endif
37
91
 
38
- #ifdef HAVE_RUBY_UTIL_H
39
- #include "ruby/util.h"
40
- #else
41
- #ifndef __MACRUBY__
42
- #include "util.h"
43
- #endif
44
- #endif
45
92
 
46
- void nokogiri_root_node(xmlNodePtr node)
93
+ int
94
+ noko_io_read(void *ctx, char *buffer, int len)
47
95
  {
48
- xmlDocPtr doc;
49
- nokogiriTuplePtr tuple;
96
+ VALUE string, args[2];
97
+ size_t str_len, safe_len;
50
98
 
51
- doc = node->doc;
52
- if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
53
- tuple = (nokogiriTuplePtr)doc->_private;
54
- st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
99
+ args[0] = (VALUE)ctx;
100
+ args[1] = INT2NUM(len);
101
+
102
+ string = rb_rescue(read_check, (VALUE)args, read_failed, 0);
103
+
104
+ if (NIL_P(string)) { return 0; }
105
+ if (string == Qundef) { return -1; }
106
+ if (TYPE(string) != T_STRING) { return -1; }
107
+
108
+ str_len = (size_t)RSTRING_LEN(string);
109
+ safe_len = str_len > (size_t)len ? (size_t)len : str_len;
110
+ memcpy(buffer, StringValuePtr(string), safe_len);
111
+
112
+ return (int)safe_len;
55
113
  }
56
114
 
57
- void nokogiri_root_nsdef(xmlNsPtr ns, xmlDocPtr doc)
115
+
116
+ static VALUE
117
+ write_check(VALUE val)
58
118
  {
59
- nokogiriTuplePtr tuple;
119
+ VALUE *args = (VALUE *)val;
120
+ return rb_funcall(args[0], id_write, 1, args[1]);
121
+ }
122
+
60
123
 
61
- if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
62
- tuple = (nokogiriTuplePtr)doc->_private;
63
- st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
124
+ static VALUE
125
+ write_failed(VALUE arg, VALUE exc)
126
+ {
127
+ return Qundef;
64
128
  }
65
129
 
66
- void Init_nokogiri()
130
+
131
+ int
132
+ noko_io_write(void *ctx, char *buffer, int len)
67
133
  {
68
- #ifndef __MACRUBY__
69
- xmlMemSetup(
70
- (xmlFreeFunc)ruby_xfree,
71
- (xmlMallocFunc)ruby_xmalloc,
72
- (xmlReallocFunc)ruby_xrealloc,
73
- ruby_strdup
74
- );
75
- #endif
134
+ VALUE args[2], size;
135
+
136
+ args[0] = (VALUE)ctx;
137
+ args[1] = rb_str_new(buffer, (long)len);
138
+
139
+ size = rb_rescue(write_check, (VALUE)args, write_failed, 0);
140
+
141
+ if (size == Qundef) { return -1; }
76
142
 
143
+ return NUM2INT(size);
144
+ }
145
+
146
+
147
+ int
148
+ noko_io_close(void *ctx)
149
+ {
150
+ return 0;
151
+ }
152
+
153
+
154
+ void
155
+ Init_nokogiri()
156
+ {
77
157
  mNokogiri = rb_define_module("Nokogiri");
158
+ mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo");
159
+ mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
160
+ mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
161
+ mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5");
78
162
  mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
79
- mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
80
- mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
81
163
  mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
82
- mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX");
83
-
84
- rb_const_set( mNokogiri,
85
- rb_intern("LIBXML_VERSION"),
86
- NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION)
87
- );
88
- rb_const_set( mNokogiri,
89
- rb_intern("LIBXML_PARSER_VERSION"),
90
- NOKOGIRI_STR_NEW2(xmlParserVersion)
91
- );
92
-
93
- #ifdef NOKOGIRI_USE_PACKAGED_LIBRARIES
94
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qtrue);
95
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATH));
96
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATH));
164
+ mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
165
+ mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
166
+
167
+ rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
168
+ rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
169
+
170
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXSLT_DOTTED_VERSION));
171
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xsltEngineVersion));
172
+
173
+ #ifdef NOKOGIRI_PACKAGED_LIBRARIES
174
+ rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qtrue);
175
+ # ifdef NOKOGIRI_PRECOMPILED_LIBRARIES
176
+ rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qtrue);
177
+ # else
178
+ rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qfalse);
179
+ # endif
180
+ rb_const_set(mNokogiri, rb_intern("LIBXML2_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATCHES), " "));
181
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATCHES), " "));
97
182
  #else
98
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qfalse);
99
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), Qnil);
100
- rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), Qnil);
183
+ rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qfalse);
184
+ rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qfalse);
185
+ rb_const_set(mNokogiri, rb_intern("LIBXML2_PATCHES"), Qnil);
186
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_PATCHES"), Qnil);
101
187
  #endif
102
188
 
103
189
  #ifdef LIBXML_ICONV_ENABLED
@@ -106,38 +192,87 @@ void Init_nokogiri()
106
192
  rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qfalse);
107
193
  #endif
108
194
 
195
+ #ifdef NOKOGIRI_OTHER_LIBRARY_VERSIONS
196
+ rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
197
+ #endif
198
+
199
+ #if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
200
+ /*
201
+ * We choose *not* to do use Ruby's memory management functions with windows DLLs because of this
202
+ * issue in libxml 2.9.12:
203
+ *
204
+ * https://github.com/sparklemotion/nokogiri/issues/2241
205
+ *
206
+ * If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip
207
+ * this config only for the specific libxml2 versions 2.9.12.
208
+ *
209
+ * Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its
210
+ * default memory management functions (recall that this config was introduced to reduce memory
211
+ * bloat and allow Ruby to GC more often); but we should *really* test with production workloads
212
+ * before making that kind of a potentially-invasive change.
213
+ */
214
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
215
+ #else
216
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
217
+ xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
218
+ #endif
219
+
109
220
  xmlInitParser();
221
+ exsltRegisterAll();
222
+
223
+ if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) {
224
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue);
225
+ } else {
226
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qfalse);
227
+ }
228
+
229
+ cNokogiriSyntaxError = rb_define_class_under(mNokogiri, "SyntaxError", rb_eStandardError);
230
+ noko_init_xml_syntax_error();
231
+ assert(cNokogiriXmlSyntaxError);
232
+ cNokogiriXmlXpathSyntaxError = rb_define_class_under(mNokogiriXmlXpath, "SyntaxError", cNokogiriXmlSyntaxError);
233
+
234
+ noko_init_xml_element_content();
235
+ noko_init_xml_encoding_handler();
236
+ noko_init_xml_namespace();
237
+ noko_init_xml_node_set();
238
+ noko_init_xml_reader();
239
+ noko_init_xml_sax_parser();
240
+ noko_init_xml_xpath_context();
241
+ noko_init_xslt_stylesheet();
242
+ noko_init_html_element_description();
243
+ noko_init_html_entity_lookup();
244
+
245
+ noko_init_xml_schema();
246
+ noko_init_xml_relax_ng();
247
+
248
+ noko_init_xml_sax_parser_context();
249
+ noko_init_html_sax_parser_context();
250
+
251
+ noko_init_xml_sax_push_parser();
252
+ noko_init_html_sax_push_parser();
253
+
254
+ noko_init_xml_node();
255
+ noko_init_xml_attr();
256
+ noko_init_xml_attribute_decl();
257
+ noko_init_xml_dtd();
258
+ noko_init_xml_element_decl();
259
+ noko_init_xml_entity_decl();
260
+ noko_init_xml_entity_reference();
261
+ noko_init_xml_processing_instruction();
262
+ assert(cNokogiriXmlNode);
263
+ cNokogiriXmlElement = rb_define_class_under(mNokogiriXml, "Element", cNokogiriXmlNode);
264
+ cNokogiriXmlCharacterData = rb_define_class_under(mNokogiriXml, "CharacterData", cNokogiriXmlNode);
265
+ noko_init_xml_comment();
266
+ noko_init_xml_text();
267
+ noko_init_xml_cdata();
268
+
269
+ noko_init_xml_document_fragment();
270
+ noko_init_xml_document();
271
+ noko_init_html_document();
272
+ noko_init_gumbo();
273
+
274
+ noko_init_test_global_handlers();
110
275
 
111
- init_xml_document();
112
- init_html_document();
113
- init_xml_node();
114
- init_xml_document_fragment();
115
- init_xml_text();
116
- init_xml_cdata();
117
- init_xml_processing_instruction();
118
- init_xml_attr();
119
- init_xml_entity_reference();
120
- init_xml_comment();
121
- init_xml_node_set();
122
- init_xml_xpath_context();
123
- init_xml_sax_parser_context();
124
- init_xml_sax_parser();
125
- init_xml_sax_push_parser();
126
- init_xml_reader();
127
- init_xml_dtd();
128
- init_xml_element_content();
129
- init_xml_attribute_decl();
130
- init_xml_element_decl();
131
- init_xml_entity_decl();
132
- init_xml_namespace();
133
- init_html_sax_parser_context();
134
- init_html_sax_push_parser();
135
- init_xslt_stylesheet();
136
- init_xml_syntax_error();
137
- init_html_entity_lookup();
138
- init_html_element_description();
139
- init_xml_schema();
140
- init_xml_relax_ng();
141
- init_nokogiri_io();
142
- init_xml_encoding_handler();
276
+ id_read = rb_intern("read");
277
+ id_write = rb_intern("write");
143
278
  }