nokogiri 1.5.10 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (328) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +278 -0
  6. data/bin/nokogiri +50 -10
  7. data/dependencies.yml +74 -0
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +944 -100
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +232 -87
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +305 -201
  25. data/ext/nokogiri/xml_document_fragment.c +13 -15
  26. data/ext/nokogiri/xml_dtd.c +54 -48
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +30 -19
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +808 -503
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +198 -186
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +162 -98
  45. data/ext/nokogiri/xslt_stylesheet.c +162 -168
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4886 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/css/node.rb +1 -50
  92. data/lib/nokogiri/css/parser.rb +317 -286
  93. data/lib/nokogiri/css/parser.y +57 -43
  94. data/lib/nokogiri/css/parser_extras.rb +39 -36
  95. data/lib/nokogiri/css/syntax_error.rb +2 -1
  96. data/lib/nokogiri/css/tokenizer.rb +105 -103
  97. data/lib/nokogiri/css/tokenizer.rex +5 -5
  98. data/lib/nokogiri/css/xpath_visitor.rb +137 -48
  99. data/lib/nokogiri/css.rb +15 -14
  100. data/lib/nokogiri/decorators/slop.rb +13 -5
  101. data/lib/nokogiri/extension.rb +31 -0
  102. data/lib/nokogiri/gumbo.rb +14 -0
  103. data/lib/nokogiri/html.rb +32 -27
  104. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  105. data/lib/nokogiri/{html → html4}/document.rb +118 -50
  106. data/lib/nokogiri/{html → html4}/document_fragment.rb +20 -11
  107. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  109. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  110. data/lib/nokogiri/{html → html4}/sax/parser.rb +22 -14
  111. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  112. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  113. data/lib/nokogiri/html4.rb +40 -0
  114. data/lib/nokogiri/html5/document.rb +74 -0
  115. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  116. data/lib/nokogiri/html5/node.rb +93 -0
  117. data/lib/nokogiri/html5.rb +473 -0
  118. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  119. data/lib/nokogiri/syntax_error.rb +1 -0
  120. data/lib/nokogiri/version/constant.rb +5 -0
  121. data/lib/nokogiri/version/info.rb +215 -0
  122. data/lib/nokogiri/version.rb +3 -91
  123. data/lib/nokogiri/xml/attr.rb +1 -0
  124. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  125. data/lib/nokogiri/xml/builder.rb +75 -33
  126. data/lib/nokogiri/xml/cdata.rb +1 -0
  127. data/lib/nokogiri/xml/character_data.rb +1 -0
  128. data/lib/nokogiri/xml/document.rb +157 -54
  129. data/lib/nokogiri/xml/document_fragment.rb +55 -8
  130. data/lib/nokogiri/xml/dtd.rb +15 -4
  131. data/lib/nokogiri/xml/element_content.rb +1 -0
  132. data/lib/nokogiri/xml/element_decl.rb +1 -0
  133. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  134. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  135. data/lib/nokogiri/xml/namespace.rb +1 -0
  136. data/lib/nokogiri/xml/node/save_options.rb +2 -1
  137. data/lib/nokogiri/xml/node.rb +712 -431
  138. data/lib/nokogiri/xml/node_set.rb +140 -123
  139. data/lib/nokogiri/xml/notation.rb +1 -0
  140. data/lib/nokogiri/xml/parse_options.rb +31 -0
  141. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  142. data/lib/nokogiri/xml/pp/node.rb +1 -0
  143. data/lib/nokogiri/xml/pp.rb +3 -2
  144. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  145. data/lib/nokogiri/xml/reader.rb +9 -12
  146. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  147. data/lib/nokogiri/xml/sax/document.rb +25 -30
  148. data/lib/nokogiri/xml/sax/parser.rb +8 -8
  149. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  150. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  151. data/lib/nokogiri/xml/sax.rb +5 -4
  152. data/lib/nokogiri/xml/schema.rb +13 -4
  153. data/lib/nokogiri/xml/searchable.rb +239 -0
  154. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  155. data/lib/nokogiri/xml/text.rb +1 -0
  156. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  157. data/lib/nokogiri/xml/xpath.rb +4 -5
  158. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  159. data/lib/nokogiri/xml.rb +37 -35
  160. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  161. data/lib/nokogiri/xslt.rb +17 -16
  162. data/lib/nokogiri.rb +55 -58
  163. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  164. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  165. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  166. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  167. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  168. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  169. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  171. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  172. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  173. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  174. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  175. metadata +307 -459
  176. data/.autotest +0 -26
  177. data/.gemtest +0 -0
  178. data/CHANGELOG.ja.rdoc +0 -785
  179. data/CHANGELOG.rdoc +0 -783
  180. data/C_CODING_STYLE.rdoc +0 -33
  181. data/Manifest.txt +0 -303
  182. data/README.ja.rdoc +0 -106
  183. data/README.rdoc +0 -175
  184. data/ROADMAP.md +0 -90
  185. data/Rakefile +0 -228
  186. data/STANDARD_RESPONSES.md +0 -47
  187. data/Y_U_NO_GEMSPEC.md +0 -155
  188. data/build_all +0 -105
  189. data/ext/nokogiri/html_document.c +0 -170
  190. data/ext/nokogiri/html_document.h +0 -10
  191. data/ext/nokogiri/html_element_description.c +0 -279
  192. data/ext/nokogiri/html_element_description.h +0 -10
  193. data/ext/nokogiri/html_entity_lookup.c +0 -32
  194. data/ext/nokogiri/html_entity_lookup.h +0 -8
  195. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  196. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  197. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  198. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  199. data/ext/nokogiri/xml_attr.h +0 -9
  200. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  201. data/ext/nokogiri/xml_cdata.h +0 -9
  202. data/ext/nokogiri/xml_comment.h +0 -9
  203. data/ext/nokogiri/xml_document.h +0 -23
  204. data/ext/nokogiri/xml_document_fragment.h +0 -10
  205. data/ext/nokogiri/xml_dtd.h +0 -10
  206. data/ext/nokogiri/xml_element_content.h +0 -10
  207. data/ext/nokogiri/xml_element_decl.h +0 -9
  208. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  209. data/ext/nokogiri/xml_entity_decl.h +0 -10
  210. data/ext/nokogiri/xml_entity_reference.h +0 -9
  211. data/ext/nokogiri/xml_io.c +0 -56
  212. data/ext/nokogiri/xml_io.h +0 -11
  213. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  214. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  215. data/ext/nokogiri/xml_namespace.h +0 -13
  216. data/ext/nokogiri/xml_node.h +0 -13
  217. data/ext/nokogiri/xml_node_set.h +0 -14
  218. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  219. data/ext/nokogiri/xml_reader.h +0 -10
  220. data/ext/nokogiri/xml_relax_ng.h +0 -9
  221. data/ext/nokogiri/xml_sax_parser.h +0 -39
  222. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  223. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  224. data/ext/nokogiri/xml_schema.h +0 -9
  225. data/ext/nokogiri/xml_syntax_error.h +0 -13
  226. data/ext/nokogiri/xml_text.h +0 -9
  227. data/ext/nokogiri/xml_xpath_context.h +0 -10
  228. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  229. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  230. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  231. data/tasks/cross_compile.rb +0 -150
  232. data/tasks/nokogiri.org.rb +0 -24
  233. data/tasks/test.rb +0 -95
  234. data/test/css/test_nthiness.rb +0 -159
  235. data/test/css/test_parser.rb +0 -341
  236. data/test/css/test_tokenizer.rb +0 -198
  237. data/test/css/test_xpath_visitor.rb +0 -91
  238. data/test/decorators/test_slop.rb +0 -16
  239. data/test/files/2ch.html +0 -108
  240. data/test/files/address_book.rlx +0 -12
  241. data/test/files/address_book.xml +0 -10
  242. data/test/files/bar/bar.xsd +0 -4
  243. data/test/files/dont_hurt_em_why.xml +0 -422
  244. data/test/files/encoding.html +0 -82
  245. data/test/files/encoding.xhtml +0 -84
  246. data/test/files/exslt.xml +0 -8
  247. data/test/files/exslt.xslt +0 -35
  248. data/test/files/foo/foo.xsd +0 -4
  249. data/test/files/metacharset.html +0 -10
  250. data/test/files/noencoding.html +0 -47
  251. data/test/files/po.xml +0 -32
  252. data/test/files/po.xsd +0 -66
  253. data/test/files/shift_jis.html +0 -10
  254. data/test/files/shift_jis.xml +0 -5
  255. data/test/files/snuggles.xml +0 -3
  256. data/test/files/staff.dtd +0 -10
  257. data/test/files/staff.xml +0 -59
  258. data/test/files/staff.xslt +0 -32
  259. data/test/files/test_document_url/bar.xml +0 -2
  260. data/test/files/test_document_url/document.dtd +0 -4
  261. data/test/files/test_document_url/document.xml +0 -6
  262. data/test/files/tlm.html +0 -850
  263. data/test/files/to_be_xincluded.xml +0 -2
  264. data/test/files/valid_bar.xml +0 -2
  265. data/test/files/xinclude.xml +0 -4
  266. data/test/helper.rb +0 -154
  267. data/test/html/sax/test_parser.rb +0 -141
  268. data/test/html/sax/test_parser_context.rb +0 -46
  269. data/test/html/test_builder.rb +0 -164
  270. data/test/html/test_document.rb +0 -552
  271. data/test/html/test_document_encoding.rb +0 -138
  272. data/test/html/test_document_fragment.rb +0 -261
  273. data/test/html/test_element_description.rb +0 -105
  274. data/test/html/test_named_characters.rb +0 -14
  275. data/test/html/test_node.rb +0 -196
  276. data/test/html/test_node_encoding.rb +0 -27
  277. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  278. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  279. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  280. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  281. data/test/test_convert_xpath.rb +0 -135
  282. data/test/test_css_cache.rb +0 -45
  283. data/test/test_encoding_handler.rb +0 -46
  284. data/test/test_memory_leak.rb +0 -156
  285. data/test/test_nokogiri.rb +0 -132
  286. data/test/test_reader.rb +0 -555
  287. data/test/test_soap4r_sax.rb +0 -52
  288. data/test/test_xslt_transforms.rb +0 -254
  289. data/test/xml/node/test_save_options.rb +0 -28
  290. data/test/xml/node/test_subclass.rb +0 -44
  291. data/test/xml/sax/test_parser.rb +0 -366
  292. data/test/xml/sax/test_parser_context.rb +0 -106
  293. data/test/xml/sax/test_push_parser.rb +0 -157
  294. data/test/xml/test_attr.rb +0 -64
  295. data/test/xml/test_attribute_decl.rb +0 -86
  296. data/test/xml/test_builder.rb +0 -306
  297. data/test/xml/test_c14n.rb +0 -151
  298. data/test/xml/test_cdata.rb +0 -48
  299. data/test/xml/test_comment.rb +0 -29
  300. data/test/xml/test_document.rb +0 -828
  301. data/test/xml/test_document_encoding.rb +0 -28
  302. data/test/xml/test_document_fragment.rb +0 -223
  303. data/test/xml/test_dtd.rb +0 -103
  304. data/test/xml/test_dtd_encoding.rb +0 -33
  305. data/test/xml/test_element_content.rb +0 -56
  306. data/test/xml/test_element_decl.rb +0 -73
  307. data/test/xml/test_entity_decl.rb +0 -122
  308. data/test/xml/test_entity_reference.rb +0 -245
  309. data/test/xml/test_namespace.rb +0 -95
  310. data/test/xml/test_node.rb +0 -1137
  311. data/test/xml/test_node_attributes.rb +0 -96
  312. data/test/xml/test_node_encoding.rb +0 -107
  313. data/test/xml/test_node_inheritance.rb +0 -32
  314. data/test/xml/test_node_reparenting.rb +0 -374
  315. data/test/xml/test_node_set.rb +0 -755
  316. data/test/xml/test_parse_options.rb +0 -64
  317. data/test/xml/test_processing_instruction.rb +0 -30
  318. data/test/xml/test_reader_encoding.rb +0 -142
  319. data/test/xml/test_relax_ng.rb +0 -60
  320. data/test/xml/test_schema.rb +0 -103
  321. data/test/xml/test_syntax_error.rb +0 -12
  322. data/test/xml/test_text.rb +0 -45
  323. data/test/xml/test_unparented_node.rb +0 -422
  324. data/test/xml/test_xinclude.rb +0 -83
  325. data/test/xml/test_xpath.rb +0 -295
  326. data/test/xslt/test_custom_functions.rb +0 -133
  327. data/test/xslt/test_exception_handling.rb +0 -37
  328. data/test_all +0 -81
@@ -0,0 +1,121 @@
1
+ #ifndef HAVE_XMLFIRSTELEMENTCHILD
2
+ #include <nokogiri.h>
3
+ /**
4
+ * xmlFirstElementChild:
5
+ * @parent: the parent node
6
+ *
7
+ * Finds the first child node of that element which is a Element node
8
+ * Note the handling of entities references is different than in
9
+ * the W3C DOM element traversal spec since we don't have back reference
10
+ * from entities content to entities references.
11
+ *
12
+ * Returns the first element child or NULL if not available
13
+ */
14
+ xmlNodePtr
15
+ xmlFirstElementChild(xmlNodePtr parent)
16
+ {
17
+ xmlNodePtr cur = NULL;
18
+
19
+ if (parent == NULL) {
20
+ return (NULL);
21
+ }
22
+ switch (parent->type) {
23
+ case XML_ELEMENT_NODE:
24
+ case XML_ENTITY_NODE:
25
+ case XML_DOCUMENT_NODE:
26
+ case XML_HTML_DOCUMENT_NODE:
27
+ cur = parent->children;
28
+ break;
29
+ default:
30
+ return (NULL);
31
+ }
32
+ while (cur != NULL) {
33
+ if (cur->type == XML_ELEMENT_NODE) {
34
+ return (cur);
35
+ }
36
+ cur = cur->next;
37
+ }
38
+ return (NULL);
39
+ }
40
+
41
+ /**
42
+ * xmlNextElementSibling:
43
+ * @node: the current node
44
+ *
45
+ * Finds the first closest next sibling of the node which is an
46
+ * element node.
47
+ * Note the handling of entities references is different than in
48
+ * the W3C DOM element traversal spec since we don't have back reference
49
+ * from entities content to entities references.
50
+ *
51
+ * Returns the next element sibling or NULL if not available
52
+ */
53
+ xmlNodePtr
54
+ xmlNextElementSibling(xmlNodePtr node)
55
+ {
56
+ if (node == NULL) {
57
+ return (NULL);
58
+ }
59
+ switch (node->type) {
60
+ case XML_ELEMENT_NODE:
61
+ case XML_TEXT_NODE:
62
+ case XML_CDATA_SECTION_NODE:
63
+ case XML_ENTITY_REF_NODE:
64
+ case XML_ENTITY_NODE:
65
+ case XML_PI_NODE:
66
+ case XML_COMMENT_NODE:
67
+ case XML_DTD_NODE:
68
+ case XML_XINCLUDE_START:
69
+ case XML_XINCLUDE_END:
70
+ node = node->next;
71
+ break;
72
+ default:
73
+ return (NULL);
74
+ }
75
+ while (node != NULL) {
76
+ if (node->type == XML_ELEMENT_NODE) {
77
+ return (node);
78
+ }
79
+ node = node->next;
80
+ }
81
+ return (NULL);
82
+ }
83
+
84
+ /**
85
+ * xmlLastElementChild:
86
+ * @parent: the parent node
87
+ *
88
+ * Finds the last child node of that element which is a Element node
89
+ * Note the handling of entities references is different than in
90
+ * the W3C DOM element traversal spec since we don't have back reference
91
+ * from entities content to entities references.
92
+ *
93
+ * Returns the last element child or NULL if not available
94
+ */
95
+ xmlNodePtr
96
+ xmlLastElementChild(xmlNodePtr parent)
97
+ {
98
+ xmlNodePtr cur = NULL;
99
+
100
+ if (parent == NULL) {
101
+ return (NULL);
102
+ }
103
+ switch (parent->type) {
104
+ case XML_ELEMENT_NODE:
105
+ case XML_ENTITY_NODE:
106
+ case XML_DOCUMENT_NODE:
107
+ case XML_HTML_DOCUMENT_NODE:
108
+ cur = parent->last;
109
+ break;
110
+ default:
111
+ return (NULL);
112
+ }
113
+ while (cur != NULL) {
114
+ if (cur->type == XML_ELEMENT_NODE) {
115
+ return (cur);
116
+ }
117
+ cur = cur->prev;
118
+ }
119
+ return (NULL);
120
+ }
121
+ #endif
@@ -1,94 +1,190 @@
1
1
  #include <nokogiri.h>
2
2
 
3
3
  VALUE mNokogiri ;
4
+ VALUE mNokogiriGumbo ;
5
+ VALUE mNokogiriHtml4 ;
6
+ VALUE mNokogiriHtml4Sax ;
7
+ VALUE mNokogiriHtml5 ;
4
8
  VALUE mNokogiriXml ;
5
- VALUE mNokogiriHtml ;
6
- VALUE mNokogiriXslt ;
7
9
  VALUE mNokogiriXmlSax ;
8
- VALUE mNokogiriHtmlSax ;
10
+ VALUE mNokogiriXmlXpath ;
11
+ VALUE mNokogiriXslt ;
12
+
13
+ VALUE cNokogiriSyntaxError;
14
+ VALUE cNokogiriXmlCharacterData;
15
+ VALUE cNokogiriXmlElement;
16
+ VALUE cNokogiriXmlXpathSyntaxError;
17
+
18
+ void noko_init_xml_attr(void);
19
+ void noko_init_xml_attribute_decl(void);
20
+ void noko_init_xml_cdata(void);
21
+ void noko_init_xml_comment(void);
22
+ void noko_init_xml_document(void);
23
+ void noko_init_xml_document_fragment(void);
24
+ void noko_init_xml_dtd(void);
25
+ void noko_init_xml_element_content(void);
26
+ void noko_init_xml_element_decl(void);
27
+ void noko_init_xml_encoding_handler(void);
28
+ void noko_init_xml_entity_decl(void);
29
+ void noko_init_xml_entity_reference(void);
30
+ void noko_init_xml_namespace(void);
31
+ void noko_init_xml_node(void);
32
+ void noko_init_xml_node_set(void);
33
+ void noko_init_xml_processing_instruction(void);
34
+ void noko_init_xml_reader(void);
35
+ void noko_init_xml_relax_ng(void);
36
+ void noko_init_xml_sax_parser(void);
37
+ void noko_init_xml_sax_parser_context(void);
38
+ void noko_init_xml_sax_push_parser(void);
39
+ void noko_init_xml_schema(void);
40
+ void noko_init_xml_syntax_error(void);
41
+ void noko_init_xml_text(void);
42
+ void noko_init_xml_xpath_context(void);
43
+ void noko_init_xslt_stylesheet(void);
44
+ void noko_init_html_document(void);
45
+ void noko_init_html_element_description(void);
46
+ void noko_init_html_entity_lookup(void);
47
+ void noko_init_html_sax_parser_context(void);
48
+ void noko_init_html_sax_push_parser(void);
49
+ void noko_init_gumbo(void);
50
+ void noko_init_test_global_handlers(void);
9
51
 
10
- #ifdef USE_INCLUDED_VASPRINTF
52
+ static ID id_read, id_write;
53
+
54
+
55
+ #ifndef HAVE_VASPRINTF
11
56
  /*
12
- * I srsly hate windows. it doesn't have vasprintf.
13
57
  * Thank you Geoffroy Couprie for this implementation of vasprintf!
14
58
  */
15
- int vasprintf (char **strp, const char *fmt, va_list ap)
59
+ int
60
+ vasprintf(char **strp, const char *fmt, va_list ap)
16
61
  {
17
- int len = vsnprintf (NULL, 0, fmt, ap) + 1;
62
+ /* Mingw32/64 have a broken vsnprintf implementation that fails when
63
+ * using a zero-byte limit in order to retrieve the required size for malloc.
64
+ * So we use a one byte buffer instead.
65
+ */
66
+ char tmp[1];
67
+ int len = vsnprintf(tmp, 1, fmt, ap) + 1;
18
68
  char *res = (char *)malloc((unsigned int)len);
19
- if (res == NULL)
20
- return -1;
69
+ if (res == NULL) {
70
+ return -1;
71
+ }
21
72
  *strp = res;
22
73
  return vsnprintf(res, (unsigned int)len, fmt, ap);
23
74
  }
24
75
  #endif
25
76
 
26
- #ifdef USING_SYSTEM_ALLOCATOR_LIBRARY /* Ruby Enterprise Edition with tcmalloc */
27
- void vasprintf_free (void *p)
77
+
78
+ static VALUE
79
+ read_check(VALUE val)
28
80
  {
29
- system_free(p);
81
+ VALUE *args = (VALUE *)val;
82
+ return rb_funcall(args[0], id_read, 1, args[1]);
30
83
  }
31
- #else
32
- void vasprintf_free (void *p)
84
+
85
+
86
+ static VALUE
87
+ read_failed(VALUE arg, VALUE exc)
33
88
  {
34
- free(p);
89
+ return Qundef;
35
90
  }
36
- #endif
37
91
 
38
- #ifdef HAVE_RUBY_UTIL_H
39
- #include "ruby/util.h"
40
- #else
41
- #ifndef __MACRUBY__
42
- #include "util.h"
43
- #endif
44
- #endif
45
92
 
46
- void nokogiri_root_node(xmlNodePtr node)
93
+ int
94
+ noko_io_read(void *ctx, char *buffer, int len)
47
95
  {
48
- xmlDocPtr doc;
49
- nokogiriTuplePtr tuple;
96
+ VALUE string, args[2];
97
+ size_t str_len, safe_len;
50
98
 
51
- doc = node->doc;
52
- if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
53
- tuple = (nokogiriTuplePtr)doc->_private;
54
- st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
99
+ args[0] = (VALUE)ctx;
100
+ args[1] = INT2NUM(len);
101
+
102
+ string = rb_rescue(read_check, (VALUE)args, read_failed, 0);
103
+
104
+ if (NIL_P(string)) { return 0; }
105
+ if (string == Qundef) { return -1; }
106
+ if (TYPE(string) != T_STRING) { return -1; }
107
+
108
+ str_len = (size_t)RSTRING_LEN(string);
109
+ safe_len = str_len > (size_t)len ? (size_t)len : str_len;
110
+ memcpy(buffer, StringValuePtr(string), safe_len);
111
+
112
+ return (int)safe_len;
55
113
  }
56
114
 
57
- void nokogiri_root_nsdef(xmlNsPtr ns, xmlDocPtr doc)
115
+
116
+ static VALUE
117
+ write_check(VALUE val)
58
118
  {
59
- nokogiriTuplePtr tuple;
119
+ VALUE *args = (VALUE *)val;
120
+ return rb_funcall(args[0], id_write, 1, args[1]);
121
+ }
122
+
60
123
 
61
- if (doc->type == XML_DOCUMENT_FRAG_NODE) doc = doc->doc;
62
- tuple = (nokogiriTuplePtr)doc->_private;
63
- st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
124
+ static VALUE
125
+ write_failed(VALUE arg, VALUE exc)
126
+ {
127
+ return Qundef;
64
128
  }
65
129
 
66
- void Init_nokogiri()
130
+
131
+ int
132
+ noko_io_write(void *ctx, char *buffer, int len)
67
133
  {
68
- #ifndef __MACRUBY__
69
- xmlMemSetup(
70
- (xmlFreeFunc)ruby_xfree,
71
- (xmlMallocFunc)ruby_xmalloc,
72
- (xmlReallocFunc)ruby_xrealloc,
73
- ruby_strdup
74
- );
75
- #endif
134
+ VALUE args[2], size;
135
+
136
+ args[0] = (VALUE)ctx;
137
+ args[1] = rb_str_new(buffer, (long)len);
76
138
 
139
+ size = rb_rescue(write_check, (VALUE)args, write_failed, 0);
140
+
141
+ if (size == Qundef) { return -1; }
142
+
143
+ return NUM2INT(size);
144
+ }
145
+
146
+
147
+ int
148
+ noko_io_close(void *ctx)
149
+ {
150
+ return 0;
151
+ }
152
+
153
+
154
+ void
155
+ Init_nokogiri()
156
+ {
77
157
  mNokogiri = rb_define_module("Nokogiri");
158
+ mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo");
159
+ mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
160
+ mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
161
+ mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5");
78
162
  mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
79
- mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
80
- mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
81
163
  mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
82
- mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX");
164
+ mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
165
+ mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
83
166
 
84
- rb_const_set( mNokogiri,
85
- rb_intern("LIBXML_VERSION"),
86
- NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION)
87
- );
88
- rb_const_set( mNokogiri,
89
- rb_intern("LIBXML_PARSER_VERSION"),
90
- NOKOGIRI_STR_NEW2(xmlParserVersion)
91
- );
167
+ rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
168
+ rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
169
+
170
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXSLT_DOTTED_VERSION));
171
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xsltEngineVersion));
172
+
173
+ #ifdef NOKOGIRI_PACKAGED_LIBRARIES
174
+ rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qtrue);
175
+ # ifdef NOKOGIRI_PRECOMPILED_LIBRARIES
176
+ rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qtrue);
177
+ # else
178
+ rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qfalse);
179
+ # endif
180
+ rb_const_set(mNokogiri, rb_intern("LIBXML2_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATCHES), " "));
181
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATCHES), " "));
182
+ #else
183
+ rb_const_set(mNokogiri, rb_intern("PACKAGED_LIBRARIES"), Qfalse);
184
+ rb_const_set(mNokogiri, rb_intern("PRECOMPILED_LIBRARIES"), Qfalse);
185
+ rb_const_set(mNokogiri, rb_intern("LIBXML2_PATCHES"), Qnil);
186
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_PATCHES"), Qnil);
187
+ #endif
92
188
 
93
189
  #ifdef LIBXML_ICONV_ENABLED
94
190
  rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qtrue);
@@ -96,38 +192,87 @@ void Init_nokogiri()
96
192
  rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qfalse);
97
193
  #endif
98
194
 
195
+ #ifdef NOKOGIRI_OTHER_LIBRARY_VERSIONS
196
+ rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
197
+ #endif
198
+
199
+ #if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
200
+ /*
201
+ * We choose *not* to do use Ruby's memory management functions with windows DLLs because of this
202
+ * issue in libxml 2.9.12:
203
+ *
204
+ * https://github.com/sparklemotion/nokogiri/issues/2241
205
+ *
206
+ * If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip
207
+ * this config only for the specific libxml2 versions 2.9.12.
208
+ *
209
+ * Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its
210
+ * default memory management functions (recall that this config was introduced to reduce memory
211
+ * bloat and allow Ruby to GC more often); but we should *really* test with production workloads
212
+ * before making that kind of a potentially-invasive change.
213
+ */
214
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
215
+ #else
216
+ rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
217
+ xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
218
+ #endif
219
+
99
220
  xmlInitParser();
221
+ exsltRegisterAll();
222
+
223
+ if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) {
224
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue);
225
+ } else {
226
+ rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qfalse);
227
+ }
228
+
229
+ cNokogiriSyntaxError = rb_define_class_under(mNokogiri, "SyntaxError", rb_eStandardError);
230
+ noko_init_xml_syntax_error();
231
+ assert(cNokogiriXmlSyntaxError);
232
+ cNokogiriXmlXpathSyntaxError = rb_define_class_under(mNokogiriXmlXpath, "SyntaxError", cNokogiriXmlSyntaxError);
233
+
234
+ noko_init_xml_element_content();
235
+ noko_init_xml_encoding_handler();
236
+ noko_init_xml_namespace();
237
+ noko_init_xml_node_set();
238
+ noko_init_xml_reader();
239
+ noko_init_xml_sax_parser();
240
+ noko_init_xml_xpath_context();
241
+ noko_init_xslt_stylesheet();
242
+ noko_init_html_element_description();
243
+ noko_init_html_entity_lookup();
244
+
245
+ noko_init_xml_schema();
246
+ noko_init_xml_relax_ng();
247
+
248
+ noko_init_xml_sax_parser_context();
249
+ noko_init_html_sax_parser_context();
250
+
251
+ noko_init_xml_sax_push_parser();
252
+ noko_init_html_sax_push_parser();
253
+
254
+ noko_init_xml_node();
255
+ noko_init_xml_attr();
256
+ noko_init_xml_attribute_decl();
257
+ noko_init_xml_dtd();
258
+ noko_init_xml_element_decl();
259
+ noko_init_xml_entity_decl();
260
+ noko_init_xml_entity_reference();
261
+ noko_init_xml_processing_instruction();
262
+ assert(cNokogiriXmlNode);
263
+ cNokogiriXmlElement = rb_define_class_under(mNokogiriXml, "Element", cNokogiriXmlNode);
264
+ cNokogiriXmlCharacterData = rb_define_class_under(mNokogiriXml, "CharacterData", cNokogiriXmlNode);
265
+ noko_init_xml_comment();
266
+ noko_init_xml_text();
267
+ noko_init_xml_cdata();
268
+
269
+ noko_init_xml_document_fragment();
270
+ noko_init_xml_document();
271
+ noko_init_html_document();
272
+ noko_init_gumbo();
273
+
274
+ noko_init_test_global_handlers();
100
275
 
101
- init_xml_document();
102
- init_html_document();
103
- init_xml_node();
104
- init_xml_document_fragment();
105
- init_xml_text();
106
- init_xml_cdata();
107
- init_xml_processing_instruction();
108
- init_xml_attr();
109
- init_xml_entity_reference();
110
- init_xml_comment();
111
- init_xml_node_set();
112
- init_xml_xpath_context();
113
- init_xml_sax_parser_context();
114
- init_xml_sax_parser();
115
- init_xml_sax_push_parser();
116
- init_xml_reader();
117
- init_xml_dtd();
118
- init_xml_element_content();
119
- init_xml_attribute_decl();
120
- init_xml_element_decl();
121
- init_xml_entity_decl();
122
- init_xml_namespace();
123
- init_html_sax_parser_context();
124
- init_html_sax_push_parser();
125
- init_xslt_stylesheet();
126
- init_xml_syntax_error();
127
- init_html_entity_lookup();
128
- init_html_element_description();
129
- init_xml_schema();
130
- init_xml_relax_ng();
131
- init_nokogiri_io();
132
- init_xml_encoding_handler();
276
+ id_read = rb_intern("read");
277
+ id_write = rb_intern("write");
133
278
  }