nokogiri 1.6.0 → 1.13.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (340) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -19
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +280 -0
  6. data/bin/nokogiri +84 -31
  7. data/dependencies.yml +23 -4
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +952 -132
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +231 -96
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +327 -223
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +45 -20
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +1290 -680
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +227 -189
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +227 -140
  45. data/ext/nokogiri/xslt_stylesheet.c +269 -177
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -58
  93. data/lib/nokogiri/css/parser.rb +407 -357
  94. data/lib/nokogiri/css/parser.y +265 -246
  95. data/lib/nokogiri/css/parser_extras.rb +52 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +8 -7
  99. data/lib/nokogiri/css/xpath_visitor.rb +266 -80
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +17 -8
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/html4/document.rb +331 -0
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -15
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +88 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +96 -0
  118. data/lib/nokogiri/html5.rb +477 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +221 -0
  123. data/lib/nokogiri/version.rb +3 -105
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +96 -54
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +234 -95
  130. data/lib/nokogiri/xml/document_fragment.rb +86 -36
  131. data/lib/nokogiri/xml/dtd.rb +16 -4
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +8 -4
  138. data/lib/nokogiri/xml/node.rb +947 -502
  139. data/lib/nokogiri/xml/node_set.rb +168 -159
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +40 -5
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +43 -41
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +270 -0
  155. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -36
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +69 -69
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  171. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  172. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  173. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  174. data/ports/archives/libxml2-2.9.13.tar.xz +0 -0
  175. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  176. metadata +278 -362
  177. data/.autotest +0 -26
  178. data/.gemtest +0 -0
  179. data/.travis.yml +0 -27
  180. data/CHANGELOG.ja.rdoc +0 -819
  181. data/CHANGELOG.rdoc +0 -819
  182. data/C_CODING_STYLE.rdoc +0 -33
  183. data/Manifest.txt +0 -315
  184. data/README.ja.rdoc +0 -106
  185. data/README.rdoc +0 -175
  186. data/ROADMAP.md +0 -90
  187. data/Rakefile +0 -246
  188. data/STANDARD_RESPONSES.md +0 -47
  189. data/Y_U_NO_GEMSPEC.md +0 -155
  190. data/build_all +0 -105
  191. data/ext/nokogiri/html_document.c +0 -170
  192. data/ext/nokogiri/html_document.h +0 -10
  193. data/ext/nokogiri/html_element_description.c +0 -279
  194. data/ext/nokogiri/html_element_description.h +0 -10
  195. data/ext/nokogiri/html_entity_lookup.c +0 -32
  196. data/ext/nokogiri/html_entity_lookup.h +0 -8
  197. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  198. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  199. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  200. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  201. data/ext/nokogiri/xml_attr.h +0 -9
  202. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  203. data/ext/nokogiri/xml_cdata.h +0 -9
  204. data/ext/nokogiri/xml_comment.h +0 -9
  205. data/ext/nokogiri/xml_document.h +0 -23
  206. data/ext/nokogiri/xml_document_fragment.h +0 -10
  207. data/ext/nokogiri/xml_dtd.h +0 -10
  208. data/ext/nokogiri/xml_element_content.h +0 -10
  209. data/ext/nokogiri/xml_element_decl.h +0 -9
  210. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  211. data/ext/nokogiri/xml_entity_decl.h +0 -10
  212. data/ext/nokogiri/xml_entity_reference.h +0 -9
  213. data/ext/nokogiri/xml_io.c +0 -56
  214. data/ext/nokogiri/xml_io.h +0 -11
  215. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  216. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  217. data/ext/nokogiri/xml_namespace.h +0 -13
  218. data/ext/nokogiri/xml_node.h +0 -13
  219. data/ext/nokogiri/xml_node_set.h +0 -14
  220. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  221. data/ext/nokogiri/xml_reader.h +0 -10
  222. data/ext/nokogiri/xml_relax_ng.h +0 -9
  223. data/ext/nokogiri/xml_sax_parser.h +0 -39
  224. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  225. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  226. data/ext/nokogiri/xml_schema.h +0 -9
  227. data/ext/nokogiri/xml_syntax_error.h +0 -13
  228. data/ext/nokogiri/xml_text.h +0 -9
  229. data/ext/nokogiri/xml_xpath_context.h +0 -10
  230. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  231. data/lib/nokogiri/html/document.rb +0 -254
  232. data/lib/nokogiri/html/document_fragment.rb +0 -41
  233. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  234. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  235. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  236. data/ports/archives/libxml2-2.8.0.tar.gz +0 -0
  237. data/ports/archives/libxslt-1.1.26.tar.gz +0 -0
  238. data/tasks/cross_compile.rb +0 -132
  239. data/tasks/nokogiri.org.rb +0 -24
  240. data/tasks/test.rb +0 -95
  241. data/test/css/test_nthiness.rb +0 -159
  242. data/test/css/test_parser.rb +0 -341
  243. data/test/css/test_tokenizer.rb +0 -198
  244. data/test/css/test_xpath_visitor.rb +0 -91
  245. data/test/decorators/test_slop.rb +0 -16
  246. data/test/files/2ch.html +0 -108
  247. data/test/files/address_book.rlx +0 -12
  248. data/test/files/address_book.xml +0 -10
  249. data/test/files/bar/bar.xsd +0 -4
  250. data/test/files/bogus.xml +0 -0
  251. data/test/files/dont_hurt_em_why.xml +0 -422
  252. data/test/files/encoding.html +0 -82
  253. data/test/files/encoding.xhtml +0 -84
  254. data/test/files/exslt.xml +0 -8
  255. data/test/files/exslt.xslt +0 -35
  256. data/test/files/foo/foo.xsd +0 -4
  257. data/test/files/metacharset.html +0 -10
  258. data/test/files/noencoding.html +0 -47
  259. data/test/files/po.xml +0 -32
  260. data/test/files/po.xsd +0 -66
  261. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  262. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  263. data/test/files/saml/xenc_schema.xsd +0 -146
  264. data/test/files/saml/xmldsig_schema.xsd +0 -318
  265. data/test/files/shift_jis.html +0 -10
  266. data/test/files/shift_jis.xml +0 -5
  267. data/test/files/snuggles.xml +0 -3
  268. data/test/files/staff.dtd +0 -10
  269. data/test/files/staff.xml +0 -59
  270. data/test/files/staff.xslt +0 -32
  271. data/test/files/test_document_url/bar.xml +0 -2
  272. data/test/files/test_document_url/document.dtd +0 -4
  273. data/test/files/test_document_url/document.xml +0 -6
  274. data/test/files/tlm.html +0 -850
  275. data/test/files/to_be_xincluded.xml +0 -2
  276. data/test/files/valid_bar.xml +0 -2
  277. data/test/files/xinclude.xml +0 -4
  278. data/test/helper.rb +0 -154
  279. data/test/html/sax/test_parser.rb +0 -141
  280. data/test/html/sax/test_parser_context.rb +0 -46
  281. data/test/html/test_builder.rb +0 -164
  282. data/test/html/test_document.rb +0 -552
  283. data/test/html/test_document_encoding.rb +0 -138
  284. data/test/html/test_document_fragment.rb +0 -261
  285. data/test/html/test_element_description.rb +0 -105
  286. data/test/html/test_named_characters.rb +0 -14
  287. data/test/html/test_node.rb +0 -196
  288. data/test/html/test_node_encoding.rb +0 -27
  289. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  290. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  291. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  292. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  293. data/test/test_convert_xpath.rb +0 -135
  294. data/test/test_css_cache.rb +0 -45
  295. data/test/test_encoding_handler.rb +0 -46
  296. data/test/test_memory_leak.rb +0 -156
  297. data/test/test_nokogiri.rb +0 -132
  298. data/test/test_reader.rb +0 -555
  299. data/test/test_soap4r_sax.rb +0 -52
  300. data/test/test_xslt_transforms.rb +0 -254
  301. data/test/xml/node/test_save_options.rb +0 -28
  302. data/test/xml/node/test_subclass.rb +0 -44
  303. data/test/xml/sax/test_parser.rb +0 -366
  304. data/test/xml/sax/test_parser_context.rb +0 -106
  305. data/test/xml/sax/test_push_parser.rb +0 -157
  306. data/test/xml/test_attr.rb +0 -64
  307. data/test/xml/test_attribute_decl.rb +0 -86
  308. data/test/xml/test_builder.rb +0 -306
  309. data/test/xml/test_c14n.rb +0 -151
  310. data/test/xml/test_cdata.rb +0 -48
  311. data/test/xml/test_comment.rb +0 -29
  312. data/test/xml/test_document.rb +0 -828
  313. data/test/xml/test_document_encoding.rb +0 -28
  314. data/test/xml/test_document_fragment.rb +0 -223
  315. data/test/xml/test_dtd.rb +0 -103
  316. data/test/xml/test_dtd_encoding.rb +0 -33
  317. data/test/xml/test_element_content.rb +0 -56
  318. data/test/xml/test_element_decl.rb +0 -73
  319. data/test/xml/test_entity_decl.rb +0 -122
  320. data/test/xml/test_entity_reference.rb +0 -245
  321. data/test/xml/test_namespace.rb +0 -95
  322. data/test/xml/test_node.rb +0 -1137
  323. data/test/xml/test_node_attributes.rb +0 -96
  324. data/test/xml/test_node_encoding.rb +0 -107
  325. data/test/xml/test_node_inheritance.rb +0 -32
  326. data/test/xml/test_node_reparenting.rb +0 -374
  327. data/test/xml/test_node_set.rb +0 -755
  328. data/test/xml/test_parse_options.rb +0 -64
  329. data/test/xml/test_processing_instruction.rb +0 -30
  330. data/test/xml/test_reader_encoding.rb +0 -142
  331. data/test/xml/test_relax_ng.rb +0 -60
  332. data/test/xml/test_schema.rb +0 -103
  333. data/test/xml/test_syntax_error.rb +0 -12
  334. data/test/xml/test_text.rb +0 -45
  335. data/test/xml/test_unparented_node.rb +0 -422
  336. data/test/xml/test_xinclude.rb +0 -83
  337. data/test/xml/test_xpath.rb +0 -295
  338. data/test/xslt/test_custom_functions.rb +0 -133
  339. data/test/xslt/test_exception_handling.rb +0 -37
  340. data/test_all +0 -81
@@ -0,0 +1,166 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriHtml4Document ;
4
+
5
+ static ID id_encoding_found;
6
+ static ID id_to_s;
7
+
8
+ /*
9
+ * call-seq:
10
+ * new
11
+ *
12
+ * Create a new document
13
+ */
14
+ static VALUE
15
+ rb_html_document_s_new(int argc, VALUE *argv, VALUE klass)
16
+ {
17
+ VALUE uri, external_id, rest, rb_doc;
18
+ htmlDocPtr doc;
19
+
20
+ rb_scan_args(argc, argv, "0*", &rest);
21
+ uri = rb_ary_entry(rest, (long)0);
22
+ external_id = rb_ary_entry(rest, (long)1);
23
+
24
+ doc = htmlNewDoc(
25
+ RTEST(uri) ? (const xmlChar *)StringValueCStr(uri) : NULL,
26
+ RTEST(external_id) ? (const xmlChar *)StringValueCStr(external_id) : NULL
27
+ );
28
+ rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
29
+ return rb_doc ;
30
+ }
31
+
32
+ /*
33
+ * call-seq:
34
+ * read_io(io, url, encoding, options)
35
+ *
36
+ * Read the HTML document from +io+ with given +url+, +encoding+,
37
+ * and +options+. See Nokogiri::HTML4.parse
38
+ */
39
+ static VALUE
40
+ rb_html_document_s_read_io(VALUE klass, VALUE rb_io, VALUE rb_url, VALUE rb_encoding, VALUE rb_options)
41
+ {
42
+ VALUE rb_doc;
43
+ VALUE rb_error_list = rb_ary_new();
44
+ htmlDocPtr c_doc;
45
+ const char *c_url = NIL_P(rb_url) ? NULL : StringValueCStr(rb_url);
46
+ const char *c_encoding = NIL_P(rb_encoding) ? NULL : StringValueCStr(rb_encoding);
47
+ int options = NUM2INT(rb_options);
48
+
49
+ xmlSetStructuredErrorFunc((void *)rb_error_list, Nokogiri_error_array_pusher);
50
+
51
+ c_doc = htmlReadIO(noko_io_read, noko_io_close, (void *)rb_io, c_url, c_encoding, options);
52
+
53
+ xmlSetStructuredErrorFunc(NULL, NULL);
54
+
55
+ /*
56
+ * If EncodingFound has occurred in EncodingReader, make sure to do
57
+ * a cleanup and propagate the error.
58
+ */
59
+ if (rb_respond_to(rb_io, id_encoding_found)) {
60
+ VALUE encoding_found = rb_funcall(rb_io, id_encoding_found, 0);
61
+ if (!NIL_P(encoding_found)) {
62
+ xmlFreeDoc(c_doc);
63
+ rb_exc_raise(encoding_found);
64
+ }
65
+ }
66
+
67
+ if ((c_doc == NULL) || (!(options & XML_PARSE_RECOVER) && (RARRAY_LEN(rb_error_list) > 0))) {
68
+ VALUE rb_error ;
69
+
70
+ xmlFreeDoc(c_doc);
71
+
72
+ rb_error = rb_ary_entry(rb_error_list, 0);
73
+ if (rb_error == Qnil) {
74
+ rb_raise(rb_eRuntimeError, "Could not parse document");
75
+ } else {
76
+ VALUE exception_message = rb_funcall(rb_error, id_to_s, 0);
77
+ exception_message = rb_str_concat(rb_str_new2("Parser without recover option encountered error or warning: "),
78
+ exception_message);
79
+ rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
80
+ }
81
+
82
+ return Qnil;
83
+ }
84
+
85
+ rb_doc = noko_xml_document_wrap(klass, c_doc);
86
+ rb_iv_set(rb_doc, "@errors", rb_error_list);
87
+ return rb_doc;
88
+ }
89
+
90
+ /*
91
+ * call-seq:
92
+ * read_memory(string, url, encoding, options)
93
+ *
94
+ * Read the HTML document contained in +string+ with given +url+, +encoding+,
95
+ * and +options+. See Nokogiri::HTML4.parse
96
+ */
97
+ static VALUE
98
+ rb_html_document_s_read_memory(VALUE klass, VALUE rb_html, VALUE rb_url, VALUE rb_encoding, VALUE rb_options)
99
+ {
100
+ VALUE rb_doc;
101
+ VALUE rb_error_list = rb_ary_new();
102
+ htmlDocPtr c_doc;
103
+ const char *c_buffer = StringValuePtr(rb_html);
104
+ const char *c_url = NIL_P(rb_url) ? NULL : StringValueCStr(rb_url);
105
+ const char *c_encoding = NIL_P(rb_encoding) ? NULL : StringValueCStr(rb_encoding);
106
+ int html_len = (int)RSTRING_LEN(rb_html);
107
+ int options = NUM2INT(rb_options);
108
+
109
+ xmlSetStructuredErrorFunc((void *)rb_error_list, Nokogiri_error_array_pusher);
110
+
111
+ c_doc = htmlReadMemory(c_buffer, html_len, c_url, c_encoding, options);
112
+
113
+ xmlSetStructuredErrorFunc(NULL, NULL);
114
+
115
+ if ((c_doc == NULL) || (!(options & XML_PARSE_RECOVER) && (RARRAY_LEN(rb_error_list) > 0))) {
116
+ VALUE rb_error ;
117
+
118
+ xmlFreeDoc(c_doc);
119
+
120
+ rb_error = rb_ary_entry(rb_error_list, 0);
121
+ if (rb_error == Qnil) {
122
+ rb_raise(rb_eRuntimeError, "Could not parse document");
123
+ } else {
124
+ VALUE exception_message = rb_funcall(rb_error, id_to_s, 0);
125
+ exception_message = rb_str_concat(rb_str_new2("Parser without recover option encountered error or warning: "),
126
+ exception_message);
127
+ rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
128
+ }
129
+
130
+ return Qnil;
131
+ }
132
+
133
+ rb_doc = noko_xml_document_wrap(klass, c_doc);
134
+ rb_iv_set(rb_doc, "@errors", rb_error_list);
135
+ return rb_doc;
136
+ }
137
+
138
+ /*
139
+ * call-seq:
140
+ * type
141
+ *
142
+ * The type for this document
143
+ */
144
+ static VALUE
145
+ rb_html_document_type(VALUE self)
146
+ {
147
+ htmlDocPtr doc;
148
+ Data_Get_Struct(self, xmlDoc, doc);
149
+ return INT2NUM((long)doc->type);
150
+ }
151
+
152
+ void
153
+ noko_init_html_document()
154
+ {
155
+ assert(cNokogiriXmlDocument);
156
+ cNokogiriHtml4Document = rb_define_class_under(mNokogiriHtml4, "Document", cNokogiriXmlDocument);
157
+
158
+ rb_define_singleton_method(cNokogiriHtml4Document, "read_memory", rb_html_document_s_read_memory, 4);
159
+ rb_define_singleton_method(cNokogiriHtml4Document, "read_io", rb_html_document_s_read_io, 4);
160
+ rb_define_singleton_method(cNokogiriHtml4Document, "new", rb_html_document_s_new, -1);
161
+
162
+ rb_define_method(cNokogiriHtml4Document, "type", rb_html_document_type, 0);
163
+
164
+ id_encoding_found = rb_intern("encoding_found");
165
+ id_to_s = rb_intern("to_s");
166
+ }
@@ -0,0 +1,294 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriHtml4ElementDescription ;
4
+
5
+ /*
6
+ * call-seq:
7
+ * required_attributes
8
+ *
9
+ * A list of required attributes for this element
10
+ */
11
+ static VALUE
12
+ required_attributes(VALUE self)
13
+ {
14
+ const htmlElemDesc *description;
15
+ VALUE list;
16
+ int i;
17
+
18
+ Data_Get_Struct(self, htmlElemDesc, description);
19
+
20
+ list = rb_ary_new();
21
+
22
+ if (NULL == description->attrs_req) { return list; }
23
+
24
+ for (i = 0; description->attrs_depr[i]; i++) {
25
+ rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_req[i]));
26
+ }
27
+
28
+ return list;
29
+ }
30
+
31
+ /*
32
+ * call-seq:
33
+ * deprecated_attributes
34
+ *
35
+ * A list of deprecated attributes for this element
36
+ */
37
+ static VALUE
38
+ deprecated_attributes(VALUE self)
39
+ {
40
+ const htmlElemDesc *description;
41
+ VALUE list;
42
+ int i;
43
+
44
+ Data_Get_Struct(self, htmlElemDesc, description);
45
+
46
+ list = rb_ary_new();
47
+
48
+ if (NULL == description->attrs_depr) { return list; }
49
+
50
+ for (i = 0; description->attrs_depr[i]; i++) {
51
+ rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_depr[i]));
52
+ }
53
+
54
+ return list;
55
+ }
56
+
57
+ /*
58
+ * call-seq:
59
+ * optional_attributes
60
+ *
61
+ * A list of optional attributes for this element
62
+ */
63
+ static VALUE
64
+ optional_attributes(VALUE self)
65
+ {
66
+ const htmlElemDesc *description;
67
+ VALUE list;
68
+ int i;
69
+
70
+ Data_Get_Struct(self, htmlElemDesc, description);
71
+
72
+ list = rb_ary_new();
73
+
74
+ if (NULL == description->attrs_opt) { return list; }
75
+
76
+ for (i = 0; description->attrs_opt[i]; i++) {
77
+ rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_opt[i]));
78
+ }
79
+
80
+ return list;
81
+ }
82
+
83
+ /*
84
+ * call-seq:
85
+ * default_sub_element
86
+ *
87
+ * The default sub element for this element
88
+ */
89
+ static VALUE
90
+ default_sub_element(VALUE self)
91
+ {
92
+ const htmlElemDesc *description;
93
+ Data_Get_Struct(self, htmlElemDesc, description);
94
+
95
+ if (description->defaultsubelt) {
96
+ return NOKOGIRI_STR_NEW2(description->defaultsubelt);
97
+ }
98
+
99
+ return Qnil;
100
+ }
101
+
102
+ /*
103
+ * call-seq:
104
+ * sub_elements
105
+ *
106
+ * A list of allowed sub elements for this element.
107
+ */
108
+ static VALUE
109
+ sub_elements(VALUE self)
110
+ {
111
+ const htmlElemDesc *description;
112
+ VALUE list;
113
+ int i;
114
+
115
+ Data_Get_Struct(self, htmlElemDesc, description);
116
+
117
+ list = rb_ary_new();
118
+
119
+ if (NULL == description->subelts) { return list; }
120
+
121
+ for (i = 0; description->subelts[i]; i++) {
122
+ rb_ary_push(list, NOKOGIRI_STR_NEW2(description->subelts[i]));
123
+ }
124
+
125
+ return list;
126
+ }
127
+
128
+ /*
129
+ * call-seq:
130
+ * description
131
+ *
132
+ * The description for this element
133
+ */
134
+ static VALUE
135
+ description(VALUE self)
136
+ {
137
+ const htmlElemDesc *description;
138
+ Data_Get_Struct(self, htmlElemDesc, description);
139
+
140
+ return NOKOGIRI_STR_NEW2(description->desc);
141
+ }
142
+
143
+ /*
144
+ * call-seq:
145
+ * inline?
146
+ *
147
+ * Is this element an inline element?
148
+ */
149
+ static VALUE
150
+ inline_eh(VALUE self)
151
+ {
152
+ const htmlElemDesc *description;
153
+ Data_Get_Struct(self, htmlElemDesc, description);
154
+
155
+ if (description->isinline) { return Qtrue; }
156
+ return Qfalse;
157
+ }
158
+
159
+ /*
160
+ * call-seq:
161
+ * deprecated?
162
+ *
163
+ * Is this element deprecated?
164
+ */
165
+ static VALUE
166
+ deprecated_eh(VALUE self)
167
+ {
168
+ const htmlElemDesc *description;
169
+ Data_Get_Struct(self, htmlElemDesc, description);
170
+
171
+ if (description->depr) { return Qtrue; }
172
+ return Qfalse;
173
+ }
174
+
175
+ /*
176
+ * call-seq:
177
+ * empty?
178
+ *
179
+ * Is this an empty element?
180
+ */
181
+ static VALUE
182
+ empty_eh(VALUE self)
183
+ {
184
+ const htmlElemDesc *description;
185
+ Data_Get_Struct(self, htmlElemDesc, description);
186
+
187
+ if (description->empty) { return Qtrue; }
188
+ return Qfalse;
189
+ }
190
+
191
+ /*
192
+ * call-seq:
193
+ * save_end_tag?
194
+ *
195
+ * Should the end tag be saved?
196
+ */
197
+ static VALUE
198
+ save_end_tag_eh(VALUE self)
199
+ {
200
+ const htmlElemDesc *description;
201
+ Data_Get_Struct(self, htmlElemDesc, description);
202
+
203
+ if (description->saveEndTag) { return Qtrue; }
204
+ return Qfalse;
205
+ }
206
+
207
+ /*
208
+ * call-seq:
209
+ * implied_end_tag?
210
+ *
211
+ * Can the end tag be implied for this tag?
212
+ */
213
+ static VALUE
214
+ implied_end_tag_eh(VALUE self)
215
+ {
216
+ const htmlElemDesc *description;
217
+ Data_Get_Struct(self, htmlElemDesc, description);
218
+
219
+ if (description->endTag) { return Qtrue; }
220
+ return Qfalse;
221
+ }
222
+
223
+ /*
224
+ * call-seq:
225
+ * implied_start_tag?
226
+ *
227
+ * Can the start tag be implied for this tag?
228
+ */
229
+ static VALUE
230
+ implied_start_tag_eh(VALUE self)
231
+ {
232
+ const htmlElemDesc *description;
233
+ Data_Get_Struct(self, htmlElemDesc, description);
234
+
235
+ if (description->startTag) { return Qtrue; }
236
+ return Qfalse;
237
+ }
238
+
239
+ /*
240
+ * call-seq:
241
+ * name
242
+ *
243
+ * Get the tag name for this ElemementDescription
244
+ */
245
+ static VALUE
246
+ name(VALUE self)
247
+ {
248
+ const htmlElemDesc *description;
249
+ Data_Get_Struct(self, htmlElemDesc, description);
250
+
251
+ if (NULL == description->name) { return Qnil; }
252
+ return NOKOGIRI_STR_NEW2(description->name);
253
+ }
254
+
255
+ /*
256
+ * call-seq:
257
+ * [](tag_name)
258
+ *
259
+ * Get ElemementDescription for +tag_name+
260
+ */
261
+ static VALUE
262
+ get_description(VALUE klass, VALUE tag_name)
263
+ {
264
+ const htmlElemDesc *description = htmlTagLookup(
265
+ (const xmlChar *)StringValueCStr(tag_name)
266
+ );
267
+
268
+ if (NULL == description) { return Qnil; }
269
+ return Data_Wrap_Struct(klass, 0, 0, DISCARD_CONST_QUAL(void *, description));
270
+ }
271
+
272
+ void
273
+ noko_init_html_element_description()
274
+ {
275
+ cNokogiriHtml4ElementDescription = rb_define_class_under(mNokogiriHtml4, "ElementDescription", rb_cObject);
276
+
277
+ rb_undef_alloc_func(cNokogiriHtml4ElementDescription);
278
+
279
+ rb_define_singleton_method(cNokogiriHtml4ElementDescription, "[]", get_description, 1);
280
+
281
+ rb_define_method(cNokogiriHtml4ElementDescription, "name", name, 0);
282
+ rb_define_method(cNokogiriHtml4ElementDescription, "implied_start_tag?", implied_start_tag_eh, 0);
283
+ rb_define_method(cNokogiriHtml4ElementDescription, "implied_end_tag?", implied_end_tag_eh, 0);
284
+ rb_define_method(cNokogiriHtml4ElementDescription, "save_end_tag?", save_end_tag_eh, 0);
285
+ rb_define_method(cNokogiriHtml4ElementDescription, "empty?", empty_eh, 0);
286
+ rb_define_method(cNokogiriHtml4ElementDescription, "deprecated?", deprecated_eh, 0);
287
+ rb_define_method(cNokogiriHtml4ElementDescription, "inline?", inline_eh, 0);
288
+ rb_define_method(cNokogiriHtml4ElementDescription, "description", description, 0);
289
+ rb_define_method(cNokogiriHtml4ElementDescription, "sub_elements", sub_elements, 0);
290
+ rb_define_method(cNokogiriHtml4ElementDescription, "default_sub_element", default_sub_element, 0);
291
+ rb_define_method(cNokogiriHtml4ElementDescription, "optional_attributes", optional_attributes, 0);
292
+ rb_define_method(cNokogiriHtml4ElementDescription, "deprecated_attributes", deprecated_attributes, 0);
293
+ rb_define_method(cNokogiriHtml4ElementDescription, "required_attributes", required_attributes, 0);
294
+ }
@@ -0,0 +1,37 @@
1
+ #include <nokogiri.h>
2
+
3
+ static VALUE cNokogiriHtml4EntityLookup;
4
+
5
+ /*
6
+ * call-seq:
7
+ * get(key)
8
+ *
9
+ * Get the HTML4::EntityDescription for +key+
10
+ */
11
+ static VALUE
12
+ get(VALUE _, VALUE rb_entity_name)
13
+ {
14
+ VALUE cNokogiriHtml4EntityDescription;
15
+ const htmlEntityDesc *c_entity_desc;
16
+ VALUE rb_constructor_args[3];
17
+
18
+ c_entity_desc = htmlEntityLookup((const xmlChar *)StringValueCStr(rb_entity_name));
19
+ if (NULL == c_entity_desc) {
20
+ return Qnil;
21
+ }
22
+
23
+ rb_constructor_args[0] = INT2NUM((long)c_entity_desc->value);
24
+ rb_constructor_args[1] = NOKOGIRI_STR_NEW2(c_entity_desc->name);
25
+ rb_constructor_args[2] = NOKOGIRI_STR_NEW2(c_entity_desc->desc);
26
+
27
+ cNokogiriHtml4EntityDescription = rb_const_get_at(mNokogiriHtml4, rb_intern("EntityDescription"));
28
+ return rb_class_new_instance(3, rb_constructor_args, cNokogiriHtml4EntityDescription);
29
+ }
30
+
31
+ void
32
+ noko_init_html_entity_lookup()
33
+ {
34
+ cNokogiriHtml4EntityLookup = rb_define_class_under(mNokogiriHtml4, "EntityLookup", rb_cObject);
35
+
36
+ rb_define_method(cNokogiriHtml4EntityLookup, "get", get, 1);
37
+ }
@@ -0,0 +1,120 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriHtml4SaxParserContext ;
4
+
5
+ static void
6
+ deallocate(xmlParserCtxtPtr ctxt)
7
+ {
8
+ NOKOGIRI_DEBUG_START(ctxt);
9
+
10
+ ctxt->sax = NULL;
11
+
12
+ htmlFreeParserCtxt(ctxt);
13
+
14
+ NOKOGIRI_DEBUG_END(ctxt);
15
+ }
16
+
17
+ static VALUE
18
+ parse_memory(VALUE klass, VALUE data, VALUE encoding)
19
+ {
20
+ htmlParserCtxtPtr ctxt;
21
+
22
+ if (NIL_P(data)) {
23
+ rb_raise(rb_eArgError, "data cannot be nil");
24
+ }
25
+ if (!(int)RSTRING_LEN(data)) {
26
+ rb_raise(rb_eRuntimeError, "data cannot be empty");
27
+ }
28
+
29
+ ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
30
+ (int)RSTRING_LEN(data));
31
+ if (ctxt->sax) {
32
+ xmlFree(ctxt->sax);
33
+ ctxt->sax = NULL;
34
+ }
35
+
36
+ if (RTEST(encoding)) {
37
+ xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding));
38
+ if (enc != NULL) {
39
+ xmlSwitchToEncoding(ctxt, enc);
40
+ if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
41
+ rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
42
+ StringValueCStr(encoding));
43
+ }
44
+ }
45
+ }
46
+
47
+ return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
48
+ }
49
+
50
+ static VALUE
51
+ parse_file(VALUE klass, VALUE filename, VALUE encoding)
52
+ {
53
+ htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(
54
+ StringValueCStr(filename),
55
+ StringValueCStr(encoding)
56
+ );
57
+ return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
58
+ }
59
+
60
+ static VALUE
61
+ parse_doc(VALUE ctxt_val)
62
+ {
63
+ htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
64
+ htmlParseDocument(ctxt);
65
+ return Qnil;
66
+ }
67
+
68
+ static VALUE
69
+ parse_doc_finalize(VALUE ctxt_val)
70
+ {
71
+ htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
72
+
73
+ if (ctxt->myDoc) {
74
+ xmlFreeDoc(ctxt->myDoc);
75
+ }
76
+
77
+ NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
78
+ return Qnil;
79
+ }
80
+
81
+ static VALUE
82
+ parse_with(VALUE self, VALUE sax_handler)
83
+ {
84
+ htmlParserCtxtPtr ctxt;
85
+ htmlSAXHandlerPtr sax;
86
+
87
+ if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
88
+ rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
89
+ }
90
+
91
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
92
+ Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
93
+
94
+ /* Free the sax handler since we'll assign our own */
95
+ if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
96
+ xmlFree(ctxt->sax);
97
+ }
98
+
99
+ ctxt->sax = sax;
100
+ ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
101
+
102
+ xmlSetStructuredErrorFunc(NULL, NULL);
103
+
104
+ rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
105
+
106
+ return self;
107
+ }
108
+
109
+ void
110
+ noko_init_html_sax_parser_context()
111
+ {
112
+ assert(cNokogiriXmlSaxParserContext);
113
+ cNokogiriHtml4SaxParserContext = rb_define_class_under(mNokogiriHtml4Sax, "ParserContext",
114
+ cNokogiriXmlSaxParserContext);
115
+
116
+ rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "memory", parse_memory, 2);
117
+ rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "file", parse_file, 2);
118
+
119
+ rb_define_method(cNokogiriHtml4SaxParserContext, "parse_with", parse_with, 1);
120
+ }
@@ -0,0 +1,95 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriHtml4SaxPushParser;
4
+
5
+ /*
6
+ * call-seq:
7
+ * native_write(chunk, last_chunk)
8
+ *
9
+ * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
10
+ */
11
+ static VALUE
12
+ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
13
+ {
14
+ xmlParserCtxtPtr ctx;
15
+ const char *chunk = NULL;
16
+ int size = 0;
17
+ int status = 0;
18
+ libxmlStructuredErrorHandlerState handler_state;
19
+
20
+ Data_Get_Struct(self, xmlParserCtxt, ctx);
21
+
22
+ if (Qnil != _chunk) {
23
+ chunk = StringValuePtr(_chunk);
24
+ size = (int)RSTRING_LEN(_chunk);
25
+ }
26
+
27
+ Nokogiri_structured_error_func_save_and_set(&handler_state, NULL, NULL);
28
+
29
+ status = htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0);
30
+
31
+ Nokogiri_structured_error_func_restore(&handler_state);
32
+
33
+ if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
34
+ // TODO: there appear to be no tests for this block
35
+ xmlErrorPtr e = xmlCtxtGetLastError(ctx);
36
+ Nokogiri_error_raise(NULL, e);
37
+ }
38
+
39
+ return self;
40
+ }
41
+
42
+ /*
43
+ * call-seq:
44
+ * initialize_native(xml_sax, filename)
45
+ *
46
+ * Initialize the push parser with +xml_sax+ using +filename+
47
+ */
48
+ static VALUE
49
+ initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
50
+ VALUE encoding)
51
+ {
52
+ htmlSAXHandlerPtr sax;
53
+ const char *filename = NULL;
54
+ htmlParserCtxtPtr ctx;
55
+ xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
56
+
57
+ Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
58
+
59
+ if (_filename != Qnil) { filename = StringValueCStr(_filename); }
60
+
61
+ if (!NIL_P(encoding)) {
62
+ enc = xmlParseCharEncoding(StringValueCStr(encoding));
63
+ if (enc == XML_CHAR_ENCODING_ERROR) {
64
+ rb_raise(rb_eArgError, "Unsupported Encoding");
65
+ }
66
+ }
67
+
68
+ ctx = htmlCreatePushParserCtxt(
69
+ sax,
70
+ NULL,
71
+ NULL,
72
+ 0,
73
+ filename,
74
+ enc
75
+ );
76
+ if (ctx == NULL) {
77
+ rb_raise(rb_eRuntimeError, "Could not create a parser context");
78
+ }
79
+
80
+ ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
81
+
82
+ ctx->sax2 = 1;
83
+ DATA_PTR(self) = ctx;
84
+ return self;
85
+ }
86
+
87
+ void
88
+ noko_init_html_sax_push_parser()
89
+ {
90
+ assert(cNokogiriXmlSaxPushParser);
91
+ cNokogiriHtml4SaxPushParser = rb_define_class_under(mNokogiriHtml4Sax, "PushParser", cNokogiriXmlSaxPushParser);
92
+
93
+ rb_define_private_method(cNokogiriHtml4SaxPushParser, "initialize_native", initialize_native, 3);
94
+ rb_define_private_method(cNokogiriHtml4SaxPushParser, "native_write", native_write, 2);
95
+ }