nokogiri 1.8.5 → 1.15.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (358) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +40 -18
  3. data/LICENSE-DEPENDENCIES.md +1636 -1024
  4. data/LICENSE.md +5 -28
  5. data/README.md +203 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -61
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +867 -417
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +215 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +42 -37
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +40 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +401 -237
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +136 -62
  33. data/ext/nokogiri/xml_node.c +1387 -678
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +347 -212
  37. data/ext/nokogiri/xml_relax_ng.c +86 -77
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +145 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +64 -36
  41. data/ext/nokogiri/xml_schema.c +138 -81
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +36 -26
  44. data/ext/nokogiri/xml_xpath_context.c +366 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +111 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +224 -95
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +392 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +223 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +98 -54
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -126
  133. data/lib/nokogiri/xml/document_fragment.rb +104 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +45 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1093 -411
  142. data/lib/nokogiri/xml/node_set.rb +173 -67
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +38 -37
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  169. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  170. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  175. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  177. metadata +126 -399
  178. data/.autotest +0 -22
  179. data/.cross_rubies +0 -8
  180. data/.editorconfig +0 -17
  181. data/.gemtest +0 -0
  182. data/.travis.yml +0 -63
  183. data/CHANGELOG.md +0 -1368
  184. data/CONTRIBUTING.md +0 -42
  185. data/C_CODING_STYLE.rdoc +0 -33
  186. data/Gemfile-libxml-ruby +0 -3
  187. data/Manifest.txt +0 -370
  188. data/ROADMAP.md +0 -111
  189. data/Rakefile +0 -348
  190. data/SECURITY.md +0 -19
  191. data/STANDARD_RESPONSES.md +0 -47
  192. data/Y_U_NO_GEMSPEC.md +0 -155
  193. data/appveyor.yml +0 -29
  194. data/build_all +0 -44
  195. data/ext/nokogiri/html_document.c +0 -170
  196. data/ext/nokogiri/html_document.h +0 -10
  197. data/ext/nokogiri/html_element_description.c +0 -279
  198. data/ext/nokogiri/html_element_description.h +0 -10
  199. data/ext/nokogiri/html_entity_lookup.c +0 -32
  200. data/ext/nokogiri/html_entity_lookup.h +0 -8
  201. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  202. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  203. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  204. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  205. data/ext/nokogiri/xml_attr.h +0 -9
  206. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  207. data/ext/nokogiri/xml_cdata.h +0 -9
  208. data/ext/nokogiri/xml_comment.h +0 -9
  209. data/ext/nokogiri/xml_document.h +0 -23
  210. data/ext/nokogiri/xml_document_fragment.h +0 -10
  211. data/ext/nokogiri/xml_dtd.h +0 -10
  212. data/ext/nokogiri/xml_element_content.h +0 -10
  213. data/ext/nokogiri/xml_element_decl.h +0 -9
  214. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  215. data/ext/nokogiri/xml_entity_decl.h +0 -10
  216. data/ext/nokogiri/xml_entity_reference.h +0 -9
  217. data/ext/nokogiri/xml_io.c +0 -61
  218. data/ext/nokogiri/xml_io.h +0 -11
  219. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  220. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  221. data/ext/nokogiri/xml_namespace.h +0 -15
  222. data/ext/nokogiri/xml_node.h +0 -13
  223. data/ext/nokogiri/xml_node_set.h +0 -12
  224. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  225. data/ext/nokogiri/xml_reader.h +0 -10
  226. data/ext/nokogiri/xml_relax_ng.h +0 -9
  227. data/ext/nokogiri/xml_sax_parser.h +0 -39
  228. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  229. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  230. data/ext/nokogiri/xml_schema.h +0 -9
  231. data/ext/nokogiri/xml_syntax_error.h +0 -13
  232. data/ext/nokogiri/xml_text.h +0 -9
  233. data/ext/nokogiri/xml_xpath_context.h +0 -10
  234. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  235. data/lib/nokogiri/html/document.rb +0 -335
  236. data/lib/nokogiri/html/document_fragment.rb +0 -49
  237. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  238. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  239. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  240. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  241. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  242. data/patches/sort-patches-by-date +0 -25
  243. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  244. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  245. data/suppressions/README.txt +0 -1
  246. data/suppressions/nokogiri_ruby-2.supp +0 -10
  247. data/tasks/test.rb +0 -100
  248. data/test/css/test_nthiness.rb +0 -226
  249. data/test/css/test_parser.rb +0 -386
  250. data/test/css/test_tokenizer.rb +0 -215
  251. data/test/css/test_xpath_visitor.rb +0 -96
  252. data/test/decorators/test_slop.rb +0 -23
  253. data/test/files/2ch.html +0 -108
  254. data/test/files/GH_1042.html +0 -18
  255. data/test/files/address_book.rlx +0 -12
  256. data/test/files/address_book.xml +0 -10
  257. data/test/files/atom.xml +0 -344
  258. data/test/files/bar/bar.xsd +0 -4
  259. data/test/files/bogus.xml +0 -0
  260. data/test/files/dont_hurt_em_why.xml +0 -422
  261. data/test/files/encoding.html +0 -82
  262. data/test/files/encoding.xhtml +0 -84
  263. data/test/files/exslt.xml +0 -8
  264. data/test/files/exslt.xslt +0 -35
  265. data/test/files/foo/foo.xsd +0 -4
  266. data/test/files/metacharset.html +0 -10
  267. data/test/files/namespace_pressure_test.xml +0 -1684
  268. data/test/files/noencoding.html +0 -47
  269. data/test/files/po.xml +0 -32
  270. data/test/files/po.xsd +0 -66
  271. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  272. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  273. data/test/files/saml/xenc_schema.xsd +0 -146
  274. data/test/files/saml/xmldsig_schema.xsd +0 -318
  275. data/test/files/shift_jis.html +0 -10
  276. data/test/files/shift_jis.xml +0 -5
  277. data/test/files/shift_jis_no_charset.html +0 -9
  278. data/test/files/slow-xpath.xml +0 -25509
  279. data/test/files/snuggles.xml +0 -3
  280. data/test/files/staff.dtd +0 -10
  281. data/test/files/staff.xml +0 -59
  282. data/test/files/staff.xslt +0 -32
  283. data/test/files/test_document_url/bar.xml +0 -2
  284. data/test/files/test_document_url/document.dtd +0 -4
  285. data/test/files/test_document_url/document.xml +0 -6
  286. data/test/files/tlm.html +0 -851
  287. data/test/files/to_be_xincluded.xml +0 -2
  288. data/test/files/valid_bar.xml +0 -2
  289. data/test/files/xinclude.xml +0 -4
  290. data/test/helper.rb +0 -271
  291. data/test/html/sax/test_parser.rb +0 -168
  292. data/test/html/sax/test_parser_context.rb +0 -46
  293. data/test/html/sax/test_parser_text.rb +0 -163
  294. data/test/html/sax/test_push_parser.rb +0 -87
  295. data/test/html/test_attributes.rb +0 -85
  296. data/test/html/test_builder.rb +0 -164
  297. data/test/html/test_document.rb +0 -712
  298. data/test/html/test_document_encoding.rb +0 -143
  299. data/test/html/test_document_fragment.rb +0 -310
  300. data/test/html/test_element_description.rb +0 -105
  301. data/test/html/test_named_characters.rb +0 -14
  302. data/test/html/test_node.rb +0 -212
  303. data/test/html/test_node_encoding.rb +0 -91
  304. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  305. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  306. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  307. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  308. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  309. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  310. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  311. data/test/test_convert_xpath.rb +0 -135
  312. data/test/test_css_cache.rb +0 -47
  313. data/test/test_encoding_handler.rb +0 -48
  314. data/test/test_memory_leak.rb +0 -156
  315. data/test/test_nokogiri.rb +0 -138
  316. data/test/test_soap4r_sax.rb +0 -52
  317. data/test/test_xslt_transforms.rb +0 -314
  318. data/test/xml/node/test_save_options.rb +0 -28
  319. data/test/xml/node/test_subclass.rb +0 -44
  320. data/test/xml/sax/test_parser.rb +0 -402
  321. data/test/xml/sax/test_parser_context.rb +0 -115
  322. data/test/xml/sax/test_parser_text.rb +0 -202
  323. data/test/xml/sax/test_push_parser.rb +0 -265
  324. data/test/xml/test_attr.rb +0 -74
  325. data/test/xml/test_attribute_decl.rb +0 -86
  326. data/test/xml/test_builder.rb +0 -341
  327. data/test/xml/test_c14n.rb +0 -180
  328. data/test/xml/test_cdata.rb +0 -54
  329. data/test/xml/test_comment.rb +0 -40
  330. data/test/xml/test_document.rb +0 -982
  331. data/test/xml/test_document_encoding.rb +0 -31
  332. data/test/xml/test_document_fragment.rb +0 -298
  333. data/test/xml/test_dtd.rb +0 -187
  334. data/test/xml/test_dtd_encoding.rb +0 -31
  335. data/test/xml/test_element_content.rb +0 -56
  336. data/test/xml/test_element_decl.rb +0 -73
  337. data/test/xml/test_entity_decl.rb +0 -122
  338. data/test/xml/test_entity_reference.rb +0 -262
  339. data/test/xml/test_namespace.rb +0 -96
  340. data/test/xml/test_node.rb +0 -1325
  341. data/test/xml/test_node_attributes.rb +0 -115
  342. data/test/xml/test_node_encoding.rb +0 -75
  343. data/test/xml/test_node_inheritance.rb +0 -32
  344. data/test/xml/test_node_reparenting.rb +0 -592
  345. data/test/xml/test_node_set.rb +0 -809
  346. data/test/xml/test_parse_options.rb +0 -64
  347. data/test/xml/test_processing_instruction.rb +0 -30
  348. data/test/xml/test_reader.rb +0 -620
  349. data/test/xml/test_reader_encoding.rb +0 -134
  350. data/test/xml/test_relax_ng.rb +0 -60
  351. data/test/xml/test_schema.rb +0 -142
  352. data/test/xml/test_syntax_error.rb +0 -36
  353. data/test/xml/test_text.rb +0 -60
  354. data/test/xml/test_unparented_node.rb +0 -483
  355. data/test/xml/test_xinclude.rb +0 -83
  356. data/test/xml/test_xpath.rb +0 -470
  357. data/test/xslt/test_custom_functions.rb +0 -133
  358. data/test/xslt/test_exception_handling.rb +0 -37
@@ -0,0 +1,165 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriHtml4Document ;
4
+
5
+ static ID id_encoding_found;
6
+ static ID id_to_s;
7
+
8
+ /*
9
+ * call-seq:
10
+ * new
11
+ *
12
+ * Create a new document
13
+ */
14
+ static VALUE
15
+ rb_html_document_s_new(int argc, VALUE *argv, VALUE klass)
16
+ {
17
+ VALUE uri, external_id, rest, rb_doc;
18
+ htmlDocPtr doc;
19
+
20
+ rb_scan_args(argc, argv, "0*", &rest);
21
+ uri = rb_ary_entry(rest, (long)0);
22
+ external_id = rb_ary_entry(rest, (long)1);
23
+
24
+ doc = htmlNewDoc(
25
+ RTEST(uri) ? (const xmlChar *)StringValueCStr(uri) : NULL,
26
+ RTEST(external_id) ? (const xmlChar *)StringValueCStr(external_id) : NULL
27
+ );
28
+ rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
29
+ return rb_doc ;
30
+ }
31
+
32
+ /*
33
+ * call-seq:
34
+ * read_io(io, url, encoding, options)
35
+ *
36
+ * Read the HTML document from +io+ with given +url+, +encoding+,
37
+ * and +options+. See Nokogiri::HTML4.parse
38
+ */
39
+ static VALUE
40
+ rb_html_document_s_read_io(VALUE klass, VALUE rb_io, VALUE rb_url, VALUE rb_encoding, VALUE rb_options)
41
+ {
42
+ VALUE rb_doc;
43
+ VALUE rb_error_list = rb_ary_new();
44
+ htmlDocPtr c_doc;
45
+ const char *c_url = NIL_P(rb_url) ? NULL : StringValueCStr(rb_url);
46
+ const char *c_encoding = NIL_P(rb_encoding) ? NULL : StringValueCStr(rb_encoding);
47
+ int options = NUM2INT(rb_options);
48
+
49
+ xmlSetStructuredErrorFunc((void *)rb_error_list, Nokogiri_error_array_pusher);
50
+
51
+ c_doc = htmlReadIO(noko_io_read, noko_io_close, (void *)rb_io, c_url, c_encoding, options);
52
+
53
+ xmlSetStructuredErrorFunc(NULL, NULL);
54
+
55
+ /*
56
+ * If EncodingFound has occurred in EncodingReader, make sure to do
57
+ * a cleanup and propagate the error.
58
+ */
59
+ if (rb_respond_to(rb_io, id_encoding_found)) {
60
+ VALUE encoding_found = rb_funcall(rb_io, id_encoding_found, 0);
61
+ if (!NIL_P(encoding_found)) {
62
+ xmlFreeDoc(c_doc);
63
+ rb_exc_raise(encoding_found);
64
+ }
65
+ }
66
+
67
+ if ((c_doc == NULL) || (!(options & XML_PARSE_RECOVER) && (RARRAY_LEN(rb_error_list) > 0))) {
68
+ VALUE rb_error ;
69
+
70
+ xmlFreeDoc(c_doc);
71
+
72
+ rb_error = rb_ary_entry(rb_error_list, 0);
73
+ if (rb_error == Qnil) {
74
+ rb_raise(rb_eRuntimeError, "Could not parse document");
75
+ } else {
76
+ VALUE exception_message = rb_funcall(rb_error, id_to_s, 0);
77
+ exception_message = rb_str_concat(rb_str_new2("Parser without recover option encountered error or warning: "),
78
+ exception_message);
79
+ rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
80
+ }
81
+
82
+ return Qnil;
83
+ }
84
+
85
+ rb_doc = noko_xml_document_wrap(klass, c_doc);
86
+ rb_iv_set(rb_doc, "@errors", rb_error_list);
87
+ return rb_doc;
88
+ }
89
+
90
+ /*
91
+ * call-seq:
92
+ * read_memory(string, url, encoding, options)
93
+ *
94
+ * Read the HTML document contained in +string+ with given +url+, +encoding+,
95
+ * and +options+. See Nokogiri::HTML4.parse
96
+ */
97
+ static VALUE
98
+ rb_html_document_s_read_memory(VALUE klass, VALUE rb_html, VALUE rb_url, VALUE rb_encoding, VALUE rb_options)
99
+ {
100
+ VALUE rb_doc;
101
+ VALUE rb_error_list = rb_ary_new();
102
+ htmlDocPtr c_doc;
103
+ const char *c_buffer = StringValuePtr(rb_html);
104
+ const char *c_url = NIL_P(rb_url) ? NULL : StringValueCStr(rb_url);
105
+ const char *c_encoding = NIL_P(rb_encoding) ? NULL : StringValueCStr(rb_encoding);
106
+ int html_len = (int)RSTRING_LEN(rb_html);
107
+ int options = NUM2INT(rb_options);
108
+
109
+ xmlSetStructuredErrorFunc((void *)rb_error_list, Nokogiri_error_array_pusher);
110
+
111
+ c_doc = htmlReadMemory(c_buffer, html_len, c_url, c_encoding, options);
112
+
113
+ xmlSetStructuredErrorFunc(NULL, NULL);
114
+
115
+ if ((c_doc == NULL) || (!(options & XML_PARSE_RECOVER) && (RARRAY_LEN(rb_error_list) > 0))) {
116
+ VALUE rb_error ;
117
+
118
+ xmlFreeDoc(c_doc);
119
+
120
+ rb_error = rb_ary_entry(rb_error_list, 0);
121
+ if (rb_error == Qnil) {
122
+ rb_raise(rb_eRuntimeError, "Could not parse document");
123
+ } else {
124
+ VALUE exception_message = rb_funcall(rb_error, id_to_s, 0);
125
+ exception_message = rb_str_concat(rb_str_new2("Parser without recover option encountered error or warning: "),
126
+ exception_message);
127
+ rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
128
+ }
129
+
130
+ return Qnil;
131
+ }
132
+
133
+ rb_doc = noko_xml_document_wrap(klass, c_doc);
134
+ rb_iv_set(rb_doc, "@errors", rb_error_list);
135
+ return rb_doc;
136
+ }
137
+
138
+ /*
139
+ * call-seq:
140
+ * type
141
+ *
142
+ * The type for this document
143
+ */
144
+ static VALUE
145
+ rb_html_document_type(VALUE self)
146
+ {
147
+ htmlDocPtr doc = noko_xml_document_unwrap(self);
148
+ return INT2NUM(doc->type);
149
+ }
150
+
151
+ void
152
+ noko_init_html_document(void)
153
+ {
154
+ assert(cNokogiriXmlDocument);
155
+ cNokogiriHtml4Document = rb_define_class_under(mNokogiriHtml4, "Document", cNokogiriXmlDocument);
156
+
157
+ rb_define_singleton_method(cNokogiriHtml4Document, "read_memory", rb_html_document_s_read_memory, 4);
158
+ rb_define_singleton_method(cNokogiriHtml4Document, "read_io", rb_html_document_s_read_io, 4);
159
+ rb_define_singleton_method(cNokogiriHtml4Document, "new", rb_html_document_s_new, -1);
160
+
161
+ rb_define_method(cNokogiriHtml4Document, "type", rb_html_document_type, 0);
162
+
163
+ id_encoding_found = rb_intern("encoding_found");
164
+ id_to_s = rb_intern("to_s");
165
+ }
@@ -0,0 +1,299 @@
1
+ #include <nokogiri.h>
2
+
3
+ static const rb_data_type_t html4_element_description_type = {
4
+ .wrap_struct_name = "Nokogiri::HTML4::ElementDescription",
5
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
6
+ };
7
+
8
+ VALUE cNokogiriHtml4ElementDescription ;
9
+
10
+ /*
11
+ * call-seq:
12
+ * required_attributes
13
+ *
14
+ * A list of required attributes for this element
15
+ */
16
+ static VALUE
17
+ required_attributes(VALUE self)
18
+ {
19
+ const htmlElemDesc *description;
20
+ VALUE list;
21
+ int i;
22
+
23
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
24
+
25
+ list = rb_ary_new();
26
+
27
+ if (NULL == description->attrs_req) { return list; }
28
+
29
+ for (i = 0; description->attrs_depr[i]; i++) {
30
+ rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_req[i]));
31
+ }
32
+
33
+ return list;
34
+ }
35
+
36
+ /*
37
+ * call-seq:
38
+ * deprecated_attributes
39
+ *
40
+ * A list of deprecated attributes for this element
41
+ */
42
+ static VALUE
43
+ deprecated_attributes(VALUE self)
44
+ {
45
+ const htmlElemDesc *description;
46
+ VALUE list;
47
+ int i;
48
+
49
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
50
+
51
+ list = rb_ary_new();
52
+
53
+ if (NULL == description->attrs_depr) { return list; }
54
+
55
+ for (i = 0; description->attrs_depr[i]; i++) {
56
+ rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_depr[i]));
57
+ }
58
+
59
+ return list;
60
+ }
61
+
62
+ /*
63
+ * call-seq:
64
+ * optional_attributes
65
+ *
66
+ * A list of optional attributes for this element
67
+ */
68
+ static VALUE
69
+ optional_attributes(VALUE self)
70
+ {
71
+ const htmlElemDesc *description;
72
+ VALUE list;
73
+ int i;
74
+
75
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
76
+
77
+ list = rb_ary_new();
78
+
79
+ if (NULL == description->attrs_opt) { return list; }
80
+
81
+ for (i = 0; description->attrs_opt[i]; i++) {
82
+ rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_opt[i]));
83
+ }
84
+
85
+ return list;
86
+ }
87
+
88
+ /*
89
+ * call-seq:
90
+ * default_sub_element
91
+ *
92
+ * The default sub element for this element
93
+ */
94
+ static VALUE
95
+ default_sub_element(VALUE self)
96
+ {
97
+ const htmlElemDesc *description;
98
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
99
+
100
+ if (description->defaultsubelt) {
101
+ return NOKOGIRI_STR_NEW2(description->defaultsubelt);
102
+ }
103
+
104
+ return Qnil;
105
+ }
106
+
107
+ /*
108
+ * call-seq:
109
+ * sub_elements
110
+ *
111
+ * A list of allowed sub elements for this element.
112
+ */
113
+ static VALUE
114
+ sub_elements(VALUE self)
115
+ {
116
+ const htmlElemDesc *description;
117
+ VALUE list;
118
+ int i;
119
+
120
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
121
+
122
+ list = rb_ary_new();
123
+
124
+ if (NULL == description->subelts) { return list; }
125
+
126
+ for (i = 0; description->subelts[i]; i++) {
127
+ rb_ary_push(list, NOKOGIRI_STR_NEW2(description->subelts[i]));
128
+ }
129
+
130
+ return list;
131
+ }
132
+
133
+ /*
134
+ * call-seq:
135
+ * description
136
+ *
137
+ * The description for this element
138
+ */
139
+ static VALUE
140
+ description(VALUE self)
141
+ {
142
+ const htmlElemDesc *description;
143
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
144
+
145
+ return NOKOGIRI_STR_NEW2(description->desc);
146
+ }
147
+
148
+ /*
149
+ * call-seq:
150
+ * inline?
151
+ *
152
+ * Is this element an inline element?
153
+ */
154
+ static VALUE
155
+ inline_eh(VALUE self)
156
+ {
157
+ const htmlElemDesc *description;
158
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
159
+
160
+ if (description->isinline) { return Qtrue; }
161
+ return Qfalse;
162
+ }
163
+
164
+ /*
165
+ * call-seq:
166
+ * deprecated?
167
+ *
168
+ * Is this element deprecated?
169
+ */
170
+ static VALUE
171
+ deprecated_eh(VALUE self)
172
+ {
173
+ const htmlElemDesc *description;
174
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
175
+
176
+ if (description->depr) { return Qtrue; }
177
+ return Qfalse;
178
+ }
179
+
180
+ /*
181
+ * call-seq:
182
+ * empty?
183
+ *
184
+ * Is this an empty element?
185
+ */
186
+ static VALUE
187
+ empty_eh(VALUE self)
188
+ {
189
+ const htmlElemDesc *description;
190
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
191
+
192
+ if (description->empty) { return Qtrue; }
193
+ return Qfalse;
194
+ }
195
+
196
+ /*
197
+ * call-seq:
198
+ * save_end_tag?
199
+ *
200
+ * Should the end tag be saved?
201
+ */
202
+ static VALUE
203
+ save_end_tag_eh(VALUE self)
204
+ {
205
+ const htmlElemDesc *description;
206
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
207
+
208
+ if (description->saveEndTag) { return Qtrue; }
209
+ return Qfalse;
210
+ }
211
+
212
+ /*
213
+ * call-seq:
214
+ * implied_end_tag?
215
+ *
216
+ * Can the end tag be implied for this tag?
217
+ */
218
+ static VALUE
219
+ implied_end_tag_eh(VALUE self)
220
+ {
221
+ const htmlElemDesc *description;
222
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
223
+
224
+ if (description->endTag) { return Qtrue; }
225
+ return Qfalse;
226
+ }
227
+
228
+ /*
229
+ * call-seq:
230
+ * implied_start_tag?
231
+ *
232
+ * Can the start tag be implied for this tag?
233
+ */
234
+ static VALUE
235
+ implied_start_tag_eh(VALUE self)
236
+ {
237
+ const htmlElemDesc *description;
238
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
239
+
240
+ if (description->startTag) { return Qtrue; }
241
+ return Qfalse;
242
+ }
243
+
244
+ /*
245
+ * call-seq:
246
+ * name
247
+ *
248
+ * Get the tag name for this ElemementDescription
249
+ */
250
+ static VALUE
251
+ name(VALUE self)
252
+ {
253
+ const htmlElemDesc *description;
254
+ TypedData_Get_Struct(self, htmlElemDesc, &html4_element_description_type, description);
255
+
256
+ if (NULL == description->name) { return Qnil; }
257
+ return NOKOGIRI_STR_NEW2(description->name);
258
+ }
259
+
260
+ /*
261
+ * call-seq:
262
+ * [](tag_name)
263
+ *
264
+ * Get ElemementDescription for +tag_name+
265
+ */
266
+ static VALUE
267
+ get_description(VALUE klass, VALUE tag_name)
268
+ {
269
+ const htmlElemDesc *description = htmlTagLookup(
270
+ (const xmlChar *)StringValueCStr(tag_name)
271
+ );
272
+
273
+ if (NULL == description) { return Qnil; }
274
+ return TypedData_Wrap_Struct(klass, &html4_element_description_type, DISCARD_CONST_QUAL(void *, description));
275
+ }
276
+
277
+ void
278
+ noko_init_html_element_description(void)
279
+ {
280
+ cNokogiriHtml4ElementDescription = rb_define_class_under(mNokogiriHtml4, "ElementDescription", rb_cObject);
281
+
282
+ rb_undef_alloc_func(cNokogiriHtml4ElementDescription);
283
+
284
+ rb_define_singleton_method(cNokogiriHtml4ElementDescription, "[]", get_description, 1);
285
+
286
+ rb_define_method(cNokogiriHtml4ElementDescription, "name", name, 0);
287
+ rb_define_method(cNokogiriHtml4ElementDescription, "implied_start_tag?", implied_start_tag_eh, 0);
288
+ rb_define_method(cNokogiriHtml4ElementDescription, "implied_end_tag?", implied_end_tag_eh, 0);
289
+ rb_define_method(cNokogiriHtml4ElementDescription, "save_end_tag?", save_end_tag_eh, 0);
290
+ rb_define_method(cNokogiriHtml4ElementDescription, "empty?", empty_eh, 0);
291
+ rb_define_method(cNokogiriHtml4ElementDescription, "deprecated?", deprecated_eh, 0);
292
+ rb_define_method(cNokogiriHtml4ElementDescription, "inline?", inline_eh, 0);
293
+ rb_define_method(cNokogiriHtml4ElementDescription, "description", description, 0);
294
+ rb_define_method(cNokogiriHtml4ElementDescription, "sub_elements", sub_elements, 0);
295
+ rb_define_method(cNokogiriHtml4ElementDescription, "default_sub_element", default_sub_element, 0);
296
+ rb_define_method(cNokogiriHtml4ElementDescription, "optional_attributes", optional_attributes, 0);
297
+ rb_define_method(cNokogiriHtml4ElementDescription, "deprecated_attributes", deprecated_attributes, 0);
298
+ rb_define_method(cNokogiriHtml4ElementDescription, "required_attributes", required_attributes, 0);
299
+ }
@@ -0,0 +1,37 @@
1
+ #include <nokogiri.h>
2
+
3
+ static VALUE cNokogiriHtml4EntityLookup;
4
+
5
+ /*
6
+ * call-seq:
7
+ * get(key)
8
+ *
9
+ * Get the HTML4::EntityDescription for +key+
10
+ */
11
+ static VALUE
12
+ get(VALUE _, VALUE rb_entity_name)
13
+ {
14
+ VALUE cNokogiriHtml4EntityDescription;
15
+ const htmlEntityDesc *c_entity_desc;
16
+ VALUE rb_constructor_args[3];
17
+
18
+ c_entity_desc = htmlEntityLookup((const xmlChar *)StringValueCStr(rb_entity_name));
19
+ if (NULL == c_entity_desc) {
20
+ return Qnil;
21
+ }
22
+
23
+ rb_constructor_args[0] = UINT2NUM(c_entity_desc->value);
24
+ rb_constructor_args[1] = NOKOGIRI_STR_NEW2(c_entity_desc->name);
25
+ rb_constructor_args[2] = NOKOGIRI_STR_NEW2(c_entity_desc->desc);
26
+
27
+ cNokogiriHtml4EntityDescription = rb_const_get_at(mNokogiriHtml4, rb_intern("EntityDescription"));
28
+ return rb_class_new_instance(3, rb_constructor_args, cNokogiriHtml4EntityDescription);
29
+ }
30
+
31
+ void
32
+ noko_init_html_entity_lookup(void)
33
+ {
34
+ cNokogiriHtml4EntityLookup = rb_define_class_under(mNokogiriHtml4, "EntityLookup", rb_cObject);
35
+
36
+ rb_define_method(cNokogiriHtml4EntityLookup, "get", get, 1);
37
+ }
@@ -0,0 +1,108 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriHtml4SaxParserContext ;
4
+
5
+ static VALUE
6
+ parse_memory(VALUE klass, VALUE data, VALUE encoding)
7
+ {
8
+ htmlParserCtxtPtr ctxt;
9
+
10
+ Check_Type(data, T_STRING);
11
+
12
+ if (!(int)RSTRING_LEN(data)) {
13
+ rb_raise(rb_eRuntimeError, "data cannot be empty");
14
+ }
15
+
16
+ ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
17
+ (int)RSTRING_LEN(data));
18
+ if (ctxt->sax) {
19
+ xmlFree(ctxt->sax);
20
+ ctxt->sax = NULL;
21
+ }
22
+
23
+ if (RTEST(encoding)) {
24
+ xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding));
25
+ if (enc != NULL) {
26
+ xmlSwitchToEncoding(ctxt, enc);
27
+ if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
28
+ rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
29
+ StringValueCStr(encoding));
30
+ }
31
+ }
32
+ }
33
+
34
+ return noko_xml_sax_parser_context_wrap(klass, ctxt);
35
+ }
36
+
37
+ static VALUE
38
+ parse_file(VALUE klass, VALUE filename, VALUE encoding)
39
+ {
40
+ htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(
41
+ StringValueCStr(filename),
42
+ StringValueCStr(encoding)
43
+ );
44
+
45
+ if (ctxt->sax) {
46
+ xmlFree(ctxt->sax);
47
+ ctxt->sax = NULL;
48
+ }
49
+
50
+ return noko_xml_sax_parser_context_wrap(klass, ctxt);
51
+ }
52
+
53
+ static VALUE
54
+ parse_doc(VALUE ctxt_val)
55
+ {
56
+ htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
57
+ htmlParseDocument(ctxt);
58
+ return Qnil;
59
+ }
60
+
61
+ static VALUE
62
+ parse_doc_finalize(VALUE ctxt_val)
63
+ {
64
+ htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
65
+
66
+ if (ctxt->myDoc) {
67
+ xmlFreeDoc(ctxt->myDoc);
68
+ }
69
+
70
+ NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
71
+ return Qnil;
72
+ }
73
+
74
+ static VALUE
75
+ parse_with(VALUE self, VALUE sax_handler)
76
+ {
77
+ htmlParserCtxtPtr ctxt;
78
+ htmlSAXHandlerPtr sax;
79
+
80
+ if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
81
+ rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
82
+ }
83
+
84
+ ctxt = noko_xml_sax_parser_context_unwrap(self);
85
+ sax = noko_sax_handler_unwrap(sax_handler);
86
+
87
+ ctxt->sax = sax;
88
+ ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
89
+
90
+ xmlSetStructuredErrorFunc(NULL, NULL);
91
+
92
+ rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
93
+
94
+ return self;
95
+ }
96
+
97
+ void
98
+ noko_init_html_sax_parser_context(void)
99
+ {
100
+ assert(cNokogiriXmlSaxParserContext);
101
+ cNokogiriHtml4SaxParserContext = rb_define_class_under(mNokogiriHtml4Sax, "ParserContext",
102
+ cNokogiriXmlSaxParserContext);
103
+
104
+ rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "memory", parse_memory, 2);
105
+ rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "file", parse_file, 2);
106
+
107
+ rb_define_method(cNokogiriHtml4SaxParserContext, "parse_with", parse_with, 1);
108
+ }
@@ -0,0 +1,95 @@
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriHtml4SaxPushParser;
4
+
5
+ /*
6
+ * call-seq:
7
+ * native_write(chunk, last_chunk)
8
+ *
9
+ * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
10
+ */
11
+ static VALUE
12
+ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
13
+ {
14
+ xmlParserCtxtPtr ctx;
15
+ const char *chunk = NULL;
16
+ int size = 0;
17
+ int status = 0;
18
+ libxmlStructuredErrorHandlerState handler_state;
19
+
20
+ ctx = noko_xml_sax_push_parser_unwrap(self);
21
+
22
+ if (Qnil != _chunk) {
23
+ chunk = StringValuePtr(_chunk);
24
+ size = (int)RSTRING_LEN(_chunk);
25
+ }
26
+
27
+ Nokogiri_structured_error_func_save_and_set(&handler_state, NULL, NULL);
28
+
29
+ status = htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0);
30
+
31
+ Nokogiri_structured_error_func_restore(&handler_state);
32
+
33
+ if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
34
+ // TODO: there appear to be no tests for this block
35
+ xmlErrorPtr e = xmlCtxtGetLastError(ctx);
36
+ Nokogiri_error_raise(NULL, e);
37
+ }
38
+
39
+ return self;
40
+ }
41
+
42
+ /*
43
+ * call-seq:
44
+ * initialize_native(xml_sax, filename)
45
+ *
46
+ * Initialize the push parser with +xml_sax+ using +filename+
47
+ */
48
+ static VALUE
49
+ initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
50
+ VALUE encoding)
51
+ {
52
+ htmlSAXHandlerPtr sax;
53
+ const char *filename = NULL;
54
+ htmlParserCtxtPtr ctx;
55
+ xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
56
+
57
+ sax = noko_sax_handler_unwrap(_xml_sax);
58
+
59
+ if (_filename != Qnil) { filename = StringValueCStr(_filename); }
60
+
61
+ if (!NIL_P(encoding)) {
62
+ enc = xmlParseCharEncoding(StringValueCStr(encoding));
63
+ if (enc == XML_CHAR_ENCODING_ERROR) {
64
+ rb_raise(rb_eArgError, "Unsupported Encoding");
65
+ }
66
+ }
67
+
68
+ ctx = htmlCreatePushParserCtxt(
69
+ sax,
70
+ NULL,
71
+ NULL,
72
+ 0,
73
+ filename,
74
+ enc
75
+ );
76
+ if (ctx == NULL) {
77
+ rb_raise(rb_eRuntimeError, "Could not create a parser context");
78
+ }
79
+
80
+ ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
81
+
82
+ ctx->sax2 = 1;
83
+ DATA_PTR(self) = ctx;
84
+ return self;
85
+ }
86
+
87
+ void
88
+ noko_init_html_sax_push_parser(void)
89
+ {
90
+ assert(cNokogiriXmlSaxPushParser);
91
+ cNokogiriHtml4SaxPushParser = rb_define_class_under(mNokogiriHtml4Sax, "PushParser", cNokogiriXmlSaxPushParser);
92
+
93
+ rb_define_private_method(cNokogiriHtml4SaxPushParser, "initialize_native", initialize_native, 3);
94
+ rb_define_private_method(cNokogiriHtml4SaxPushParser, "native_write", native_write, 2);
95
+ }