nokogiri 1.8.5 → 1.15.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (358) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +40 -18
  3. data/LICENSE-DEPENDENCIES.md +1636 -1024
  4. data/LICENSE.md +5 -28
  5. data/README.md +203 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -61
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +867 -417
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +215 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +42 -37
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +40 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +401 -237
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +136 -62
  33. data/ext/nokogiri/xml_node.c +1387 -678
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +347 -212
  37. data/ext/nokogiri/xml_relax_ng.c +86 -77
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +145 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +64 -36
  41. data/ext/nokogiri/xml_schema.c +138 -81
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +36 -26
  44. data/ext/nokogiri/xml_xpath_context.c +366 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +111 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +224 -95
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +392 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +223 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +98 -54
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -126
  133. data/lib/nokogiri/xml/document_fragment.rb +104 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +45 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1093 -411
  142. data/lib/nokogiri/xml/node_set.rb +173 -67
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +38 -37
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  169. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  170. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  175. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  177. metadata +126 -399
  178. data/.autotest +0 -22
  179. data/.cross_rubies +0 -8
  180. data/.editorconfig +0 -17
  181. data/.gemtest +0 -0
  182. data/.travis.yml +0 -63
  183. data/CHANGELOG.md +0 -1368
  184. data/CONTRIBUTING.md +0 -42
  185. data/C_CODING_STYLE.rdoc +0 -33
  186. data/Gemfile-libxml-ruby +0 -3
  187. data/Manifest.txt +0 -370
  188. data/ROADMAP.md +0 -111
  189. data/Rakefile +0 -348
  190. data/SECURITY.md +0 -19
  191. data/STANDARD_RESPONSES.md +0 -47
  192. data/Y_U_NO_GEMSPEC.md +0 -155
  193. data/appveyor.yml +0 -29
  194. data/build_all +0 -44
  195. data/ext/nokogiri/html_document.c +0 -170
  196. data/ext/nokogiri/html_document.h +0 -10
  197. data/ext/nokogiri/html_element_description.c +0 -279
  198. data/ext/nokogiri/html_element_description.h +0 -10
  199. data/ext/nokogiri/html_entity_lookup.c +0 -32
  200. data/ext/nokogiri/html_entity_lookup.h +0 -8
  201. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  202. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  203. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  204. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  205. data/ext/nokogiri/xml_attr.h +0 -9
  206. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  207. data/ext/nokogiri/xml_cdata.h +0 -9
  208. data/ext/nokogiri/xml_comment.h +0 -9
  209. data/ext/nokogiri/xml_document.h +0 -23
  210. data/ext/nokogiri/xml_document_fragment.h +0 -10
  211. data/ext/nokogiri/xml_dtd.h +0 -10
  212. data/ext/nokogiri/xml_element_content.h +0 -10
  213. data/ext/nokogiri/xml_element_decl.h +0 -9
  214. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  215. data/ext/nokogiri/xml_entity_decl.h +0 -10
  216. data/ext/nokogiri/xml_entity_reference.h +0 -9
  217. data/ext/nokogiri/xml_io.c +0 -61
  218. data/ext/nokogiri/xml_io.h +0 -11
  219. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  220. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  221. data/ext/nokogiri/xml_namespace.h +0 -15
  222. data/ext/nokogiri/xml_node.h +0 -13
  223. data/ext/nokogiri/xml_node_set.h +0 -12
  224. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  225. data/ext/nokogiri/xml_reader.h +0 -10
  226. data/ext/nokogiri/xml_relax_ng.h +0 -9
  227. data/ext/nokogiri/xml_sax_parser.h +0 -39
  228. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  229. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  230. data/ext/nokogiri/xml_schema.h +0 -9
  231. data/ext/nokogiri/xml_syntax_error.h +0 -13
  232. data/ext/nokogiri/xml_text.h +0 -9
  233. data/ext/nokogiri/xml_xpath_context.h +0 -10
  234. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  235. data/lib/nokogiri/html/document.rb +0 -335
  236. data/lib/nokogiri/html/document_fragment.rb +0 -49
  237. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  238. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  239. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  240. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  241. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  242. data/patches/sort-patches-by-date +0 -25
  243. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  244. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  245. data/suppressions/README.txt +0 -1
  246. data/suppressions/nokogiri_ruby-2.supp +0 -10
  247. data/tasks/test.rb +0 -100
  248. data/test/css/test_nthiness.rb +0 -226
  249. data/test/css/test_parser.rb +0 -386
  250. data/test/css/test_tokenizer.rb +0 -215
  251. data/test/css/test_xpath_visitor.rb +0 -96
  252. data/test/decorators/test_slop.rb +0 -23
  253. data/test/files/2ch.html +0 -108
  254. data/test/files/GH_1042.html +0 -18
  255. data/test/files/address_book.rlx +0 -12
  256. data/test/files/address_book.xml +0 -10
  257. data/test/files/atom.xml +0 -344
  258. data/test/files/bar/bar.xsd +0 -4
  259. data/test/files/bogus.xml +0 -0
  260. data/test/files/dont_hurt_em_why.xml +0 -422
  261. data/test/files/encoding.html +0 -82
  262. data/test/files/encoding.xhtml +0 -84
  263. data/test/files/exslt.xml +0 -8
  264. data/test/files/exslt.xslt +0 -35
  265. data/test/files/foo/foo.xsd +0 -4
  266. data/test/files/metacharset.html +0 -10
  267. data/test/files/namespace_pressure_test.xml +0 -1684
  268. data/test/files/noencoding.html +0 -47
  269. data/test/files/po.xml +0 -32
  270. data/test/files/po.xsd +0 -66
  271. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  272. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  273. data/test/files/saml/xenc_schema.xsd +0 -146
  274. data/test/files/saml/xmldsig_schema.xsd +0 -318
  275. data/test/files/shift_jis.html +0 -10
  276. data/test/files/shift_jis.xml +0 -5
  277. data/test/files/shift_jis_no_charset.html +0 -9
  278. data/test/files/slow-xpath.xml +0 -25509
  279. data/test/files/snuggles.xml +0 -3
  280. data/test/files/staff.dtd +0 -10
  281. data/test/files/staff.xml +0 -59
  282. data/test/files/staff.xslt +0 -32
  283. data/test/files/test_document_url/bar.xml +0 -2
  284. data/test/files/test_document_url/document.dtd +0 -4
  285. data/test/files/test_document_url/document.xml +0 -6
  286. data/test/files/tlm.html +0 -851
  287. data/test/files/to_be_xincluded.xml +0 -2
  288. data/test/files/valid_bar.xml +0 -2
  289. data/test/files/xinclude.xml +0 -4
  290. data/test/helper.rb +0 -271
  291. data/test/html/sax/test_parser.rb +0 -168
  292. data/test/html/sax/test_parser_context.rb +0 -46
  293. data/test/html/sax/test_parser_text.rb +0 -163
  294. data/test/html/sax/test_push_parser.rb +0 -87
  295. data/test/html/test_attributes.rb +0 -85
  296. data/test/html/test_builder.rb +0 -164
  297. data/test/html/test_document.rb +0 -712
  298. data/test/html/test_document_encoding.rb +0 -143
  299. data/test/html/test_document_fragment.rb +0 -310
  300. data/test/html/test_element_description.rb +0 -105
  301. data/test/html/test_named_characters.rb +0 -14
  302. data/test/html/test_node.rb +0 -212
  303. data/test/html/test_node_encoding.rb +0 -91
  304. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  305. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  306. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  307. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  308. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  309. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  310. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  311. data/test/test_convert_xpath.rb +0 -135
  312. data/test/test_css_cache.rb +0 -47
  313. data/test/test_encoding_handler.rb +0 -48
  314. data/test/test_memory_leak.rb +0 -156
  315. data/test/test_nokogiri.rb +0 -138
  316. data/test/test_soap4r_sax.rb +0 -52
  317. data/test/test_xslt_transforms.rb +0 -314
  318. data/test/xml/node/test_save_options.rb +0 -28
  319. data/test/xml/node/test_subclass.rb +0 -44
  320. data/test/xml/sax/test_parser.rb +0 -402
  321. data/test/xml/sax/test_parser_context.rb +0 -115
  322. data/test/xml/sax/test_parser_text.rb +0 -202
  323. data/test/xml/sax/test_push_parser.rb +0 -265
  324. data/test/xml/test_attr.rb +0 -74
  325. data/test/xml/test_attribute_decl.rb +0 -86
  326. data/test/xml/test_builder.rb +0 -341
  327. data/test/xml/test_c14n.rb +0 -180
  328. data/test/xml/test_cdata.rb +0 -54
  329. data/test/xml/test_comment.rb +0 -40
  330. data/test/xml/test_document.rb +0 -982
  331. data/test/xml/test_document_encoding.rb +0 -31
  332. data/test/xml/test_document_fragment.rb +0 -298
  333. data/test/xml/test_dtd.rb +0 -187
  334. data/test/xml/test_dtd_encoding.rb +0 -31
  335. data/test/xml/test_element_content.rb +0 -56
  336. data/test/xml/test_element_decl.rb +0 -73
  337. data/test/xml/test_entity_decl.rb +0 -122
  338. data/test/xml/test_entity_reference.rb +0 -262
  339. data/test/xml/test_namespace.rb +0 -96
  340. data/test/xml/test_node.rb +0 -1325
  341. data/test/xml/test_node_attributes.rb +0 -115
  342. data/test/xml/test_node_encoding.rb +0 -75
  343. data/test/xml/test_node_inheritance.rb +0 -32
  344. data/test/xml/test_node_reparenting.rb +0 -592
  345. data/test/xml/test_node_set.rb +0 -809
  346. data/test/xml/test_parse_options.rb +0 -64
  347. data/test/xml/test_processing_instruction.rb +0 -30
  348. data/test/xml/test_reader.rb +0 -620
  349. data/test/xml/test_reader_encoding.rb +0 -134
  350. data/test/xml/test_relax_ng.rb +0 -60
  351. data/test/xml/test_schema.rb +0 -142
  352. data/test/xml/test_syntax_error.rb +0 -36
  353. data/test/xml/test_text.rb +0 -60
  354. data/test/xml/test_unparented_node.rb +0 -483
  355. data/test/xml/test_xinclude.rb +0 -83
  356. data/test/xml/test_xpath.rb +0 -470
  357. data/test/xslt/test_custom_functions.rb +0 -133
  358. data/test/xslt/test_exception_handling.rb +0 -37
@@ -1,170 +0,0 @@
1
- #include <html_document.h>
2
-
3
- static ID id_encoding_found;
4
-
5
- /*
6
- * call-seq:
7
- * new
8
- *
9
- * Create a new document
10
- */
11
- static VALUE new(int argc, VALUE *argv, VALUE klass)
12
- {
13
- VALUE uri, external_id, rest, rb_doc;
14
- htmlDocPtr doc;
15
-
16
- rb_scan_args(argc, argv, "0*", &rest);
17
- uri = rb_ary_entry(rest, (long)0);
18
- external_id = rb_ary_entry(rest, (long)1);
19
-
20
- doc = htmlNewDoc(
21
- RTEST(uri) ? (const xmlChar *)StringValueCStr(uri) : NULL,
22
- RTEST(external_id) ? (const xmlChar *)StringValueCStr(external_id) : NULL
23
- );
24
- rb_doc = Nokogiri_wrap_xml_document(klass, doc);
25
- rb_obj_call_init(rb_doc, argc, argv);
26
- return rb_doc ;
27
- }
28
-
29
- /*
30
- * call-seq:
31
- * read_io(io, url, encoding, options)
32
- *
33
- * Read the HTML document from +io+ with given +url+, +encoding+,
34
- * and +options+. See Nokogiri::HTML.parse
35
- */
36
- static VALUE read_io( VALUE klass,
37
- VALUE io,
38
- VALUE url,
39
- VALUE encoding,
40
- VALUE options )
41
- {
42
- const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url);
43
- const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
44
- VALUE error_list = rb_ary_new();
45
- VALUE document;
46
- htmlDocPtr doc;
47
-
48
- xmlResetLastError();
49
- xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
50
-
51
- doc = htmlReadIO(
52
- io_read_callback,
53
- io_close_callback,
54
- (void *)io,
55
- c_url,
56
- c_enc,
57
- (int)NUM2INT(options)
58
- );
59
- xmlSetStructuredErrorFunc(NULL, NULL);
60
-
61
- /*
62
- * If EncodingFound has occurred in EncodingReader, make sure to do
63
- * a cleanup and propagate the error.
64
- */
65
- if (rb_respond_to(io, id_encoding_found)) {
66
- VALUE encoding_found = rb_funcall(io, id_encoding_found, 0);
67
- if (!NIL_P(encoding_found)) {
68
- xmlFreeDoc(doc);
69
- rb_exc_raise(encoding_found);
70
- }
71
- }
72
-
73
- if(doc == NULL) {
74
- xmlErrorPtr error;
75
-
76
- xmlFreeDoc(doc);
77
-
78
- error = xmlGetLastError();
79
- if(error)
80
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
81
- else
82
- rb_raise(rb_eRuntimeError, "Could not parse document");
83
-
84
- return Qnil;
85
- }
86
-
87
- document = Nokogiri_wrap_xml_document(klass, doc);
88
- rb_iv_set(document, "@errors", error_list);
89
- return document;
90
- }
91
-
92
- /*
93
- * call-seq:
94
- * read_memory(string, url, encoding, options)
95
- *
96
- * Read the HTML document contained in +string+ with given +url+, +encoding+,
97
- * and +options+. See Nokogiri::HTML.parse
98
- */
99
- static VALUE read_memory( VALUE klass,
100
- VALUE string,
101
- VALUE url,
102
- VALUE encoding,
103
- VALUE options )
104
- {
105
- const char * c_buffer = StringValuePtr(string);
106
- const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url);
107
- const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
108
- int len = (int)RSTRING_LEN(string);
109
- VALUE error_list = rb_ary_new();
110
- VALUE document;
111
- htmlDocPtr doc;
112
-
113
- xmlResetLastError();
114
- xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
115
-
116
- doc = htmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
117
- xmlSetStructuredErrorFunc(NULL, NULL);
118
-
119
- if(doc == NULL) {
120
- xmlErrorPtr error;
121
-
122
- xmlFreeDoc(doc);
123
-
124
- error = xmlGetLastError();
125
- if(error)
126
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
127
- else
128
- rb_raise(rb_eRuntimeError, "Could not parse document");
129
-
130
- return Qnil;
131
- }
132
-
133
- document = Nokogiri_wrap_xml_document(klass, doc);
134
- rb_iv_set(document, "@errors", error_list);
135
- return document;
136
- }
137
-
138
- /*
139
- * call-seq:
140
- * type
141
- *
142
- * The type for this document
143
- */
144
- static VALUE type(VALUE self)
145
- {
146
- htmlDocPtr doc;
147
- Data_Get_Struct(self, xmlDoc, doc);
148
- return INT2NUM((long)doc->type);
149
- }
150
-
151
- VALUE cNokogiriHtmlDocument ;
152
- void init_html_document()
153
- {
154
- VALUE nokogiri = rb_define_module("Nokogiri");
155
- VALUE html = rb_define_module_under(nokogiri, "HTML");
156
- VALUE xml = rb_define_module_under(nokogiri, "XML");
157
- VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
158
- VALUE xml_doc = rb_define_class_under(xml, "Document", node);
159
- VALUE klass = rb_define_class_under(html, "Document", xml_doc);
160
-
161
- cNokogiriHtmlDocument = klass;
162
-
163
- rb_define_singleton_method(klass, "read_memory", read_memory, 4);
164
- rb_define_singleton_method(klass, "read_io", read_io, 4);
165
- rb_define_singleton_method(klass, "new", new, -1);
166
-
167
- rb_define_method(klass, "type", type, 0);
168
-
169
- id_encoding_found = rb_intern("encoding_found");
170
- }
@@ -1,10 +0,0 @@
1
- #ifndef NOKOGIRI_HTML_DOCUMENT
2
- #define NOKOGIRI_HTML_DOCUMENT
3
-
4
- #include <nokogiri.h>
5
-
6
- void init_html_document();
7
-
8
- extern VALUE cNokogiriHtmlDocument ;
9
-
10
- #endif
@@ -1,279 +0,0 @@
1
- #include <html_element_description.h>
2
-
3
- /*
4
- * call-seq:
5
- * required_attributes
6
- *
7
- * A list of required attributes for this element
8
- */
9
- static VALUE required_attributes(VALUE self)
10
- {
11
- const htmlElemDesc * description;
12
- VALUE list;
13
- int i;
14
-
15
- Data_Get_Struct(self, htmlElemDesc, description);
16
-
17
- list = rb_ary_new();
18
-
19
- if(NULL == description->attrs_req) return list;
20
-
21
- for(i = 0; description->attrs_depr[i]; i++) {
22
- rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_req[i]));
23
- }
24
-
25
- return list;
26
- }
27
-
28
- /*
29
- * call-seq:
30
- * deprecated_attributes
31
- *
32
- * A list of deprecated attributes for this element
33
- */
34
- static VALUE deprecated_attributes(VALUE self)
35
- {
36
- const htmlElemDesc * description;
37
- VALUE list;
38
- int i;
39
-
40
- Data_Get_Struct(self, htmlElemDesc, description);
41
-
42
- list = rb_ary_new();
43
-
44
- if(NULL == description->attrs_depr) return list;
45
-
46
- for(i = 0; description->attrs_depr[i]; i++) {
47
- rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_depr[i]));
48
- }
49
-
50
- return list;
51
- }
52
-
53
- /*
54
- * call-seq:
55
- * optional_attributes
56
- *
57
- * A list of optional attributes for this element
58
- */
59
- static VALUE optional_attributes(VALUE self)
60
- {
61
- const htmlElemDesc * description;
62
- VALUE list;
63
- int i;
64
-
65
- Data_Get_Struct(self, htmlElemDesc, description);
66
-
67
- list = rb_ary_new();
68
-
69
- if(NULL == description->attrs_opt) return list;
70
-
71
- for(i = 0; description->attrs_opt[i]; i++) {
72
- rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_opt[i]));
73
- }
74
-
75
- return list;
76
- }
77
-
78
- /*
79
- * call-seq:
80
- * default_sub_element
81
- *
82
- * The default sub element for this element
83
- */
84
- static VALUE default_sub_element(VALUE self)
85
- {
86
- const htmlElemDesc * description;
87
- Data_Get_Struct(self, htmlElemDesc, description);
88
-
89
- if (description->defaultsubelt)
90
- return NOKOGIRI_STR_NEW2(description->defaultsubelt);
91
-
92
- return Qnil;
93
- }
94
-
95
- /*
96
- * call-seq:
97
- * sub_elements
98
- *
99
- * A list of allowed sub elements for this element.
100
- */
101
- static VALUE sub_elements(VALUE self)
102
- {
103
- const htmlElemDesc * description;
104
- VALUE list;
105
- int i;
106
-
107
- Data_Get_Struct(self, htmlElemDesc, description);
108
-
109
- list = rb_ary_new();
110
-
111
- if(NULL == description->subelts) return list;
112
-
113
- for(i = 0; description->subelts[i]; i++) {
114
- rb_ary_push(list, NOKOGIRI_STR_NEW2(description->subelts[i]));
115
- }
116
-
117
- return list;
118
- }
119
-
120
- /*
121
- * call-seq:
122
- * description
123
- *
124
- * The description for this element
125
- */
126
- static VALUE description(VALUE self)
127
- {
128
- const htmlElemDesc * description;
129
- Data_Get_Struct(self, htmlElemDesc, description);
130
-
131
- return NOKOGIRI_STR_NEW2(description->desc);
132
- }
133
-
134
- /*
135
- * call-seq:
136
- * inline?
137
- *
138
- * Is this element an inline element?
139
- */
140
- static VALUE inline_eh(VALUE self)
141
- {
142
- const htmlElemDesc * description;
143
- Data_Get_Struct(self, htmlElemDesc, description);
144
-
145
- if(description->isinline) return Qtrue;
146
- return Qfalse;
147
- }
148
-
149
- /*
150
- * call-seq:
151
- * deprecated?
152
- *
153
- * Is this element deprecated?
154
- */
155
- static VALUE deprecated_eh(VALUE self)
156
- {
157
- const htmlElemDesc * description;
158
- Data_Get_Struct(self, htmlElemDesc, description);
159
-
160
- if(description->depr) return Qtrue;
161
- return Qfalse;
162
- }
163
-
164
- /*
165
- * call-seq:
166
- * empty?
167
- *
168
- * Is this an empty element?
169
- */
170
- static VALUE empty_eh(VALUE self)
171
- {
172
- const htmlElemDesc * description;
173
- Data_Get_Struct(self, htmlElemDesc, description);
174
-
175
- if(description->empty) return Qtrue;
176
- return Qfalse;
177
- }
178
-
179
- /*
180
- * call-seq:
181
- * save_end_tag?
182
- *
183
- * Should the end tag be saved?
184
- */
185
- static VALUE save_end_tag_eh(VALUE self)
186
- {
187
- const htmlElemDesc * description;
188
- Data_Get_Struct(self, htmlElemDesc, description);
189
-
190
- if(description->saveEndTag) return Qtrue;
191
- return Qfalse;
192
- }
193
-
194
- /*
195
- * call-seq:
196
- * implied_end_tag?
197
- *
198
- * Can the end tag be implied for this tag?
199
- */
200
- static VALUE implied_end_tag_eh(VALUE self)
201
- {
202
- const htmlElemDesc * description;
203
- Data_Get_Struct(self, htmlElemDesc, description);
204
-
205
- if(description->endTag) return Qtrue;
206
- return Qfalse;
207
- }
208
-
209
- /*
210
- * call-seq:
211
- * implied_start_tag?
212
- *
213
- * Can the start tag be implied for this tag?
214
- */
215
- static VALUE implied_start_tag_eh(VALUE self)
216
- {
217
- const htmlElemDesc * description;
218
- Data_Get_Struct(self, htmlElemDesc, description);
219
-
220
- if(description->startTag) return Qtrue;
221
- return Qfalse;
222
- }
223
-
224
- /*
225
- * call-seq:
226
- * name
227
- *
228
- * Get the tag name for this ElemementDescription
229
- */
230
- static VALUE name(VALUE self)
231
- {
232
- const htmlElemDesc * description;
233
- Data_Get_Struct(self, htmlElemDesc, description);
234
-
235
- if(NULL == description->name) return Qnil;
236
- return NOKOGIRI_STR_NEW2(description->name);
237
- }
238
-
239
- /*
240
- * call-seq:
241
- * [](tag_name)
242
- *
243
- * Get ElemementDescription for +tag_name+
244
- */
245
- static VALUE get_description(VALUE klass, VALUE tag_name)
246
- {
247
- const htmlElemDesc * description = htmlTagLookup(
248
- (const xmlChar *)StringValueCStr(tag_name)
249
- );
250
-
251
- if(NULL == description) return Qnil;
252
- return Data_Wrap_Struct(klass, 0, 0, (void *)(uintptr_t)description);
253
- }
254
-
255
- VALUE cNokogiriHtmlElementDescription ;
256
- void init_html_element_description()
257
- {
258
- VALUE nokogiri = rb_define_module("Nokogiri");
259
- VALUE html = rb_define_module_under(nokogiri, "HTML");
260
- VALUE klass = rb_define_class_under(html, "ElementDescription",rb_cObject);
261
-
262
- cNokogiriHtmlElementDescription = klass;
263
-
264
- rb_define_singleton_method(klass, "[]", get_description, 1);
265
-
266
- rb_define_method(klass, "name", name, 0);
267
- rb_define_method(klass, "implied_start_tag?", implied_start_tag_eh, 0);
268
- rb_define_method(klass, "implied_end_tag?", implied_end_tag_eh, 0);
269
- rb_define_method(klass, "save_end_tag?", save_end_tag_eh, 0);
270
- rb_define_method(klass, "empty?", empty_eh, 0);
271
- rb_define_method(klass, "deprecated?", deprecated_eh, 0);
272
- rb_define_method(klass, "inline?", inline_eh, 0);
273
- rb_define_method(klass, "description", description, 0);
274
- rb_define_method(klass, "sub_elements", sub_elements, 0);
275
- rb_define_method(klass, "default_sub_element", default_sub_element, 0);
276
- rb_define_method(klass, "optional_attributes", optional_attributes, 0);
277
- rb_define_method(klass, "deprecated_attributes", deprecated_attributes, 0);
278
- rb_define_method(klass, "required_attributes", required_attributes, 0);
279
- }
@@ -1,10 +0,0 @@
1
- #ifndef NOKOGIRI_HTML_ELEMENT_DESCRIPTION
2
- #define NOKOGIRI_HTML_ELEMENT_DESCRIPTION
3
-
4
- #include <nokogiri.h>
5
-
6
- void init_html_element_description();
7
-
8
- extern VALUE cNokogiriHtmlElementDescription ;
9
-
10
- #endif
@@ -1,32 +0,0 @@
1
- #include <html_entity_lookup.h>
2
-
3
- /*
4
- * call-seq:
5
- * get(key)
6
- *
7
- * Get the HTML::EntityDescription for +key+
8
- */
9
- static VALUE get(VALUE self, VALUE key)
10
- {
11
- const htmlEntityDesc * desc =
12
- htmlEntityLookup((const xmlChar *)StringValueCStr(key));
13
- VALUE klass, args[3];
14
-
15
- if(NULL == desc) return Qnil;
16
- klass = rb_const_get(mNokogiriHtml, rb_intern("EntityDescription"));
17
-
18
- args[0] = INT2NUM((long)desc->value);
19
- args[1] = NOKOGIRI_STR_NEW2(desc->name);
20
- args[2] = NOKOGIRI_STR_NEW2(desc->desc);
21
-
22
- return rb_class_new_instance(3, args, klass);
23
- }
24
-
25
- void init_html_entity_lookup()
26
- {
27
- VALUE nokogiri = rb_define_module("Nokogiri");
28
- VALUE html = rb_define_module_under(nokogiri, "HTML");
29
- VALUE klass = rb_define_class_under(html, "EntityLookup", rb_cObject);
30
-
31
- rb_define_method(klass, "get", get, 1);
32
- }
@@ -1,8 +0,0 @@
1
- #ifndef NOKOGIRI_HTML_ENTITY_LOOKUP
2
- #define NOKOGIRI_HTML_ENTITY_LOOKUP
3
-
4
- #include <nokogiri.h>
5
-
6
- void init_html_entity_lookup();
7
-
8
- #endif
@@ -1,116 +0,0 @@
1
- #include <html_sax_parser_context.h>
2
-
3
- VALUE cNokogiriHtmlSaxParserContext ;
4
-
5
- static void deallocate(xmlParserCtxtPtr ctxt)
6
- {
7
- NOKOGIRI_DEBUG_START(handler);
8
-
9
- ctxt->sax = NULL;
10
-
11
- htmlFreeParserCtxt(ctxt);
12
-
13
- NOKOGIRI_DEBUG_END(handler);
14
- }
15
-
16
- static VALUE
17
- parse_memory(VALUE klass, VALUE data, VALUE encoding)
18
- {
19
- htmlParserCtxtPtr ctxt;
20
-
21
- if (NIL_P(data))
22
- rb_raise(rb_eArgError, "data cannot be nil");
23
- if (!(int)RSTRING_LEN(data))
24
- rb_raise(rb_eRuntimeError, "data cannot be empty");
25
-
26
- ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
27
- (int)RSTRING_LEN(data));
28
- if (ctxt->sax) {
29
- xmlFree(ctxt->sax);
30
- ctxt->sax = NULL;
31
- }
32
-
33
- if (RTEST(encoding)) {
34
- xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding));
35
- if (enc != NULL) {
36
- xmlSwitchToEncoding(ctxt, enc);
37
- if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
38
- rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
39
- StringValueCStr(encoding));
40
- }
41
- }
42
- }
43
-
44
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
45
- }
46
-
47
- static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
48
- {
49
- htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(
50
- StringValueCStr(filename),
51
- StringValueCStr(encoding)
52
- );
53
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
54
- }
55
-
56
- static VALUE
57
- parse_doc(VALUE ctxt_val)
58
- {
59
- htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
60
- htmlParseDocument(ctxt);
61
- return Qnil;
62
- }
63
-
64
- static VALUE
65
- parse_doc_finalize(VALUE ctxt_val)
66
- {
67
- htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
68
-
69
- if (ctxt->myDoc)
70
- xmlFreeDoc(ctxt->myDoc);
71
-
72
- NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
73
- return Qnil;
74
- }
75
-
76
- static VALUE
77
- parse_with(VALUE self, VALUE sax_handler)
78
- {
79
- htmlParserCtxtPtr ctxt;
80
- htmlSAXHandlerPtr sax;
81
-
82
- if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
83
- rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
84
-
85
- Data_Get_Struct(self, htmlParserCtxt, ctxt);
86
- Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
87
-
88
- /* Free the sax handler since we'll assign our own */
89
- if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
90
- xmlFree(ctxt->sax);
91
-
92
- ctxt->sax = sax;
93
- ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
94
-
95
- rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
96
-
97
- return self;
98
- }
99
-
100
- void init_html_sax_parser_context()
101
- {
102
- VALUE nokogiri = rb_define_module("Nokogiri");
103
- VALUE xml = rb_define_module_under(nokogiri, "XML");
104
- VALUE html = rb_define_module_under(nokogiri, "HTML");
105
- VALUE sax = rb_define_module_under(xml, "SAX");
106
- VALUE hsax = rb_define_module_under(html, "SAX");
107
- VALUE pc = rb_define_class_under(sax, "ParserContext", rb_cObject);
108
- VALUE klass = rb_define_class_under(hsax, "ParserContext", pc);
109
-
110
- cNokogiriHtmlSaxParserContext = klass;
111
-
112
- rb_define_singleton_method(klass, "memory", parse_memory, 2);
113
- rb_define_singleton_method(klass, "file", parse_file, 2);
114
-
115
- rb_define_method(klass, "parse_with", parse_with, 1);
116
- }
@@ -1,11 +0,0 @@
1
- #ifndef NOKOGIRI_HTML_SAX_PARSER_CONTEXT
2
- #define NOKOGIRI_HTML_SAX_PARSER_CONTEXT
3
-
4
- #include <nokogiri.h>
5
-
6
- extern VALUE cNokogiriHtmlSaxParserContext;
7
-
8
- void init_html_sax_parser_context();
9
-
10
- #endif
11
-