nokogiri 1.8.5 → 1.13.6

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (356) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -21
  3. data/LICENSE-DEPENDENCIES.md +1159 -868
  4. data/LICENSE.md +5 -28
  5. data/README.md +196 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -59
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +750 -420
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +119 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +228 -91
  18. data/ext/nokogiri/nokogiri.h +191 -89
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +41 -36
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +13 -18
  23. data/ext/nokogiri/xml_comment.c +19 -26
  24. data/ext/nokogiri/xml_document.c +291 -216
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +43 -18
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +61 -52
  33. data/ext/nokogiri/xml_node.c +1044 -616
  34. data/ext/nokogiri/xml_node_set.c +174 -162
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +226 -175
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +112 -112
  39. data/ext/nokogiri/xml_sax_parser_context.c +112 -86
  40. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +13 -17
  44. data/ext/nokogiri/xml_xpath_context.c +223 -115
  45. data/ext/nokogiri/xslt_stylesheet.c +265 -173
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +218 -91
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/{html → html4}/document.rb +103 -105
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +91 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +100 -0
  118. data/lib/nokogiri/html5.rb +478 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +222 -0
  123. data/lib/nokogiri/version.rb +3 -108
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +97 -53
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +224 -86
  130. data/lib/nokogiri/xml/document_fragment.rb +57 -44
  131. data/lib/nokogiri/xml/dtd.rb +4 -2
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +10 -5
  138. data/lib/nokogiri/xml/node.rb +895 -377
  139. data/lib/nokogiri/xml/node_set.rb +92 -65
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +22 -8
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +38 -34
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +112 -72
  155. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -37
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +49 -65
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  171. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  172. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  173. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  174. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  175. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  176. metadata +220 -266
  177. data/.autotest +0 -22
  178. data/.cross_rubies +0 -8
  179. data/.editorconfig +0 -17
  180. data/.gemtest +0 -0
  181. data/.travis.yml +0 -63
  182. data/CHANGELOG.md +0 -1368
  183. data/CONTRIBUTING.md +0 -42
  184. data/C_CODING_STYLE.rdoc +0 -33
  185. data/Gemfile-libxml-ruby +0 -3
  186. data/Manifest.txt +0 -370
  187. data/ROADMAP.md +0 -111
  188. data/Rakefile +0 -348
  189. data/SECURITY.md +0 -19
  190. data/STANDARD_RESPONSES.md +0 -47
  191. data/Y_U_NO_GEMSPEC.md +0 -155
  192. data/appveyor.yml +0 -29
  193. data/build_all +0 -44
  194. data/ext/nokogiri/html_document.c +0 -170
  195. data/ext/nokogiri/html_document.h +0 -10
  196. data/ext/nokogiri/html_element_description.c +0 -279
  197. data/ext/nokogiri/html_element_description.h +0 -10
  198. data/ext/nokogiri/html_entity_lookup.c +0 -32
  199. data/ext/nokogiri/html_entity_lookup.h +0 -8
  200. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  201. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  202. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  203. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  204. data/ext/nokogiri/xml_attr.h +0 -9
  205. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  206. data/ext/nokogiri/xml_cdata.h +0 -9
  207. data/ext/nokogiri/xml_comment.h +0 -9
  208. data/ext/nokogiri/xml_document.h +0 -23
  209. data/ext/nokogiri/xml_document_fragment.h +0 -10
  210. data/ext/nokogiri/xml_dtd.h +0 -10
  211. data/ext/nokogiri/xml_element_content.h +0 -10
  212. data/ext/nokogiri/xml_element_decl.h +0 -9
  213. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  214. data/ext/nokogiri/xml_entity_decl.h +0 -10
  215. data/ext/nokogiri/xml_entity_reference.h +0 -9
  216. data/ext/nokogiri/xml_io.c +0 -61
  217. data/ext/nokogiri/xml_io.h +0 -11
  218. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  219. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  220. data/ext/nokogiri/xml_namespace.h +0 -15
  221. data/ext/nokogiri/xml_node.h +0 -13
  222. data/ext/nokogiri/xml_node_set.h +0 -12
  223. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  224. data/ext/nokogiri/xml_reader.h +0 -10
  225. data/ext/nokogiri/xml_relax_ng.h +0 -9
  226. data/ext/nokogiri/xml_sax_parser.h +0 -39
  227. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  228. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  229. data/ext/nokogiri/xml_schema.h +0 -9
  230. data/ext/nokogiri/xml_syntax_error.h +0 -13
  231. data/ext/nokogiri/xml_text.h +0 -9
  232. data/ext/nokogiri/xml_xpath_context.h +0 -10
  233. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  234. data/lib/nokogiri/html/document_fragment.rb +0 -49
  235. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  236. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  237. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  238. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  239. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  240. data/patches/sort-patches-by-date +0 -25
  241. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  242. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  243. data/suppressions/README.txt +0 -1
  244. data/suppressions/nokogiri_ruby-2.supp +0 -10
  245. data/tasks/test.rb +0 -100
  246. data/test/css/test_nthiness.rb +0 -226
  247. data/test/css/test_parser.rb +0 -386
  248. data/test/css/test_tokenizer.rb +0 -215
  249. data/test/css/test_xpath_visitor.rb +0 -96
  250. data/test/decorators/test_slop.rb +0 -23
  251. data/test/files/2ch.html +0 -108
  252. data/test/files/GH_1042.html +0 -18
  253. data/test/files/address_book.rlx +0 -12
  254. data/test/files/address_book.xml +0 -10
  255. data/test/files/atom.xml +0 -344
  256. data/test/files/bar/bar.xsd +0 -4
  257. data/test/files/bogus.xml +0 -0
  258. data/test/files/dont_hurt_em_why.xml +0 -422
  259. data/test/files/encoding.html +0 -82
  260. data/test/files/encoding.xhtml +0 -84
  261. data/test/files/exslt.xml +0 -8
  262. data/test/files/exslt.xslt +0 -35
  263. data/test/files/foo/foo.xsd +0 -4
  264. data/test/files/metacharset.html +0 -10
  265. data/test/files/namespace_pressure_test.xml +0 -1684
  266. data/test/files/noencoding.html +0 -47
  267. data/test/files/po.xml +0 -32
  268. data/test/files/po.xsd +0 -66
  269. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  270. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  271. data/test/files/saml/xenc_schema.xsd +0 -146
  272. data/test/files/saml/xmldsig_schema.xsd +0 -318
  273. data/test/files/shift_jis.html +0 -10
  274. data/test/files/shift_jis.xml +0 -5
  275. data/test/files/shift_jis_no_charset.html +0 -9
  276. data/test/files/slow-xpath.xml +0 -25509
  277. data/test/files/snuggles.xml +0 -3
  278. data/test/files/staff.dtd +0 -10
  279. data/test/files/staff.xml +0 -59
  280. data/test/files/staff.xslt +0 -32
  281. data/test/files/test_document_url/bar.xml +0 -2
  282. data/test/files/test_document_url/document.dtd +0 -4
  283. data/test/files/test_document_url/document.xml +0 -6
  284. data/test/files/tlm.html +0 -851
  285. data/test/files/to_be_xincluded.xml +0 -2
  286. data/test/files/valid_bar.xml +0 -2
  287. data/test/files/xinclude.xml +0 -4
  288. data/test/helper.rb +0 -271
  289. data/test/html/sax/test_parser.rb +0 -168
  290. data/test/html/sax/test_parser_context.rb +0 -46
  291. data/test/html/sax/test_parser_text.rb +0 -163
  292. data/test/html/sax/test_push_parser.rb +0 -87
  293. data/test/html/test_attributes.rb +0 -85
  294. data/test/html/test_builder.rb +0 -164
  295. data/test/html/test_document.rb +0 -712
  296. data/test/html/test_document_encoding.rb +0 -143
  297. data/test/html/test_document_fragment.rb +0 -310
  298. data/test/html/test_element_description.rb +0 -105
  299. data/test/html/test_named_characters.rb +0 -14
  300. data/test/html/test_node.rb +0 -212
  301. data/test/html/test_node_encoding.rb +0 -91
  302. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  303. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  304. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  305. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  306. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  307. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  308. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  309. data/test/test_convert_xpath.rb +0 -135
  310. data/test/test_css_cache.rb +0 -47
  311. data/test/test_encoding_handler.rb +0 -48
  312. data/test/test_memory_leak.rb +0 -156
  313. data/test/test_nokogiri.rb +0 -138
  314. data/test/test_soap4r_sax.rb +0 -52
  315. data/test/test_xslt_transforms.rb +0 -314
  316. data/test/xml/node/test_save_options.rb +0 -28
  317. data/test/xml/node/test_subclass.rb +0 -44
  318. data/test/xml/sax/test_parser.rb +0 -402
  319. data/test/xml/sax/test_parser_context.rb +0 -115
  320. data/test/xml/sax/test_parser_text.rb +0 -202
  321. data/test/xml/sax/test_push_parser.rb +0 -265
  322. data/test/xml/test_attr.rb +0 -74
  323. data/test/xml/test_attribute_decl.rb +0 -86
  324. data/test/xml/test_builder.rb +0 -341
  325. data/test/xml/test_c14n.rb +0 -180
  326. data/test/xml/test_cdata.rb +0 -54
  327. data/test/xml/test_comment.rb +0 -40
  328. data/test/xml/test_document.rb +0 -982
  329. data/test/xml/test_document_encoding.rb +0 -31
  330. data/test/xml/test_document_fragment.rb +0 -298
  331. data/test/xml/test_dtd.rb +0 -187
  332. data/test/xml/test_dtd_encoding.rb +0 -31
  333. data/test/xml/test_element_content.rb +0 -56
  334. data/test/xml/test_element_decl.rb +0 -73
  335. data/test/xml/test_entity_decl.rb +0 -122
  336. data/test/xml/test_entity_reference.rb +0 -262
  337. data/test/xml/test_namespace.rb +0 -96
  338. data/test/xml/test_node.rb +0 -1325
  339. data/test/xml/test_node_attributes.rb +0 -115
  340. data/test/xml/test_node_encoding.rb +0 -75
  341. data/test/xml/test_node_inheritance.rb +0 -32
  342. data/test/xml/test_node_reparenting.rb +0 -592
  343. data/test/xml/test_node_set.rb +0 -809
  344. data/test/xml/test_parse_options.rb +0 -64
  345. data/test/xml/test_processing_instruction.rb +0 -30
  346. data/test/xml/test_reader.rb +0 -620
  347. data/test/xml/test_reader_encoding.rb +0 -134
  348. data/test/xml/test_relax_ng.rb +0 -60
  349. data/test/xml/test_schema.rb +0 -142
  350. data/test/xml/test_syntax_error.rb +0 -36
  351. data/test/xml/test_text.rb +0 -60
  352. data/test/xml/test_unparented_node.rb +0 -483
  353. data/test/xml/test_xinclude.rb +0 -83
  354. data/test/xml/test_xpath.rb +0 -470
  355. data/test/xslt/test_custom_functions.rb +0 -133
  356. data/test/xslt/test_exception_handling.rb +0 -37
@@ -0,0 +1,578 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module HTML4
5
+ class ElementDescription
6
+ # Methods are defined protected by method_defined? because at
7
+ # this point the C-library or Java library is already loaded,
8
+ # and we don't want to clobber any methods that have been
9
+ # defined there.
10
+
11
+ Desc = Struct.new("HTMLElementDescription", :name,
12
+ :startTag, :endTag, :saveEndTag,
13
+ :empty, :depr, :dtd, :isinline,
14
+ :desc,
15
+ :subelts, :defaultsubelt,
16
+ :attrs_opt, :attrs_depr, :attrs_req)
17
+
18
+ # This is filled in down below.
19
+ DefaultDescriptions = {} # rubocop:disable Naming/ConstantName
20
+
21
+ def default_desc
22
+ DefaultDescriptions[name.downcase]
23
+ end
24
+ private :default_desc
25
+
26
+ unless method_defined?(:implied_start_tag?)
27
+ def implied_start_tag?
28
+ d = default_desc
29
+ d ? d.startTag : nil
30
+ end
31
+ end
32
+
33
+ unless method_defined?(:implied_end_tag?)
34
+ def implied_end_tag?
35
+ d = default_desc
36
+ d ? d.endTag : nil
37
+ end
38
+ end
39
+
40
+ unless method_defined?(:save_end_tag?)
41
+ def save_end_tag?
42
+ d = default_desc
43
+ d ? d.saveEndTag : nil
44
+ end
45
+ end
46
+
47
+ unless method_defined?(:deprecated?)
48
+ def deprecated?
49
+ d = default_desc
50
+ d ? d.depr : nil
51
+ end
52
+ end
53
+
54
+ unless method_defined?(:description)
55
+ def description
56
+ d = default_desc
57
+ d ? d.desc : nil
58
+ end
59
+ end
60
+
61
+ unless method_defined?(:default_sub_element)
62
+ def default_sub_element
63
+ d = default_desc
64
+ d ? d.defaultsubelt : nil
65
+ end
66
+ end
67
+
68
+ unless method_defined?(:optional_attributes)
69
+ def optional_attributes
70
+ d = default_desc
71
+ d ? d.attrs_opt : []
72
+ end
73
+ end
74
+
75
+ unless method_defined?(:deprecated_attributes)
76
+ def deprecated_attributes
77
+ d = default_desc
78
+ d ? d.attrs_depr : []
79
+ end
80
+ end
81
+
82
+ unless method_defined?(:required_attributes)
83
+ def required_attributes
84
+ d = default_desc
85
+ d ? d.attrs_req : []
86
+ end
87
+ end
88
+
89
+ ###
90
+ # Default Element Descriptions (HTML 4.0) copied from
91
+ # libxml2/HTMLparser.c and libxml2/include/libxml/HTMLparser.h
92
+ #
93
+ # The copyright notice for those files and the following list of
94
+ # element and attribute descriptions is reproduced here:
95
+ #
96
+ # Except where otherwise noted in the source code (e.g. the
97
+ # files hash.c, list.c and the trio files, which are covered by
98
+ # a similar licence but with different Copyright notices) all
99
+ # the files are:
100
+ #
101
+ # Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved.
102
+ #
103
+ # Permission is hereby granted, free of charge, to any person
104
+ # obtaining a copy of this software and associated documentation
105
+ # files (the "Software"), to deal in the Software without
106
+ # restriction, including without limitation the rights to use,
107
+ # copy, modify, merge, publish, distribute, sublicense, and/or
108
+ # sell copies of the Software, and to permit persons to whom the
109
+ # Software is fur- nished to do so, subject to the following
110
+ # conditions:
111
+
112
+ # The above copyright notice and this permission notice shall be
113
+ # included in all copies or substantial portions of the
114
+ # Software.
115
+
116
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
117
+ # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
118
+ # WARRANTIES OF MERCHANTABILITY, FIT- NESS FOR A PARTICULAR
119
+ # PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE DANIEL
120
+ # VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
121
+ # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
122
+ # FROM, OUT OF OR IN CON- NECTION WITH THE SOFTWARE OR THE USE
123
+ # OR OTHER DEALINGS IN THE SOFTWARE.
124
+
125
+ # Except as contained in this notice, the name of Daniel
126
+ # Veillard shall not be used in advertising or otherwise to
127
+ # promote the sale, use or other deal- ings in this Software
128
+ # without prior written authorization from him.
129
+
130
+ # Attributes defined and categorized
131
+ FONTSTYLE = ["tt", "i", "b", "u", "s", "strike", "big", "small"]
132
+ PHRASE = ["em", "strong", "dfn", "code", "samp",
133
+ "kbd", "var", "cite", "abbr", "acronym",]
134
+ SPECIAL = ["a", "img", "applet", "embed", "object", "font", "basefont",
135
+ "br", "script", "map", "q", "sub", "sup", "span", "bdo",
136
+ "iframe",]
137
+ PCDATA = []
138
+ HEADING = ["h1", "h2", "h3", "h4", "h5", "h6"]
139
+ LIST = ["ul", "ol", "dir", "menu"]
140
+ FORMCTRL = ["input", "select", "textarea", "label", "button"]
141
+ BLOCK = [HEADING, LIST, "pre", "p", "dl", "div", "center", "noscript",
142
+ "noframes", "blockquote", "form", "isindex", "hr", "table",
143
+ "fieldset", "address",]
144
+ INLINE = [PCDATA, FONTSTYLE, PHRASE, SPECIAL, FORMCTRL]
145
+ FLOW = [BLOCK, INLINE]
146
+ MODIFIER = []
147
+ EMPTY = []
148
+
149
+ HTML_FLOW = FLOW
150
+ HTML_INLINE = INLINE
151
+ HTML_PCDATA = PCDATA
152
+ HTML_CDATA = HTML_PCDATA
153
+
154
+ COREATTRS = ["id", "class", "style", "title"]
155
+ I18N = ["lang", "dir"]
156
+ EVENTS = ["onclick", "ondblclick", "onmousedown", "onmouseup",
157
+ "onmouseover", "onmouseout", "onkeypress", "onkeydown",
158
+ "onkeyup",]
159
+ ATTRS = [COREATTRS, I18N, EVENTS]
160
+ CELLHALIGN = ["align", "char", "charoff"]
161
+ CELLVALIGN = ["valign"]
162
+
163
+ HTML_ATTRS = ATTRS
164
+ CORE_I18N_ATTRS = [COREATTRS, I18N]
165
+ CORE_ATTRS = COREATTRS
166
+ I18N_ATTRS = I18N
167
+
168
+ A_ATTRS = [ATTRS, "charset", "type", "name",
169
+ "href", "hreflang", "rel", "rev", "accesskey", "shape",
170
+ "coords", "tabindex", "onfocus", "onblur",]
171
+ TARGET_ATTR = ["target"]
172
+ ROWS_COLS_ATTR = ["rows", "cols"]
173
+ ALT_ATTR = ["alt"]
174
+ SRC_ALT_ATTRS = ["src", "alt"]
175
+ HREF_ATTRS = ["href"]
176
+ CLEAR_ATTRS = ["clear"]
177
+ INLINE_P = [INLINE, "p"]
178
+
179
+ FLOW_PARAM = [FLOW, "param"]
180
+ APPLET_ATTRS = [COREATTRS, "codebase",
181
+ "archive", "alt", "name", "height", "width", "align",
182
+ "hspace", "vspace",]
183
+ AREA_ATTRS = ["shape", "coords", "href", "nohref",
184
+ "tabindex", "accesskey", "onfocus", "onblur",]
185
+ BASEFONT_ATTRS = ["id", "size", "color", "face"]
186
+ QUOTE_ATTRS = [ATTRS, "cite"]
187
+ BODY_CONTENTS = [FLOW, "ins", "del"]
188
+ BODY_ATTRS = [ATTRS, "onload", "onunload"]
189
+ BODY_DEPR = ["background", "bgcolor", "text",
190
+ "link", "vlink", "alink",]
191
+ BUTTON_ATTRS = [ATTRS, "name", "value", "type",
192
+ "disabled", "tabindex", "accesskey", "onfocus", "onblur",]
193
+
194
+ COL_ATTRS = [ATTRS, "span", "width", CELLHALIGN, CELLVALIGN]
195
+ COL_ELT = ["col"]
196
+ EDIT_ATTRS = [ATTRS, "datetime", "cite"]
197
+ COMPACT_ATTRS = [ATTRS, "compact"]
198
+ DL_CONTENTS = ["dt", "dd"]
199
+ COMPACT_ATTR = ["compact"]
200
+ LABEL_ATTR = ["label"]
201
+ FIELDSET_CONTENTS = [FLOW, "legend"]
202
+ FONT_ATTRS = [COREATTRS, I18N, "size", "color", "face"]
203
+ FORM_CONTENTS = [HEADING, LIST, INLINE, "pre", "p", "div", "center",
204
+ "noscript", "noframes", "blockquote", "isindex", "hr",
205
+ "table", "fieldset", "address",]
206
+ FORM_ATTRS = [ATTRS, "method", "enctype", "accept", "name", "onsubmit",
207
+ "onreset", "accept-charset",]
208
+ FRAME_ATTRS = [COREATTRS, "longdesc", "name", "src", "frameborder",
209
+ "marginwidth", "marginheight", "noresize", "scrolling",]
210
+ FRAMESET_ATTRS = [COREATTRS, "rows", "cols", "onload", "onunload"]
211
+ FRAMESET_CONTENTS = ["frameset", "frame", "noframes"]
212
+ HEAD_ATTRS = [I18N, "profile"]
213
+ HEAD_CONTENTS = ["title", "isindex", "base", "script", "style", "meta",
214
+ "link", "object",]
215
+ HR_DEPR = ["align", "noshade", "size", "width"]
216
+ VERSION_ATTR = ["version"]
217
+ HTML_CONTENT = ["head", "body", "frameset"]
218
+ IFRAME_ATTRS = [COREATTRS, "longdesc", "name", "src", "frameborder",
219
+ "marginwidth", "marginheight", "scrolling", "align",
220
+ "height", "width",]
221
+ IMG_ATTRS = [ATTRS, "longdesc", "name", "height", "width", "usemap",
222
+ "ismap",]
223
+ EMBED_ATTRS = [COREATTRS, "align", "alt", "border", "code", "codebase",
224
+ "frameborder", "height", "hidden", "hspace", "name",
225
+ "palette", "pluginspace", "pluginurl", "src", "type",
226
+ "units", "vspace", "width",]
227
+ INPUT_ATTRS = [ATTRS, "type", "name", "value", "checked", "disabled",
228
+ "readonly", "size", "maxlength", "src", "alt", "usemap",
229
+ "ismap", "tabindex", "accesskey", "onfocus", "onblur",
230
+ "onselect", "onchange", "accept",]
231
+ PROMPT_ATTRS = [COREATTRS, I18N, "prompt"]
232
+ LABEL_ATTRS = [ATTRS, "for", "accesskey", "onfocus", "onblur"]
233
+ LEGEND_ATTRS = [ATTRS, "accesskey"]
234
+ ALIGN_ATTR = ["align"]
235
+ LINK_ATTRS = [ATTRS, "charset", "href", "hreflang", "type", "rel", "rev",
236
+ "media",]
237
+ MAP_CONTENTS = [BLOCK, "area"]
238
+ NAME_ATTR = ["name"]
239
+ ACTION_ATTR = ["action"]
240
+ BLOCKLI_ELT = [BLOCK, "li"]
241
+ META_ATTRS = [I18N, "http-equiv", "name", "scheme"]
242
+ CONTENT_ATTR = ["content"]
243
+ TYPE_ATTR = ["type"]
244
+ NOFRAMES_CONTENT = ["body", FLOW, MODIFIER]
245
+ OBJECT_CONTENTS = [FLOW, "param"]
246
+ OBJECT_ATTRS = [ATTRS, "declare", "classid", "codebase", "data", "type",
247
+ "codetype", "archive", "standby", "height", "width",
248
+ "usemap", "name", "tabindex",]
249
+ OBJECT_DEPR = ["align", "border", "hspace", "vspace"]
250
+ OL_ATTRS = ["type", "compact", "start"]
251
+ OPTION_ELT = ["option"]
252
+ OPTGROUP_ATTRS = [ATTRS, "disabled"]
253
+ OPTION_ATTRS = [ATTRS, "disabled", "label", "selected", "value"]
254
+ PARAM_ATTRS = ["id", "value", "valuetype", "type"]
255
+ WIDTH_ATTR = ["width"]
256
+ PRE_CONTENT = [PHRASE, "tt", "i", "b", "u", "s", "strike", "a", "br",
257
+ "script", "map", "q", "span", "bdo", "iframe",]
258
+ SCRIPT_ATTRS = ["charset", "src", "defer", "event", "for"]
259
+ LANGUAGE_ATTR = ["language"]
260
+ SELECT_CONTENT = ["optgroup", "option"]
261
+ SELECT_ATTRS = [ATTRS, "name", "size", "multiple", "disabled", "tabindex",
262
+ "onfocus", "onblur", "onchange",]
263
+ STYLE_ATTRS = [I18N, "media", "title"]
264
+ TABLE_ATTRS = [ATTRS, "summary", "width", "border", "frame", "rules",
265
+ "cellspacing", "cellpadding", "datapagesize",]
266
+ TABLE_DEPR = ["align", "bgcolor"]
267
+ TABLE_CONTENTS = ["caption", "col", "colgroup", "thead", "tfoot", "tbody",
268
+ "tr",]
269
+ TR_ELT = ["tr"]
270
+ TALIGN_ATTRS = [ATTRS, CELLHALIGN, CELLVALIGN]
271
+ TH_TD_DEPR = ["nowrap", "bgcolor", "width", "height"]
272
+ TH_TD_ATTR = [ATTRS, "abbr", "axis", "headers", "scope", "rowspan",
273
+ "colspan", CELLHALIGN, CELLVALIGN,]
274
+ TEXTAREA_ATTRS = [ATTRS, "name", "disabled", "readonly", "tabindex",
275
+ "accesskey", "onfocus", "onblur", "onselect",
276
+ "onchange",]
277
+ TR_CONTENTS = ["th", "td"]
278
+ BGCOLOR_ATTR = ["bgcolor"]
279
+ LI_ELT = ["li"]
280
+ UL_DEPR = ["type", "compact"]
281
+ DIR_ATTR = ["dir"]
282
+
283
+ [
284
+ ["a", false, false, false, false, false, :any, true,
285
+ "anchor ",
286
+ HTML_INLINE, nil, A_ATTRS, TARGET_ATTR, [],],
287
+ ["abbr", false, false, false, false, false, :any, true,
288
+ "abbreviated form",
289
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
290
+ ["acronym", false, false, false, false, false, :any, true, "",
291
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
292
+ ["address", false, false, false, false, false, :any, false,
293
+ "information on author",
294
+ INLINE_P, nil, HTML_ATTRS, [], [],],
295
+ ["applet", false, false, false, false, true, :loose, true,
296
+ "java applet ",
297
+ FLOW_PARAM, nil, [], APPLET_ATTRS, [],],
298
+ ["area", false, true, true, true, false, :any, false,
299
+ "client-side image map area ",
300
+ EMPTY, nil, AREA_ATTRS, TARGET_ATTR, ALT_ATTR,],
301
+ ["b", false, true, false, false, false, :any, true,
302
+ "bold text style",
303
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
304
+ ["base", false, true, true, true, false, :any, false,
305
+ "document base uri ",
306
+ EMPTY, nil, [], TARGET_ATTR, HREF_ATTRS,],
307
+ ["basefont", false, true, true, true, true, :loose, true,
308
+ "base font size ",
309
+ EMPTY, nil, [], BASEFONT_ATTRS, [],],
310
+ ["bdo", false, false, false, false, false, :any, true,
311
+ "i18n bidi over-ride ",
312
+ HTML_INLINE, nil, CORE_I18N_ATTRS, [], DIR_ATTR,],
313
+ ["big", false, true, false, false, false, :any, true,
314
+ "large text style",
315
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
316
+ ["blockquote", false, false, false, false, false, :any, false,
317
+ "long quotation ",
318
+ HTML_FLOW, nil, QUOTE_ATTRS, [], [],],
319
+ ["body", true, true, false, false, false, :any, false,
320
+ "document body ",
321
+ BODY_CONTENTS, "div", BODY_ATTRS, BODY_DEPR, [],],
322
+ ["br", false, true, true, true, false, :any, true,
323
+ "forced line break ",
324
+ EMPTY, nil, CORE_ATTRS, CLEAR_ATTRS, [],],
325
+ ["button", false, false, false, false, false, :any, true,
326
+ "push button ",
327
+ [HTML_FLOW, MODIFIER], nil, BUTTON_ATTRS, [], [],],
328
+ ["caption", false, false, false, false, false, :any, false,
329
+ "table caption ",
330
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
331
+ ["center", false, true, false, false, true, :loose, false,
332
+ "shorthand for div align=center ",
333
+ HTML_FLOW, nil, [], HTML_ATTRS, [],],
334
+ ["cite", false, false, false, false, false, :any, true, "citation",
335
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
336
+ ["code", false, false, false, false, false, :any, true,
337
+ "computer code fragment",
338
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
339
+ ["col", false, true, true, true, false, :any, false, "table column ",
340
+ EMPTY, nil, COL_ATTRS, [], [],],
341
+ ["colgroup", false, true, false, false, false, :any, false,
342
+ "table column group ",
343
+ COL_ELT, "col", COL_ATTRS, [], [],],
344
+ ["dd", false, true, false, false, false, :any, false,
345
+ "definition description ",
346
+ HTML_FLOW, nil, HTML_ATTRS, [], [],],
347
+ ["del", false, false, false, false, false, :any, true,
348
+ "deleted text ",
349
+ HTML_FLOW, nil, EDIT_ATTRS, [], [],],
350
+ ["dfn", false, false, false, false, false, :any, true,
351
+ "instance definition",
352
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
353
+ ["dir", false, false, false, false, true, :loose, false,
354
+ "directory list",
355
+ BLOCKLI_ELT, "li", [], COMPACT_ATTRS, [],],
356
+ ["div", false, false, false, false, false, :any, false,
357
+ "generic language/style container",
358
+ HTML_FLOW, nil, HTML_ATTRS, ALIGN_ATTR, [],],
359
+ ["dl", false, false, false, false, false, :any, false,
360
+ "definition list ",
361
+ DL_CONTENTS, "dd", HTML_ATTRS, COMPACT_ATTR, [],],
362
+ ["dt", false, true, false, false, false, :any, false,
363
+ "definition term ",
364
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
365
+ ["em", false, true, false, false, false, :any, true,
366
+ "emphasis",
367
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
368
+ ["embed", false, true, false, false, true, :loose, true,
369
+ "generic embedded object ",
370
+ EMPTY, nil, EMBED_ATTRS, [], [],],
371
+ ["fieldset", false, false, false, false, false, :any, false,
372
+ "form control group ",
373
+ FIELDSET_CONTENTS, nil, HTML_ATTRS, [], [],],
374
+ ["font", false, true, false, false, true, :loose, true,
375
+ "local change to font ",
376
+ HTML_INLINE, nil, [], FONT_ATTRS, [],],
377
+ ["form", false, false, false, false, false, :any, false,
378
+ "interactive form ",
379
+ FORM_CONTENTS, "fieldset", FORM_ATTRS, TARGET_ATTR, ACTION_ATTR,],
380
+ ["frame", false, true, true, true, false, :frameset, false,
381
+ "subwindow ",
382
+ EMPTY, nil, [], FRAME_ATTRS, [],],
383
+ ["frameset", false, false, false, false, false, :frameset, false,
384
+ "window subdivision",
385
+ FRAMESET_CONTENTS, "noframes", [], FRAMESET_ATTRS, [],],
386
+ ["htrue", false, false, false, false, false, :any, false,
387
+ "heading ",
388
+ HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [],],
389
+ ["htrue", false, false, false, false, false, :any, false,
390
+ "heading ",
391
+ HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [],],
392
+ ["htrue", false, false, false, false, false, :any, false,
393
+ "heading ",
394
+ HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [],],
395
+ ["h4", false, false, false, false, false, :any, false,
396
+ "heading ",
397
+ HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [],],
398
+ ["h5", false, false, false, false, false, :any, false,
399
+ "heading ",
400
+ HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [],],
401
+ ["h6", false, false, false, false, false, :any, false,
402
+ "heading ",
403
+ HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [],],
404
+ ["head", true, true, false, false, false, :any, false,
405
+ "document head ",
406
+ HEAD_CONTENTS, nil, HEAD_ATTRS, [], [],],
407
+ ["hr", false, true, true, true, false, :any, false,
408
+ "horizontal rule ",
409
+ EMPTY, nil, HTML_ATTRS, HR_DEPR, [],],
410
+ ["html", true, true, false, false, false, :any, false,
411
+ "document root element ",
412
+ HTML_CONTENT, nil, I18N_ATTRS, VERSION_ATTR, [],],
413
+ ["i", false, true, false, false, false, :any, true,
414
+ "italic text style",
415
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
416
+ ["iframe", false, false, false, false, false, :any, true,
417
+ "inline subwindow ",
418
+ HTML_FLOW, nil, [], IFRAME_ATTRS, [],],
419
+ ["img", false, true, true, true, false, :any, true,
420
+ "embedded image ",
421
+ EMPTY, nil, IMG_ATTRS, ALIGN_ATTR, SRC_ALT_ATTRS,],
422
+ ["input", false, true, true, true, false, :any, true,
423
+ "form control ",
424
+ EMPTY, nil, INPUT_ATTRS, ALIGN_ATTR, [],],
425
+ ["ins", false, false, false, false, false, :any, true,
426
+ "inserted text",
427
+ HTML_FLOW, nil, EDIT_ATTRS, [], [],],
428
+ ["isindex", false, true, true, true, true, :loose, false,
429
+ "single line prompt ",
430
+ EMPTY, nil, [], PROMPT_ATTRS, [],],
431
+ ["kbd", false, false, false, false, false, :any, true,
432
+ "text to be entered by the user",
433
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
434
+ ["label", false, false, false, false, false, :any, true,
435
+ "form field label text ",
436
+ [HTML_INLINE, MODIFIER], nil, LABEL_ATTRS, [], [],],
437
+ ["legend", false, false, false, false, false, :any, false,
438
+ "fieldset legend ",
439
+ HTML_INLINE, nil, LEGEND_ATTRS, ALIGN_ATTR, [],],
440
+ ["li", false, true, true, false, false, :any, false,
441
+ "list item ",
442
+ HTML_FLOW, nil, HTML_ATTRS, [], [],],
443
+ ["link", false, true, true, true, false, :any, false,
444
+ "a media-independent link ",
445
+ EMPTY, nil, LINK_ATTRS, TARGET_ATTR, [],],
446
+ ["map", false, false, false, false, false, :any, true,
447
+ "client-side image map ",
448
+ MAP_CONTENTS, nil, HTML_ATTRS, [], NAME_ATTR,],
449
+ ["menu", false, false, false, false, true, :loose, false,
450
+ "menu list ",
451
+ BLOCKLI_ELT, nil, [], COMPACT_ATTRS, [],],
452
+ ["meta", false, true, true, true, false, :any, false,
453
+ "generic metainformation ",
454
+ EMPTY, nil, META_ATTRS, [], CONTENT_ATTR,],
455
+ ["noframes", false, false, false, false, false, :frameset, false,
456
+ "alternate content container for non frame-based rendering ",
457
+ NOFRAMES_CONTENT, "body", HTML_ATTRS, [], [],],
458
+ ["noscript", false, false, false, false, false, :any, false,
459
+ "alternate content container for non script-based rendering ",
460
+ HTML_FLOW, "div", HTML_ATTRS, [], [],],
461
+ ["object", false, false, false, false, false, :any, true,
462
+ "generic embedded object ",
463
+ OBJECT_CONTENTS, "div", OBJECT_ATTRS, OBJECT_DEPR, [],],
464
+ ["ol", false, false, false, false, false, :any, false,
465
+ "ordered list ",
466
+ LI_ELT, "li", HTML_ATTRS, OL_ATTRS, [],],
467
+ ["optgroup", false, false, false, false, false, :any, false,
468
+ "option group ",
469
+ OPTION_ELT, "option", OPTGROUP_ATTRS, [], LABEL_ATTR,],
470
+ ["option", false, true, false, false, false, :any, false,
471
+ "selectable choice ",
472
+ HTML_PCDATA, nil, OPTION_ATTRS, [], [],],
473
+ ["p", false, true, false, false, false, :any, false,
474
+ "paragraph ",
475
+ HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [],],
476
+ ["param", false, true, true, true, false, :any, false,
477
+ "named property value ",
478
+ EMPTY, nil, PARAM_ATTRS, [], NAME_ATTR,],
479
+ ["pre", false, false, false, false, false, :any, false,
480
+ "preformatted text ",
481
+ PRE_CONTENT, nil, HTML_ATTRS, WIDTH_ATTR, [],],
482
+ ["q", false, false, false, false, false, :any, true,
483
+ "short inline quotation ",
484
+ HTML_INLINE, nil, QUOTE_ATTRS, [], [],],
485
+ ["s", false, true, false, false, true, :loose, true,
486
+ "strike-through text style",
487
+ HTML_INLINE, nil, [], HTML_ATTRS, [],],
488
+ ["samp", false, false, false, false, false, :any, true,
489
+ "sample program output, scripts, etc.",
490
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
491
+ ["script", false, false, false, false, false, :any, true,
492
+ "script statements ",
493
+ HTML_CDATA, nil, SCRIPT_ATTRS, LANGUAGE_ATTR, TYPE_ATTR,],
494
+ ["select", false, false, false, false, false, :any, true,
495
+ "option selector ",
496
+ SELECT_CONTENT, nil, SELECT_ATTRS, [], [],],
497
+ ["small", false, true, false, false, false, :any, true,
498
+ "small text style",
499
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
500
+ ["span", false, false, false, false, false, :any, true,
501
+ "generic language/style container ",
502
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
503
+ ["strike", false, true, false, false, true, :loose, true,
504
+ "strike-through text",
505
+ HTML_INLINE, nil, [], HTML_ATTRS, [],],
506
+ ["strong", false, true, false, false, false, :any, true,
507
+ "strong emphasis",
508
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
509
+ ["style", false, false, false, false, false, :any, false,
510
+ "style info ",
511
+ HTML_CDATA, nil, STYLE_ATTRS, [], TYPE_ATTR,],
512
+ ["sub", false, true, false, false, false, :any, true,
513
+ "subscript",
514
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
515
+ ["sup", false, true, false, false, false, :any, true,
516
+ "superscript ",
517
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
518
+ ["table", false, false, false, false, false, :any, false,
519
+ "",
520
+ TABLE_CONTENTS, "tr", TABLE_ATTRS, TABLE_DEPR, [],],
521
+ ["tbody", true, false, false, false, false, :any, false,
522
+ "table body ",
523
+ TR_ELT, "tr", TALIGN_ATTRS, [], [],],
524
+ ["td", false, false, false, false, false, :any, false,
525
+ "table data cell",
526
+ HTML_FLOW, nil, TH_TD_ATTR, TH_TD_DEPR, [],],
527
+ ["textarea", false, false, false, false, false, :any, true,
528
+ "multi-line text field ",
529
+ HTML_PCDATA, nil, TEXTAREA_ATTRS, [], ROWS_COLS_ATTR,],
530
+ ["tfoot", false, true, false, false, false, :any, false,
531
+ "table footer ",
532
+ TR_ELT, "tr", TALIGN_ATTRS, [], [],],
533
+ ["th", false, true, false, false, false, :any, false,
534
+ "table header cell",
535
+ HTML_FLOW, nil, TH_TD_ATTR, TH_TD_DEPR, [],],
536
+ ["thead", false, true, false, false, false, :any, false,
537
+ "table header ",
538
+ TR_ELT, "tr", TALIGN_ATTRS, [], [],],
539
+ ["title", false, false, false, false, false, :any, false,
540
+ "document title ",
541
+ HTML_PCDATA, nil, I18N_ATTRS, [], [],],
542
+ ["tr", false, false, false, false, false, :any, false,
543
+ "table row ",
544
+ TR_CONTENTS, "td", TALIGN_ATTRS, BGCOLOR_ATTR, [],],
545
+ ["tt", false, true, false, false, false, :any, true,
546
+ "teletype or monospaced text style",
547
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
548
+ ["u", false, true, false, false, true, :loose, true,
549
+ "underlined text style",
550
+ HTML_INLINE, nil, [], HTML_ATTRS, [],],
551
+ ["ul", false, false, false, false, false, :any, false,
552
+ "unordered list ",
553
+ LI_ELT, "li", HTML_ATTRS, UL_DEPR, [],],
554
+ ["var", false, false, false, false, false, :any, true,
555
+ "instance of a variable or program argument",
556
+ HTML_INLINE, nil, HTML_ATTRS, [], [],],
557
+ ].each do |descriptor|
558
+ name = descriptor[0]
559
+
560
+ begin
561
+ d = Desc.new(*descriptor)
562
+
563
+ # flatten all the attribute lists (Ruby1.9, *[a,b,c] can be
564
+ # used to flatten a literal list, but not in Ruby1.8).
565
+ d[:subelts] = d[:subelts].flatten
566
+ d[:attrs_opt] = d[:attrs_opt].flatten
567
+ d[:attrs_depr] = d[:attrs_depr].flatten
568
+ d[:attrs_req] = d[:attrs_req].flatten
569
+ rescue => e
570
+ p(name)
571
+ raise e
572
+ end
573
+
574
+ DefaultDescriptions[name] = d
575
+ end
576
+ end
577
+ end
578
+ end
@@ -1,11 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
- module HTML
4
+ module HTML4
3
5
  class EntityDescription < Struct.new(:value, :name, :description); end
4
6
 
5
7
  class EntityLookup
6
8
  ###
7
9
  # Look up entity with +name+
8
- def [] name
10
+ def [](name)
9
11
  (val = get(name)) && val.value
10
12
  end
11
13
  end
@@ -1,17 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
- module HTML
4
+ module HTML4
3
5
  ###
4
- # Nokogiri lets you write a SAX parser to process HTML but get HTML
5
- # correction features.
6
+ # Nokogiri lets you write a SAX parser to process HTML but get HTML correction features.
6
7
  #
7
- # See Nokogiri::HTML::SAX::Parser for a basic example of using a
8
- # SAX parser with HTML.
8
+ # See Nokogiri::HTML4::SAX::Parser for a basic example of using a SAX parser with HTML.
9
9
  #
10
10
  # For more information on SAX parsers, see Nokogiri::XML::SAX
11
11
  module SAX
12
12
  ###
13
- # This class lets you perform SAX style parsing on HTML with HTML
14
- # error correction.
13
+ # This class lets you perform SAX style parsing on HTML with HTML error correction.
15
14
  #
16
15
  # Here is a basic usage example:
17
16
  #
@@ -21,40 +20,42 @@ module Nokogiri
21
20
  # end
22
21
  # end
23
22
  #
24
- # parser = Nokogiri::HTML::SAX::Parser.new(MyDoc.new)
23
+ # parser = Nokogiri::HTML4::SAX::Parser.new(MyDoc.new)
25
24
  # parser.parse(File.read(ARGV[0], mode: 'rb'))
26
25
  #
27
26
  # For more information on SAX parsers, see Nokogiri::XML::SAX
28
27
  class Parser < Nokogiri::XML::SAX::Parser
29
28
  ###
30
29
  # Parse html stored in +data+ using +encoding+
31
- def parse_memory data, encoding = 'UTF-8'
32
- raise ArgumentError unless data
33
- return unless data.length > 0
30
+ def parse_memory(data, encoding = "UTF-8")
31
+ raise TypeError unless String === data
32
+ return if data.empty?
33
+
34
34
  ctx = ParserContext.memory(data, encoding)
35
35
  yield ctx if block_given?
36
- ctx.parse_with self
36
+ ctx.parse_with(self)
37
37
  end
38
38
 
39
39
  ###
40
40
  # Parse given +io+
41
- def parse_io io, encoding = 'UTF-8'
41
+ def parse_io(io, encoding = "UTF-8")
42
42
  check_encoding(encoding)
43
43
  @encoding = encoding
44
44
  ctx = ParserContext.io(io, ENCODINGS[encoding])
45
45
  yield ctx if block_given?
46
- ctx.parse_with self
46
+ ctx.parse_with(self)
47
47
  end
48
48
 
49
49
  ###
50
50
  # Parse a file with +filename+
51
- def parse_file filename, encoding = 'UTF-8'
51
+ def parse_file(filename, encoding = "UTF-8")
52
52
  raise ArgumentError unless filename
53
53
  raise Errno::ENOENT unless File.exist?(filename)
54
54
  raise Errno::EISDIR if File.directory?(filename)
55
+
55
56
  ctx = ParserContext.file(filename, encoding)
56
57
  yield ctx if block_given?
57
- ctx.parse_with self
58
+ ctx.parse_with(self)
58
59
  end
59
60
  end
60
61
  end