nokogiri 1.5.10 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (328) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +278 -0
  6. data/bin/nokogiri +50 -10
  7. data/dependencies.yml +74 -0
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +944 -100
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +232 -87
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +305 -201
  25. data/ext/nokogiri/xml_document_fragment.c +13 -15
  26. data/ext/nokogiri/xml_dtd.c +54 -48
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +30 -19
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +808 -503
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +198 -186
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +162 -98
  45. data/ext/nokogiri/xslt_stylesheet.c +162 -168
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4886 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/css/node.rb +1 -50
  92. data/lib/nokogiri/css/parser.rb +317 -286
  93. data/lib/nokogiri/css/parser.y +57 -43
  94. data/lib/nokogiri/css/parser_extras.rb +39 -36
  95. data/lib/nokogiri/css/syntax_error.rb +2 -1
  96. data/lib/nokogiri/css/tokenizer.rb +105 -103
  97. data/lib/nokogiri/css/tokenizer.rex +5 -5
  98. data/lib/nokogiri/css/xpath_visitor.rb +137 -48
  99. data/lib/nokogiri/css.rb +15 -14
  100. data/lib/nokogiri/decorators/slop.rb +13 -5
  101. data/lib/nokogiri/extension.rb +31 -0
  102. data/lib/nokogiri/gumbo.rb +14 -0
  103. data/lib/nokogiri/html.rb +32 -27
  104. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  105. data/lib/nokogiri/{html → html4}/document.rb +118 -50
  106. data/lib/nokogiri/{html → html4}/document_fragment.rb +20 -11
  107. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  109. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  110. data/lib/nokogiri/{html → html4}/sax/parser.rb +22 -14
  111. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  112. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  113. data/lib/nokogiri/html4.rb +40 -0
  114. data/lib/nokogiri/html5/document.rb +74 -0
  115. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  116. data/lib/nokogiri/html5/node.rb +93 -0
  117. data/lib/nokogiri/html5.rb +473 -0
  118. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  119. data/lib/nokogiri/syntax_error.rb +1 -0
  120. data/lib/nokogiri/version/constant.rb +5 -0
  121. data/lib/nokogiri/version/info.rb +215 -0
  122. data/lib/nokogiri/version.rb +3 -91
  123. data/lib/nokogiri/xml/attr.rb +1 -0
  124. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  125. data/lib/nokogiri/xml/builder.rb +75 -33
  126. data/lib/nokogiri/xml/cdata.rb +1 -0
  127. data/lib/nokogiri/xml/character_data.rb +1 -0
  128. data/lib/nokogiri/xml/document.rb +157 -54
  129. data/lib/nokogiri/xml/document_fragment.rb +55 -8
  130. data/lib/nokogiri/xml/dtd.rb +15 -4
  131. data/lib/nokogiri/xml/element_content.rb +1 -0
  132. data/lib/nokogiri/xml/element_decl.rb +1 -0
  133. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  134. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  135. data/lib/nokogiri/xml/namespace.rb +1 -0
  136. data/lib/nokogiri/xml/node/save_options.rb +2 -1
  137. data/lib/nokogiri/xml/node.rb +712 -431
  138. data/lib/nokogiri/xml/node_set.rb +140 -123
  139. data/lib/nokogiri/xml/notation.rb +1 -0
  140. data/lib/nokogiri/xml/parse_options.rb +31 -0
  141. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  142. data/lib/nokogiri/xml/pp/node.rb +1 -0
  143. data/lib/nokogiri/xml/pp.rb +3 -2
  144. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  145. data/lib/nokogiri/xml/reader.rb +9 -12
  146. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  147. data/lib/nokogiri/xml/sax/document.rb +25 -30
  148. data/lib/nokogiri/xml/sax/parser.rb +8 -8
  149. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  150. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  151. data/lib/nokogiri/xml/sax.rb +5 -4
  152. data/lib/nokogiri/xml/schema.rb +13 -4
  153. data/lib/nokogiri/xml/searchable.rb +239 -0
  154. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  155. data/lib/nokogiri/xml/text.rb +1 -0
  156. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  157. data/lib/nokogiri/xml/xpath.rb +4 -5
  158. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  159. data/lib/nokogiri/xml.rb +37 -35
  160. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  161. data/lib/nokogiri/xslt.rb +17 -16
  162. data/lib/nokogiri.rb +55 -58
  163. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  164. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  165. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  166. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  167. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  168. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  169. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  171. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  172. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  173. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  174. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  175. metadata +307 -459
  176. data/.autotest +0 -26
  177. data/.gemtest +0 -0
  178. data/CHANGELOG.ja.rdoc +0 -785
  179. data/CHANGELOG.rdoc +0 -783
  180. data/C_CODING_STYLE.rdoc +0 -33
  181. data/Manifest.txt +0 -303
  182. data/README.ja.rdoc +0 -106
  183. data/README.rdoc +0 -175
  184. data/ROADMAP.md +0 -90
  185. data/Rakefile +0 -228
  186. data/STANDARD_RESPONSES.md +0 -47
  187. data/Y_U_NO_GEMSPEC.md +0 -155
  188. data/build_all +0 -105
  189. data/ext/nokogiri/html_document.c +0 -170
  190. data/ext/nokogiri/html_document.h +0 -10
  191. data/ext/nokogiri/html_element_description.c +0 -279
  192. data/ext/nokogiri/html_element_description.h +0 -10
  193. data/ext/nokogiri/html_entity_lookup.c +0 -32
  194. data/ext/nokogiri/html_entity_lookup.h +0 -8
  195. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  196. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  197. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  198. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  199. data/ext/nokogiri/xml_attr.h +0 -9
  200. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  201. data/ext/nokogiri/xml_cdata.h +0 -9
  202. data/ext/nokogiri/xml_comment.h +0 -9
  203. data/ext/nokogiri/xml_document.h +0 -23
  204. data/ext/nokogiri/xml_document_fragment.h +0 -10
  205. data/ext/nokogiri/xml_dtd.h +0 -10
  206. data/ext/nokogiri/xml_element_content.h +0 -10
  207. data/ext/nokogiri/xml_element_decl.h +0 -9
  208. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  209. data/ext/nokogiri/xml_entity_decl.h +0 -10
  210. data/ext/nokogiri/xml_entity_reference.h +0 -9
  211. data/ext/nokogiri/xml_io.c +0 -56
  212. data/ext/nokogiri/xml_io.h +0 -11
  213. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  214. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  215. data/ext/nokogiri/xml_namespace.h +0 -13
  216. data/ext/nokogiri/xml_node.h +0 -13
  217. data/ext/nokogiri/xml_node_set.h +0 -14
  218. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  219. data/ext/nokogiri/xml_reader.h +0 -10
  220. data/ext/nokogiri/xml_relax_ng.h +0 -9
  221. data/ext/nokogiri/xml_sax_parser.h +0 -39
  222. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  223. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  224. data/ext/nokogiri/xml_schema.h +0 -9
  225. data/ext/nokogiri/xml_syntax_error.h +0 -13
  226. data/ext/nokogiri/xml_text.h +0 -9
  227. data/ext/nokogiri/xml_xpath_context.h +0 -10
  228. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  229. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  230. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  231. data/tasks/cross_compile.rb +0 -150
  232. data/tasks/nokogiri.org.rb +0 -24
  233. data/tasks/test.rb +0 -95
  234. data/test/css/test_nthiness.rb +0 -159
  235. data/test/css/test_parser.rb +0 -341
  236. data/test/css/test_tokenizer.rb +0 -198
  237. data/test/css/test_xpath_visitor.rb +0 -91
  238. data/test/decorators/test_slop.rb +0 -16
  239. data/test/files/2ch.html +0 -108
  240. data/test/files/address_book.rlx +0 -12
  241. data/test/files/address_book.xml +0 -10
  242. data/test/files/bar/bar.xsd +0 -4
  243. data/test/files/dont_hurt_em_why.xml +0 -422
  244. data/test/files/encoding.html +0 -82
  245. data/test/files/encoding.xhtml +0 -84
  246. data/test/files/exslt.xml +0 -8
  247. data/test/files/exslt.xslt +0 -35
  248. data/test/files/foo/foo.xsd +0 -4
  249. data/test/files/metacharset.html +0 -10
  250. data/test/files/noencoding.html +0 -47
  251. data/test/files/po.xml +0 -32
  252. data/test/files/po.xsd +0 -66
  253. data/test/files/shift_jis.html +0 -10
  254. data/test/files/shift_jis.xml +0 -5
  255. data/test/files/snuggles.xml +0 -3
  256. data/test/files/staff.dtd +0 -10
  257. data/test/files/staff.xml +0 -59
  258. data/test/files/staff.xslt +0 -32
  259. data/test/files/test_document_url/bar.xml +0 -2
  260. data/test/files/test_document_url/document.dtd +0 -4
  261. data/test/files/test_document_url/document.xml +0 -6
  262. data/test/files/tlm.html +0 -850
  263. data/test/files/to_be_xincluded.xml +0 -2
  264. data/test/files/valid_bar.xml +0 -2
  265. data/test/files/xinclude.xml +0 -4
  266. data/test/helper.rb +0 -154
  267. data/test/html/sax/test_parser.rb +0 -141
  268. data/test/html/sax/test_parser_context.rb +0 -46
  269. data/test/html/test_builder.rb +0 -164
  270. data/test/html/test_document.rb +0 -552
  271. data/test/html/test_document_encoding.rb +0 -138
  272. data/test/html/test_document_fragment.rb +0 -261
  273. data/test/html/test_element_description.rb +0 -105
  274. data/test/html/test_named_characters.rb +0 -14
  275. data/test/html/test_node.rb +0 -196
  276. data/test/html/test_node_encoding.rb +0 -27
  277. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  278. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  279. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  280. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  281. data/test/test_convert_xpath.rb +0 -135
  282. data/test/test_css_cache.rb +0 -45
  283. data/test/test_encoding_handler.rb +0 -46
  284. data/test/test_memory_leak.rb +0 -156
  285. data/test/test_nokogiri.rb +0 -132
  286. data/test/test_reader.rb +0 -555
  287. data/test/test_soap4r_sax.rb +0 -52
  288. data/test/test_xslt_transforms.rb +0 -254
  289. data/test/xml/node/test_save_options.rb +0 -28
  290. data/test/xml/node/test_subclass.rb +0 -44
  291. data/test/xml/sax/test_parser.rb +0 -366
  292. data/test/xml/sax/test_parser_context.rb +0 -106
  293. data/test/xml/sax/test_push_parser.rb +0 -157
  294. data/test/xml/test_attr.rb +0 -64
  295. data/test/xml/test_attribute_decl.rb +0 -86
  296. data/test/xml/test_builder.rb +0 -306
  297. data/test/xml/test_c14n.rb +0 -151
  298. data/test/xml/test_cdata.rb +0 -48
  299. data/test/xml/test_comment.rb +0 -29
  300. data/test/xml/test_document.rb +0 -828
  301. data/test/xml/test_document_encoding.rb +0 -28
  302. data/test/xml/test_document_fragment.rb +0 -223
  303. data/test/xml/test_dtd.rb +0 -103
  304. data/test/xml/test_dtd_encoding.rb +0 -33
  305. data/test/xml/test_element_content.rb +0 -56
  306. data/test/xml/test_element_decl.rb +0 -73
  307. data/test/xml/test_entity_decl.rb +0 -122
  308. data/test/xml/test_entity_reference.rb +0 -245
  309. data/test/xml/test_namespace.rb +0 -95
  310. data/test/xml/test_node.rb +0 -1137
  311. data/test/xml/test_node_attributes.rb +0 -96
  312. data/test/xml/test_node_encoding.rb +0 -107
  313. data/test/xml/test_node_inheritance.rb +0 -32
  314. data/test/xml/test_node_reparenting.rb +0 -374
  315. data/test/xml/test_node_set.rb +0 -755
  316. data/test/xml/test_parse_options.rb +0 -64
  317. data/test/xml/test_processing_instruction.rb +0 -30
  318. data/test/xml/test_reader_encoding.rb +0 -142
  319. data/test/xml/test_relax_ng.rb +0 -60
  320. data/test/xml/test_schema.rb +0 -103
  321. data/test/xml/test_syntax_error.rb +0 -12
  322. data/test/xml/test_text.rb +0 -45
  323. data/test/xml/test_unparented_node.rb +0 -422
  324. data/test/xml/test_xinclude.rb +0 -83
  325. data/test/xml/test_xpath.rb +0 -295
  326. data/test/xslt/test_custom_functions.rb +0 -133
  327. data/test/xslt/test_exception_handling.rb +0 -37
  328. data/test_all +0 -81
@@ -1,552 +0,0 @@
1
- require "helper"
2
-
3
- module Nokogiri
4
- module HTML
5
- class TestDocument < Nokogiri::TestCase
6
- def setup
7
- super
8
- @html = Nokogiri::HTML.parse(File.read(HTML_FILE))
9
- end
10
-
11
- def test_nil_css
12
- # Behavior is undefined but shouldn't break
13
- assert @html.css(nil)
14
- assert @html.xpath(nil)
15
- end
16
-
17
- def test_exceptions_remove_newlines
18
- errors = @html.errors
19
- assert errors.length > 0, 'has errors'
20
- errors.each do |error|
21
- assert_equal(error.to_s.chomp, error.to_s)
22
- end
23
- end
24
-
25
- def test_fragment
26
- fragment = @html.fragment
27
- assert_equal 0, fragment.children.length
28
- end
29
-
30
- def test_document_takes_config_block
31
- options = nil
32
- Nokogiri::HTML(File.read(HTML_FILE), HTML_FILE) do |cfg|
33
- options = cfg
34
- options.nonet.nowarning.dtdattr
35
- end
36
- assert options.nonet?
37
- assert options.nowarning?
38
- assert options.dtdattr?
39
- end
40
-
41
- def test_parse_takes_config_block
42
- options = nil
43
- Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) do |cfg|
44
- options = cfg
45
- options.nonet.nowarning.dtdattr
46
- end
47
- assert options.nonet?
48
- assert options.nowarning?
49
- assert options.dtdattr?
50
- end
51
-
52
- def test_subclass
53
- klass = Class.new(Nokogiri::HTML::Document)
54
- doc = klass.new
55
- assert_instance_of klass, doc
56
- end
57
-
58
- def test_subclass_initialize
59
- klass = Class.new(Nokogiri::HTML::Document) do
60
- attr_accessor :initialized_with
61
-
62
- def initialize(*args)
63
- @initialized_with = args
64
- end
65
- end
66
- doc = klass.new("uri", "external_id", 1)
67
- assert_equal ["uri", "external_id", 1], doc.initialized_with
68
- end
69
-
70
- def test_subclass_dup
71
- klass = Class.new(Nokogiri::HTML::Document)
72
- doc = klass.new.dup
73
- assert_instance_of klass, doc
74
- end
75
-
76
- def test_subclass_parse
77
- klass = Class.new(Nokogiri::HTML::Document)
78
- doc = klass.parse(File.read(HTML_FILE))
79
- assert_equal @html.to_s, doc.to_s
80
- assert_instance_of klass, doc
81
- end
82
-
83
- def test_document_parse_method
84
- html = Nokogiri::HTML::Document.parse(File.read(HTML_FILE))
85
- assert_equal @html.to_s, html.to_s
86
- end
87
-
88
- def test_document_parse_method_with_url
89
- require 'open-uri'
90
- begin
91
- html = open('http://google.com').read
92
- rescue
93
- skip("This test needs the internet. Skips if no internet available.")
94
- end
95
- doc = Nokogiri::HTML html ,"http:/foobar.foobar/"
96
- refute_empty doc.to_s, "Document should not be empty"
97
- end
98
-
99
- ###
100
- # Nokogiri::HTML returns an empty Document when given a blank string GH#11
101
- def test_empty_string_returns_empty_doc
102
- doc = Nokogiri::HTML('')
103
- assert_instance_of Nokogiri::HTML::Document, doc
104
- assert_nil doc.root
105
- end
106
-
107
- unless Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
108
- # FIXME: this is a hack around broken libxml versions
109
- def test_to_xhtml_with_indent
110
- doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
111
- doc = Nokogiri::HTML(doc.to_xhtml(:indent => 2))
112
- assert_indent 2, doc
113
- end
114
-
115
- def test_write_to_xhtml_with_indent
116
- io = StringIO.new
117
- doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
118
- doc.write_xhtml_to io, :indent => 5
119
- io.rewind
120
- doc = Nokogiri::HTML(io.read)
121
- assert_indent 5, doc
122
- end
123
- end
124
-
125
- def test_swap_should_not_exist
126
- assert_raises(NoMethodError) {
127
- @html.swap
128
- }
129
- end
130
-
131
- def test_namespace_should_not_exist
132
- assert_raises(NoMethodError) {
133
- @html.namespace
134
- }
135
- end
136
-
137
- def test_meta_encoding
138
- assert_equal 'UTF-8', @html.meta_encoding
139
- end
140
-
141
- def test_meta_encoding_is_strict_about_http_equiv
142
- doc = Nokogiri::HTML(<<-eohtml)
143
- <html>
144
- <head>
145
- <meta http-equiv="X-Content-Type" content="text/html; charset=Shift_JIS">
146
- </head>
147
- <body>
148
- foo
149
- </body>
150
- </html>
151
- eohtml
152
- assert_nil doc.meta_encoding
153
- end
154
-
155
- def test_meta_encoding_handles_malformed_content_charset
156
- doc = Nokogiri::HTML(<<EOHTML)
157
- <html>
158
- <head>
159
- <meta http-equiv="Content-type" content="text/html; utf-8" />
160
- </head>
161
- <body>
162
- foo
163
- </body>
164
- </html>
165
- EOHTML
166
- assert_nil doc.meta_encoding
167
- end
168
-
169
- def test_meta_encoding=
170
- @html.meta_encoding = 'EUC-JP'
171
- assert_equal 'EUC-JP', @html.meta_encoding
172
- end
173
-
174
- def test_title
175
- assert_equal 'Tender Lovemaking ', @html.title
176
- doc = Nokogiri::HTML('<html><body>foo</body></html>')
177
- assert_nil doc.title
178
- end
179
-
180
- def test_title=()
181
- doc = Nokogiri::HTML(<<eohtml)
182
- <html>
183
- <head>
184
- <title>old</title>
185
- </head>
186
- <body>
187
- foo
188
- </body>
189
- </html>
190
- eohtml
191
- doc.title = 'new'
192
- assert_equal 'new', doc.title
193
-
194
- doc = Nokogiri::HTML(<<eohtml)
195
- <html>
196
- <head>
197
- </head>
198
- <body>
199
- foo
200
- </body>
201
- </html>
202
- eohtml
203
- doc.title = 'new'
204
- assert_equal 'new', doc.title
205
-
206
- doc = Nokogiri::HTML(<<eohtml)
207
- <html>
208
- <body>
209
- foo
210
- </body>
211
- </html>
212
- eohtml
213
- doc.title = 'new'
214
- if Nokogiri.uses_libxml?
215
- assert_nil doc.title
216
- else
217
- assert_equal 'new', doc.title
218
- end
219
- end
220
-
221
- def test_meta_encoding_without_head
222
- html = Nokogiri::HTML('<html><body>foo</body></html>')
223
- assert_nil html.meta_encoding
224
-
225
- html.meta_encoding = 'EUC-JP'
226
- assert_nil html.meta_encoding
227
- end
228
-
229
- def test_meta_encoding_with_empty_content_type
230
- html = Nokogiri::HTML(<<-eohtml)
231
- <html>
232
- <head>
233
- <meta http-equiv="Content-Type" content="">
234
- </head>
235
- <body>
236
- foo
237
- </body>
238
- </html>
239
- eohtml
240
- assert_nil html.meta_encoding
241
-
242
- html = Nokogiri::HTML(<<-eohtml)
243
- <html>
244
- <head>
245
- <meta http-equiv="Content-Type">
246
- </head>
247
- <body>
248
- foo
249
- </body>
250
- </html>
251
- eohtml
252
- assert_nil html.meta_encoding
253
- end
254
-
255
- def test_root_node_parent_is_document
256
- parent = @html.root.parent
257
- assert_equal @html, parent
258
- assert_instance_of Nokogiri::HTML::Document, parent
259
- end
260
-
261
- def test_parse_handles_nil_gracefully
262
- @doc = Nokogiri::HTML::Document.parse(nil)
263
- assert_instance_of Nokogiri::HTML::Document, @doc
264
- end
265
-
266
- def test_parse_empty_document
267
- doc = Nokogiri::HTML("\n")
268
- assert_equal 0, doc.css('a').length
269
- assert_equal 0, doc.xpath('//a').length
270
- assert_equal 0, doc.search('//a').length
271
- end
272
-
273
- def test_HTML_function
274
- html = Nokogiri::HTML(File.read(HTML_FILE))
275
- assert html.html?
276
- end
277
-
278
- def test_parse_io
279
- assert File.open(HTML_FILE, 'rb') { |f|
280
- Document.read_io(f, nil, 'UTF-8',
281
- XML::ParseOptions::NOERROR | XML::ParseOptions::NOWARNING
282
- )
283
- }
284
- end
285
-
286
- def test_parse_temp_file
287
- temp_html_file = Tempfile.new("TEMP_HTML_FILE")
288
- File.open(HTML_FILE, 'rb') { |f| temp_html_file.write f.read }
289
- temp_html_file.close
290
- temp_html_file.open
291
- assert_equal Nokogiri::HTML.parse(File.read(HTML_FILE)).xpath('//div/a').length,
292
- Nokogiri::HTML.parse(temp_html_file).xpath('//div/a').length
293
- end
294
-
295
- def test_to_xhtml
296
- assert_match 'XHTML', @html.to_xhtml
297
- assert_match 'XHTML', @html.to_xhtml(:encoding => 'UTF-8')
298
- assert_match 'UTF-8', @html.to_xhtml(:encoding => 'UTF-8')
299
- end
300
-
301
- def test_no_xml_header
302
- html = Nokogiri::HTML(<<-eohtml)
303
- <html>
304
- </html>
305
- eohtml
306
- assert html.to_html.length > 0, 'html length is too short'
307
- assert_no_match(/^<\?xml/, html.to_html)
308
- end
309
-
310
- def test_document_has_error
311
- html = Nokogiri::HTML(<<-eohtml)
312
- <html>
313
- <body>
314
- <div awesome="asdf>
315
- <p>inside div tag</p>
316
- </div>
317
- <p>outside div tag</p>
318
- </body>
319
- </html>
320
- eohtml
321
- assert html.errors.length > 0
322
- end
323
-
324
- def test_relative_css
325
- html = Nokogiri::HTML(<<-eohtml)
326
- <html>
327
- <body>
328
- <div>
329
- <p>inside div tag</p>
330
- </div>
331
- <p>outside div tag</p>
332
- </body>
333
- </html>
334
- eohtml
335
- set = html.search('div').search('p')
336
- assert_equal(1, set.length)
337
- assert_equal('inside div tag', set.first.inner_text)
338
- end
339
-
340
- def test_multi_css
341
- html = Nokogiri::HTML(<<-eohtml)
342
- <html>
343
- <body>
344
- <div>
345
- <p>p tag</p>
346
- <a>a tag</a>
347
- </div>
348
- </body>
349
- </html>
350
- eohtml
351
- set = html.css('p, a')
352
- assert_equal(2, set.length)
353
- assert_equal ['a tag', 'p tag'].sort, set.map { |x| x.content }.sort
354
- end
355
-
356
- def test_inner_text
357
- html = Nokogiri::HTML(<<-eohtml)
358
- <html>
359
- <body>
360
- <div>
361
- <p>
362
- Hello world!
363
- </p>
364
- </div>
365
- </body>
366
- </html>
367
- eohtml
368
- node = html.xpath('//div').first
369
- assert_equal('Hello world!', node.inner_text.strip)
370
- end
371
-
372
- def test_doc_type
373
- html = Nokogiri::HTML(<<-eohtml)
374
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
375
- <html xmlns="http://www.w3.org/1999/xhtml">
376
- <body>
377
- <p>Rainbow Dash</p>
378
- </body>
379
- </html>
380
- eohtml
381
- assert_equal "html", html.internal_subset.name
382
- assert_equal "-//W3C//DTD XHTML 1.1//EN", html.internal_subset.external_id
383
- assert_equal "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", html.internal_subset.system_id
384
- assert_equal "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">", html.to_s[0,97]
385
- end
386
-
387
- def test_content_size
388
- html = Nokogiri::HTML('<div>
389
- </div>')
390
- assert_equal 1, html.content.size
391
- assert_equal 1, html.content.split("").size
392
- assert_equal "\n", html.content
393
- end
394
-
395
- def test_find_by_xpath
396
- found = @html.xpath('//div/a')
397
- assert_equal 3, found.length
398
- end
399
-
400
- def test_find_by_css
401
- found = @html.css('div > a')
402
- assert_equal 3, found.length
403
- end
404
-
405
- def test_find_by_css_with_square_brackets
406
- found = @html.css("div[@id='header'] > h1")
407
- found = @html.css("div[@id='header'] h1") # this blows up on commit 6fa0f6d329d9dbf1cc21c0ac72f7e627bb4c05fc
408
- assert_equal 1, found.length
409
- end
410
-
411
- def test_find_with_function
412
- assert @html.css("div:awesome() h1", Class.new {
413
- def awesome divs
414
- [divs.first]
415
- end
416
- }.new)
417
- end
418
-
419
- def test_dup_shallow
420
- found = @html.search('//div/a').first
421
- dup = found.dup(0)
422
- assert dup
423
- assert_equal '', dup.content
424
- end
425
-
426
- def test_search_can_handle_xpath_and_css
427
- found = @html.search('//div/a', 'div > p')
428
- length = @html.xpath('//div/a').length +
429
- @html.css('div > p').length
430
- assert_equal length, found.length
431
- end
432
-
433
- def test_dup_document
434
- assert dup = @html.dup
435
- assert_not_equal dup, @html
436
- assert @html.html?
437
- assert_instance_of Nokogiri::HTML::Document, dup
438
- assert dup.html?, 'duplicate should be html'
439
- assert_equal @html.to_s, dup.to_s
440
- end
441
-
442
- def test_dup_document_shallow
443
- assert dup = @html.dup(0)
444
- assert_not_equal dup, @html
445
- end
446
-
447
- def test_dup
448
- found = @html.search('//div/a').first
449
- dup = found.dup
450
- assert dup
451
- assert_equal found.content, dup.content
452
- assert_equal found.document, dup.document
453
- end
454
-
455
- def test_inner_html
456
- html = Nokogiri::HTML(<<-eohtml)
457
- <html>
458
- <body>
459
- <div>
460
- <p>
461
- Hello world!
462
- </p>
463
- </div>
464
- </body>
465
- </html>
466
- eohtml
467
- node = html.xpath('//div').first
468
- assert_equal('<p>Helloworld!</p>', node.inner_html.gsub(/\s/, ''))
469
- end
470
-
471
- def test_round_trip
472
- doc = Nokogiri::HTML(@html.inner_html)
473
- assert_equal @html.root.to_html, doc.root.to_html
474
- end
475
-
476
- def test_fragment_contains_text_node
477
- fragment = Nokogiri::HTML.fragment('fooo')
478
- assert_equal 1, fragment.children.length
479
- assert_equal 'fooo', fragment.inner_text
480
- end
481
-
482
- def test_fragment_includes_two_tags
483
- assert_equal 2, Nokogiri::HTML.fragment("<br/><hr/>").children.length
484
- end
485
-
486
- def test_relative_css_finder
487
- doc = Nokogiri::HTML(<<-eohtml)
488
- <html>
489
- <body>
490
- <div class="red">
491
- <p>
492
- inside red
493
- </p>
494
- </div>
495
- <div class="green">
496
- <p>
497
- inside green
498
- </p>
499
- </div>
500
- </body>
501
- </html>
502
- eohtml
503
- red_divs = doc.css('div.red')
504
- assert_equal 1, red_divs.length
505
- p_tags = red_divs.first.css('p')
506
- assert_equal 1, p_tags.length
507
- assert_equal 'inside red', p_tags.first.text.strip
508
- end
509
-
510
- def test_find_classes
511
- doc = Nokogiri::HTML(<<-eohtml)
512
- <html>
513
- <body>
514
- <p class="red">RED</p>
515
- <p class="awesome red">RED</p>
516
- <p class="notred">GREEN</p>
517
- <p class="green notred">GREEN</p>
518
- </body>
519
- </html>
520
- eohtml
521
- list = doc.css('.red')
522
- assert_equal 2, list.length
523
- assert_equal %w{ RED RED }, list.map { |x| x.text }
524
- end
525
-
526
- def test_parse_can_take_io
527
- html = nil
528
- File.open(HTML_FILE, 'rb') { |f|
529
- html = Nokogiri::HTML(f)
530
- }
531
- assert html.html?
532
- end
533
-
534
- def test_html?
535
- assert !@html.xml?
536
- assert @html.html?
537
- end
538
-
539
- def test_serialize
540
- assert @html.serialize
541
- assert @html.to_html
542
- end
543
-
544
- def test_empty_document
545
- # empty document should return "" #699
546
- assert_equal "", Nokogiri::HTML.parse(nil).text
547
- assert_equal "", Nokogiri::HTML.parse("").text
548
- end
549
- end
550
- end
551
- end
552
-
@@ -1,138 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- require "helper"
3
-
4
- module Nokogiri
5
- module HTML
6
- if RUBY_VERSION =~ /^1\.9/
7
- class TestDocumentEncoding < Nokogiri::TestCase
8
- def test_encoding
9
- doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
10
-
11
- hello = "こんにちは"
12
-
13
- assert_match doc.encoding, doc.to_html
14
- assert_match hello.encode('Shift_JIS'), doc.to_html
15
- assert_equal 'Shift_JIS', doc.to_html.encoding.name
16
-
17
- assert_match hello, doc.to_html(:encoding => 'UTF-8')
18
- assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8')
19
- assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
20
- end
21
-
22
- def test_default_to_encoding_from_string
23
- bad_charset = <<-eohtml
24
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
25
- <html>
26
- <head>
27
- <meta http-equiv="Content-Type" content="text/html; charset=charset=UTF-8">
28
- </head>
29
- <body>
30
- <a href="http://tenderlovemaking.com/">blah!</a>
31
- </body>
32
- </html>
33
- eohtml
34
- doc = Nokogiri::HTML(bad_charset)
35
- assert_equal bad_charset.encoding.name, doc.encoding
36
-
37
- doc = Nokogiri.parse(bad_charset)
38
- assert_equal bad_charset.encoding.name, doc.encoding
39
- end
40
-
41
- def test_encoding_non_utf8
42
- orig = '日本語が上手です'
43
- bin = Encoding::ASCII_8BIT
44
- [Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
45
- html = <<-eohtml.encode(enc)
46
- <html>
47
- <meta http-equiv="Content-Type" content="text/html; charset=#{enc.name}">
48
- <title xml:lang="ja">#{orig}</title></html>
49
- eohtml
50
- text = Nokogiri::HTML.parse(html).at('title').inner_text
51
- assert_equal(
52
- orig.encode(enc).force_encoding(bin),
53
- text.encode(enc).force_encoding(bin)
54
- )
55
- end
56
- end
57
-
58
- def test_encoding_with_a_bad_name
59
- bad_charset = <<-eohtml
60
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
61
- <html>
62
- <head>
63
- <meta http-equiv="Content-Type" content="text/html; charset=charset=UTF-8">
64
- </head>
65
- <body>
66
- <a href="http://tenderlovemaking.com/">blah!</a>
67
- </body>
68
- </html>
69
- eohtml
70
- doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
71
- assert_equal ['http://tenderlovemaking.com/'],
72
- doc.css('a').map { |a| a['href'] }
73
- end
74
- end
75
- end
76
-
77
- class TestDocumentEncodingDetection < Nokogiri::TestCase
78
- if IO.respond_to?(:binread)
79
- def binread(file)
80
- IO.binread(file)
81
- end
82
- else
83
- def binread(file)
84
- IO.read(file)
85
- end
86
- end
87
-
88
- def binopen(file)
89
- File.open(file, 'rb')
90
- end
91
-
92
- def test_document_html_noencoding
93
- from_stream = Nokogiri::HTML(binopen(NOENCODING_FILE))
94
- from_string = Nokogiri::HTML(binread(NOENCODING_FILE))
95
-
96
- assert_equal from_string.to_s.size, from_stream.to_s.size
97
- end
98
-
99
- def test_document_html_charset
100
- html = Nokogiri::HTML(binopen(METACHARSET_FILE))
101
- assert_equal 'iso-2022-jp', html.encoding
102
- assert_equal 'たこ焼き仮面', html.title
103
- end
104
-
105
- def test_document_xhtml_enc
106
- [ENCODING_XHTML_FILE, ENCODING_HTML_FILE].each { |file|
107
- doc_from_string_enc = Nokogiri::HTML(binread(file), nil, 'Shift_JIS')
108
- ary_from_string_enc = doc_from_string_enc.xpath('//p/text()').map { |text| text.text }
109
-
110
- doc_from_string = Nokogiri::HTML(binread(file))
111
- ary_from_string = doc_from_string.xpath('//p/text()').map { |text| text.text }
112
-
113
- doc_from_file_enc = Nokogiri::HTML(binopen(file), nil, 'Shift_JIS')
114
- ary_from_file_enc = doc_from_file_enc.xpath('//p/text()').map { |text| text.text }
115
-
116
- doc_from_file = Nokogiri::HTML(binopen(file))
117
- ary_from_file = doc_from_file.xpath('//p/text()').map { |text| text.text }
118
-
119
- title = 'たこ焼き仮面'
120
-
121
- assert_equal(title, doc_from_string_enc.at('//title/text()').text)
122
- assert_equal(title, doc_from_string.at('//title/text()').text)
123
- assert_equal(title, doc_from_file_enc.at('//title/text()').text)
124
- unless Nokogiri.jruby? && file == ENCODING_HTML_FILE
125
- assert_equal(title, doc_from_file.at('//title/text()').text)
126
- end
127
-
128
- evil = (0..72).map { |i| '超' * i + '悪い事を構想中。' }
129
-
130
- assert_equal(evil, ary_from_string_enc)
131
- assert_equal(evil, ary_from_string)
132
- assert_equal(evil, ary_from_file_enc)
133
- assert_equal(evil, ary_from_file)
134
- }
135
- end
136
- end
137
- end
138
- end