nokogiri 1.6.2.rc1-x64-mingw32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (263) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.editorconfig +17 -0
  4. data/.gemtest +0 -0
  5. data/.travis.yml +25 -0
  6. data/CHANGELOG.ja.rdoc +857 -0
  7. data/CHANGELOG.rdoc +880 -0
  8. data/C_CODING_STYLE.rdoc +33 -0
  9. data/Gemfile +21 -0
  10. data/Manifest.txt +371 -0
  11. data/README.ja.rdoc +112 -0
  12. data/README.rdoc +180 -0
  13. data/ROADMAP.md +89 -0
  14. data/Rakefile +351 -0
  15. data/STANDARD_RESPONSES.md +47 -0
  16. data/Y_U_NO_GEMSPEC.md +155 -0
  17. data/bin/nokogiri +78 -0
  18. data/build_all +130 -0
  19. data/dependencies.yml +4 -0
  20. data/ext/nokogiri/depend +358 -0
  21. data/ext/nokogiri/extconf.rb +453 -0
  22. data/ext/nokogiri/html_document.c +170 -0
  23. data/ext/nokogiri/html_document.h +10 -0
  24. data/ext/nokogiri/html_element_description.c +279 -0
  25. data/ext/nokogiri/html_element_description.h +10 -0
  26. data/ext/nokogiri/html_entity_lookup.c +32 -0
  27. data/ext/nokogiri/html_entity_lookup.h +8 -0
  28. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  29. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  30. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  31. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  32. data/ext/nokogiri/nokogiri.c +148 -0
  33. data/ext/nokogiri/nokogiri.h +164 -0
  34. data/ext/nokogiri/xml_attr.c +94 -0
  35. data/ext/nokogiri/xml_attr.h +9 -0
  36. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  37. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  38. data/ext/nokogiri/xml_cdata.c +56 -0
  39. data/ext/nokogiri/xml_cdata.h +9 -0
  40. data/ext/nokogiri/xml_comment.c +54 -0
  41. data/ext/nokogiri/xml_comment.h +9 -0
  42. data/ext/nokogiri/xml_document.c +577 -0
  43. data/ext/nokogiri/xml_document.h +23 -0
  44. data/ext/nokogiri/xml_document_fragment.c +48 -0
  45. data/ext/nokogiri/xml_document_fragment.h +10 -0
  46. data/ext/nokogiri/xml_dtd.c +202 -0
  47. data/ext/nokogiri/xml_dtd.h +10 -0
  48. data/ext/nokogiri/xml_element_content.c +123 -0
  49. data/ext/nokogiri/xml_element_content.h +10 -0
  50. data/ext/nokogiri/xml_element_decl.c +69 -0
  51. data/ext/nokogiri/xml_element_decl.h +9 -0
  52. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  53. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  54. data/ext/nokogiri/xml_entity_decl.c +110 -0
  55. data/ext/nokogiri/xml_entity_decl.h +10 -0
  56. data/ext/nokogiri/xml_entity_reference.c +52 -0
  57. data/ext/nokogiri/xml_entity_reference.h +9 -0
  58. data/ext/nokogiri/xml_io.c +56 -0
  59. data/ext/nokogiri/xml_io.h +11 -0
  60. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  61. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  62. data/ext/nokogiri/xml_namespace.c +78 -0
  63. data/ext/nokogiri/xml_namespace.h +13 -0
  64. data/ext/nokogiri/xml_node.c +1541 -0
  65. data/ext/nokogiri/xml_node.h +13 -0
  66. data/ext/nokogiri/xml_node_set.c +467 -0
  67. data/ext/nokogiri/xml_node_set.h +14 -0
  68. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  69. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  70. data/ext/nokogiri/xml_reader.c +681 -0
  71. data/ext/nokogiri/xml_reader.h +10 -0
  72. data/ext/nokogiri/xml_relax_ng.c +161 -0
  73. data/ext/nokogiri/xml_relax_ng.h +9 -0
  74. data/ext/nokogiri/xml_sax_parser.c +312 -0
  75. data/ext/nokogiri/xml_sax_parser.h +39 -0
  76. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  77. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  78. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  79. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  80. data/ext/nokogiri/xml_schema.c +205 -0
  81. data/ext/nokogiri/xml_schema.h +9 -0
  82. data/ext/nokogiri/xml_syntax_error.c +63 -0
  83. data/ext/nokogiri/xml_syntax_error.h +13 -0
  84. data/ext/nokogiri/xml_text.c +52 -0
  85. data/ext/nokogiri/xml_text.h +9 -0
  86. data/ext/nokogiri/xml_xpath_context.c +307 -0
  87. data/ext/nokogiri/xml_xpath_context.h +10 -0
  88. data/ext/nokogiri/xslt_stylesheet.c +270 -0
  89. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  90. data/lib/nokogiri.rb +137 -0
  91. data/lib/nokogiri/2.0/nokogiri.so +0 -0
  92. data/lib/nokogiri/2.1/nokogiri.so +0 -0
  93. data/lib/nokogiri/css.rb +27 -0
  94. data/lib/nokogiri/css/node.rb +52 -0
  95. data/lib/nokogiri/css/parser.rb +715 -0
  96. data/lib/nokogiri/css/parser.y +249 -0
  97. data/lib/nokogiri/css/parser_extras.rb +91 -0
  98. data/lib/nokogiri/css/syntax_error.rb +7 -0
  99. data/lib/nokogiri/css/tokenizer.rb +152 -0
  100. data/lib/nokogiri/css/tokenizer.rex +55 -0
  101. data/lib/nokogiri/css/xpath_visitor.rb +219 -0
  102. data/lib/nokogiri/decorators/slop.rb +35 -0
  103. data/lib/nokogiri/html.rb +37 -0
  104. data/lib/nokogiri/html/builder.rb +35 -0
  105. data/lib/nokogiri/html/document.rb +333 -0
  106. data/lib/nokogiri/html/document_fragment.rb +41 -0
  107. data/lib/nokogiri/html/element_description.rb +23 -0
  108. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  109. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  110. data/lib/nokogiri/html/sax/parser.rb +52 -0
  111. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  112. data/lib/nokogiri/html/sax/push_parser.rb +16 -0
  113. data/lib/nokogiri/syntax_error.rb +4 -0
  114. data/lib/nokogiri/version.rb +106 -0
  115. data/lib/nokogiri/xml.rb +73 -0
  116. data/lib/nokogiri/xml/attr.rb +14 -0
  117. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  118. data/lib/nokogiri/xml/builder.rb +443 -0
  119. data/lib/nokogiri/xml/cdata.rb +11 -0
  120. data/lib/nokogiri/xml/character_data.rb +7 -0
  121. data/lib/nokogiri/xml/document.rb +279 -0
  122. data/lib/nokogiri/xml/document_fragment.rb +112 -0
  123. data/lib/nokogiri/xml/dtd.rb +32 -0
  124. data/lib/nokogiri/xml/element_content.rb +36 -0
  125. data/lib/nokogiri/xml/element_decl.rb +13 -0
  126. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  127. data/lib/nokogiri/xml/namespace.rb +13 -0
  128. data/lib/nokogiri/xml/node.rb +982 -0
  129. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  130. data/lib/nokogiri/xml/node_set.rb +355 -0
  131. data/lib/nokogiri/xml/notation.rb +6 -0
  132. data/lib/nokogiri/xml/parse_options.rb +98 -0
  133. data/lib/nokogiri/xml/pp.rb +2 -0
  134. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  135. data/lib/nokogiri/xml/pp/node.rb +56 -0
  136. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  137. data/lib/nokogiri/xml/reader.rb +112 -0
  138. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  139. data/lib/nokogiri/xml/sax.rb +4 -0
  140. data/lib/nokogiri/xml/sax/document.rb +171 -0
  141. data/lib/nokogiri/xml/sax/parser.rb +123 -0
  142. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  143. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  144. data/lib/nokogiri/xml/schema.rb +63 -0
  145. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  146. data/lib/nokogiri/xml/text.rb +9 -0
  147. data/lib/nokogiri/xml/xpath.rb +10 -0
  148. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  149. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  150. data/lib/nokogiri/xslt.rb +56 -0
  151. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/suppressions/README.txt +1 -0
  154. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  155. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  156. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  157. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  158. data/tasks/nokogiri.org.rb +24 -0
  159. data/tasks/test.rb +95 -0
  160. data/test/css/test_nthiness.rb +222 -0
  161. data/test/css/test_parser.rb +358 -0
  162. data/test/css/test_tokenizer.rb +198 -0
  163. data/test/css/test_xpath_visitor.rb +96 -0
  164. data/test/decorators/test_slop.rb +16 -0
  165. data/test/files/2ch.html +108 -0
  166. data/test/files/address_book.rlx +12 -0
  167. data/test/files/address_book.xml +10 -0
  168. data/test/files/atom.xml +344 -0
  169. data/test/files/bar/bar.xsd +4 -0
  170. data/test/files/bogus.xml +0 -0
  171. data/test/files/dont_hurt_em_why.xml +422 -0
  172. data/test/files/encoding.html +82 -0
  173. data/test/files/encoding.xhtml +84 -0
  174. data/test/files/exslt.xml +8 -0
  175. data/test/files/exslt.xslt +35 -0
  176. data/test/files/foo/foo.xsd +4 -0
  177. data/test/files/metacharset.html +10 -0
  178. data/test/files/noencoding.html +47 -0
  179. data/test/files/po.xml +32 -0
  180. data/test/files/po.xsd +66 -0
  181. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  182. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  183. data/test/files/saml/xenc_schema.xsd +146 -0
  184. data/test/files/saml/xmldsig_schema.xsd +318 -0
  185. data/test/files/shift_jis.html +10 -0
  186. data/test/files/shift_jis.xml +5 -0
  187. data/test/files/shift_jis_no_charset.html +9 -0
  188. data/test/files/snuggles.xml +3 -0
  189. data/test/files/staff.dtd +10 -0
  190. data/test/files/staff.xml +59 -0
  191. data/test/files/staff.xslt +32 -0
  192. data/test/files/test_document_url/bar.xml +2 -0
  193. data/test/files/test_document_url/document.dtd +4 -0
  194. data/test/files/test_document_url/document.xml +6 -0
  195. data/test/files/tlm.html +850 -0
  196. data/test/files/to_be_xincluded.xml +2 -0
  197. data/test/files/valid_bar.xml +2 -0
  198. data/test/files/xinclude.xml +4 -0
  199. data/test/helper.rb +164 -0
  200. data/test/html/sax/test_parser.rb +141 -0
  201. data/test/html/sax/test_parser_context.rb +46 -0
  202. data/test/html/test_builder.rb +164 -0
  203. data/test/html/test_document.rb +619 -0
  204. data/test/html/test_document_encoding.rb +148 -0
  205. data/test/html/test_document_fragment.rb +261 -0
  206. data/test/html/test_element_description.rb +105 -0
  207. data/test/html/test_named_characters.rb +14 -0
  208. data/test/html/test_node.rb +196 -0
  209. data/test/html/test_node_encoding.rb +27 -0
  210. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  211. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  212. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  213. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  214. data/test/namespaces/test_namespaces_in_parsed_doc.rb +66 -0
  215. data/test/test_convert_xpath.rb +135 -0
  216. data/test/test_css_cache.rb +45 -0
  217. data/test/test_encoding_handler.rb +46 -0
  218. data/test/test_memory_leak.rb +156 -0
  219. data/test/test_nokogiri.rb +138 -0
  220. data/test/test_reader.rb +558 -0
  221. data/test/test_soap4r_sax.rb +52 -0
  222. data/test/test_xslt_transforms.rb +279 -0
  223. data/test/xml/node/test_save_options.rb +28 -0
  224. data/test/xml/node/test_subclass.rb +44 -0
  225. data/test/xml/sax/test_parser.rb +382 -0
  226. data/test/xml/sax/test_parser_context.rb +115 -0
  227. data/test/xml/sax/test_push_parser.rb +157 -0
  228. data/test/xml/test_attr.rb +64 -0
  229. data/test/xml/test_attribute_decl.rb +86 -0
  230. data/test/xml/test_builder.rb +315 -0
  231. data/test/xml/test_c14n.rb +161 -0
  232. data/test/xml/test_cdata.rb +48 -0
  233. data/test/xml/test_comment.rb +29 -0
  234. data/test/xml/test_document.rb +934 -0
  235. data/test/xml/test_document_encoding.rb +28 -0
  236. data/test/xml/test_document_fragment.rb +228 -0
  237. data/test/xml/test_dtd.rb +187 -0
  238. data/test/xml/test_dtd_encoding.rb +33 -0
  239. data/test/xml/test_element_content.rb +56 -0
  240. data/test/xml/test_element_decl.rb +73 -0
  241. data/test/xml/test_entity_decl.rb +122 -0
  242. data/test/xml/test_entity_reference.rb +245 -0
  243. data/test/xml/test_namespace.rb +95 -0
  244. data/test/xml/test_node.rb +1155 -0
  245. data/test/xml/test_node_attributes.rb +113 -0
  246. data/test/xml/test_node_encoding.rb +107 -0
  247. data/test/xml/test_node_inheritance.rb +32 -0
  248. data/test/xml/test_node_reparenting.rb +374 -0
  249. data/test/xml/test_node_set.rb +755 -0
  250. data/test/xml/test_parse_options.rb +64 -0
  251. data/test/xml/test_processing_instruction.rb +30 -0
  252. data/test/xml/test_reader_encoding.rb +142 -0
  253. data/test/xml/test_relax_ng.rb +60 -0
  254. data/test/xml/test_schema.rb +129 -0
  255. data/test/xml/test_syntax_error.rb +12 -0
  256. data/test/xml/test_text.rb +45 -0
  257. data/test/xml/test_unparented_node.rb +422 -0
  258. data/test/xml/test_xinclude.rb +83 -0
  259. data/test/xml/test_xpath.rb +376 -0
  260. data/test/xslt/test_custom_functions.rb +133 -0
  261. data/test/xslt/test_exception_handling.rb +37 -0
  262. data/test_all +81 -0
  263. metadata +601 -0
@@ -0,0 +1,619 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestDocument < Nokogiri::TestCase
6
+ def setup
7
+ super
8
+ @html = Nokogiri::HTML.parse(File.read(HTML_FILE))
9
+ end
10
+
11
+ def test_nil_css
12
+ # Behavior is undefined but shouldn't break
13
+ assert @html.css(nil)
14
+ assert @html.xpath(nil)
15
+ end
16
+
17
+ def test_exceptions_remove_newlines
18
+ errors = @html.errors
19
+ assert errors.length > 0, 'has errors'
20
+ errors.each do |error|
21
+ assert_equal(error.to_s.chomp, error.to_s)
22
+ end
23
+ end
24
+
25
+ def test_fragment
26
+ fragment = @html.fragment
27
+ assert_equal 0, fragment.children.length
28
+ end
29
+
30
+ def test_document_takes_config_block
31
+ options = nil
32
+ Nokogiri::HTML(File.read(HTML_FILE), HTML_FILE) do |cfg|
33
+ options = cfg
34
+ options.nonet.nowarning.dtdattr
35
+ end
36
+ assert options.nonet?
37
+ assert options.nowarning?
38
+ assert options.dtdattr?
39
+ end
40
+
41
+ def test_parse_takes_config_block
42
+ options = nil
43
+ Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) do |cfg|
44
+ options = cfg
45
+ options.nonet.nowarning.dtdattr
46
+ end
47
+ assert options.nonet?
48
+ assert options.nowarning?
49
+ assert options.dtdattr?
50
+ end
51
+
52
+ def test_subclass
53
+ klass = Class.new(Nokogiri::HTML::Document)
54
+ doc = klass.new
55
+ assert_instance_of klass, doc
56
+ end
57
+
58
+ def test_subclass_initialize
59
+ klass = Class.new(Nokogiri::HTML::Document) do
60
+ attr_accessor :initialized_with
61
+
62
+ def initialize(*args)
63
+ @initialized_with = args
64
+ end
65
+ end
66
+ doc = klass.new("uri", "external_id", 1)
67
+ assert_equal ["uri", "external_id", 1], doc.initialized_with
68
+ end
69
+
70
+ def test_subclass_dup
71
+ klass = Class.new(Nokogiri::HTML::Document)
72
+ doc = klass.new.dup
73
+ assert_instance_of klass, doc
74
+ end
75
+
76
+ def test_subclass_parse
77
+ klass = Class.new(Nokogiri::HTML::Document)
78
+ doc = klass.parse(File.read(HTML_FILE))
79
+ assert_equal @html.to_s, doc.to_s
80
+ assert_instance_of klass, doc
81
+ end
82
+
83
+ def test_document_parse_method
84
+ html = Nokogiri::HTML::Document.parse(File.read(HTML_FILE))
85
+ assert_equal @html.to_s, html.to_s
86
+ end
87
+
88
+ def test_document_parse_method_with_url
89
+ require 'open-uri'
90
+ begin
91
+ html = open('http://google.com').read
92
+ rescue
93
+ skip("This test needs the internet. Skips if no internet available.")
94
+ end
95
+ doc = Nokogiri::HTML html ,"http:/foobar.foobar/"
96
+ refute_empty doc.to_s, "Document should not be empty"
97
+ end
98
+
99
+ ###
100
+ # Nokogiri::HTML returns an empty Document when given a blank string GH#11
101
+ def test_empty_string_returns_empty_doc
102
+ doc = Nokogiri::HTML('')
103
+ assert_instance_of Nokogiri::HTML::Document, doc
104
+ assert_nil doc.root
105
+ end
106
+
107
+ unless Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
108
+ # FIXME: this is a hack around broken libxml versions
109
+ def test_to_xhtml_with_indent
110
+ doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
111
+ doc = Nokogiri::HTML(doc.to_xhtml(:indent => 2))
112
+ assert_indent 2, doc
113
+ end
114
+
115
+ def test_write_to_xhtml_with_indent
116
+ io = StringIO.new
117
+ doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
118
+ doc.write_xhtml_to io, :indent => 5
119
+ io.rewind
120
+ doc = Nokogiri::HTML(io.read)
121
+ assert_indent 5, doc
122
+ end
123
+ end
124
+
125
+ def test_swap_should_not_exist
126
+ assert_raises(NoMethodError) {
127
+ @html.swap
128
+ }
129
+ end
130
+
131
+ def test_namespace_should_not_exist
132
+ assert_raises(NoMethodError) {
133
+ @html.namespace
134
+ }
135
+ end
136
+
137
+ def test_meta_encoding
138
+ assert_equal 'UTF-8', @html.meta_encoding
139
+ end
140
+
141
+ def test_meta_encoding_is_strict_about_http_equiv
142
+ doc = Nokogiri::HTML(<<-eohtml)
143
+ <html>
144
+ <head>
145
+ <meta http-equiv="X-Content-Type" content="text/html; charset=Shift_JIS">
146
+ </head>
147
+ <body>
148
+ foo
149
+ </body>
150
+ </html>
151
+ eohtml
152
+ assert_nil doc.meta_encoding
153
+ end
154
+
155
+ def test_meta_encoding_handles_malformed_content_charset
156
+ doc = Nokogiri::HTML(<<EOHTML)
157
+ <html>
158
+ <head>
159
+ <meta http-equiv="Content-type" content="text/html; utf-8" />
160
+ </head>
161
+ <body>
162
+ foo
163
+ </body>
164
+ </html>
165
+ EOHTML
166
+ assert_nil doc.meta_encoding
167
+ end
168
+
169
+ def test_meta_encoding_checks_charset
170
+ doc = Nokogiri::HTML(<<-eohtml)
171
+ <html>
172
+ <head>
173
+ <meta charset="UTF-8">
174
+ </head>
175
+ <body>
176
+ foo
177
+ </body>
178
+ </html>
179
+ eohtml
180
+ assert_equal 'UTF-8', doc.meta_encoding
181
+ end
182
+
183
+ def test_meta_encoding=
184
+ @html.meta_encoding = 'EUC-JP'
185
+ assert_equal 'EUC-JP', @html.meta_encoding
186
+ end
187
+
188
+ def test_title
189
+ assert_equal 'Tender Lovemaking ', @html.title
190
+ doc = Nokogiri::HTML('<html><body>foo</body></html>')
191
+ assert_nil doc.title
192
+ end
193
+
194
+ def test_title=()
195
+ doc = Nokogiri::HTML(<<eohtml)
196
+ <html>
197
+ <head>
198
+ <title>old</title>
199
+ </head>
200
+ <body>
201
+ foo
202
+ </body>
203
+ </html>
204
+ eohtml
205
+ doc.title = 'new'
206
+ assert_equal 1, doc.css('title').size
207
+ assert_equal 'new', doc.title
208
+
209
+ doc = Nokogiri::HTML(<<eohtml)
210
+ <html>
211
+ <head>
212
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
213
+ </head>
214
+ <body>
215
+ foo
216
+ </body>
217
+ </html>
218
+ eohtml
219
+ doc.title = 'new'
220
+ assert_equal 'new', doc.title
221
+ title = doc.at('/html/head/title')
222
+ assert_not_nil title
223
+ assert_equal 'new', title.text
224
+ assert_equal -1, doc.at('meta[@http-equiv]') <=> title
225
+
226
+ doc = Nokogiri::HTML(<<eohtml)
227
+ <html>
228
+ <body>
229
+ foo
230
+ </body>
231
+ </html>
232
+ eohtml
233
+ doc.title = 'new'
234
+ assert_equal 'new', doc.title
235
+ # <head> may or may not be added
236
+ title = doc.at('/html//title')
237
+ assert_not_nil title
238
+ assert_equal 'new', title.text
239
+ assert_equal -1, title <=> doc.at('body')
240
+
241
+ doc = Nokogiri::HTML(<<eohtml)
242
+ <html>
243
+ <meta charset="UTF-8">
244
+ <body>
245
+ foo
246
+ </body>
247
+ </html>
248
+ eohtml
249
+ doc.title = 'new'
250
+ assert_equal 'new', doc.title
251
+ assert_equal -1, doc.at('meta[@charset]') <=> doc.at('title')
252
+ assert_equal -1, doc.at('title') <=> doc.at('body')
253
+
254
+ doc = Nokogiri::HTML('<!DOCTYPE html><p>hello')
255
+ doc.title = 'new'
256
+ assert_equal 'new', doc.title
257
+ assert_instance_of Nokogiri::XML::DTD, doc.children.first
258
+ assert_equal -1, doc.at('title') <=> doc.at('p')
259
+
260
+ doc = Nokogiri::HTML('')
261
+ doc.title = 'new'
262
+ assert_equal 'new', doc.title
263
+ assert_equal 'new', doc.at('/html/head/title/text()').to_s
264
+ end
265
+
266
+ def test_meta_encoding_without_head
267
+ encoding = 'EUC-JP'
268
+ html = Nokogiri::HTML('<html><body>foo</body></html>', nil, encoding)
269
+
270
+ assert_nil html.meta_encoding
271
+
272
+ html.meta_encoding = encoding
273
+ assert_equal encoding, html.meta_encoding
274
+
275
+ meta = html.at('/html/head/meta[@http-equiv and boolean(@content)]')
276
+ assert meta, 'meta is in head'
277
+
278
+ assert meta.at('./parent::head/following-sibling::body'), 'meta is before body'
279
+ end
280
+
281
+ def test_html5_meta_encoding_without_head
282
+ encoding = 'EUC-JP'
283
+ html = Nokogiri::HTML('<!DOCTYPE html><html><body>foo</body></html>', nil, encoding)
284
+
285
+ assert_nil html.meta_encoding
286
+
287
+ html.meta_encoding = encoding
288
+ assert_equal encoding, html.meta_encoding
289
+
290
+ meta = html.at('/html/head/meta[@charset]')
291
+ assert meta, 'meta is in head'
292
+
293
+ assert meta.at('./parent::head/following-sibling::body'), 'meta is before body'
294
+ end
295
+
296
+ def test_meta_encoding_with_empty_content_type
297
+ html = Nokogiri::HTML(<<-eohtml)
298
+ <html>
299
+ <head>
300
+ <meta http-equiv="Content-Type" content="">
301
+ </head>
302
+ <body>
303
+ foo
304
+ </body>
305
+ </html>
306
+ eohtml
307
+ assert_nil html.meta_encoding
308
+
309
+ html = Nokogiri::HTML(<<-eohtml)
310
+ <html>
311
+ <head>
312
+ <meta http-equiv="Content-Type">
313
+ </head>
314
+ <body>
315
+ foo
316
+ </body>
317
+ </html>
318
+ eohtml
319
+ assert_nil html.meta_encoding
320
+ end
321
+
322
+ def test_root_node_parent_is_document
323
+ parent = @html.root.parent
324
+ assert_equal @html, parent
325
+ assert_instance_of Nokogiri::HTML::Document, parent
326
+ end
327
+
328
+ def test_parse_handles_nil_gracefully
329
+ @doc = Nokogiri::HTML::Document.parse(nil)
330
+ assert_instance_of Nokogiri::HTML::Document, @doc
331
+ end
332
+
333
+ def test_parse_empty_document
334
+ doc = Nokogiri::HTML("\n")
335
+ assert_equal 0, doc.css('a').length
336
+ assert_equal 0, doc.xpath('//a').length
337
+ assert_equal 0, doc.search('//a').length
338
+ end
339
+
340
+ def test_HTML_function
341
+ html = Nokogiri::HTML(File.read(HTML_FILE))
342
+ assert html.html?
343
+ end
344
+
345
+ def test_parse_io
346
+ assert File.open(HTML_FILE, 'rb') { |f|
347
+ Document.read_io(f, nil, 'UTF-8',
348
+ XML::ParseOptions::NOERROR | XML::ParseOptions::NOWARNING
349
+ )
350
+ }
351
+ end
352
+
353
+ def test_parse_temp_file
354
+ temp_html_file = Tempfile.new("TEMP_HTML_FILE")
355
+ File.open(HTML_FILE, 'rb') { |f| temp_html_file.write f.read }
356
+ temp_html_file.close
357
+ temp_html_file.open
358
+ assert_equal Nokogiri::HTML.parse(File.read(HTML_FILE)).xpath('//div/a').length,
359
+ Nokogiri::HTML.parse(temp_html_file).xpath('//div/a').length
360
+ end
361
+
362
+ def test_to_xhtml
363
+ assert_match 'XHTML', @html.to_xhtml
364
+ assert_match 'XHTML', @html.to_xhtml(:encoding => 'UTF-8')
365
+ assert_match 'UTF-8', @html.to_xhtml(:encoding => 'UTF-8')
366
+ end
367
+
368
+ def test_no_xml_header
369
+ html = Nokogiri::HTML(<<-eohtml)
370
+ <html>
371
+ </html>
372
+ eohtml
373
+ assert html.to_html.length > 0, 'html length is too short'
374
+ assert_no_match(/^<\?xml/, html.to_html)
375
+ end
376
+
377
+ def test_document_has_error
378
+ html = Nokogiri::HTML(<<-eohtml)
379
+ <html>
380
+ <body>
381
+ <div awesome="asdf>
382
+ <p>inside div tag</p>
383
+ </div>
384
+ <p>outside div tag</p>
385
+ </body>
386
+ </html>
387
+ eohtml
388
+ assert html.errors.length > 0
389
+ end
390
+
391
+ def test_relative_css
392
+ html = Nokogiri::HTML(<<-eohtml)
393
+ <html>
394
+ <body>
395
+ <div>
396
+ <p>inside div tag</p>
397
+ </div>
398
+ <p>outside div tag</p>
399
+ </body>
400
+ </html>
401
+ eohtml
402
+ set = html.search('div').search('p')
403
+ assert_equal(1, set.length)
404
+ assert_equal('inside div tag', set.first.inner_text)
405
+ end
406
+
407
+ def test_multi_css
408
+ html = Nokogiri::HTML(<<-eohtml)
409
+ <html>
410
+ <body>
411
+ <div>
412
+ <p>p tag</p>
413
+ <a>a tag</a>
414
+ </div>
415
+ </body>
416
+ </html>
417
+ eohtml
418
+ set = html.css('p, a')
419
+ assert_equal(2, set.length)
420
+ assert_equal ['a tag', 'p tag'].sort, set.map { |x| x.content }.sort
421
+ end
422
+
423
+ def test_inner_text
424
+ html = Nokogiri::HTML(<<-eohtml)
425
+ <html>
426
+ <body>
427
+ <div>
428
+ <p>
429
+ Hello world!
430
+ </p>
431
+ </div>
432
+ </body>
433
+ </html>
434
+ eohtml
435
+ node = html.xpath('//div').first
436
+ assert_equal('Hello world!', node.inner_text.strip)
437
+ end
438
+
439
+ def test_doc_type
440
+ html = Nokogiri::HTML(<<-eohtml)
441
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
442
+ <html xmlns="http://www.w3.org/1999/xhtml">
443
+ <body>
444
+ <p>Rainbow Dash</p>
445
+ </body>
446
+ </html>
447
+ eohtml
448
+ assert_equal "html", html.internal_subset.name
449
+ assert_equal "-//W3C//DTD XHTML 1.1//EN", html.internal_subset.external_id
450
+ assert_equal "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", html.internal_subset.system_id
451
+ assert_equal "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">", html.to_s[0,97]
452
+ end
453
+
454
+ def test_content_size
455
+ html = Nokogiri::HTML('<div>
456
+ </div>')
457
+ assert_equal 1, html.content.size
458
+ assert_equal 1, html.content.split("").size
459
+ assert_equal "\n", html.content
460
+ end
461
+
462
+ def test_find_by_xpath
463
+ found = @html.xpath('//div/a')
464
+ assert_equal 3, found.length
465
+ end
466
+
467
+ def test_find_by_css
468
+ found = @html.css('div > a')
469
+ assert_equal 3, found.length
470
+ end
471
+
472
+ def test_find_by_css_with_square_brackets
473
+ found = @html.css("div[@id='header'] > h1")
474
+ found = @html.css("div[@id='header'] h1") # this blows up on commit 6fa0f6d329d9dbf1cc21c0ac72f7e627bb4c05fc
475
+ assert_equal 1, found.length
476
+ end
477
+
478
+ def test_find_with_function
479
+ assert @html.css("div:awesome() h1", Class.new {
480
+ def awesome divs
481
+ [divs.first]
482
+ end
483
+ }.new)
484
+ end
485
+
486
+ def test_dup_shallow
487
+ found = @html.search('//div/a').first
488
+ dup = found.dup(0)
489
+ assert dup
490
+ assert_equal '', dup.content
491
+ end
492
+
493
+ def test_search_can_handle_xpath_and_css
494
+ found = @html.search('//div/a', 'div > p')
495
+ length = @html.xpath('//div/a').length +
496
+ @html.css('div > p').length
497
+ assert_equal length, found.length
498
+ end
499
+
500
+ def test_dup_document
501
+ assert dup = @html.dup
502
+ assert_not_equal dup, @html
503
+ assert @html.html?
504
+ assert_instance_of Nokogiri::HTML::Document, dup
505
+ assert dup.html?, 'duplicate should be html'
506
+ assert_equal @html.to_s, dup.to_s
507
+ end
508
+
509
+ def test_dup_document_shallow
510
+ assert dup = @html.dup(0)
511
+ assert_not_equal dup, @html
512
+ end
513
+
514
+ def test_dup
515
+ found = @html.search('//div/a').first
516
+ dup = found.dup
517
+ assert dup
518
+ assert_equal found.content, dup.content
519
+ assert_equal found.document, dup.document
520
+ end
521
+
522
+ def test_inner_html
523
+ html = Nokogiri::HTML(<<-eohtml)
524
+ <html>
525
+ <body>
526
+ <div>
527
+ <p>
528
+ Hello world!
529
+ </p>
530
+ </div>
531
+ </body>
532
+ </html>
533
+ eohtml
534
+ node = html.xpath('//div').first
535
+ assert_equal('<p>Helloworld!</p>', node.inner_html.gsub(/\s/, ''))
536
+ end
537
+
538
+ def test_round_trip
539
+ doc = Nokogiri::HTML(@html.inner_html)
540
+ assert_equal @html.root.to_html, doc.root.to_html
541
+ end
542
+
543
+ def test_fragment_contains_text_node
544
+ fragment = Nokogiri::HTML.fragment('fooo')
545
+ assert_equal 1, fragment.children.length
546
+ assert_equal 'fooo', fragment.inner_text
547
+ end
548
+
549
+ def test_fragment_includes_two_tags
550
+ assert_equal 2, Nokogiri::HTML.fragment("<br/><hr/>").children.length
551
+ end
552
+
553
+ def test_relative_css_finder
554
+ doc = Nokogiri::HTML(<<-eohtml)
555
+ <html>
556
+ <body>
557
+ <div class="red">
558
+ <p>
559
+ inside red
560
+ </p>
561
+ </div>
562
+ <div class="green">
563
+ <p>
564
+ inside green
565
+ </p>
566
+ </div>
567
+ </body>
568
+ </html>
569
+ eohtml
570
+ red_divs = doc.css('div.red')
571
+ assert_equal 1, red_divs.length
572
+ p_tags = red_divs.first.css('p')
573
+ assert_equal 1, p_tags.length
574
+ assert_equal 'inside red', p_tags.first.text.strip
575
+ end
576
+
577
+ def test_find_classes
578
+ doc = Nokogiri::HTML(<<-eohtml)
579
+ <html>
580
+ <body>
581
+ <p class="red">RED</p>
582
+ <p class="awesome red">RED</p>
583
+ <p class="notred">GREEN</p>
584
+ <p class="green notred">GREEN</p>
585
+ </body>
586
+ </html>
587
+ eohtml
588
+ list = doc.css('.red')
589
+ assert_equal 2, list.length
590
+ assert_equal %w{ RED RED }, list.map { |x| x.text }
591
+ end
592
+
593
+ def test_parse_can_take_io
594
+ html = nil
595
+ File.open(HTML_FILE, 'rb') { |f|
596
+ html = Nokogiri::HTML(f)
597
+ }
598
+ assert html.html?
599
+ end
600
+
601
+ def test_html?
602
+ assert !@html.xml?
603
+ assert @html.html?
604
+ end
605
+
606
+ def test_serialize
607
+ assert @html.serialize
608
+ assert @html.to_html
609
+ end
610
+
611
+ def test_empty_document
612
+ # empty document should return "" #699
613
+ assert_equal "", Nokogiri::HTML.parse(nil).text
614
+ assert_equal "", Nokogiri::HTML.parse("").text
615
+ end
616
+ end
617
+ end
618
+ end
619
+