nokogiri 1.3.0-x86-mswin32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (256) hide show
  1. data/.autotest +27 -0
  2. data/CHANGELOG.ja.rdoc +233 -0
  3. data/CHANGELOG.rdoc +222 -0
  4. data/Manifest.txt +247 -0
  5. data/README.ja.rdoc +103 -0
  6. data/README.rdoc +117 -0
  7. data/Rakefile +205 -0
  8. data/bin/nokogiri +47 -0
  9. data/ext/nokogiri/extconf.rb +89 -0
  10. data/ext/nokogiri/html_document.c +183 -0
  11. data/ext/nokogiri/html_document.h +10 -0
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.c +30 -0
  15. data/ext/nokogiri/html_entity_lookup.h +8 -0
  16. data/ext/nokogiri/html_sax_parser.c +57 -0
  17. data/ext/nokogiri/html_sax_parser.h +11 -0
  18. data/ext/nokogiri/iconv.dll +0 -0
  19. data/ext/nokogiri/libexslt.dll +0 -0
  20. data/ext/nokogiri/libxml2.dll +0 -0
  21. data/ext/nokogiri/libxslt.dll +0 -0
  22. data/ext/nokogiri/nokogiri.c +81 -0
  23. data/ext/nokogiri/nokogiri.h +149 -0
  24. data/ext/nokogiri/xml_attr.c +92 -0
  25. data/ext/nokogiri/xml_attr.h +9 -0
  26. data/ext/nokogiri/xml_cdata.c +53 -0
  27. data/ext/nokogiri/xml_cdata.h +9 -0
  28. data/ext/nokogiri/xml_comment.c +51 -0
  29. data/ext/nokogiri/xml_comment.h +9 -0
  30. data/ext/nokogiri/xml_document.c +308 -0
  31. data/ext/nokogiri/xml_document.h +21 -0
  32. data/ext/nokogiri/xml_document_fragment.c +48 -0
  33. data/ext/nokogiri/xml_document_fragment.h +10 -0
  34. data/ext/nokogiri/xml_dtd.c +102 -0
  35. data/ext/nokogiri/xml_dtd.h +8 -0
  36. data/ext/nokogiri/xml_entity_reference.c +50 -0
  37. data/ext/nokogiri/xml_entity_reference.h +9 -0
  38. data/ext/nokogiri/xml_io.c +24 -0
  39. data/ext/nokogiri/xml_io.h +10 -0
  40. data/ext/nokogiri/xml_namespace.c +69 -0
  41. data/ext/nokogiri/xml_namespace.h +12 -0
  42. data/ext/nokogiri/xml_node.c +928 -0
  43. data/ext/nokogiri/xml_node.h +14 -0
  44. data/ext/nokogiri/xml_node_set.c +386 -0
  45. data/ext/nokogiri/xml_node_set.h +9 -0
  46. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  47. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  48. data/ext/nokogiri/xml_reader.c +572 -0
  49. data/ext/nokogiri/xml_reader.h +10 -0
  50. data/ext/nokogiri/xml_relax_ng.c +106 -0
  51. data/ext/nokogiri/xml_relax_ng.h +9 -0
  52. data/ext/nokogiri/xml_sax_parser.c +336 -0
  53. data/ext/nokogiri/xml_sax_parser.h +10 -0
  54. data/ext/nokogiri/xml_sax_push_parser.c +86 -0
  55. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  56. data/ext/nokogiri/xml_schema.c +107 -0
  57. data/ext/nokogiri/xml_schema.h +9 -0
  58. data/ext/nokogiri/xml_syntax_error.c +203 -0
  59. data/ext/nokogiri/xml_syntax_error.h +12 -0
  60. data/ext/nokogiri/xml_text.c +47 -0
  61. data/ext/nokogiri/xml_text.h +9 -0
  62. data/ext/nokogiri/xml_xpath.c +53 -0
  63. data/ext/nokogiri/xml_xpath.h +11 -0
  64. data/ext/nokogiri/xml_xpath_context.c +252 -0
  65. data/ext/nokogiri/xml_xpath_context.h +9 -0
  66. data/ext/nokogiri/xslt_stylesheet.c +131 -0
  67. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  68. data/ext/nokogiri/zlib1.dll +0 -0
  69. data/lib/action-nokogiri.rb +36 -0
  70. data/lib/nokogiri.rb +110 -0
  71. data/lib/nokogiri/1.8/nokogiri.so +0 -0
  72. data/lib/nokogiri/1.9/nokogiri.so +0 -0
  73. data/lib/nokogiri/css.rb +25 -0
  74. data/lib/nokogiri/css/generated_parser.rb +748 -0
  75. data/lib/nokogiri/css/generated_tokenizer.rb +144 -0
  76. data/lib/nokogiri/css/node.rb +107 -0
  77. data/lib/nokogiri/css/parser.rb +82 -0
  78. data/lib/nokogiri/css/parser.y +227 -0
  79. data/lib/nokogiri/css/syntax_error.rb +7 -0
  80. data/lib/nokogiri/css/tokenizer.rb +11 -0
  81. data/lib/nokogiri/css/tokenizer.rex +54 -0
  82. data/lib/nokogiri/css/xpath_visitor.rb +172 -0
  83. data/lib/nokogiri/decorators.rb +2 -0
  84. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  85. data/lib/nokogiri/decorators/hpricot/node.rb +56 -0
  86. data/lib/nokogiri/decorators/hpricot/node_set.rb +54 -0
  87. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +30 -0
  88. data/lib/nokogiri/decorators/slop.rb +33 -0
  89. data/lib/nokogiri/ffi/html/document.rb +37 -0
  90. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  91. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  92. data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
  93. data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
  94. data/lib/nokogiri/ffi/libxml.rb +314 -0
  95. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  96. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  97. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  98. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  99. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  100. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  101. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  102. data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
  103. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  104. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  105. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  106. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  107. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  108. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  109. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
  110. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  111. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  112. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  113. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  114. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  115. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  116. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  117. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  118. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  119. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  120. data/lib/nokogiri/ffi/xml/document.rb +107 -0
  121. data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
  122. data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
  123. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  124. data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
  125. data/lib/nokogiri/ffi/xml/node.rb +380 -0
  126. data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
  127. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  128. data/lib/nokogiri/ffi/xml/reader.rb +217 -0
  129. data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
  130. data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
  131. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
  132. data/lib/nokogiri/ffi/xml/schema.rb +55 -0
  133. data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
  134. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  135. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  136. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  137. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  138. data/lib/nokogiri/hpricot.rb +62 -0
  139. data/lib/nokogiri/html.rb +34 -0
  140. data/lib/nokogiri/html/builder.rb +35 -0
  141. data/lib/nokogiri/html/document.rb +71 -0
  142. data/lib/nokogiri/html/document_fragment.rb +15 -0
  143. data/lib/nokogiri/html/element_description.rb +23 -0
  144. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  145. data/lib/nokogiri/html/sax/parser.rb +47 -0
  146. data/lib/nokogiri/nokogiri.rb +1 -0
  147. data/lib/nokogiri/syntax_error.rb +4 -0
  148. data/lib/nokogiri/version.rb +29 -0
  149. data/lib/nokogiri/version_warning.rb +11 -0
  150. data/lib/nokogiri/xml.rb +62 -0
  151. data/lib/nokogiri/xml/attr.rb +9 -0
  152. data/lib/nokogiri/xml/builder.rb +254 -0
  153. data/lib/nokogiri/xml/cdata.rb +11 -0
  154. data/lib/nokogiri/xml/document.rb +100 -0
  155. data/lib/nokogiri/xml/document_fragment.rb +49 -0
  156. data/lib/nokogiri/xml/dtd.rb +11 -0
  157. data/lib/nokogiri/xml/entity_declaration.rb +11 -0
  158. data/lib/nokogiri/xml/fragment_handler.rb +55 -0
  159. data/lib/nokogiri/xml/namespace.rb +7 -0
  160. data/lib/nokogiri/xml/node.rb +745 -0
  161. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  162. data/lib/nokogiri/xml/node_set.rb +238 -0
  163. data/lib/nokogiri/xml/notation.rb +6 -0
  164. data/lib/nokogiri/xml/parse_options.rb +80 -0
  165. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  166. data/lib/nokogiri/xml/reader.rb +66 -0
  167. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  168. data/lib/nokogiri/xml/sax.rb +3 -0
  169. data/lib/nokogiri/xml/sax/document.rb +143 -0
  170. data/lib/nokogiri/xml/sax/parser.rb +101 -0
  171. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  172. data/lib/nokogiri/xml/schema.rb +65 -0
  173. data/lib/nokogiri/xml/syntax_error.rb +34 -0
  174. data/lib/nokogiri/xml/xpath.rb +10 -0
  175. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  176. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  177. data/lib/nokogiri/xslt.rb +48 -0
  178. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  179. data/lib/xsd/xmlparser/nokogiri.rb +64 -0
  180. data/tasks/test.rb +161 -0
  181. data/test/css/test_nthiness.rb +160 -0
  182. data/test/css/test_parser.rb +277 -0
  183. data/test/css/test_tokenizer.rb +176 -0
  184. data/test/css/test_xpath_visitor.rb +76 -0
  185. data/test/ffi/test_document.rb +35 -0
  186. data/test/files/address_book.rlx +12 -0
  187. data/test/files/address_book.xml +10 -0
  188. data/test/files/dont_hurt_em_why.xml +422 -0
  189. data/test/files/exslt.xml +8 -0
  190. data/test/files/exslt.xslt +35 -0
  191. data/test/files/po.xml +32 -0
  192. data/test/files/po.xsd +66 -0
  193. data/test/files/staff.xml +59 -0
  194. data/test/files/staff.xslt +32 -0
  195. data/test/files/tlm.html +850 -0
  196. data/test/helper.rb +123 -0
  197. data/test/hpricot/files/basic.xhtml +17 -0
  198. data/test/hpricot/files/boingboing.html +2266 -0
  199. data/test/hpricot/files/cy0.html +3653 -0
  200. data/test/hpricot/files/immob.html +400 -0
  201. data/test/hpricot/files/pace_application.html +1320 -0
  202. data/test/hpricot/files/tenderlove.html +16 -0
  203. data/test/hpricot/files/uswebgen.html +220 -0
  204. data/test/hpricot/files/utf8.html +1054 -0
  205. data/test/hpricot/files/week9.html +1723 -0
  206. data/test/hpricot/files/why.xml +19 -0
  207. data/test/hpricot/load_files.rb +11 -0
  208. data/test/hpricot/test_alter.rb +68 -0
  209. data/test/hpricot/test_builder.rb +20 -0
  210. data/test/hpricot/test_parser.rb +426 -0
  211. data/test/hpricot/test_paths.rb +15 -0
  212. data/test/hpricot/test_preserved.rb +77 -0
  213. data/test/hpricot/test_xml.rb +30 -0
  214. data/test/html/sax/test_parser.rb +52 -0
  215. data/test/html/test_builder.rb +156 -0
  216. data/test/html/test_document.rb +361 -0
  217. data/test/html/test_document_encoding.rb +46 -0
  218. data/test/html/test_document_fragment.rb +97 -0
  219. data/test/html/test_element_description.rb +95 -0
  220. data/test/html/test_named_characters.rb +14 -0
  221. data/test/html/test_node.rb +165 -0
  222. data/test/test_convert_xpath.rb +186 -0
  223. data/test/test_css_cache.rb +56 -0
  224. data/test/test_gc.rb +15 -0
  225. data/test/test_memory_leak.rb +77 -0
  226. data/test/test_nokogiri.rb +127 -0
  227. data/test/test_reader.rb +316 -0
  228. data/test/test_xslt_transforms.rb +131 -0
  229. data/test/xml/node/test_save_options.rb +20 -0
  230. data/test/xml/node/test_subclass.rb +44 -0
  231. data/test/xml/sax/test_parser.rb +169 -0
  232. data/test/xml/sax/test_push_parser.rb +92 -0
  233. data/test/xml/test_attr.rb +38 -0
  234. data/test/xml/test_builder.rb +73 -0
  235. data/test/xml/test_cdata.rb +38 -0
  236. data/test/xml/test_comment.rb +23 -0
  237. data/test/xml/test_document.rb +397 -0
  238. data/test/xml/test_document_encoding.rb +26 -0
  239. data/test/xml/test_document_fragment.rb +76 -0
  240. data/test/xml/test_dtd.rb +42 -0
  241. data/test/xml/test_dtd_encoding.rb +31 -0
  242. data/test/xml/test_entity_reference.rb +21 -0
  243. data/test/xml/test_namespace.rb +43 -0
  244. data/test/xml/test_node.rb +808 -0
  245. data/test/xml/test_node_attributes.rb +34 -0
  246. data/test/xml/test_node_encoding.rb +84 -0
  247. data/test/xml/test_node_set.rb +368 -0
  248. data/test/xml/test_parse_options.rb +52 -0
  249. data/test/xml/test_processing_instruction.rb +30 -0
  250. data/test/xml/test_reader_encoding.rb +126 -0
  251. data/test/xml/test_relax_ng.rb +60 -0
  252. data/test/xml/test_schema.rb +65 -0
  253. data/test/xml/test_text.rb +18 -0
  254. data/test/xml/test_unparented_node.rb +381 -0
  255. data/test/xml/test_xpath.rb +106 -0
  256. metadata +409 -0
@@ -0,0 +1,46 @@
1
+ # -*- coding: utf-8 -*-
2
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
3
+
4
+ module Nokogiri
5
+ module HTML
6
+ if RUBY_VERSION =~ /^1\.9/
7
+ class TestDocumentEncoding < Nokogiri::TestCase
8
+ def test_default_to_encoding_from_string
9
+ bad_charset = <<-eohtml
10
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
11
+ <html>
12
+ <head>
13
+ <meta http-equiv="Content-Type" content="text/html; charset=charset=UTF-8">
14
+ </head>
15
+ <body>
16
+ <a href="http://tenderlovemaking.com/">blah!</a>
17
+ </body>
18
+ </html>
19
+ eohtml
20
+ doc = Nokogiri::HTML(bad_charset)
21
+ assert_equal bad_charset.encoding.name, doc.encoding
22
+
23
+ doc = Nokogiri.parse(bad_charset)
24
+ assert_equal bad_charset.encoding.name, doc.encoding
25
+ end
26
+
27
+ def test_encoding_with_a_bad_name
28
+ bad_charset = <<-eohtml
29
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
30
+ <html>
31
+ <head>
32
+ <meta http-equiv="Content-Type" content="text/html; charset=charset=UTF-8">
33
+ </head>
34
+ <body>
35
+ <a href="http://tenderlovemaking.com/">blah!</a>
36
+ </body>
37
+ </html>
38
+ eohtml
39
+ doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
40
+ assert_equal ['http://tenderlovemaking.com/'],
41
+ doc.css('a').map { |a| a['href'] }
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,97 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestDocumentFragment < Nokogiri::TestCase
6
+ def setup
7
+ super
8
+ @html = Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE)
9
+ end
10
+
11
+ def test_new
12
+ fragment = Nokogiri::HTML::DocumentFragment.new(@html)
13
+ end
14
+
15
+ def test_fragment_should_have_document
16
+ fragment = Nokogiri::HTML::DocumentFragment.new(@html)
17
+ assert_equal @html, fragment.document
18
+ end
19
+
20
+ def test_name
21
+ fragment = Nokogiri::HTML::DocumentFragment.new(@html)
22
+ assert_equal '#document-fragment', fragment.name
23
+ end
24
+
25
+ def test_static_method
26
+ fragment = Nokogiri::HTML::DocumentFragment.parse("<div>a</div>")
27
+ assert_instance_of Nokogiri::HTML::DocumentFragment, fragment
28
+ end
29
+
30
+ def test_many_fragments
31
+ 100.times { Nokogiri::HTML::DocumentFragment.new(@html) }
32
+ end
33
+
34
+ def test_subclass
35
+ klass = Class.new(Nokogiri::HTML::DocumentFragment)
36
+ fragment = klass.new(@html, "<div>a</div>")
37
+ assert_instance_of klass, fragment
38
+ end
39
+
40
+ def test_html_fragment
41
+ fragment = Nokogiri::HTML.fragment("<div>a</div>")
42
+ assert_equal "<div>a</div>", fragment.to_s
43
+ end
44
+
45
+ def test_html_fragment_has_outer_text
46
+ doc = "a<div>b</div>c"
47
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
48
+ if Nokogiri::VERSION_INFO['libxml']['loaded'] <= "2.6.16"
49
+ assert_equal "a<div>b</div><p>c</p>", fragment.to_s
50
+ else
51
+ assert_equal "a<div>b</div>c", fragment.to_s
52
+ end
53
+ end
54
+
55
+ def test_html_fragment_case_insensitivity
56
+ doc = "<crazyDiv>b</crazyDiv>"
57
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
58
+ assert_equal "<crazydiv>b</crazydiv>", fragment.to_s
59
+ end
60
+
61
+ def test_html_fragment_with_leading_whitespace
62
+ doc = " <div>b</div> "
63
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
64
+ assert_equal "<div>b</div>", fragment.to_s
65
+ end
66
+
67
+ def test_to_s
68
+ doc = "<span>foo<br></span><span>bar</span>"
69
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
70
+ assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_s
71
+ end
72
+
73
+ def test_to_html
74
+ doc = "<span>foo<br></span><span>bar</span>"
75
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
76
+ assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_html
77
+ end
78
+
79
+ def test_to_xhtml
80
+ doc = "<span>foo<br></span><span>bar</span>"
81
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
82
+ if Nokogiri::VERSION_INFO['libxml']['loaded'] >= "2.7.0"
83
+ assert_equal "<span>foo<br /></span><span>bar</span>", fragment.to_xhtml
84
+ else
85
+ assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_xhtml
86
+ end
87
+ end
88
+
89
+ def test_to_xml
90
+ doc = "<span>foo<br></span><span>bar</span>"
91
+ fragment = Nokogiri::HTML::Document.new.fragment(doc)
92
+ assert_equal "<span>foo<br/></span><span>bar</span>", fragment.to_xml
93
+ end
94
+
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,95 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestElementDescription < Nokogiri::TestCase
6
+ def test_fetch_nonexistent
7
+ assert_nil ElementDescription['foo']
8
+ end
9
+
10
+ def test_fetch_element_description
11
+ assert desc = ElementDescription['a']
12
+ assert_instance_of ElementDescription, desc
13
+ end
14
+
15
+ def test_name
16
+ assert_equal 'a', ElementDescription['a'].name
17
+ end
18
+
19
+ def test_implied_start_tag?
20
+ assert !ElementDescription['a'].implied_start_tag?
21
+ end
22
+
23
+ def test_implied_end_tag?
24
+ assert !ElementDescription['a'].implied_end_tag?
25
+ assert ElementDescription['p'].implied_end_tag?
26
+ end
27
+
28
+ def test_save_end_tag?
29
+ assert !ElementDescription['a'].save_end_tag?
30
+ assert ElementDescription['br'].save_end_tag?
31
+ end
32
+
33
+ def test_empty?
34
+ assert ElementDescription['br'].empty?
35
+ assert !ElementDescription['a'].empty?
36
+ end
37
+
38
+ def test_deprecated?
39
+ assert ElementDescription['applet'].deprecated?
40
+ assert !ElementDescription['br'].deprecated?
41
+ end
42
+
43
+ def test_inline?
44
+ assert ElementDescription['a'].inline?
45
+ assert !ElementDescription['div'].inline?
46
+ end
47
+
48
+ def test_block?
49
+ element = ElementDescription['a']
50
+ assert_equal(!element.inline?, element.block?)
51
+ end
52
+
53
+ def test_description
54
+ assert ElementDescription['a'].description
55
+ end
56
+
57
+ def test_subelements
58
+ sub_elements = ElementDescription['body'].sub_elements
59
+ assert_equal 61, sub_elements.length
60
+ end
61
+
62
+ def test_default_sub_element
63
+ assert_equal 'div', ElementDescription['body'].default_sub_element
64
+ end
65
+
66
+ def test_optional_attributes
67
+ attrs = ElementDescription['table'].optional_attributes
68
+ assert attrs
69
+ assert_equal 22, attrs.length
70
+ end
71
+
72
+ def test_deprecated_attributes
73
+ attrs = ElementDescription['table'].deprecated_attributes
74
+ assert attrs
75
+ assert_equal 2, attrs.length
76
+ end
77
+
78
+ def test_required_attributes
79
+ attrs = ElementDescription['table'].required_attributes
80
+ assert attrs
81
+ assert_equal 0, attrs.length
82
+ end
83
+
84
+ def test_inspect
85
+ desc = ElementDescription['input']
86
+ assert_match desc.name, desc.inspect
87
+ end
88
+
89
+ def test_to_s
90
+ desc = ElementDescription['input']
91
+ assert_match desc.name, desc.to_s
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,14 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestNamedCharacters < Nokogiri::TestCase
6
+ def test_named_character
7
+ copy = NamedCharacters.get('copy')
8
+ assert_equal 169, NamedCharacters['copy']
9
+ assert_equal copy.value, NamedCharacters['copy']
10
+ assert copy.description
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,165 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+
3
+ require 'nkf'
4
+
5
+ module Nokogiri
6
+ module HTML
7
+ class TestNode < Nokogiri::TestCase
8
+ def setup
9
+ super
10
+ @html = Nokogiri::HTML(<<-eohtml)
11
+ <html>
12
+ <head></head>
13
+ <body>
14
+ <div class='baz'><a href="foo" class="bar">first</a></div>
15
+ </body>
16
+ </html>
17
+ eohtml
18
+ end
19
+
20
+ def test_description
21
+ assert desc = @html.at('a.bar').description
22
+ assert_equal 'a', desc.name
23
+ end
24
+
25
+ def test_add_next_sibling_with_empty_nodeset
26
+ assert_raises(ArgumentError) {
27
+ @html.at('a').add_next_sibling(@html.at('head').children)
28
+ }
29
+ end
30
+
31
+ def test_add_next_sibling_with_non_empty_nodeset
32
+ assert_raises(ArgumentError) {
33
+ @html.at('head').add_next_sibling(@html.at('div').children)
34
+ }
35
+ end
36
+
37
+ def test_ancestors_with_selector
38
+ assert node = @html.at('a.bar').child
39
+ assert list = node.ancestors('.baz')
40
+ assert_equal 1, list.length
41
+ assert_equal 'div', list.first.name
42
+ end
43
+
44
+ def test_css_matches?
45
+ assert node = @html.at('a.bar')
46
+ assert node.matches?('a.bar')
47
+ end
48
+
49
+ def test_xpath_matches?
50
+ assert node = @html.at('//a')
51
+ assert node.matches?('//a')
52
+ end
53
+
54
+ def test_swap
55
+ @html.at('div').swap('<a href="foo">bar</a>')
56
+ a_tag = @html.css('a').first
57
+ assert_equal 'body', a_tag.parent.name
58
+ assert_equal 0, @html.css('div').length
59
+ end
60
+
61
+ def test_swap_with_regex_characters
62
+ @html.at('div').swap('<a href="foo">ba)r</a>')
63
+ a_tag = @html.css('a').first
64
+ assert_equal 'ba)r', a_tag.text
65
+ end
66
+
67
+ def test_attribute_decodes_entities
68
+ node = @html.at('div')
69
+ node['href'] = 'foo&bar'
70
+ assert_equal 'foo&bar', node['href']
71
+ node['href'] += '&baz'
72
+ assert_equal 'foo&bar&baz', node['href']
73
+ end
74
+
75
+
76
+ def test_before_will_prepend_text_nodes
77
+ assert node = @html.at('//body').children.first
78
+ node.before "some text"
79
+ assert_equal 'some text', @html.at('//body').children[0].content.strip
80
+ end
81
+
82
+ def test_fragment_handler_does_not_regurge_on_invalid_attributes
83
+ iframe = %Q{<iframe style="width: 0%; height: 0px" src="http://someurl" allowtransparency></iframe>}
84
+ assert_nothing_raised { @html.at('div').before(iframe) }
85
+ assert_nothing_raised { @html.at('div').after(iframe) }
86
+ assert_nothing_raised { @html.at('div').inner_html=(iframe) }
87
+ end
88
+
89
+ def test_inner_html=
90
+ assert div = @html.at('//div')
91
+ div.inner_html = '<span>testing</span>'
92
+ assert_equal 'span', div.children.first.name
93
+
94
+ div.inner_html = 'testing'
95
+ assert_equal 'testing', div.content
96
+ end
97
+
98
+ def test_fragment
99
+ fragment = @html.fragment(<<-eohtml)
100
+ hello
101
+ <div class="foo">
102
+ <p>bar</p>
103
+ </div>
104
+ world
105
+ eohtml
106
+ assert_match(/^hello/, fragment.inner_html.strip)
107
+ assert_equal 3, fragment.children.length
108
+ assert p_tag = fragment.css('p').first
109
+ assert_equal 'div', p_tag.parent.name
110
+ assert_equal 'foo', p_tag.parent['class']
111
+ end
112
+
113
+ def test_fragment_serialization
114
+ fragment = Nokogiri::HTML.fragment("<div>foo</div>")
115
+ assert_equal "<div>foo</div>", fragment.serialize.chomp
116
+ assert_equal "<div>foo</div>", fragment.to_xml.chomp
117
+ assert_equal "<div>foo</div>", fragment.inner_html
118
+ assert_equal "<div>foo</div>", fragment.to_html
119
+ assert_equal "<div>foo</div>", fragment.to_s
120
+ end
121
+
122
+ def test_after_will_append_text_nodes
123
+ assert node = @html.at('//body/div')
124
+ node.after "some text"
125
+ assert_equal 'some text', node.next.text.strip
126
+ end
127
+
128
+ def test_replace
129
+ doc = Nokogiri::HTML(<<-eohtml)
130
+ <html>
131
+ <head></head>
132
+ <body>
133
+ <center><img src='logo.gif' /></center>
134
+ </body>
135
+ </html>
136
+ eohtml
137
+ center = doc.at("//center")
138
+ img = center.search("//img")
139
+ assert_raises ArgumentError do
140
+ center.replace img
141
+ end
142
+ end
143
+
144
+ def test_to_html_does_not_contain_entities
145
+ html = NKF.nkf("-e --msdos", <<-EOH)
146
+ <html><body>
147
+ <p> test paragraph
148
+ foo bar </p>
149
+ </body></html>
150
+ EOH
151
+ nokogiri = Nokogiri::HTML.parse(html)
152
+
153
+ if RUBY_PLATFORM =~ /java/
154
+ # NKF linebreak modes are not supported as of jruby 1.2
155
+ # see http://jira.codehaus.org/browse/JRUBY-3602 for status
156
+ assert_equal "<p>testparagraph\nfoobar</p>",
157
+ nokogiri.at("p").to_html.gsub(/ /, '')
158
+ else
159
+ assert_equal "<p>testparagraph\r\nfoobar</p>",
160
+ nokogiri.at("p").to_html.gsub(/ /, '')
161
+ end
162
+ end
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,186 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
+
3
+ begin
4
+ require 'rubygems'
5
+ require 'hpricot'
6
+ HAS_HPRICOT = true
7
+ rescue LoadError
8
+ HAS_HPRICOT = false
9
+ end
10
+
11
+ class TestConvertXPath < Nokogiri::TestCase
12
+
13
+ def setup
14
+ super
15
+ @N = Nokogiri(File.read(HTML_FILE))
16
+ @NH = Nokogiri.Hpricot(File.read(HTML_FILE)) # decorated document
17
+ @H = Hpricot(File.read(HTML_FILE)) if HAS_HPRICOT
18
+ end
19
+
20
+ def assert_syntactical_equivalence(hpath, xpath, match, &blk)
21
+ blk ||= lambda {|j| j.first}
22
+ assert_equal match, blk.call(@N.search(xpath)), "xpath result did not match"
23
+ if HAS_HPRICOT
24
+ assert_equal match, blk.call(@H.search(hpath)).chomp, "hpath result did not match"
25
+ end
26
+ assert_equal [xpath], @NH.convert_to_xpath(hpath), "converted hpath did not match xpath"
27
+ end
28
+
29
+ def test_ordinary_xpath_conversions
30
+ assert_equal(".//p", @NH.convert_to_xpath("p").first)
31
+ assert_equal(".//p", @NH.convert_to_xpath(:p).first)
32
+ assert_equal(".//p", @NH.convert_to_xpath("//p").first)
33
+ assert_equal(".//p", @NH.convert_to_xpath(".//p").first)
34
+ end
35
+
36
+ def test_child_tag
37
+ assert_syntactical_equivalence("h1[a]", ".//h1[child::a]", "Tender Lovemaking") do |j|
38
+ j.inner_text
39
+ end
40
+ end
41
+
42
+ def test_child_tag_equals
43
+ assert_syntactical_equivalence("h1[a='Tender Lovemaking']", ".//h1[child::a = 'Tender Lovemaking']", "Tender Lovemaking") do |j|
44
+ j.inner_text
45
+ end
46
+ end
47
+
48
+ def test_filter_contains
49
+ assert_syntactical_equivalence("title:contains('Tender')", ".//title[contains(., 'Tender')]",
50
+ "Tender Lovemaking ") do |j|
51
+ j.inner_text
52
+ end
53
+ end
54
+
55
+ def test_filter_comment
56
+ assert_syntactical_equivalence("div comment()[2]", ".//div//comment()[position() = 2]", "<!-- end of header -->") do |j|
57
+ j.first.to_s
58
+ end
59
+ end
60
+
61
+ def test_filter_text
62
+ assert_syntactical_equivalence("a[text()]", ".//a[normalize-space(child::text())]", "<a href=\"http://tenderlovemaking.com\">Tender Lovemaking</a>") do |j|
63
+ j.first.to_s
64
+ end
65
+ assert_syntactical_equivalence("a[text()='Tender Lovemaking']", ".//a[normalize-space(child::text()) = 'Tender Lovemaking']", "<a href=\"http://tenderlovemaking.com\">Tender Lovemaking</a>") do |j|
66
+ j.first.to_s
67
+ end
68
+ assert_syntactical_equivalence("a/text()", ".//a/child::text()", "Tender Lovemaking") do |j|
69
+ j.first.to_s
70
+ end
71
+ assert_syntactical_equivalence("h2//a[text()!='Back Home!']", ".//h2//a[normalize-space(child::text()) != 'Back Home!']", "Meow meow meow meow meow") do |j|
72
+ j.first.inner_text
73
+ end
74
+ end
75
+
76
+ def test_filter_by_attr
77
+ assert_syntactical_equivalence("a[@href='http://blog.geminigeek.com/wordpress-theme']",
78
+ ".//a[@href = 'http://blog.geminigeek.com/wordpress-theme']",
79
+ "http://blog.geminigeek.com/wordpress-theme") do |j|
80
+ j.first["href"]
81
+ end
82
+ end
83
+
84
+ def test_css_id
85
+ assert_syntactical_equivalence("#linkcat-7", ".//*[@id = 'linkcat-7']", "linkcat-7") do |j|
86
+ j.first["id"]
87
+ end
88
+ assert_syntactical_equivalence("li#linkcat-7", ".//li[@id = 'linkcat-7']", "linkcat-7") do |j|
89
+ j.first["id"]
90
+ end
91
+ end
92
+
93
+ def test_css_class
94
+ assert_syntactical_equivalence(".cat-item-15", ".//*[contains(concat(' ', @class, ' '), ' cat-item-15 ')]",
95
+ "cat-item cat-item-15") do |j|
96
+ j.first["class"]
97
+ end
98
+ assert_syntactical_equivalence("li.cat-item-15", ".//li[contains(concat(' ', @class, ' '), ' cat-item-15 ')]",
99
+ "cat-item cat-item-15") do |j|
100
+ j.first["class"]
101
+ end
102
+ end
103
+
104
+ def test_css_tags
105
+ assert_syntactical_equivalence("div li a", ".//div//li//a", "http://brobinius.org/") do |j|
106
+ j.first.inner_text
107
+ end
108
+ assert_syntactical_equivalence("div li > a", ".//div//li/a", "http://brobinius.org/") do |j|
109
+ j.first.inner_text
110
+ end
111
+ assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j|
112
+ j.first.inner_text
113
+ end
114
+ assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j|
115
+ j.first.inner_text
116
+ end
117
+ end
118
+
119
+ def test_positional
120
+ ##
121
+ # we are intentionally NOT staying compatible with nth-and-friends, as Hpricot has an OB1 bug.
122
+ #
123
+ # assert_syntactical_equivalence("div > div:eq(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
124
+ # j.first.inner_text
125
+ # end
126
+ # assert_syntactical_equivalence("div/div:eq(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
127
+ # j.first.inner_text
128
+ # end
129
+ # assert_syntactical_equivalence("div/div:nth(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
130
+ # j.first.inner_text
131
+ # end
132
+ # assert_syntactical_equivalence("div/div:nth-of-type(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
133
+ # j.first.inner_text
134
+ # end
135
+ assert_syntactical_equivalence("div/div:first()", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j|
136
+ j.first.inner_text.gsub(/[\r\n]/, '')
137
+ end
138
+ assert_syntactical_equivalence("div/div:first", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j|
139
+ j.first.inner_text.gsub(/[\r\n]/, '')
140
+ end
141
+ assert_syntactical_equivalence("div//a:last()", ".//div//a[position() = last()]", "Wordpress") do |j|
142
+ j.last.inner_text
143
+ end
144
+ assert_syntactical_equivalence("div//a:last", ".//div//a[position() = last()]", "Wordpress") do |j|
145
+ j.last.inner_text
146
+ end
147
+ end
148
+
149
+ def test_multiple_filters
150
+ assert_syntactical_equivalence("a[@rel='bookmark'][1]", ".//a[@rel = 'bookmark' and position() = 1]", "Back Home!") do |j|
151
+ j.first.inner_text
152
+ end
153
+ end
154
+
155
+ def test_compat_mode_namespaces
156
+ assert_equal(".//*[name()='t:sam']", @NH.convert_to_xpath("//t:sam").first)
157
+ assert_equal(".//*[name()='t:sam'][@rel='bookmark'][1]", @NH.convert_to_xpath("//t:sam[@rel='bookmark'][1]").first)
158
+ end
159
+
160
+ ##
161
+ # 'and' is not supported by hpricot
162
+ # def test_and
163
+ # assert_syntactical_equivalence("div[h1 and small]", ".//div[h1 and small]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
164
+ # j.inner_text
165
+ # end
166
+ # end
167
+
168
+
169
+
170
+ # TODO:
171
+ # doc/'title ~ link' -> links that are siblings of title
172
+ # doc/'p[@class~="final"]' -> class includes string (whitespacy)
173
+ # doc/'p[text()*="final"]' -> class includes string (index) (broken: always returns true?)
174
+ # doc/'p[text()$="final"]' -> /final$/
175
+ # doc/'p[text()|="final"]' -> /^final$/
176
+ # doc/'p[text()^="final"]' -> string starts with 'final
177
+ # nth_first
178
+ # nth_last
179
+ # even
180
+ # odd
181
+ # first-child, nth-child, last-child, nth-last-child, nth-last-of-type
182
+ # only-of-type, only-child
183
+ # parent
184
+ # empty
185
+ # root
186
+ end