superfeedr-nokogiri 1.4.0.20091116183308

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. data/.autotest +27 -0
  2. data/CHANGELOG.ja.rdoc +330 -0
  3. data/CHANGELOG.rdoc +314 -0
  4. data/Manifest.txt +269 -0
  5. data/README.ja.rdoc +105 -0
  6. data/README.rdoc +118 -0
  7. data/Rakefile +244 -0
  8. data/bin/nokogiri +49 -0
  9. data/ext/nokogiri/extconf.rb +145 -0
  10. data/ext/nokogiri/html_document.c +145 -0
  11. data/ext/nokogiri/html_document.h +10 -0
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.c +32 -0
  15. data/ext/nokogiri/html_entity_lookup.h +8 -0
  16. data/ext/nokogiri/html_sax_parser_context.c +92 -0
  17. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  18. data/ext/nokogiri/nokogiri.c +89 -0
  19. data/ext/nokogiri/nokogiri.h +145 -0
  20. data/ext/nokogiri/xml_attr.c +92 -0
  21. data/ext/nokogiri/xml_attr.h +9 -0
  22. data/ext/nokogiri/xml_attribute_decl.c +67 -0
  23. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  24. data/ext/nokogiri/xml_cdata.c +54 -0
  25. data/ext/nokogiri/xml_cdata.h +9 -0
  26. data/ext/nokogiri/xml_comment.c +52 -0
  27. data/ext/nokogiri/xml_comment.h +9 -0
  28. data/ext/nokogiri/xml_document.c +388 -0
  29. data/ext/nokogiri/xml_document.h +24 -0
  30. data/ext/nokogiri/xml_document_fragment.c +46 -0
  31. data/ext/nokogiri/xml_document_fragment.h +10 -0
  32. data/ext/nokogiri/xml_dtd.c +192 -0
  33. data/ext/nokogiri/xml_dtd.h +10 -0
  34. data/ext/nokogiri/xml_element_content.c +123 -0
  35. data/ext/nokogiri/xml_element_content.h +10 -0
  36. data/ext/nokogiri/xml_element_decl.c +69 -0
  37. data/ext/nokogiri/xml_element_decl.h +9 -0
  38. data/ext/nokogiri/xml_entity_decl.c +97 -0
  39. data/ext/nokogiri/xml_entity_decl.h +10 -0
  40. data/ext/nokogiri/xml_entity_reference.c +50 -0
  41. data/ext/nokogiri/xml_entity_reference.h +9 -0
  42. data/ext/nokogiri/xml_io.c +31 -0
  43. data/ext/nokogiri/xml_io.h +11 -0
  44. data/ext/nokogiri/xml_namespace.c +74 -0
  45. data/ext/nokogiri/xml_namespace.h +12 -0
  46. data/ext/nokogiri/xml_node.c +1060 -0
  47. data/ext/nokogiri/xml_node.h +13 -0
  48. data/ext/nokogiri/xml_node_set.c +397 -0
  49. data/ext/nokogiri/xml_node_set.h +9 -0
  50. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  51. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  52. data/ext/nokogiri/xml_reader.c +593 -0
  53. data/ext/nokogiri/xml_reader.h +10 -0
  54. data/ext/nokogiri/xml_relax_ng.c +159 -0
  55. data/ext/nokogiri/xml_relax_ng.h +9 -0
  56. data/ext/nokogiri/xml_sax_parser.c +286 -0
  57. data/ext/nokogiri/xml_sax_parser.h +43 -0
  58. data/ext/nokogiri/xml_sax_parser_context.c +155 -0
  59. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  60. data/ext/nokogiri/xml_sax_push_parser.c +114 -0
  61. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  62. data/ext/nokogiri/xml_schema.c +156 -0
  63. data/ext/nokogiri/xml_schema.h +9 -0
  64. data/ext/nokogiri/xml_syntax_error.c +261 -0
  65. data/ext/nokogiri/xml_syntax_error.h +13 -0
  66. data/ext/nokogiri/xml_text.c +48 -0
  67. data/ext/nokogiri/xml_text.h +9 -0
  68. data/ext/nokogiri/xml_xpath.c +53 -0
  69. data/ext/nokogiri/xml_xpath.h +11 -0
  70. data/ext/nokogiri/xml_xpath_context.c +239 -0
  71. data/ext/nokogiri/xml_xpath_context.h +9 -0
  72. data/ext/nokogiri/xslt_stylesheet.c +131 -0
  73. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  74. data/lib/nokogiri.rb +116 -0
  75. data/lib/nokogiri/css.rb +25 -0
  76. data/lib/nokogiri/css/generated_parser.rb +646 -0
  77. data/lib/nokogiri/css/generated_tokenizer.rb +142 -0
  78. data/lib/nokogiri/css/node.rb +99 -0
  79. data/lib/nokogiri/css/parser.rb +82 -0
  80. data/lib/nokogiri/css/parser.y +227 -0
  81. data/lib/nokogiri/css/syntax_error.rb +7 -0
  82. data/lib/nokogiri/css/tokenizer.rb +7 -0
  83. data/lib/nokogiri/css/tokenizer.rex +54 -0
  84. data/lib/nokogiri/css/xpath_visitor.rb +162 -0
  85. data/lib/nokogiri/decorators/slop.rb +33 -0
  86. data/lib/nokogiri/ffi/html/document.rb +28 -0
  87. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  88. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  89. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  90. data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
  91. data/lib/nokogiri/ffi/libxml.rb +356 -0
  92. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  93. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  94. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  95. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  96. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  97. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  98. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  99. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  100. data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
  101. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  102. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  103. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  104. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  105. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  106. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  107. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  108. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  109. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
  110. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  111. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  112. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +15 -0
  113. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  114. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  115. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  116. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  117. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  118. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  119. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  120. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  121. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  122. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  123. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  124. data/lib/nokogiri/ffi/xml/document.rb +135 -0
  125. data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
  126. data/lib/nokogiri/ffi/xml/dtd.rb +69 -0
  127. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  128. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  129. data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
  130. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  131. data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
  132. data/lib/nokogiri/ffi/xml/node.rb +444 -0
  133. data/lib/nokogiri/ffi/xml/node_set.rb +133 -0
  134. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  135. data/lib/nokogiri/ffi/xml/reader.rb +227 -0
  136. data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
  137. data/lib/nokogiri/ffi/xml/sax/parser.rb +142 -0
  138. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
  139. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +39 -0
  140. data/lib/nokogiri/ffi/xml/schema.rb +92 -0
  141. data/lib/nokogiri/ffi/xml/syntax_error.rb +91 -0
  142. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  143. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  144. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  145. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  146. data/lib/nokogiri/html.rb +35 -0
  147. data/lib/nokogiri/html/builder.rb +35 -0
  148. data/lib/nokogiri/html/document.rb +88 -0
  149. data/lib/nokogiri/html/document_fragment.rb +15 -0
  150. data/lib/nokogiri/html/element_description.rb +23 -0
  151. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  152. data/lib/nokogiri/html/sax/parser.rb +48 -0
  153. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  154. data/lib/nokogiri/syntax_error.rb +4 -0
  155. data/lib/nokogiri/version.rb +33 -0
  156. data/lib/nokogiri/version_warning.rb +11 -0
  157. data/lib/nokogiri/xml.rb +67 -0
  158. data/lib/nokogiri/xml/attr.rb +14 -0
  159. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  160. data/lib/nokogiri/xml/builder.rb +405 -0
  161. data/lib/nokogiri/xml/cdata.rb +11 -0
  162. data/lib/nokogiri/xml/character_data.rb +7 -0
  163. data/lib/nokogiri/xml/document.rb +131 -0
  164. data/lib/nokogiri/xml/document_fragment.rb +69 -0
  165. data/lib/nokogiri/xml/dtd.rb +11 -0
  166. data/lib/nokogiri/xml/element_content.rb +36 -0
  167. data/lib/nokogiri/xml/element_decl.rb +13 -0
  168. data/lib/nokogiri/xml/entity_decl.rb +15 -0
  169. data/lib/nokogiri/xml/fragment_handler.rb +71 -0
  170. data/lib/nokogiri/xml/namespace.rb +13 -0
  171. data/lib/nokogiri/xml/node.rb +665 -0
  172. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  173. data/lib/nokogiri/xml/node_set.rb +307 -0
  174. data/lib/nokogiri/xml/notation.rb +6 -0
  175. data/lib/nokogiri/xml/parse_options.rb +85 -0
  176. data/lib/nokogiri/xml/pp.rb +2 -0
  177. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  178. data/lib/nokogiri/xml/pp/node.rb +56 -0
  179. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  180. data/lib/nokogiri/xml/reader.rb +74 -0
  181. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  182. data/lib/nokogiri/xml/sax.rb +4 -0
  183. data/lib/nokogiri/xml/sax/document.rb +160 -0
  184. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  185. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  186. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  187. data/lib/nokogiri/xml/schema.rb +61 -0
  188. data/lib/nokogiri/xml/syntax_error.rb +38 -0
  189. data/lib/nokogiri/xml/xpath.rb +10 -0
  190. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  191. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  192. data/lib/nokogiri/xslt.rb +48 -0
  193. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  194. data/lib/xsd/xmlparser/nokogiri.rb +71 -0
  195. data/tasks/test.rb +100 -0
  196. data/test/css/test_nthiness.rb +159 -0
  197. data/test/css/test_parser.rb +277 -0
  198. data/test/css/test_tokenizer.rb +183 -0
  199. data/test/css/test_xpath_visitor.rb +76 -0
  200. data/test/ffi/test_document.rb +35 -0
  201. data/test/files/2ch.html +108 -0
  202. data/test/files/address_book.rlx +12 -0
  203. data/test/files/address_book.xml +10 -0
  204. data/test/files/bar/bar.xsd +4 -0
  205. data/test/files/dont_hurt_em_why.xml +422 -0
  206. data/test/files/exslt.xml +8 -0
  207. data/test/files/exslt.xslt +35 -0
  208. data/test/files/foo/foo.xsd +4 -0
  209. data/test/files/po.xml +32 -0
  210. data/test/files/po.xsd +66 -0
  211. data/test/files/shift_jis.html +10 -0
  212. data/test/files/shift_jis.xml +5 -0
  213. data/test/files/snuggles.xml +3 -0
  214. data/test/files/staff.dtd +10 -0
  215. data/test/files/staff.xml +59 -0
  216. data/test/files/staff.xslt +32 -0
  217. data/test/files/tlm.html +850 -0
  218. data/test/files/valid_bar.xml +2 -0
  219. data/test/helper.rb +136 -0
  220. data/test/html/sax/test_parser.rb +64 -0
  221. data/test/html/sax/test_parser_context.rb +48 -0
  222. data/test/html/test_builder.rb +164 -0
  223. data/test/html/test_document.rb +390 -0
  224. data/test/html/test_document_encoding.rb +77 -0
  225. data/test/html/test_document_fragment.rb +132 -0
  226. data/test/html/test_element_description.rb +94 -0
  227. data/test/html/test_named_characters.rb +14 -0
  228. data/test/html/test_node.rb +228 -0
  229. data/test/html/test_node_encoding.rb +27 -0
  230. data/test/test_convert_xpath.rb +135 -0
  231. data/test/test_css_cache.rb +45 -0
  232. data/test/test_gc.rb +15 -0
  233. data/test/test_memory_leak.rb +77 -0
  234. data/test/test_nokogiri.rb +134 -0
  235. data/test/test_reader.rb +358 -0
  236. data/test/test_xslt_transforms.rb +131 -0
  237. data/test/xml/node/test_save_options.rb +20 -0
  238. data/test/xml/node/test_subclass.rb +44 -0
  239. data/test/xml/sax/test_parser.rb +307 -0
  240. data/test/xml/sax/test_parser_context.rb +56 -0
  241. data/test/xml/sax/test_push_parser.rb +131 -0
  242. data/test/xml/test_attr.rb +38 -0
  243. data/test/xml/test_attribute_decl.rb +82 -0
  244. data/test/xml/test_builder.rb +167 -0
  245. data/test/xml/test_cdata.rb +38 -0
  246. data/test/xml/test_comment.rb +29 -0
  247. data/test/xml/test_document.rb +607 -0
  248. data/test/xml/test_document_encoding.rb +26 -0
  249. data/test/xml/test_document_fragment.rb +138 -0
  250. data/test/xml/test_dtd.rb +82 -0
  251. data/test/xml/test_dtd_encoding.rb +33 -0
  252. data/test/xml/test_element_content.rb +56 -0
  253. data/test/xml/test_element_decl.rb +73 -0
  254. data/test/xml/test_entity_decl.rb +83 -0
  255. data/test/xml/test_entity_reference.rb +21 -0
  256. data/test/xml/test_namespace.rb +68 -0
  257. data/test/xml/test_node.rb +889 -0
  258. data/test/xml/test_node_attributes.rb +34 -0
  259. data/test/xml/test_node_encoding.rb +107 -0
  260. data/test/xml/test_node_set.rb +531 -0
  261. data/test/xml/test_parse_options.rb +52 -0
  262. data/test/xml/test_processing_instruction.rb +30 -0
  263. data/test/xml/test_reader_encoding.rb +126 -0
  264. data/test/xml/test_relax_ng.rb +60 -0
  265. data/test/xml/test_schema.rb +89 -0
  266. data/test/xml/test_syntax_error.rb +27 -0
  267. data/test/xml/test_text.rb +30 -0
  268. data/test/xml/test_unparented_node.rb +381 -0
  269. data/test/xml/test_xpath.rb +106 -0
  270. metadata +430 -0
@@ -0,0 +1,2 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <bar />
@@ -0,0 +1,136 @@
1
+ #Process.setrlimit(Process::RLIMIT_CORE, Process::RLIM_INFINITY) unless RUBY_PLATFORM =~ /(java|mswin|mingw)/i
2
+ $VERBOSE = true
3
+ require 'rubygems'
4
+ require 'test/unit'
5
+ require 'fileutils'
6
+ require 'tempfile'
7
+ require 'pp'
8
+
9
+ %w(../lib ../ext).each do |path|
10
+ $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), path)))
11
+ end
12
+
13
+ require 'nokogiri'
14
+
15
+ warn "#{__FILE__}:#{__LINE__}: libxml version info: #{Nokogiri::VERSION_INFO.inspect}"
16
+
17
+ module Nokogiri
18
+ class TestCase < Test::Unit::TestCase
19
+ ASSETS_DIR = File.expand_path File.join(File.dirname(__FILE__), 'files')
20
+ XML_FILE = File.join(ASSETS_DIR, 'staff.xml')
21
+ XSLT_FILE = File.join(ASSETS_DIR, 'staff.xslt')
22
+ EXSLT_FILE = File.join(ASSETS_DIR, 'exslt.xslt')
23
+ EXML_FILE = File.join(ASSETS_DIR, 'exslt.xml')
24
+ HTML_FILE = File.join(ASSETS_DIR, 'tlm.html')
25
+ NICH_FILE = File.join(ASSETS_DIR, '2ch.html')
26
+ SHIFT_JIS_XML = File.join(ASSETS_DIR, 'shift_jis.xml')
27
+ SHIFT_JIS_HTML = File.join(ASSETS_DIR, 'shift_jis.html')
28
+ PO_XML_FILE = File.join(ASSETS_DIR, 'po.xml')
29
+ PO_SCHEMA_FILE = File.join(ASSETS_DIR, 'po.xsd')
30
+ ADDRESS_SCHEMA_FILE = File.join(ASSETS_DIR, 'address_book.rlx')
31
+ ADDRESS_XML_FILE = File.join(ASSETS_DIR, 'address_book.xml')
32
+ SNUGGLES_FILE = File.join(ASSETS_DIR, 'snuggles.xml')
33
+
34
+ unless RUBY_VERSION >= '1.9'
35
+ undef :default_test
36
+ end
37
+
38
+ def setup
39
+ warn "#{name}" if ENV['TESTOPTS'] == '-v'
40
+ end
41
+
42
+ def teardown
43
+ if ENV['NOKOGIRI_GC']
44
+ STDOUT.putc '!'
45
+ GC.start
46
+ end
47
+ end
48
+
49
+ def assert_indent amount, doc, message = nil
50
+ nodes = []
51
+ doc.traverse do |node|
52
+ nodes << node if node.text? && node.blank?
53
+ end
54
+ assert nodes.length > 0
55
+ nodes.each do |node|
56
+ len = node.content.gsub(/[\r\n]/, '').length
57
+ assert_equal(0, len % amount, message)
58
+ end
59
+ end
60
+ end
61
+
62
+ module SAX
63
+ class TestCase < Nokogiri::TestCase
64
+ class Doc < XML::SAX::Document
65
+ attr_reader :start_elements, :start_document_called
66
+ attr_reader :end_elements, :end_document_called
67
+ attr_reader :data, :comments, :cdata_blocks, :start_elements_namespace
68
+ attr_reader :errors, :warnings, :end_elements_namespace
69
+ attr_reader :xmldecls
70
+
71
+ def xmldecl version, encoding, standalone
72
+ @xmldecls = [version, encoding, standalone].compact
73
+ super
74
+ end
75
+
76
+ def start_document
77
+ @start_document_called = true
78
+ super
79
+ end
80
+
81
+ def end_document
82
+ @end_document_called = true
83
+ super
84
+ end
85
+
86
+ def error error
87
+ (@errors ||= []) << error
88
+ super
89
+ end
90
+
91
+ def warning warning
92
+ (@warning ||= []) << warning
93
+ super
94
+ end
95
+
96
+ def start_element *args
97
+ (@start_elements ||= []) << args
98
+ super
99
+ end
100
+
101
+ def start_element_namespace *args
102
+ (@start_elements_namespace ||= []) << args
103
+ super
104
+ end
105
+
106
+ def end_element *args
107
+ (@end_elements ||= []) << args
108
+ super
109
+ end
110
+
111
+ def end_element_namespace *args
112
+ (@end_elements_namespace ||= []) << args
113
+ super
114
+ end
115
+
116
+ def characters string
117
+ @data ||= []
118
+ @data += [string]
119
+ super
120
+ end
121
+
122
+ def comment string
123
+ @comments ||= []
124
+ @comments += [string]
125
+ super
126
+ end
127
+
128
+ def cdata_block string
129
+ @cdata_blocks ||= []
130
+ @cdata_blocks += [string]
131
+ super
132
+ end
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,64 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ module SAX
6
+ class TestParser < Nokogiri::SAX::TestCase
7
+ def setup
8
+ super
9
+ @parser = HTML::SAX::Parser.new(Doc.new)
10
+ end
11
+
12
+ def test_parse_empty_document
13
+ # This caused a segfault in libxml 2.6.x
14
+ assert_nothing_raised { @parser.parse '' }
15
+ end
16
+
17
+ def test_parse_empty_file
18
+ # Make sure empty files don't break stuff
19
+ empty_file_name = File.join(Dir.tmpdir, 'bogus.xml')
20
+ FileUtils.touch empty_file_name
21
+ assert_nothing_raised { @parser.parse_file empty_file_name }
22
+ end
23
+
24
+ def test_parse_file
25
+ @parser.parse_file(HTML_FILE)
26
+ assert_equal 1110, @parser.document.end_elements.length
27
+ end
28
+
29
+ def test_parse_file_nil_argument
30
+ assert_raises(ArgumentError) {
31
+ @parser.parse_file(nil)
32
+ }
33
+ end
34
+
35
+ def test_parse_file_non_existant
36
+ assert_raise Errno::ENOENT do
37
+ @parser.parse_file('foo')
38
+ end
39
+ end
40
+
41
+ def test_parse_file_with_dir
42
+ assert_raise Errno::EISDIR do
43
+ @parser.parse_file(File.dirname(__FILE__))
44
+ end
45
+ end
46
+
47
+ def test_parse_memory_nil
48
+ assert_raise ArgumentError do
49
+ @parser.parse_memory(nil)
50
+ end
51
+ end
52
+
53
+ def test_parse_document
54
+ @parser.parse_memory(<<-eoxml)
55
+ <p>Paragraph 1</p>
56
+ <p>Paragraph 2</p>
57
+ eoxml
58
+ assert_equal([["html", []], ["body", []], ["p", []], ["p", []]],
59
+ @parser.document.start_elements)
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,48 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require "helper"
4
+
5
+ module Nokogiri
6
+ module HTML
7
+ module SAX
8
+ class TestParserContext < Nokogiri::SAX::TestCase
9
+ def test_from_io
10
+ assert_nothing_raised do
11
+ ctx = ParserContext.new StringIO.new('fo'), 'UTF-8'
12
+ end
13
+ end
14
+
15
+ def test_from_string
16
+ assert_nothing_raised do
17
+ ctx = ParserContext.new 'blah blah'
18
+ end
19
+ end
20
+
21
+ def test_parse_with
22
+ ctx = ParserContext.new 'blah'
23
+ assert_raises ArgumentError do
24
+ ctx.parse_with nil
25
+ end
26
+ end
27
+
28
+ def test_parse_with_sax_parser
29
+ assert_nothing_raised do
30
+ xml = "<root />"
31
+ ctx = ParserContext.new xml
32
+ parser = Parser.new Doc.new
33
+ ctx.parse_with parser
34
+ end
35
+ end
36
+
37
+ def test_from_file
38
+ assert_nothing_raised do
39
+ ctx = ParserContext.file HTML_FILE, 'UTF-8'
40
+ parser = Parser.new Doc.new
41
+ ctx.parse_with parser
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+
@@ -0,0 +1,164 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestBuilder < Nokogiri::TestCase
6
+ def test_top_level_function_builds
7
+ foo = nil
8
+ Nokogiri() { |xml| foo = xml }
9
+ assert_instance_of Nokogiri::HTML::Builder, foo
10
+ end
11
+
12
+ def test_builder_with_explicit_tags
13
+ html_doc = Nokogiri::HTML::Builder.new {
14
+ div.slide(:class => 'another_class') {
15
+ node = Nokogiri::XML::Node.new("id", doc)
16
+ node.content = "hello"
17
+ insert(node)
18
+ }
19
+ }.doc
20
+ assert_equal 1, html_doc.css('div.slide > id').length
21
+ assert_equal 'hello', html_doc.at('div.slide > id').content
22
+ end
23
+
24
+ def test_hash_as_attributes_for_attribute_method
25
+ html = Nokogiri::HTML::Builder.new { ||
26
+ div.slide(:class => 'another_class') {
27
+ span 'Slide 1'
28
+ }
29
+ }.to_html
30
+ assert_match 'class="slide another_class"', html
31
+ end
32
+
33
+ def test_hash_as_attributes
34
+ builder = Nokogiri::HTML::Builder.new do
35
+ div(:id => 'awesome') {
36
+ h1 "america"
37
+ }
38
+ end
39
+ assert_equal('<div id="awesome"><h1>america</h1></div>',
40
+ builder.doc.root.to_html.gsub(/\n/, '').gsub(/>\s*</, '><'))
41
+ end
42
+
43
+ def test_href_with_attributes
44
+ uri = 'http://tenderlovemaking.com/'
45
+ built = Nokogiri::XML::Builder.new {
46
+ div {
47
+ a('King Khan & The Shrines', :href => uri)
48
+ }
49
+ }
50
+ assert_equal 'http://tenderlovemaking.com/',
51
+ built.doc.at('a')[:href]
52
+ end
53
+
54
+ def test_tag_nesting
55
+ builder = Nokogiri::HTML::Builder.new do
56
+ body {
57
+ span.left ''
58
+ span.middle {
59
+ div.icon ''
60
+ }
61
+ span.right ''
62
+ }
63
+ end
64
+ assert node = builder.doc.css('span.right').first
65
+ assert_equal 'middle', node.previous_sibling['class']
66
+ end
67
+
68
+ def test_has_ampersand
69
+ builder = Nokogiri::HTML::Builder.new do
70
+ div.rad.thing! {
71
+ text "<awe&some>"
72
+ b "hello & world"
73
+ }
74
+ end
75
+ assert_equal(
76
+ '<div class="rad" id="thing">&lt;awe&amp;some&gt;<b>hello &amp; world</b></div>',
77
+ builder.doc.root.to_html.gsub(/\n/, ''))
78
+ end
79
+
80
+ def test_multi_tags
81
+ builder = Nokogiri::HTML::Builder.new do
82
+ div.rad.thing! {
83
+ text "<awesome>"
84
+ b "hello"
85
+ }
86
+ end
87
+ assert_equal(
88
+ '<div class="rad" id="thing">&lt;awesome&gt;<b>hello</b></div>',
89
+ builder.doc.root.to_html.gsub(/\n/, ''))
90
+ end
91
+
92
+ def test_attributes_plus_block
93
+ builder = Nokogiri::HTML::Builder.new do
94
+ div.rad.thing! {
95
+ text "<awesome>"
96
+ }
97
+ end
98
+ assert_equal('<div class="rad" id="thing">&lt;awesome&gt;</div>',
99
+ builder.doc.root.to_html.chomp)
100
+ end
101
+
102
+ def test_builder_adds_attributes
103
+ builder = Nokogiri::HTML::Builder.new do
104
+ div.rad.thing! "tender div"
105
+ end
106
+ assert_equal('<div class="rad" id="thing">tender div</div>',
107
+ builder.doc.root.to_html.chomp)
108
+ end
109
+
110
+ def test_bold_tag
111
+ builder = Nokogiri::HTML::Builder.new do
112
+ b "bold tag"
113
+ end
114
+ assert_equal('<b>bold tag</b>', builder.doc.root.to_html.chomp)
115
+ end
116
+
117
+ def test_html_then_body_tag
118
+ builder = Nokogiri::HTML::Builder.new do
119
+ html {
120
+ body {
121
+ b "bold tag"
122
+ }
123
+ }
124
+ end
125
+ assert_equal('<html><body><b>bold tag</b></body></html>',
126
+ builder.doc.root.to_html.chomp.gsub(/>\s*</, '><'))
127
+ end
128
+
129
+ def test_instance_eval_with_delegation_to_block_context
130
+ class << self
131
+ def foo
132
+ "foo!"
133
+ end
134
+ end
135
+
136
+ builder = Nokogiri::HTML::Builder.new { text foo }
137
+ assert builder.to_html.include?("foo!")
138
+ end
139
+
140
+ def test_builder_with_param
141
+ doc = Nokogiri::HTML::Builder.new { |html|
142
+ html.body {
143
+ html.p "hello world"
144
+ }
145
+ }.doc
146
+
147
+ assert node = doc.xpath('//body/p').first
148
+ assert_equal 'hello world', node.content
149
+ end
150
+
151
+ def test_builder_with_id
152
+ text = "hello world"
153
+ doc = Nokogiri::HTML::Builder.new { |html|
154
+ html.body {
155
+ html.id_ text
156
+ }
157
+ }.doc
158
+
159
+ assert node = doc.xpath('//body/id').first
160
+ assert_equal text, node.content
161
+ end
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,390 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestDocument < Nokogiri::TestCase
6
+ def setup
7
+ super
8
+ @html = Nokogiri::HTML.parse(File.read(HTML_FILE))
9
+ end
10
+
11
+ def test_fragment
12
+ fragment = @html.fragment
13
+ assert_equal 0, fragment.children.length
14
+ end
15
+
16
+ def test_document_takes_config_block
17
+ options = nil
18
+ Nokogiri::HTML(File.read(HTML_FILE), HTML_FILE) do |cfg|
19
+ options = cfg
20
+ options.nonet.nowarning.dtdattr
21
+ end
22
+ assert options.nonet?
23
+ assert options.nowarning?
24
+ assert options.dtdattr?
25
+ end
26
+
27
+ def test_parse_takes_config_block
28
+ options = nil
29
+ Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) do |cfg|
30
+ options = cfg
31
+ options.nonet.nowarning.dtdattr
32
+ end
33
+ assert options.nonet?
34
+ assert options.nowarning?
35
+ assert options.dtdattr?
36
+ end
37
+
38
+ def test_subclass
39
+ klass = Class.new(Nokogiri::HTML::Document)
40
+ doc = klass.new
41
+ assert_instance_of klass, doc
42
+ end
43
+
44
+ def test_subclass_initialize
45
+ klass = Class.new(Nokogiri::HTML::Document) do
46
+ attr_accessor :initialized_with
47
+
48
+ def initialize(*args)
49
+ @initialized_with = args
50
+ end
51
+ end
52
+ doc = klass.new("uri", "external_id", 1)
53
+ assert_equal ["uri", "external_id", 1], doc.initialized_with
54
+ end
55
+
56
+ def test_subclass_dup
57
+ klass = Class.new(Nokogiri::HTML::Document)
58
+ doc = klass.new.dup
59
+ assert_instance_of klass, doc
60
+ end
61
+
62
+ def test_subclass_parse
63
+ klass = Class.new(Nokogiri::HTML::Document)
64
+ doc = klass.parse(File.read(HTML_FILE))
65
+ assert_equal @html.to_s, doc.to_s
66
+ assert_instance_of klass, doc
67
+ end
68
+
69
+ def test_document_parse_method
70
+ html = Nokogiri::HTML::Document.parse(File.read(HTML_FILE))
71
+ assert_equal @html.to_s, html.to_s
72
+ end
73
+
74
+ ###
75
+ # Nokogiri::HTML returns an empty Document when given a blank string GH#11
76
+ def test_empty_string_returns_empty_doc
77
+ doc = Nokogiri::HTML('')
78
+ assert_instance_of Nokogiri::HTML::Document, doc
79
+ assert_nil doc.root
80
+ end
81
+
82
+ unless %w[2 6] === LIBXML_VERSION.split('.')[0..1]
83
+ # FIXME: this is a hack around broken libxml versions
84
+ def test_to_xhtml_with_indent
85
+ doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
86
+ doc = Nokogiri::HTML(doc.to_xhtml(:indent => 2))
87
+ assert_indent 2, doc
88
+ end
89
+
90
+ def test_write_to_xhtml_with_indent
91
+ io = StringIO.new
92
+ doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
93
+ doc.write_xhtml_to io, :indent => 5
94
+ io.rewind
95
+ doc = Nokogiri::HTML(io.read)
96
+ assert_indent 5, doc
97
+ end
98
+ end
99
+
100
+ def test_swap_should_not_exist
101
+ assert_raises(NoMethodError) {
102
+ @html.swap
103
+ }
104
+ end
105
+
106
+ def test_namespace_should_not_exist
107
+ assert_raises(NoMethodError) {
108
+ @html.namespace
109
+ }
110
+ end
111
+
112
+ def test_meta_encoding
113
+ assert_equal 'UTF-8', @html.meta_encoding
114
+ end
115
+
116
+ def test_meta_encoding=
117
+ @html.meta_encoding = 'EUC-JP'
118
+ assert_equal 'EUC-JP', @html.meta_encoding
119
+ end
120
+
121
+ def test_meta_encoding_without_head
122
+ html = Nokogiri::HTML('<html><body>foo</body></html>')
123
+ assert_nil html.meta_encoding
124
+
125
+ html.meta_encoding = 'EUC-JP'
126
+ assert_nil html.meta_encoding
127
+ end
128
+
129
+ def test_root_node_parent_is_document
130
+ parent = @html.root.parent
131
+ assert_equal @html, parent
132
+ assert_instance_of Nokogiri::HTML::Document, parent
133
+ end
134
+
135
+ def test_parse_handles_nil_gracefully
136
+ assert_nothing_raised do
137
+ @doc = Nokogiri::HTML::Document.parse(nil)
138
+ end
139
+ assert_instance_of Nokogiri::HTML::Document, @doc
140
+ end
141
+
142
+ def test_parse_empty_document
143
+ doc = Nokogiri::HTML("\n")
144
+ assert_equal 0, doc.css('a').length
145
+ assert_equal 0, doc.xpath('//a').length
146
+ assert_equal 0, doc.search('//a').length
147
+ end
148
+
149
+ def test_HTML_function
150
+ html = Nokogiri::HTML(File.read(HTML_FILE))
151
+ assert html.html?
152
+ end
153
+
154
+ def test_parse_io
155
+ assert doc = File.open(HTML_FILE, 'rb') { |f|
156
+ Document.read_io(f, nil, 'UTF-8',
157
+ XML::ParseOptions::NOERROR | XML::ParseOptions::NOWARNING
158
+ )
159
+ }
160
+ end
161
+
162
+ def test_to_xhtml
163
+ assert_match 'XHTML', @html.to_xhtml
164
+ assert_match 'XHTML', @html.to_xhtml(:encoding => 'UTF-8')
165
+ assert_match 'UTF-8', @html.to_xhtml(:encoding => 'UTF-8')
166
+ end
167
+
168
+ def test_no_xml_header
169
+ html = Nokogiri::HTML(<<-eohtml)
170
+ <html>
171
+ </html>
172
+ eohtml
173
+ assert html.to_html.length > 0, 'html length is too short'
174
+ assert_no_match(/^<\?xml/, html.to_html)
175
+ end
176
+
177
+ def test_document_has_error
178
+ html = Nokogiri::HTML(<<-eohtml)
179
+ <html>
180
+ <body>
181
+ <div awesome="asdf>
182
+ <p>inside div tag</p>
183
+ </div>
184
+ <p>outside div tag</p>
185
+ </body>
186
+ </html>
187
+ eohtml
188
+ assert html.errors.length > 0
189
+ end
190
+
191
+ def test_relative_css
192
+ html = Nokogiri::HTML(<<-eohtml)
193
+ <html>
194
+ <body>
195
+ <div>
196
+ <p>inside div tag</p>
197
+ </div>
198
+ <p>outside div tag</p>
199
+ </body>
200
+ </html>
201
+ eohtml
202
+ set = html.search('div').search('p')
203
+ assert_equal(1, set.length)
204
+ assert_equal('inside div tag', set.first.inner_text)
205
+ end
206
+
207
+ def test_multi_css
208
+ html = Nokogiri::HTML(<<-eohtml)
209
+ <html>
210
+ <body>
211
+ <div>
212
+ <p>p tag</p>
213
+ <a>a tag</a>
214
+ </div>
215
+ </body>
216
+ </html>
217
+ eohtml
218
+ set = html.css('p, a')
219
+ assert_equal(2, set.length)
220
+ assert_equal ['a tag', 'p tag'].sort, set.map { |x| x.content }.sort
221
+ end
222
+
223
+ def test_inner_text
224
+ html = Nokogiri::HTML(<<-eohtml)
225
+ <html>
226
+ <body>
227
+ <div>
228
+ <p>
229
+ Hello world!
230
+ </p>
231
+ </div>
232
+ </body>
233
+ </html>
234
+ eohtml
235
+ node = html.xpath('//div').first
236
+ assert_equal('Hello world!', node.inner_text.strip)
237
+ end
238
+
239
+ def test_find_by_xpath
240
+ found = @html.xpath('//div/a')
241
+ assert_equal 3, found.length
242
+ end
243
+
244
+ def test_find_by_css
245
+ found = @html.css('div > a')
246
+ assert_equal 3, found.length
247
+ end
248
+
249
+ def test_find_by_css_with_square_brackets
250
+ found = @html.css("div[@id='header'] > h1")
251
+ found = @html.css("div[@id='header'] h1") # this blows up on commit 6fa0f6d329d9dbf1cc21c0ac72f7e627bb4c05fc
252
+ assert_equal 1, found.length
253
+ end
254
+
255
+ def test_find_with_function
256
+ found = @html.css("div:awesome() h1", Class.new {
257
+ def awesome divs
258
+ [divs.first]
259
+ end
260
+ }.new)
261
+ end
262
+
263
+ def test_dup_shallow
264
+ found = @html.search('//div/a').first
265
+ dup = found.dup(0)
266
+ assert dup
267
+ assert_equal '', dup.content
268
+ end
269
+
270
+ def test_search_can_handle_xpath_and_css
271
+ found = @html.search('//div/a', 'div > p')
272
+ length = @html.xpath('//div/a').length +
273
+ @html.css('div > p').length
274
+ assert_equal length, found.length
275
+ end
276
+
277
+ def test_dup_document
278
+ assert dup = @html.dup
279
+ assert_not_equal dup, @html
280
+ assert @html.html?
281
+ assert_instance_of Nokogiri::HTML::Document, dup
282
+ assert dup.html?, 'duplicate should be html'
283
+ assert_equal @html.to_s, dup.to_s
284
+ end
285
+
286
+ def test_dup_document_shallow
287
+ assert dup = @html.dup(0)
288
+ assert_not_equal dup, @html
289
+ end
290
+
291
+ def test_dup
292
+ found = @html.search('//div/a').first
293
+ dup = found.dup
294
+ assert dup
295
+ assert_equal found.content, dup.content
296
+ assert_equal found.document, dup.document
297
+ end
298
+
299
+ def test_inner_html
300
+ html = Nokogiri::HTML(<<-eohtml)
301
+ <html>
302
+ <body>
303
+ <div>
304
+ <p>
305
+ Hello world!
306
+ </p>
307
+ </div>
308
+ </body>
309
+ </html>
310
+ eohtml
311
+ node = html.xpath('//div').first
312
+ assert_equal('<p>Helloworld!</p>', node.inner_html.gsub(/\s/, ''))
313
+ end
314
+
315
+ def test_round_trip
316
+ doc = Nokogiri::HTML(@html.inner_html)
317
+ assert_equal @html.root.to_html, doc.root.to_html
318
+ end
319
+
320
+ def test_fragment_contains_text_node
321
+ fragment = Nokogiri::HTML.fragment('fooo')
322
+ assert_equal 1, fragment.children.length
323
+ assert_equal 'fooo', fragment.inner_text
324
+ end
325
+
326
+ def test_fragment_includes_two_tags
327
+ assert_equal 2, Nokogiri::HTML.fragment("<br/><hr/>").children.length
328
+ end
329
+
330
+ def test_relative_css_finder
331
+ doc = Nokogiri::HTML(<<-eohtml)
332
+ <html>
333
+ <body>
334
+ <div class="red">
335
+ <p>
336
+ inside red
337
+ </p>
338
+ </div>
339
+ <div class="green">
340
+ <p>
341
+ inside green
342
+ </p>
343
+ </div>
344
+ </body>
345
+ </html>
346
+ eohtml
347
+ red_divs = doc.css('div.red')
348
+ assert_equal 1, red_divs.length
349
+ p_tags = red_divs.first.css('p')
350
+ assert_equal 1, p_tags.length
351
+ assert_equal 'inside red', p_tags.first.text.strip
352
+ end
353
+
354
+ def test_find_classes
355
+ doc = Nokogiri::HTML(<<-eohtml)
356
+ <html>
357
+ <body>
358
+ <p class="red">RED</p>
359
+ <p class="awesome red">RED</p>
360
+ <p class="notred">GREEN</p>
361
+ <p class="green notred">GREEN</p>
362
+ </body>
363
+ </html>
364
+ eohtml
365
+ list = doc.css('.red')
366
+ assert_equal 2, list.length
367
+ assert_equal %w{ RED RED }, list.map { |x| x.text }
368
+ end
369
+
370
+ def test_parse_can_take_io
371
+ html = nil
372
+ File.open(HTML_FILE, 'rb') { |f|
373
+ html = Nokogiri::HTML(f)
374
+ }
375
+ assert html.html?
376
+ end
377
+
378
+ def test_html?
379
+ assert !@html.xml?
380
+ assert @html.html?
381
+ end
382
+
383
+ def test_serialize
384
+ assert @html.serialize
385
+ assert @html.to_html
386
+ end
387
+ end
388
+ end
389
+ end
390
+