nokogiri 1.3.3-x86-mswin32 → 1.4.0-x86-mswin32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (202) hide show
  1. data/CHANGELOG.ja.rdoc +48 -3
  2. data/CHANGELOG.rdoc +42 -0
  3. data/Manifest.txt +44 -29
  4. data/README.ja.rdoc +0 -2
  5. data/README.rdoc +4 -7
  6. data/Rakefile +14 -22
  7. data/bin/nokogiri +7 -5
  8. data/ext/nokogiri/extconf.rb +5 -21
  9. data/ext/nokogiri/html_document.c +14 -50
  10. data/ext/nokogiri/html_element_description.c +7 -7
  11. data/ext/nokogiri/html_entity_lookup.c +6 -4
  12. data/ext/nokogiri/html_sax_parser_context.c +92 -0
  13. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  14. data/ext/nokogiri/nokogiri.c +9 -3
  15. data/ext/nokogiri/nokogiri.h +16 -20
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_attribute_decl.c +67 -0
  18. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  19. data/ext/nokogiri/xml_cdata.c +6 -5
  20. data/ext/nokogiri/xml_comment.c +3 -2
  21. data/ext/nokogiri/xml_document.c +93 -23
  22. data/ext/nokogiri/xml_document_fragment.c +1 -3
  23. data/ext/nokogiri/xml_dtd.c +63 -6
  24. data/ext/nokogiri/xml_element_content.c +123 -0
  25. data/ext/nokogiri/xml_element_content.h +10 -0
  26. data/ext/nokogiri/xml_element_decl.c +69 -0
  27. data/ext/nokogiri/xml_element_decl.h +9 -0
  28. data/ext/nokogiri/xml_entity_decl.c +97 -0
  29. data/ext/nokogiri/xml_entity_decl.h +10 -0
  30. data/ext/nokogiri/xml_entity_reference.c +1 -1
  31. data/ext/nokogiri/xml_io.c +10 -3
  32. data/ext/nokogiri/xml_io.h +1 -0
  33. data/ext/nokogiri/xml_namespace.c +2 -2
  34. data/ext/nokogiri/xml_node.c +139 -34
  35. data/ext/nokogiri/xml_node.h +0 -1
  36. data/ext/nokogiri/xml_node_set.c +23 -16
  37. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  38. data/ext/nokogiri/xml_reader.c +78 -50
  39. data/ext/nokogiri/xml_sax_parser.c +109 -168
  40. data/ext/nokogiri/xml_sax_parser.h +33 -0
  41. data/ext/nokogiri/xml_sax_parser_context.c +155 -0
  42. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  43. data/ext/nokogiri/xml_sax_push_parser.c +11 -6
  44. data/ext/nokogiri/xml_syntax_error.c +63 -12
  45. data/ext/nokogiri/xml_text.c +4 -3
  46. data/ext/nokogiri/xml_xpath.c +1 -1
  47. data/ext/nokogiri/xml_xpath_context.c +12 -25
  48. data/ext/nokogiri/xslt_stylesheet.c +3 -3
  49. data/lib/nokogiri.rb +4 -4
  50. data/lib/nokogiri/1.8/nokogiri.so +0 -0
  51. data/lib/nokogiri/1.9/nokogiri.so +0 -0
  52. data/lib/nokogiri/css/node.rb +1 -9
  53. data/lib/nokogiri/css/xpath_visitor.rb +11 -21
  54. data/lib/nokogiri/ffi/html/document.rb +0 -9
  55. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  56. data/lib/nokogiri/ffi/io_callbacks.rb +4 -2
  57. data/lib/nokogiri/ffi/libxml.rb +44 -10
  58. data/lib/nokogiri/ffi/structs/common_node.rb +1 -1
  59. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  60. data/lib/nokogiri/ffi/structs/xml_dtd.rb +3 -1
  61. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  62. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  63. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  64. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  65. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
  66. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +4 -3
  67. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +1 -1
  68. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  69. data/lib/nokogiri/ffi/xml/comment.rb +2 -2
  70. data/lib/nokogiri/ffi/xml/document.rb +29 -12
  71. data/lib/nokogiri/ffi/xml/document_fragment.rb +0 -5
  72. data/lib/nokogiri/ffi/xml/dtd.rb +14 -3
  73. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  74. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  75. data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
  76. data/lib/nokogiri/ffi/xml/node.rb +45 -5
  77. data/lib/nokogiri/ffi/xml/node_set.rb +1 -1
  78. data/lib/nokogiri/ffi/xml/reader.rb +45 -24
  79. data/lib/nokogiri/ffi/xml/sax/parser.rb +27 -34
  80. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
  81. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +5 -4
  82. data/lib/nokogiri/ffi/xml/syntax_error.rb +31 -16
  83. data/lib/nokogiri/ffi/xml/text.rb +2 -2
  84. data/lib/nokogiri/html.rb +1 -0
  85. data/lib/nokogiri/html/document.rb +39 -24
  86. data/lib/nokogiri/html/sax/parser.rb +2 -2
  87. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  88. data/lib/nokogiri/version.rb +1 -1
  89. data/lib/nokogiri/xml.rb +6 -1
  90. data/lib/nokogiri/xml/attr.rb +5 -0
  91. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  92. data/lib/nokogiri/xml/builder.rb +121 -13
  93. data/lib/nokogiri/xml/character_data.rb +7 -0
  94. data/lib/nokogiri/xml/document.rb +43 -29
  95. data/lib/nokogiri/xml/document_fragment.rb +26 -6
  96. data/lib/nokogiri/xml/dtd.rb +5 -5
  97. data/lib/nokogiri/xml/element_content.rb +36 -0
  98. data/lib/nokogiri/xml/element_decl.rb +13 -0
  99. data/lib/nokogiri/xml/entity_decl.rb +15 -0
  100. data/lib/nokogiri/xml/fragment_handler.rb +22 -11
  101. data/lib/nokogiri/xml/namespace.rb +6 -0
  102. data/lib/nokogiri/xml/node.rb +33 -15
  103. data/lib/nokogiri/xml/node_set.rb +66 -44
  104. data/lib/nokogiri/xml/pp.rb +2 -0
  105. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  106. data/lib/nokogiri/xml/pp/node.rb +56 -0
  107. data/lib/nokogiri/xml/reader.rb +8 -0
  108. data/lib/nokogiri/xml/sax.rb +1 -1
  109. data/lib/nokogiri/xml/sax/document.rb +18 -1
  110. data/lib/nokogiri/xml/sax/parser.rb +15 -8
  111. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  112. data/lib/nokogiri/xml/sax/push_parser.rb +0 -3
  113. data/lib/nokogiri/xml/syntax_error.rb +4 -0
  114. data/lib/nokogiri/xslt/stylesheet.rb +1 -1
  115. data/test/css/test_nthiness.rb +1 -1
  116. data/test/css/test_parser.rb +1 -1
  117. data/test/css/test_tokenizer.rb +1 -1
  118. data/test/css/test_xpath_visitor.rb +1 -1
  119. data/test/ffi/test_document.rb +1 -1
  120. data/test/files/shift_jis.html +10 -0
  121. data/test/files/staff.dtd +10 -0
  122. data/test/helper.rb +12 -3
  123. data/test/html/sax/test_parser.rb +1 -1
  124. data/test/html/sax/test_parser_context.rb +48 -0
  125. data/test/html/test_builder.rb +8 -2
  126. data/test/html/test_document.rb +23 -1
  127. data/test/html/test_document_encoding.rb +15 -1
  128. data/test/html/test_document_fragment.rb +10 -1
  129. data/test/html/test_element_description.rb +1 -2
  130. data/test/html/test_named_characters.rb +1 -1
  131. data/test/html/test_node.rb +61 -1
  132. data/test/html/test_node_encoding.rb +27 -0
  133. data/test/test_convert_xpath.rb +1 -3
  134. data/test/test_css_cache.rb +1 -1
  135. data/test/test_gc.rb +1 -1
  136. data/test/test_memory_leak.rb +1 -1
  137. data/test/test_nokogiri.rb +3 -3
  138. data/test/test_reader.rb +29 -1
  139. data/test/test_xslt_transforms.rb +1 -1
  140. data/test/xml/node/test_save_options.rb +1 -1
  141. data/test/xml/node/test_subclass.rb +1 -1
  142. data/test/xml/sax/test_parser.rb +64 -3
  143. data/test/xml/sax/test_parser_context.rb +56 -0
  144. data/test/xml/sax/test_push_parser.rb +11 -1
  145. data/test/xml/test_attr.rb +1 -1
  146. data/test/xml/test_attribute_decl.rb +82 -0
  147. data/test/xml/test_builder.rb +95 -1
  148. data/test/xml/test_cdata.rb +1 -1
  149. data/test/xml/test_comment.rb +7 -1
  150. data/test/xml/test_document.rb +147 -6
  151. data/test/xml/test_document_encoding.rb +1 -1
  152. data/test/xml/test_document_fragment.rb +55 -5
  153. data/test/xml/test_dtd.rb +40 -5
  154. data/test/xml/test_dtd_encoding.rb +3 -1
  155. data/test/xml/test_element_content.rb +56 -0
  156. data/test/xml/test_element_decl.rb +73 -0
  157. data/test/xml/test_entity_decl.rb +83 -0
  158. data/test/xml/test_entity_reference.rb +1 -1
  159. data/test/xml/test_namespace.rb +21 -1
  160. data/test/xml/test_node.rb +70 -4
  161. data/test/xml/test_node_attributes.rb +1 -1
  162. data/test/xml/test_node_encoding.rb +1 -1
  163. data/test/xml/test_node_set.rb +136 -2
  164. data/test/xml/test_parse_options.rb +1 -1
  165. data/test/xml/test_processing_instruction.rb +1 -1
  166. data/test/xml/test_reader_encoding.rb +1 -1
  167. data/test/xml/test_relax_ng.rb +1 -1
  168. data/test/xml/test_schema.rb +1 -1
  169. data/test/xml/test_syntax_error.rb +27 -0
  170. data/test/xml/test_text.rb +13 -1
  171. data/test/xml/test_unparented_node.rb +1 -1
  172. data/test/xml/test_xpath.rb +1 -1
  173. metadata +55 -38
  174. data/ext/nokogiri/html_sax_parser.c +0 -57
  175. data/ext/nokogiri/html_sax_parser.h +0 -11
  176. data/lib/action-nokogiri.rb +0 -38
  177. data/lib/nokogiri/decorators.rb +0 -2
  178. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  179. data/lib/nokogiri/decorators/hpricot/node.rb +0 -56
  180. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -54
  181. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -30
  182. data/lib/nokogiri/ffi/html/sax/parser.rb +0 -21
  183. data/lib/nokogiri/hpricot.rb +0 -92
  184. data/lib/nokogiri/xml/entity_declaration.rb +0 -11
  185. data/lib/nokogiri/xml/sax/legacy_handlers.rb +0 -65
  186. data/test/hpricot/files/basic.xhtml +0 -17
  187. data/test/hpricot/files/boingboing.html +0 -2266
  188. data/test/hpricot/files/cy0.html +0 -3653
  189. data/test/hpricot/files/immob.html +0 -400
  190. data/test/hpricot/files/pace_application.html +0 -1320
  191. data/test/hpricot/files/tenderlove.html +0 -16
  192. data/test/hpricot/files/uswebgen.html +0 -220
  193. data/test/hpricot/files/utf8.html +0 -1054
  194. data/test/hpricot/files/week9.html +0 -1723
  195. data/test/hpricot/files/why.xml +0 -19
  196. data/test/hpricot/load_files.rb +0 -11
  197. data/test/hpricot/test_alter.rb +0 -68
  198. data/test/hpricot/test_builder.rb +0 -20
  199. data/test/hpricot/test_parser.rb +0 -350
  200. data/test/hpricot/test_paths.rb +0 -15
  201. data/test/hpricot/test_preserved.rb +0 -77
  202. data/test/hpricot/test_xml.rb +0 -30
@@ -1,15 +0,0 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
- require File.join(File.dirname(__FILE__),"load_files")
3
-
4
- class TestParser < Nokogiri::TestCase
5
- include Nokogiri
6
-
7
- def test_roundtrip
8
- @basic = Nokogiri.parse(TestFiles::BASIC)
9
- %w[link link[2] body #link1 a p.ohmy].each do |css_sel|
10
- ele = @basic.at(css_sel)
11
- assert_equal ele, @basic.at(ele.css_path), ele.css_path
12
- assert_equal ele, @basic.at(ele.path), ele.xpath
13
- end
14
- end
15
- end
@@ -1,77 +0,0 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
- require File.join(File.dirname(__FILE__),"load_files")
3
-
4
- class TestPreserved < Nokogiri::TestCase
5
- def assert_roundtrip str
6
- doc = Nokogiri.Hpricot(str)
7
- yield doc if block_given?
8
- str2 = doc.to_original_html
9
- [*str].zip([*str2]).each do |s1, s2|
10
- assert_equal s1, s2
11
- end
12
- end
13
-
14
- def assert_html str1, str2
15
- doc = Nokogiri.Hpricot(str2)
16
- yield doc if block_given?
17
- assert_equal str1, doc.to_original_html
18
- end
19
-
20
- ####
21
- # Not supporting to_original_html
22
- #def test_simple
23
- # str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
24
- # assert_html str, str
25
- # assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
26
- # (doc/:p).set('class', 'new')
27
- # end
28
- #end
29
-
30
- ####
31
- # Not supporting to_original_html
32
- #def test_parent
33
- # str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
34
- # assert_html str, str
35
- # assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
36
- # (doc/:head).remove
37
- # (doc/:div).set('id', 'all')
38
- # (doc/:p).wrap('<div></div>')
39
- # end
40
- #end
41
-
42
- # Not really a valid test. If libxml can figure out the encoding of the file,
43
- # it will use that encoding, otherwise it uses the &#xwhatever so that no data
44
- # is lost.
45
- #
46
- # libxml on OSX can't figure out the encoding, so this tests passes. linux
47
- # can figure out the encoding, so it fails.
48
- #def test_escaping_of_contents
49
- # doc = Nokogiri.Hpricot(TestFiles::BOINGBOING)
50
- # assert_equal "Fukuda&#x2019;s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip
51
- #end
52
-
53
- ####
54
- # Modified. No.
55
- #def test_files
56
- # assert_roundtrip TestFiles::BASIC
57
- # assert_roundtrip TestFiles::BOINGBOING
58
- # assert_roundtrip TestFiles::CY0
59
- #end
60
-
61
- ####
62
- # Modified.. When calling "to_html" on the document, proper html/doc tags
63
- # are produced too.
64
- def test_escaping_of_attrs
65
- # ampersands in URLs
66
- str = %{<a href="http://google.com/search?q=nokogiri&amp;l=en">Google</a>}
67
- link = (doc = Nokogiri(str)).at(:a)
68
- assert_equal "http://google.com/search?q=nokogiri&l=en", link['href']
69
- assert_equal "http://google.com/search?q=nokogiri&l=en", link.get_attribute('href')
70
- assert_equal "http://google.com/search?q=nokogiri&l=en", link['href']
71
- assert_equal str, link.to_html
72
-
73
- # alter the url
74
- link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
75
- assert_equal %{<a href="javascript:alert(&quot;AGGA-KA-BOO!&quot;)">Google</a>}, link.to_html.gsub(/%22/, '&quot;')
76
- end
77
- end
@@ -1,30 +0,0 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
- require File.join(File.dirname(__FILE__),"load_files")
3
-
4
- class TestParser < Nokogiri::TestCase
5
- include Nokogiri
6
- # normally, the link tags are empty HTML tags.
7
- # contributed by laudney.
8
- def test_normally_empty
9
- doc = Nokogiri::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
10
- assert_equal "this is title", (doc/:rss/:channel/:title).text
11
- assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
12
- end
13
-
14
- # make sure XML doesn't get downcased
15
- def test_casing
16
- doc = Nokogiri::XML(TestFiles::WHY)
17
-
18
- ### Modified.
19
- # I don't want to differentiate pseudo classes from namespaces. If
20
- # you're parsing xml, use XPath. That's what its for. :-P
21
- assert_equal "hourly", (doc.at "//sy:updatePeriod").content
22
- assert_equal 1, (doc/"guid[@isPermaLink]").length
23
- end
24
-
25
- # be sure tags named "text" are ok
26
- def test_text_tags
27
- doc = Nokogiri::XML("<feed><title>City Poisoned</title><text>Rita Lee has poisoned Brazil.</text></feed>")
28
- assert_equal "City Poisoned", (doc/"title").text
29
- end
30
- end