superfeedr-nokogiri 1.4.0.20091116183308

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. data/.autotest +27 -0
  2. data/CHANGELOG.ja.rdoc +330 -0
  3. data/CHANGELOG.rdoc +314 -0
  4. data/Manifest.txt +269 -0
  5. data/README.ja.rdoc +105 -0
  6. data/README.rdoc +118 -0
  7. data/Rakefile +244 -0
  8. data/bin/nokogiri +49 -0
  9. data/ext/nokogiri/extconf.rb +145 -0
  10. data/ext/nokogiri/html_document.c +145 -0
  11. data/ext/nokogiri/html_document.h +10 -0
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.c +32 -0
  15. data/ext/nokogiri/html_entity_lookup.h +8 -0
  16. data/ext/nokogiri/html_sax_parser_context.c +92 -0
  17. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  18. data/ext/nokogiri/nokogiri.c +89 -0
  19. data/ext/nokogiri/nokogiri.h +145 -0
  20. data/ext/nokogiri/xml_attr.c +92 -0
  21. data/ext/nokogiri/xml_attr.h +9 -0
  22. data/ext/nokogiri/xml_attribute_decl.c +67 -0
  23. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  24. data/ext/nokogiri/xml_cdata.c +54 -0
  25. data/ext/nokogiri/xml_cdata.h +9 -0
  26. data/ext/nokogiri/xml_comment.c +52 -0
  27. data/ext/nokogiri/xml_comment.h +9 -0
  28. data/ext/nokogiri/xml_document.c +388 -0
  29. data/ext/nokogiri/xml_document.h +24 -0
  30. data/ext/nokogiri/xml_document_fragment.c +46 -0
  31. data/ext/nokogiri/xml_document_fragment.h +10 -0
  32. data/ext/nokogiri/xml_dtd.c +192 -0
  33. data/ext/nokogiri/xml_dtd.h +10 -0
  34. data/ext/nokogiri/xml_element_content.c +123 -0
  35. data/ext/nokogiri/xml_element_content.h +10 -0
  36. data/ext/nokogiri/xml_element_decl.c +69 -0
  37. data/ext/nokogiri/xml_element_decl.h +9 -0
  38. data/ext/nokogiri/xml_entity_decl.c +97 -0
  39. data/ext/nokogiri/xml_entity_decl.h +10 -0
  40. data/ext/nokogiri/xml_entity_reference.c +50 -0
  41. data/ext/nokogiri/xml_entity_reference.h +9 -0
  42. data/ext/nokogiri/xml_io.c +31 -0
  43. data/ext/nokogiri/xml_io.h +11 -0
  44. data/ext/nokogiri/xml_namespace.c +74 -0
  45. data/ext/nokogiri/xml_namespace.h +12 -0
  46. data/ext/nokogiri/xml_node.c +1060 -0
  47. data/ext/nokogiri/xml_node.h +13 -0
  48. data/ext/nokogiri/xml_node_set.c +397 -0
  49. data/ext/nokogiri/xml_node_set.h +9 -0
  50. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  51. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  52. data/ext/nokogiri/xml_reader.c +593 -0
  53. data/ext/nokogiri/xml_reader.h +10 -0
  54. data/ext/nokogiri/xml_relax_ng.c +159 -0
  55. data/ext/nokogiri/xml_relax_ng.h +9 -0
  56. data/ext/nokogiri/xml_sax_parser.c +286 -0
  57. data/ext/nokogiri/xml_sax_parser.h +43 -0
  58. data/ext/nokogiri/xml_sax_parser_context.c +155 -0
  59. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  60. data/ext/nokogiri/xml_sax_push_parser.c +114 -0
  61. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  62. data/ext/nokogiri/xml_schema.c +156 -0
  63. data/ext/nokogiri/xml_schema.h +9 -0
  64. data/ext/nokogiri/xml_syntax_error.c +261 -0
  65. data/ext/nokogiri/xml_syntax_error.h +13 -0
  66. data/ext/nokogiri/xml_text.c +48 -0
  67. data/ext/nokogiri/xml_text.h +9 -0
  68. data/ext/nokogiri/xml_xpath.c +53 -0
  69. data/ext/nokogiri/xml_xpath.h +11 -0
  70. data/ext/nokogiri/xml_xpath_context.c +239 -0
  71. data/ext/nokogiri/xml_xpath_context.h +9 -0
  72. data/ext/nokogiri/xslt_stylesheet.c +131 -0
  73. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  74. data/lib/nokogiri.rb +116 -0
  75. data/lib/nokogiri/css.rb +25 -0
  76. data/lib/nokogiri/css/generated_parser.rb +646 -0
  77. data/lib/nokogiri/css/generated_tokenizer.rb +142 -0
  78. data/lib/nokogiri/css/node.rb +99 -0
  79. data/lib/nokogiri/css/parser.rb +82 -0
  80. data/lib/nokogiri/css/parser.y +227 -0
  81. data/lib/nokogiri/css/syntax_error.rb +7 -0
  82. data/lib/nokogiri/css/tokenizer.rb +7 -0
  83. data/lib/nokogiri/css/tokenizer.rex +54 -0
  84. data/lib/nokogiri/css/xpath_visitor.rb +162 -0
  85. data/lib/nokogiri/decorators/slop.rb +33 -0
  86. data/lib/nokogiri/ffi/html/document.rb +28 -0
  87. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  88. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  89. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  90. data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
  91. data/lib/nokogiri/ffi/libxml.rb +356 -0
  92. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  93. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  94. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  95. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  96. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  97. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  98. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  99. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  100. data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
  101. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  102. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  103. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  104. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  105. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  106. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  107. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  108. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  109. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
  110. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  111. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  112. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +15 -0
  113. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  114. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  115. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  116. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  117. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  118. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  119. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  120. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  121. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  122. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  123. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  124. data/lib/nokogiri/ffi/xml/document.rb +135 -0
  125. data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
  126. data/lib/nokogiri/ffi/xml/dtd.rb +69 -0
  127. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  128. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  129. data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
  130. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  131. data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
  132. data/lib/nokogiri/ffi/xml/node.rb +444 -0
  133. data/lib/nokogiri/ffi/xml/node_set.rb +133 -0
  134. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  135. data/lib/nokogiri/ffi/xml/reader.rb +227 -0
  136. data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
  137. data/lib/nokogiri/ffi/xml/sax/parser.rb +142 -0
  138. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
  139. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +39 -0
  140. data/lib/nokogiri/ffi/xml/schema.rb +92 -0
  141. data/lib/nokogiri/ffi/xml/syntax_error.rb +91 -0
  142. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  143. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  144. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  145. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  146. data/lib/nokogiri/html.rb +35 -0
  147. data/lib/nokogiri/html/builder.rb +35 -0
  148. data/lib/nokogiri/html/document.rb +88 -0
  149. data/lib/nokogiri/html/document_fragment.rb +15 -0
  150. data/lib/nokogiri/html/element_description.rb +23 -0
  151. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  152. data/lib/nokogiri/html/sax/parser.rb +48 -0
  153. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  154. data/lib/nokogiri/syntax_error.rb +4 -0
  155. data/lib/nokogiri/version.rb +33 -0
  156. data/lib/nokogiri/version_warning.rb +11 -0
  157. data/lib/nokogiri/xml.rb +67 -0
  158. data/lib/nokogiri/xml/attr.rb +14 -0
  159. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  160. data/lib/nokogiri/xml/builder.rb +405 -0
  161. data/lib/nokogiri/xml/cdata.rb +11 -0
  162. data/lib/nokogiri/xml/character_data.rb +7 -0
  163. data/lib/nokogiri/xml/document.rb +131 -0
  164. data/lib/nokogiri/xml/document_fragment.rb +69 -0
  165. data/lib/nokogiri/xml/dtd.rb +11 -0
  166. data/lib/nokogiri/xml/element_content.rb +36 -0
  167. data/lib/nokogiri/xml/element_decl.rb +13 -0
  168. data/lib/nokogiri/xml/entity_decl.rb +15 -0
  169. data/lib/nokogiri/xml/fragment_handler.rb +71 -0
  170. data/lib/nokogiri/xml/namespace.rb +13 -0
  171. data/lib/nokogiri/xml/node.rb +665 -0
  172. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  173. data/lib/nokogiri/xml/node_set.rb +307 -0
  174. data/lib/nokogiri/xml/notation.rb +6 -0
  175. data/lib/nokogiri/xml/parse_options.rb +85 -0
  176. data/lib/nokogiri/xml/pp.rb +2 -0
  177. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  178. data/lib/nokogiri/xml/pp/node.rb +56 -0
  179. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  180. data/lib/nokogiri/xml/reader.rb +74 -0
  181. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  182. data/lib/nokogiri/xml/sax.rb +4 -0
  183. data/lib/nokogiri/xml/sax/document.rb +160 -0
  184. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  185. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  186. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  187. data/lib/nokogiri/xml/schema.rb +61 -0
  188. data/lib/nokogiri/xml/syntax_error.rb +38 -0
  189. data/lib/nokogiri/xml/xpath.rb +10 -0
  190. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  191. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  192. data/lib/nokogiri/xslt.rb +48 -0
  193. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  194. data/lib/xsd/xmlparser/nokogiri.rb +71 -0
  195. data/tasks/test.rb +100 -0
  196. data/test/css/test_nthiness.rb +159 -0
  197. data/test/css/test_parser.rb +277 -0
  198. data/test/css/test_tokenizer.rb +183 -0
  199. data/test/css/test_xpath_visitor.rb +76 -0
  200. data/test/ffi/test_document.rb +35 -0
  201. data/test/files/2ch.html +108 -0
  202. data/test/files/address_book.rlx +12 -0
  203. data/test/files/address_book.xml +10 -0
  204. data/test/files/bar/bar.xsd +4 -0
  205. data/test/files/dont_hurt_em_why.xml +422 -0
  206. data/test/files/exslt.xml +8 -0
  207. data/test/files/exslt.xslt +35 -0
  208. data/test/files/foo/foo.xsd +4 -0
  209. data/test/files/po.xml +32 -0
  210. data/test/files/po.xsd +66 -0
  211. data/test/files/shift_jis.html +10 -0
  212. data/test/files/shift_jis.xml +5 -0
  213. data/test/files/snuggles.xml +3 -0
  214. data/test/files/staff.dtd +10 -0
  215. data/test/files/staff.xml +59 -0
  216. data/test/files/staff.xslt +32 -0
  217. data/test/files/tlm.html +850 -0
  218. data/test/files/valid_bar.xml +2 -0
  219. data/test/helper.rb +136 -0
  220. data/test/html/sax/test_parser.rb +64 -0
  221. data/test/html/sax/test_parser_context.rb +48 -0
  222. data/test/html/test_builder.rb +164 -0
  223. data/test/html/test_document.rb +390 -0
  224. data/test/html/test_document_encoding.rb +77 -0
  225. data/test/html/test_document_fragment.rb +132 -0
  226. data/test/html/test_element_description.rb +94 -0
  227. data/test/html/test_named_characters.rb +14 -0
  228. data/test/html/test_node.rb +228 -0
  229. data/test/html/test_node_encoding.rb +27 -0
  230. data/test/test_convert_xpath.rb +135 -0
  231. data/test/test_css_cache.rb +45 -0
  232. data/test/test_gc.rb +15 -0
  233. data/test/test_memory_leak.rb +77 -0
  234. data/test/test_nokogiri.rb +134 -0
  235. data/test/test_reader.rb +358 -0
  236. data/test/test_xslt_transforms.rb +131 -0
  237. data/test/xml/node/test_save_options.rb +20 -0
  238. data/test/xml/node/test_subclass.rb +44 -0
  239. data/test/xml/sax/test_parser.rb +307 -0
  240. data/test/xml/sax/test_parser_context.rb +56 -0
  241. data/test/xml/sax/test_push_parser.rb +131 -0
  242. data/test/xml/test_attr.rb +38 -0
  243. data/test/xml/test_attribute_decl.rb +82 -0
  244. data/test/xml/test_builder.rb +167 -0
  245. data/test/xml/test_cdata.rb +38 -0
  246. data/test/xml/test_comment.rb +29 -0
  247. data/test/xml/test_document.rb +607 -0
  248. data/test/xml/test_document_encoding.rb +26 -0
  249. data/test/xml/test_document_fragment.rb +138 -0
  250. data/test/xml/test_dtd.rb +82 -0
  251. data/test/xml/test_dtd_encoding.rb +33 -0
  252. data/test/xml/test_element_content.rb +56 -0
  253. data/test/xml/test_element_decl.rb +73 -0
  254. data/test/xml/test_entity_decl.rb +83 -0
  255. data/test/xml/test_entity_reference.rb +21 -0
  256. data/test/xml/test_namespace.rb +68 -0
  257. data/test/xml/test_node.rb +889 -0
  258. data/test/xml/test_node_attributes.rb +34 -0
  259. data/test/xml/test_node_encoding.rb +107 -0
  260. data/test/xml/test_node_set.rb +531 -0
  261. data/test/xml/test_parse_options.rb +52 -0
  262. data/test/xml/test_processing_instruction.rb +30 -0
  263. data/test/xml/test_reader_encoding.rb +126 -0
  264. data/test/xml/test_relax_ng.rb +60 -0
  265. data/test/xml/test_schema.rb +89 -0
  266. data/test/xml/test_syntax_error.rb +27 -0
  267. data/test/xml/test_text.rb +30 -0
  268. data/test/xml/test_unparented_node.rb +381 -0
  269. data/test/xml/test_xpath.rb +106 -0
  270. metadata +430 -0
@@ -0,0 +1,244 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ gem 'hoe', '>= 2.1.0'
5
+ require 'hoe'
6
+
7
+ windows = RUBY_PLATFORM =~ /(mswin|mingw)/i
8
+ java = RUBY_PLATFORM =~ /java/
9
+
10
+ GENERATED_PARSER = "lib/nokogiri/css/generated_parser.rb"
11
+ GENERATED_TOKENIZER = "lib/nokogiri/css/generated_tokenizer.rb"
12
+
13
+ # Make sure hoe-debugging is installed
14
+ Hoe.plugin :debugging
15
+
16
+ HOE = Hoe.spec 'nokogiri' do
17
+ developer('Aaron Patterson', 'aaronp@rubyforge.org')
18
+ developer('Mike Dalessio', 'mike.dalessio@gmail.com')
19
+ self.readme_file = ['README', ENV['HLANG'], 'rdoc'].compact.join('.')
20
+ self.history_file = ['CHANGELOG', ENV['HLANG'], 'rdoc'].compact.join('.')
21
+ self.extra_rdoc_files = FileList['*.rdoc']
22
+ self.clean_globs = [
23
+ 'lib/nokogiri/*.{o,so,bundle,a,log,dll}',
24
+ 'lib/nokogiri/nokogiri.rb',
25
+ 'lib/nokogiri/1.{8,9}',
26
+ GENERATED_PARSER,
27
+ GENERATED_TOKENIZER,
28
+ 'cross',
29
+ ]
30
+
31
+ %w{ racc rexical rake-compiler }.each do |dep|
32
+ self.extra_dev_deps << [dep, '>= 0']
33
+ end
34
+
35
+ self.spec_extras = { :extensions => ["ext/nokogiri/extconf.rb"] }
36
+ end
37
+
38
+ task :ws_docs do
39
+ title = "#{HOE.name}-#{HOE.version} Documentation"
40
+
41
+ options = []
42
+ options << "--main=#{HOE.readme_file}"
43
+ options << '--format=activerecord'
44
+ options << '--threads=1'
45
+ options << "--title=#{title.inspect}"
46
+
47
+ options += HOE.spec.require_paths
48
+ options += HOE.spec.extra_rdoc_files
49
+ require 'rdoc/rdoc'
50
+ ENV['RAILS_ROOT'] ||= File.expand_path(File.join('..', 'nokogiri_ws'))
51
+ RDoc::RDoc.new.document options
52
+ end
53
+
54
+ unless java
55
+ gem 'rake-compiler', '>= 0.4.1'
56
+ require "rake/extensiontask"
57
+
58
+ RET = Rake::ExtensionTask.new("nokogiri", HOE.spec) do |ext|
59
+ ext.lib_dir = File.join(*['lib', 'nokogiri', ENV['FAT_DIR']].compact)
60
+
61
+ ext.config_options << ENV['EXTOPTS']
62
+ cross_dir = File.join(File.dirname(__FILE__), 'tmp', 'cross')
63
+ ext.cross_compile = true
64
+ ext.cross_platform = 'i386-mingw32'
65
+ ext.cross_config_options <<
66
+ "--with-iconv-dir=#{File.join(cross_dir, 'iconv')}"
67
+ ext.cross_config_options <<
68
+ "--with-xml2-dir=#{File.join(cross_dir, 'libxml2')}"
69
+ ext.cross_config_options <<
70
+ "--with-xslt-dir=#{File.join(cross_dir, 'libxslt')}"
71
+ end
72
+
73
+ file 'lib/nokogiri/nokogiri.rb' do
74
+ File.open("lib/#{HOE.name}/#{HOE.name}.rb", 'wb') do |f|
75
+ f.write <<-eoruby
76
+ require "#{HOE.name}/\#{RUBY_VERSION.sub(/\\.\\d+$/, '')}/#{HOE.name}"
77
+ eoruby
78
+ end
79
+ end
80
+
81
+ namespace :cross do
82
+ task :file_list do
83
+ HOE.spec.platform = 'x86-mingw32'
84
+ HOE.spec.extensions = []
85
+ HOE.spec.files += Dir["lib/#{HOE.name}/#{HOE.name}.rb"]
86
+ HOE.spec.files += Dir["lib/#{HOE.name}/1.{8,9}/#{HOE.name}.so"]
87
+ HOE.spec.files += Dir["ext/nokogiri/*.dll"]
88
+ end
89
+ end
90
+
91
+ CLOBBER.include("lib/nokogiri/nokogiri.{so,dylib,rb,bundle}")
92
+ CLOBBER.include("lib/nokogiri/1.{8,9}")
93
+ CLOBBER.include("ext/nokogiri/*.dll")
94
+ end
95
+
96
+ namespace :gem do
97
+ namespace :dev do
98
+ task :spec => [ GENERATED_PARSER, GENERATED_TOKENIZER ] do
99
+ File.open("#{HOE.name}.gemspec", 'w') do |f|
100
+ HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
101
+ f.write(HOE.spec.to_ruby)
102
+ end
103
+ end
104
+ end
105
+
106
+ desc "Build a gem targetted for JRuby"
107
+ task :jruby => ['gem:jruby:spec'] do
108
+ system "gem build nokogiri.gemspec"
109
+ FileUtils.mkdir_p "pkg"
110
+ FileUtils.mv Dir.glob("nokogiri*-java.gem"), "pkg"
111
+ end
112
+
113
+ namespace :jruby do
114
+ task :spec => [GENERATED_PARSER, GENERATED_TOKENIZER] do
115
+ File.open("#{HOE.name}.gemspec", 'w') do |f|
116
+ HOE.spec.platform = 'java'
117
+ HOE.spec.files << GENERATED_PARSER
118
+ HOE.spec.files << GENERATED_TOKENIZER
119
+ HOE.spec.files += Dir["ext/nokogiri/*.dll"]
120
+ HOE.spec.extensions = []
121
+ f.write(HOE.spec.to_ruby)
122
+ end
123
+ end
124
+ end
125
+
126
+ task :spec => ['gem:dev:spec']
127
+ end
128
+
129
+ file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t|
130
+ begin
131
+ racc = `which racc`.strip
132
+ racc = "#{::Config::CONFIG['bindir']}/racc" if racc.empty?
133
+ sh "#{racc} -l -o #{t.name} #{t.prerequisites.first}"
134
+ rescue
135
+ abort "need racc, sudo gem install racc"
136
+ end
137
+ end
138
+
139
+ file GENERATED_TOKENIZER => "lib/nokogiri/css/tokenizer.rex" do |t|
140
+ begin
141
+ sh "rex --independent -o #{t.name} #{t.prerequisites.first}"
142
+ rescue
143
+ abort "need rexical, sudo gem install rexical"
144
+ end
145
+ end
146
+
147
+ libs = %w{
148
+ iconv-1.9.2.win32
149
+ zlib-1.2.3.win32
150
+ libxml2-2.7.3.win32
151
+ libxslt-1.1.24.win32
152
+ }
153
+
154
+ lib_dlls = {
155
+ 'iconv-1.9.2.win32' => 'iconv.dll',
156
+ 'zlib-1.2.3.win32' => 'zlib1.dll',
157
+ 'libxml2-2.7.3.win32' => 'libxml2.dll',
158
+ 'libxslt-1.1.24.win32' => 'libxslt.dll',
159
+ }
160
+
161
+ libs.each do |lib|
162
+ libname = lib.split('-').first
163
+
164
+ file "tmp/stash/#{lib}.zip" do |t|
165
+ puts "downloading #{lib}"
166
+ FileUtils.mkdir_p('tmp/stash')
167
+ Dir.chdir('tmp/stash') do
168
+ url = "ftp://ftp.xmlsoft.org/libxml2/win32/#{lib}.zip"
169
+ system("wget #{url} || curl -O #{url}")
170
+ end
171
+ end
172
+
173
+ file "tmp/cross/#{libname}" => ["tmp/stash/#{lib}.zip"] do |t|
174
+ puts "unzipping #{lib}.zip"
175
+ FileUtils.mkdir_p('tmp/cross')
176
+ Dir.chdir('tmp/cross') do
177
+ sh "unzip ../stash/#{lib}.zip"
178
+ sh "cp #{lib}/bin/* #{lib}/lib" # put DLL in lib, so dirconfig works
179
+ sh "mv #{lib} #{lib.split('-').first}"
180
+ sh "touch #{lib.split('-').first}"
181
+ end
182
+ end
183
+
184
+ file "ext/nokogiri/#{lib_dlls[lib]}" => "tmp/cross/#{libname}" do |t|
185
+ Dir.chdir('tmp/cross') do
186
+ sh "cp #{libname}/bin/*.dll ../../ext/nokogiri/"
187
+ end
188
+ end
189
+
190
+ if Rake::Task.task_defined?(:cross)
191
+ Rake::Task[:cross].prerequisites << "ext/nokogiri/#{lib_dlls[lib]}"
192
+ Rake::Task[:cross].prerequisites << "lib/nokogiri/nokogiri.rb"
193
+ Rake::Task[:cross].prerequisites << "cross:file_list"
194
+ end
195
+ Rake::Task['gem:jruby:spec'].prerequisites << "ext/nokogiri/#{lib_dlls[lib]}"
196
+ end
197
+
198
+ require 'tasks/test'
199
+
200
+ desc "set environment variables to build and/or test with debug options"
201
+ task :debug do
202
+ ENV['NOKOGIRI_DEBUG'] = "true"
203
+ ENV['CFLAGS'] ||= ""
204
+ ENV['CFLAGS'] += " -DDEBUG"
205
+ end
206
+
207
+ # required_ruby_version
208
+
209
+ # Only do this on unix, since we can't build on windows
210
+ unless windows || java || ENV['NOKOGIRI_FFI']
211
+ [:compile, :check_manifest].each do |task_name|
212
+ Rake::Task[task_name].prerequisites << GENERATED_PARSER
213
+ Rake::Task[task_name].prerequisites << GENERATED_TOKENIZER
214
+ end
215
+
216
+ Rake::Task[:test].prerequisites << :compile
217
+ if Hoe.plugins.include?(:debugging)
218
+ ['valgrind', 'valgrind:mem', 'valgrind:mem0'].each do |task_name|
219
+ Rake::Task["test:#{task_name}"].prerequisites << :compile
220
+ end
221
+ end
222
+ else
223
+ [:test, :check_manifest].each do |task_name|
224
+ if Rake::Task[task_name]
225
+ Rake::Task[task_name].prerequisites << GENERATED_PARSER
226
+ Rake::Task[task_name].prerequisites << GENERATED_TOKENIZER
227
+ end
228
+ end
229
+ end
230
+
231
+ namespace :install do
232
+ desc "Install rex and racc for development"
233
+ task :deps => %w(rexical racc)
234
+
235
+ task :racc do |t|
236
+ sh "sudo gem install racc"
237
+ end
238
+
239
+ task :rexical do
240
+ sh "sudo gem install rexical"
241
+ end
242
+ end
243
+
244
+ # vim: syntax=Ruby
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+ require 'open-uri'
4
+ require 'irb'
5
+ require 'uri'
6
+ require 'rubygems'
7
+ require 'nokogiri'
8
+
9
+ parse_class = Nokogiri
10
+
11
+ opts = OptionParser.new do |opts|
12
+ opts.banner = "Nokogiri: an HTML, XML, SAX, and Reader parser"
13
+ opts.define_head "Usage: nokogiri <uri|path> [options]"
14
+ opts.separator ""
15
+ opts.separator "Examples:"
16
+ opts.separator " nokogiri http://www.ruby-lang.org/"
17
+ opts.separator " nokogiri ./public/index.html"
18
+ opts.separator ""
19
+ opts.separator "Options:"
20
+
21
+ opts.on("--type [TYPE]", [:xml, :html]) do |v|
22
+ parse_class = {:xml => Nokogiri::XML, :html => Nokogiri::HTML}[v]
23
+ end
24
+
25
+ opts.on_tail("-?", "--help", "Show this message") do
26
+ puts opts
27
+ exit
28
+ end
29
+
30
+ opts.on_tail("-v", "--version", "Show version") do
31
+ require 'yaml'
32
+ puts Nokogiri::VERSION_INFO.to_yaml
33
+ exit
34
+ end
35
+ end
36
+ opts.parse!
37
+
38
+ uri = ARGV.shift
39
+
40
+ if uri.to_s.strip.empty?
41
+ puts opts
42
+ exit 1
43
+ end
44
+
45
+ @doc = parse_class.parse(open(uri).read)
46
+
47
+ puts "Your document is stored in @doc..."
48
+ IRB.start
49
+
@@ -0,0 +1,145 @@
1
+ ENV['RC_ARCHS'] = '' if RUBY_PLATFORM =~ /darwin/
2
+
3
+ # :stopdoc:
4
+
5
+ require 'mkmf'
6
+
7
+ ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
8
+ LIBDIR = Config::CONFIG['libdir']
9
+ INCLUDEDIR = Config::CONFIG['includedir']
10
+
11
+ if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'macruby'
12
+ $LIBRUBYARG_STATIC.gsub!(/-static/, '')
13
+ end
14
+
15
+ $CFLAGS << " #{ENV["CFLAGS"]}"
16
+ if Config::CONFIG['target_os'] == 'mingw32'
17
+ $CFLAGS << " -DXP_WIN -DXP_WIN32 -DUSE_INCLUDED_VASPRINTF"
18
+ elsif Config::CONFIG['target_os'] == 'solaris2'
19
+ $CFLAGS << " -DUSE_INCLUDED_VASPRINTF"
20
+ else
21
+ $CFLAGS << " -g -DXP_UNIX"
22
+ end
23
+
24
+ $CFLAGS << " -O3 -Wall -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
25
+
26
+ HEADER_DIRS = [
27
+ # First search /opt/local for macports
28
+ '/opt/local/include',
29
+ '/opt/local/include/libxml2',
30
+
31
+ # Then search /usr/local for people that installed from source
32
+ '/usr/local/include',
33
+ '/usr/local/include/libxml2',
34
+
35
+ # Check the ruby install locations
36
+ INCLUDEDIR,
37
+ File.join(INCLUDEDIR, "libxml2"),
38
+
39
+ # Finally fall back to /usr
40
+ '/usr/include',
41
+ '/usr/include/libxml2',
42
+ ]
43
+
44
+ LIB_DIRS = [
45
+ # First search /opt/local for macports
46
+ '/opt/local/lib',
47
+
48
+ # Then search /usr/local for people that installed from source
49
+ '/usr/local/lib',
50
+
51
+ # Check the ruby install locations
52
+ LIBDIR,
53
+
54
+ # Finally fall back to /usr
55
+ '/usr/lib',
56
+ ]
57
+
58
+ iconv_dirs = dir_config('iconv', '/opt/local/include', '/opt/local/lib')
59
+ unless ["", ""] == iconv_dirs
60
+ HEADER_DIRS.unshift iconv_dirs.first
61
+ LIB_DIRS.unshift iconv_dirs[1]
62
+ end
63
+
64
+ xml2_dirs = dir_config('xml2', '/opt/local/include/libxml2', '/opt/local/lib')
65
+ unless ["", ""] == xml2_dirs
66
+ HEADER_DIRS.unshift xml2_dirs.first
67
+ LIB_DIRS.unshift xml2_dirs[1]
68
+ end
69
+
70
+ xslt_dirs = dir_config('xslt', '/opt/local/include/', '/opt/local/lib')
71
+ unless ["", ""] == xslt_dirs
72
+ HEADER_DIRS.unshift xslt_dirs.first
73
+ LIB_DIRS.unshift xslt_dirs[1]
74
+ end
75
+
76
+ CUSTOM_DASH_I = []
77
+
78
+ def nokogiri_find_header header_file, *paths
79
+ # mkmf in ruby 1.8.5 does not have the "checking_message" method
80
+ message = defined?(checking_message) ?
81
+ checking_message(header_file, paths) :
82
+ header_file
83
+
84
+ header = cpp_include header_file
85
+ checking_for message do
86
+ found = false
87
+ paths.each do |dir|
88
+ if File.exists?(File.join(dir, header_file))
89
+ opt = "-I#{dir}".quote
90
+ if try_cpp header, opt
91
+ unless CUSTOM_DASH_I.include? dir
92
+ $INCFLAGS = "#{opt} #{$INCFLAGS}"
93
+ CUSTOM_DASH_I << dir
94
+ end
95
+ found = dir
96
+ break
97
+ end
98
+ end
99
+ end
100
+ found ||= try_cpp(header)
101
+ end
102
+ end
103
+
104
+ unless nokogiri_find_header('iconv.h', *HEADER_DIRS)
105
+ abort "iconv is missing. try 'port install iconv' or 'yum install iconv'"
106
+ end
107
+
108
+ unless nokogiri_find_header('libxml/parser.h', *HEADER_DIRS)
109
+ abort "libxml2 is missing. try 'port install libxml2' or 'yum install libxml2-devel'"
110
+ end
111
+
112
+ unless nokogiri_find_header('libxslt/xslt.h', *HEADER_DIRS)
113
+ abort "libxslt is missing. try 'port install libxslt' or 'yum install libxslt-devel'"
114
+ end
115
+
116
+ unless nokogiri_find_header('libexslt/exslt.h', *HEADER_DIRS)
117
+ abort "libxslt is missing. try 'port install libxslt' or 'yum install libxslt-devel'"
118
+ end
119
+
120
+ unless find_library('xml2', 'xmlParseDoc', *LIB_DIRS)
121
+ abort "libxml2 is missing. try 'port install libxml2' or 'yum install libxml2'"
122
+ end
123
+
124
+ unless find_library('xslt', 'xsltParseStylesheetDoc', *LIB_DIRS)
125
+ abort "libxslt is missing. try 'port install libxslt' or 'yum install libxslt-devel'"
126
+ end
127
+
128
+ unless find_library('exslt', 'exsltFuncRegister', *LIB_DIRS)
129
+ abort "libxslt is missing. try 'port install libxslt' or 'yum install libxslt-devel'"
130
+ end
131
+
132
+ have_func('xmlRelaxNGSetParserStructuredErrors')
133
+ have_func('xmlRelaxNGSetParserStructuredErrors')
134
+ have_func('xmlRelaxNGSetValidStructuredErrors')
135
+ have_func('xmlSchemaSetValidStructuredErrors')
136
+ have_func('xmlSchemaSetParserStructuredErrors')
137
+
138
+ if ENV['CPUPROFILE']
139
+ unless find_library('profiler', 'ProfilerEnable', *LIB_DIRS)
140
+ abort "google performance tools are not installed"
141
+ end
142
+ end
143
+
144
+ create_makefile('nokogiri/nokogiri')
145
+ # :startdoc:
@@ -0,0 +1,145 @@
1
+ #include <html_document.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * new
6
+ *
7
+ * Create a new document
8
+ */
9
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
10
+ {
11
+ VALUE uri, external_id, rest, rb_doc;
12
+
13
+ rb_scan_args(argc, argv, "0*", &rest);
14
+ uri = rb_ary_entry(rest, (long)0);
15
+ external_id = rb_ary_entry(rest, (long)1);
16
+
17
+ htmlDocPtr doc = htmlNewDoc(
18
+ RTEST(uri) ? (const xmlChar *)StringValuePtr(uri) : NULL,
19
+ RTEST(external_id) ? (const xmlChar *)StringValuePtr(external_id) : NULL
20
+ );
21
+ rb_doc = Nokogiri_wrap_xml_document(klass, doc);
22
+ rb_obj_call_init(rb_doc, argc, argv);
23
+ return rb_doc ;
24
+ }
25
+
26
+ /*
27
+ * call-seq:
28
+ * read_io(io, url, encoding, options)
29
+ *
30
+ * Read the HTML document from +io+ with given +url+, +encoding+,
31
+ * and +options+. See Nokogiri::HTML.parse
32
+ */
33
+ static VALUE read_io( VALUE klass,
34
+ VALUE io,
35
+ VALUE url,
36
+ VALUE encoding,
37
+ VALUE options )
38
+ {
39
+ const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url);
40
+ const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
41
+ VALUE error_list = rb_ary_new();
42
+
43
+ xmlResetLastError();
44
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
45
+
46
+ htmlDocPtr doc = htmlReadIO(
47
+ io_read_callback,
48
+ io_close_callback,
49
+ (void *)io,
50
+ c_url,
51
+ c_enc,
52
+ (int)NUM2INT(options)
53
+ );
54
+ xmlSetStructuredErrorFunc(NULL, NULL);
55
+
56
+ if(doc == NULL) {
57
+ xmlFreeDoc(doc);
58
+
59
+ xmlErrorPtr error = xmlGetLastError();
60
+ if(error)
61
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
62
+ else
63
+ rb_raise(rb_eRuntimeError, "Could not parse document");
64
+
65
+ return Qnil;
66
+ }
67
+
68
+ VALUE document = Nokogiri_wrap_xml_document(klass, doc);
69
+ rb_iv_set(document, "@errors", error_list);
70
+ return document;
71
+ }
72
+
73
+ /*
74
+ * call-seq:
75
+ * read_memory(string, url, encoding, options)
76
+ *
77
+ * Read the HTML document contained in +string+ with given +url+, +encoding+,
78
+ * and +options+. See Nokogiri::HTML.parse
79
+ */
80
+ static VALUE read_memory( VALUE klass,
81
+ VALUE string,
82
+ VALUE url,
83
+ VALUE encoding,
84
+ VALUE options )
85
+ {
86
+ const char * c_buffer = StringValuePtr(string);
87
+ const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url);
88
+ const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
89
+ int len = RSTRING_LEN(string);
90
+ VALUE error_list = rb_ary_new();
91
+
92
+ xmlResetLastError();
93
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
94
+
95
+ htmlDocPtr doc = htmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
96
+ xmlSetStructuredErrorFunc(NULL, NULL);
97
+
98
+ if(doc == NULL) {
99
+ xmlFreeDoc(doc);
100
+
101
+ xmlErrorPtr error = xmlGetLastError();
102
+ if(error)
103
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
104
+ else
105
+ rb_raise(rb_eRuntimeError, "Could not parse document");
106
+
107
+ return Qnil;
108
+ }
109
+
110
+ VALUE document = Nokogiri_wrap_xml_document(klass, doc);
111
+ rb_iv_set(document, "@errors", error_list);
112
+ return document;
113
+ }
114
+
115
+ /*
116
+ * call-seq:
117
+ * type
118
+ *
119
+ * The type for this document
120
+ */
121
+ static VALUE type(VALUE self)
122
+ {
123
+ htmlDocPtr doc;
124
+ Data_Get_Struct(self, xmlDoc, doc);
125
+ return INT2NUM((long)doc->type);
126
+ }
127
+
128
+ VALUE cNokogiriHtmlDocument ;
129
+ void init_html_document()
130
+ {
131
+ VALUE nokogiri = rb_define_module("Nokogiri");
132
+ VALUE html = rb_define_module_under(nokogiri, "HTML");
133
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
134
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
135
+ VALUE xml_doc = rb_define_class_under(xml, "Document", node);
136
+ VALUE klass = rb_define_class_under(html, "Document", xml_doc);
137
+
138
+ cNokogiriHtmlDocument = klass;
139
+
140
+ rb_define_singleton_method(klass, "read_memory", read_memory, 4);
141
+ rb_define_singleton_method(klass, "read_io", read_io, 4);
142
+ rb_define_singleton_method(klass, "new", new, -1);
143
+
144
+ rb_define_method(klass, "type", type, 0);
145
+ }