nokogiri 1.3.0-x86-mswin32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (256) hide show
  1. data/.autotest +27 -0
  2. data/CHANGELOG.ja.rdoc +233 -0
  3. data/CHANGELOG.rdoc +222 -0
  4. data/Manifest.txt +247 -0
  5. data/README.ja.rdoc +103 -0
  6. data/README.rdoc +117 -0
  7. data/Rakefile +205 -0
  8. data/bin/nokogiri +47 -0
  9. data/ext/nokogiri/extconf.rb +89 -0
  10. data/ext/nokogiri/html_document.c +183 -0
  11. data/ext/nokogiri/html_document.h +10 -0
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.c +30 -0
  15. data/ext/nokogiri/html_entity_lookup.h +8 -0
  16. data/ext/nokogiri/html_sax_parser.c +57 -0
  17. data/ext/nokogiri/html_sax_parser.h +11 -0
  18. data/ext/nokogiri/iconv.dll +0 -0
  19. data/ext/nokogiri/libexslt.dll +0 -0
  20. data/ext/nokogiri/libxml2.dll +0 -0
  21. data/ext/nokogiri/libxslt.dll +0 -0
  22. data/ext/nokogiri/nokogiri.c +81 -0
  23. data/ext/nokogiri/nokogiri.h +149 -0
  24. data/ext/nokogiri/xml_attr.c +92 -0
  25. data/ext/nokogiri/xml_attr.h +9 -0
  26. data/ext/nokogiri/xml_cdata.c +53 -0
  27. data/ext/nokogiri/xml_cdata.h +9 -0
  28. data/ext/nokogiri/xml_comment.c +51 -0
  29. data/ext/nokogiri/xml_comment.h +9 -0
  30. data/ext/nokogiri/xml_document.c +308 -0
  31. data/ext/nokogiri/xml_document.h +21 -0
  32. data/ext/nokogiri/xml_document_fragment.c +48 -0
  33. data/ext/nokogiri/xml_document_fragment.h +10 -0
  34. data/ext/nokogiri/xml_dtd.c +102 -0
  35. data/ext/nokogiri/xml_dtd.h +8 -0
  36. data/ext/nokogiri/xml_entity_reference.c +50 -0
  37. data/ext/nokogiri/xml_entity_reference.h +9 -0
  38. data/ext/nokogiri/xml_io.c +24 -0
  39. data/ext/nokogiri/xml_io.h +10 -0
  40. data/ext/nokogiri/xml_namespace.c +69 -0
  41. data/ext/nokogiri/xml_namespace.h +12 -0
  42. data/ext/nokogiri/xml_node.c +928 -0
  43. data/ext/nokogiri/xml_node.h +14 -0
  44. data/ext/nokogiri/xml_node_set.c +386 -0
  45. data/ext/nokogiri/xml_node_set.h +9 -0
  46. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  47. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  48. data/ext/nokogiri/xml_reader.c +572 -0
  49. data/ext/nokogiri/xml_reader.h +10 -0
  50. data/ext/nokogiri/xml_relax_ng.c +106 -0
  51. data/ext/nokogiri/xml_relax_ng.h +9 -0
  52. data/ext/nokogiri/xml_sax_parser.c +336 -0
  53. data/ext/nokogiri/xml_sax_parser.h +10 -0
  54. data/ext/nokogiri/xml_sax_push_parser.c +86 -0
  55. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  56. data/ext/nokogiri/xml_schema.c +107 -0
  57. data/ext/nokogiri/xml_schema.h +9 -0
  58. data/ext/nokogiri/xml_syntax_error.c +203 -0
  59. data/ext/nokogiri/xml_syntax_error.h +12 -0
  60. data/ext/nokogiri/xml_text.c +47 -0
  61. data/ext/nokogiri/xml_text.h +9 -0
  62. data/ext/nokogiri/xml_xpath.c +53 -0
  63. data/ext/nokogiri/xml_xpath.h +11 -0
  64. data/ext/nokogiri/xml_xpath_context.c +252 -0
  65. data/ext/nokogiri/xml_xpath_context.h +9 -0
  66. data/ext/nokogiri/xslt_stylesheet.c +131 -0
  67. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  68. data/ext/nokogiri/zlib1.dll +0 -0
  69. data/lib/action-nokogiri.rb +36 -0
  70. data/lib/nokogiri.rb +110 -0
  71. data/lib/nokogiri/1.8/nokogiri.so +0 -0
  72. data/lib/nokogiri/1.9/nokogiri.so +0 -0
  73. data/lib/nokogiri/css.rb +25 -0
  74. data/lib/nokogiri/css/generated_parser.rb +748 -0
  75. data/lib/nokogiri/css/generated_tokenizer.rb +144 -0
  76. data/lib/nokogiri/css/node.rb +107 -0
  77. data/lib/nokogiri/css/parser.rb +82 -0
  78. data/lib/nokogiri/css/parser.y +227 -0
  79. data/lib/nokogiri/css/syntax_error.rb +7 -0
  80. data/lib/nokogiri/css/tokenizer.rb +11 -0
  81. data/lib/nokogiri/css/tokenizer.rex +54 -0
  82. data/lib/nokogiri/css/xpath_visitor.rb +172 -0
  83. data/lib/nokogiri/decorators.rb +2 -0
  84. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  85. data/lib/nokogiri/decorators/hpricot/node.rb +56 -0
  86. data/lib/nokogiri/decorators/hpricot/node_set.rb +54 -0
  87. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +30 -0
  88. data/lib/nokogiri/decorators/slop.rb +33 -0
  89. data/lib/nokogiri/ffi/html/document.rb +37 -0
  90. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  91. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  92. data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
  93. data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
  94. data/lib/nokogiri/ffi/libxml.rb +314 -0
  95. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  96. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  97. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  98. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  99. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  100. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  101. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  102. data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
  103. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  104. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  105. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  106. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  107. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  108. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  109. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
  110. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  111. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  112. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  113. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  114. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  115. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  116. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  117. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  118. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  119. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  120. data/lib/nokogiri/ffi/xml/document.rb +107 -0
  121. data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
  122. data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
  123. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  124. data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
  125. data/lib/nokogiri/ffi/xml/node.rb +380 -0
  126. data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
  127. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  128. data/lib/nokogiri/ffi/xml/reader.rb +217 -0
  129. data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
  130. data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
  131. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
  132. data/lib/nokogiri/ffi/xml/schema.rb +55 -0
  133. data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
  134. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  135. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  136. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  137. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  138. data/lib/nokogiri/hpricot.rb +62 -0
  139. data/lib/nokogiri/html.rb +34 -0
  140. data/lib/nokogiri/html/builder.rb +35 -0
  141. data/lib/nokogiri/html/document.rb +71 -0
  142. data/lib/nokogiri/html/document_fragment.rb +15 -0
  143. data/lib/nokogiri/html/element_description.rb +23 -0
  144. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  145. data/lib/nokogiri/html/sax/parser.rb +47 -0
  146. data/lib/nokogiri/nokogiri.rb +1 -0
  147. data/lib/nokogiri/syntax_error.rb +4 -0
  148. data/lib/nokogiri/version.rb +29 -0
  149. data/lib/nokogiri/version_warning.rb +11 -0
  150. data/lib/nokogiri/xml.rb +62 -0
  151. data/lib/nokogiri/xml/attr.rb +9 -0
  152. data/lib/nokogiri/xml/builder.rb +254 -0
  153. data/lib/nokogiri/xml/cdata.rb +11 -0
  154. data/lib/nokogiri/xml/document.rb +100 -0
  155. data/lib/nokogiri/xml/document_fragment.rb +49 -0
  156. data/lib/nokogiri/xml/dtd.rb +11 -0
  157. data/lib/nokogiri/xml/entity_declaration.rb +11 -0
  158. data/lib/nokogiri/xml/fragment_handler.rb +55 -0
  159. data/lib/nokogiri/xml/namespace.rb +7 -0
  160. data/lib/nokogiri/xml/node.rb +745 -0
  161. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  162. data/lib/nokogiri/xml/node_set.rb +238 -0
  163. data/lib/nokogiri/xml/notation.rb +6 -0
  164. data/lib/nokogiri/xml/parse_options.rb +80 -0
  165. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  166. data/lib/nokogiri/xml/reader.rb +66 -0
  167. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  168. data/lib/nokogiri/xml/sax.rb +3 -0
  169. data/lib/nokogiri/xml/sax/document.rb +143 -0
  170. data/lib/nokogiri/xml/sax/parser.rb +101 -0
  171. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  172. data/lib/nokogiri/xml/schema.rb +65 -0
  173. data/lib/nokogiri/xml/syntax_error.rb +34 -0
  174. data/lib/nokogiri/xml/xpath.rb +10 -0
  175. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  176. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  177. data/lib/nokogiri/xslt.rb +48 -0
  178. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  179. data/lib/xsd/xmlparser/nokogiri.rb +64 -0
  180. data/tasks/test.rb +161 -0
  181. data/test/css/test_nthiness.rb +160 -0
  182. data/test/css/test_parser.rb +277 -0
  183. data/test/css/test_tokenizer.rb +176 -0
  184. data/test/css/test_xpath_visitor.rb +76 -0
  185. data/test/ffi/test_document.rb +35 -0
  186. data/test/files/address_book.rlx +12 -0
  187. data/test/files/address_book.xml +10 -0
  188. data/test/files/dont_hurt_em_why.xml +422 -0
  189. data/test/files/exslt.xml +8 -0
  190. data/test/files/exslt.xslt +35 -0
  191. data/test/files/po.xml +32 -0
  192. data/test/files/po.xsd +66 -0
  193. data/test/files/staff.xml +59 -0
  194. data/test/files/staff.xslt +32 -0
  195. data/test/files/tlm.html +850 -0
  196. data/test/helper.rb +123 -0
  197. data/test/hpricot/files/basic.xhtml +17 -0
  198. data/test/hpricot/files/boingboing.html +2266 -0
  199. data/test/hpricot/files/cy0.html +3653 -0
  200. data/test/hpricot/files/immob.html +400 -0
  201. data/test/hpricot/files/pace_application.html +1320 -0
  202. data/test/hpricot/files/tenderlove.html +16 -0
  203. data/test/hpricot/files/uswebgen.html +220 -0
  204. data/test/hpricot/files/utf8.html +1054 -0
  205. data/test/hpricot/files/week9.html +1723 -0
  206. data/test/hpricot/files/why.xml +19 -0
  207. data/test/hpricot/load_files.rb +11 -0
  208. data/test/hpricot/test_alter.rb +68 -0
  209. data/test/hpricot/test_builder.rb +20 -0
  210. data/test/hpricot/test_parser.rb +426 -0
  211. data/test/hpricot/test_paths.rb +15 -0
  212. data/test/hpricot/test_preserved.rb +77 -0
  213. data/test/hpricot/test_xml.rb +30 -0
  214. data/test/html/sax/test_parser.rb +52 -0
  215. data/test/html/test_builder.rb +156 -0
  216. data/test/html/test_document.rb +361 -0
  217. data/test/html/test_document_encoding.rb +46 -0
  218. data/test/html/test_document_fragment.rb +97 -0
  219. data/test/html/test_element_description.rb +95 -0
  220. data/test/html/test_named_characters.rb +14 -0
  221. data/test/html/test_node.rb +165 -0
  222. data/test/test_convert_xpath.rb +186 -0
  223. data/test/test_css_cache.rb +56 -0
  224. data/test/test_gc.rb +15 -0
  225. data/test/test_memory_leak.rb +77 -0
  226. data/test/test_nokogiri.rb +127 -0
  227. data/test/test_reader.rb +316 -0
  228. data/test/test_xslt_transforms.rb +131 -0
  229. data/test/xml/node/test_save_options.rb +20 -0
  230. data/test/xml/node/test_subclass.rb +44 -0
  231. data/test/xml/sax/test_parser.rb +169 -0
  232. data/test/xml/sax/test_push_parser.rb +92 -0
  233. data/test/xml/test_attr.rb +38 -0
  234. data/test/xml/test_builder.rb +73 -0
  235. data/test/xml/test_cdata.rb +38 -0
  236. data/test/xml/test_comment.rb +23 -0
  237. data/test/xml/test_document.rb +397 -0
  238. data/test/xml/test_document_encoding.rb +26 -0
  239. data/test/xml/test_document_fragment.rb +76 -0
  240. data/test/xml/test_dtd.rb +42 -0
  241. data/test/xml/test_dtd_encoding.rb +31 -0
  242. data/test/xml/test_entity_reference.rb +21 -0
  243. data/test/xml/test_namespace.rb +43 -0
  244. data/test/xml/test_node.rb +808 -0
  245. data/test/xml/test_node_attributes.rb +34 -0
  246. data/test/xml/test_node_encoding.rb +84 -0
  247. data/test/xml/test_node_set.rb +368 -0
  248. data/test/xml/test_parse_options.rb +52 -0
  249. data/test/xml/test_processing_instruction.rb +30 -0
  250. data/test/xml/test_reader_encoding.rb +126 -0
  251. data/test/xml/test_relax_ng.rb +60 -0
  252. data/test/xml/test_schema.rb +65 -0
  253. data/test/xml/test_text.rb +18 -0
  254. data/test/xml/test_unparented_node.rb +381 -0
  255. data/test/xml/test_xpath.rb +106 -0
  256. metadata +409 -0
data/Rakefile ADDED
@@ -0,0 +1,205 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+ require 'hoe'
6
+
7
+ LIB_DIR = File.expand_path(File.join(File.dirname(__FILE__), 'lib'))
8
+ $LOAD_PATH << LIB_DIR
9
+
10
+ windows = RUBY_PLATFORM =~ /(mswin|mingw)/i ? true : false
11
+ java = RUBY_PLATFORM =~ /java/ ? true : false
12
+
13
+ GENERATED_PARSER = "lib/nokogiri/css/generated_parser.rb"
14
+ GENERATED_TOKENIZER = "lib/nokogiri/css/generated_tokenizer.rb"
15
+
16
+ require 'nokogiri/version'
17
+
18
+ HOE = Hoe.new('nokogiri', Nokogiri::VERSION) do |p|
19
+ p.developer('Aaron Patterson', 'aaronp@rubyforge.org')
20
+ p.developer('Mike Dalessio', 'mike.dalessio@gmail.com')
21
+ p.readme_file = ['README', ENV['HLANG'], 'rdoc'].compact.join('.')
22
+ p.history_file = ['CHANGELOG', ENV['HLANG'], 'rdoc'].compact.join('.')
23
+ p.extra_rdoc_files = FileList['*.rdoc']
24
+ p.clean_globs = [
25
+ 'lib/nokogiri/*.{o,so,bundle,a,log,dll}',
26
+ GENERATED_PARSER,
27
+ GENERATED_TOKENIZER,
28
+ 'cross',
29
+ ]
30
+
31
+ p.extra_dev_deps << "racc"
32
+ p.extra_dev_deps << "tenderlove-frex"
33
+ p.extra_dev_deps << "rake-compiler"
34
+
35
+ p.spec_extras = { :extensions => ["ext/nokogiri/extconf.rb"] }
36
+ end
37
+
38
+ unless java
39
+
40
+ gem 'rake-compiler', '>= 0.4.1'
41
+ require "rake/extensiontask"
42
+
43
+ RET = Rake::ExtensionTask.new("nokogiri", HOE.spec) do |ext|
44
+ ext.lib_dir = File.join(*['lib', 'nokogiri', ENV['FAT_DIR']].compact)
45
+
46
+ ext.config_options << ENV['EXTOPTS']
47
+ cross_dir = File.join(File.dirname(__FILE__), 'tmp', 'cross')
48
+ ext.cross_compile = true
49
+ ext.cross_platform = 'i386-mswin32'
50
+ ext.cross_config_options <<
51
+ "--with-iconv-dir=#{File.join(cross_dir, 'iconv')}"
52
+ ext.cross_config_options <<
53
+ "--with-xml2-dir=#{File.join(cross_dir, 'libxml2')}"
54
+ ext.cross_config_options <<
55
+ "--with-xslt-dir=#{File.join(cross_dir, 'libxslt')}"
56
+ end
57
+
58
+ ###
59
+ # To build the windows fat binary, do:
60
+ #
61
+ # rake fat_binary native gem
62
+ #
63
+ # I keep my ruby in multiruby, so my command is like this:
64
+ #
65
+ # RAKE19=~/.multiruby/install/1.9.1-p129/bin/rake \
66
+ # rake fat_binary native gem
67
+ task 'fat_binary' do
68
+ rake19 = ENV['RAKE19'] || 'rake1.9'
69
+ system("rake clean cross compile RUBY_CC_VERSION=1.8.6 FAT_DIR=1.8")
70
+ system("#{rake19} clean cross compile RUBY_CC_VERSION=1.9.1 FAT_DIR=1.9")
71
+ File.open("lib/#{HOE.name}/#{HOE.name}.rb", 'wb') do |f|
72
+ f.write <<-eoruby
73
+ require "#{HOE.name}/\#{RUBY_VERSION.sub(/\\.\\d+$/, '')}/#{HOE.name}"
74
+ eoruby
75
+ end
76
+ HOE.spec.extensions = []
77
+ HOE.spec.platform = 'x86-mswin32'
78
+ HOE.spec.files += Dir["lib/#{HOE.name}/#{HOE.name}.rb"]
79
+ HOE.spec.files += Dir["lib/#{HOE.name}/1.{8,9}/*"]
80
+ HOE.spec.files += Dir["ext/nokogiri/*.dll"]
81
+ end
82
+ CLOBBER.include("lib/nokogiri/nokogiri.rb")
83
+ CLOBBER.include("lib/nokogiri/1.{8,9}")
84
+ end
85
+
86
+ namespace :gem do
87
+ namespace :dev do
88
+ task :spec do
89
+ File.open("#{HOE.name}.gemspec", 'w') do |f|
90
+ HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
91
+ f.write(HOE.spec.to_ruby)
92
+ end
93
+ end
94
+ end
95
+
96
+ desc "Build a gem targetted for JRuby"
97
+ task :jruby => ['gem:jruby:spec'] do
98
+ system "gem build nokogiri.gemspec"
99
+ FileUtils.mkdir_p "pkg"
100
+ FileUtils.mv Dir.glob("nokogiri*-java.gem"), "pkg"
101
+ end
102
+
103
+ namespace :jruby do
104
+ task :spec => [GENERATED_PARSER, GENERATED_TOKENIZER] do
105
+ File.open("#{HOE.name}.gemspec", 'w') do |f|
106
+ HOE.spec.platform = 'java'
107
+ HOE.spec.files << GENERATED_PARSER
108
+ HOE.spec.files << GENERATED_TOKENIZER
109
+ HOE.spec.extensions = []
110
+ f.write(HOE.spec.to_ruby)
111
+ end
112
+ end
113
+ end
114
+
115
+ task :spec => ['gem:dev:spec']
116
+ end
117
+
118
+ file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t|
119
+ begin
120
+ racc = `which racc`.strip
121
+ racc = "#{::Config::CONFIG['bindir']}/racc" if racc.empty?
122
+ sh "#{racc} -o #{t.name} #{t.prerequisites.first}"
123
+ rescue
124
+ abort "need racc, sudo gem install racc"
125
+ end
126
+ end
127
+
128
+ file GENERATED_TOKENIZER => "lib/nokogiri/css/tokenizer.rex" do |t|
129
+ begin
130
+ sh "frex --independent -o #{t.name} #{t.prerequisites.first}"
131
+ rescue
132
+ abort "need frex, sudo gem install tenderlove-frex -s http://gems.github.com"
133
+ end
134
+ end
135
+
136
+ libs = %w{
137
+ iconv-1.9.2.win32
138
+ zlib-1.2.3.win32
139
+ libxml2-2.7.3.win32
140
+ libxslt-1.1.24.win32
141
+ }
142
+
143
+ libs.each do |lib|
144
+ file "tmp/stash/#{lib}.zip" do |t|
145
+ puts "downloading #{lib}"
146
+ FileUtils.mkdir_p('tmp/stash')
147
+ Dir.chdir('tmp/stash') do
148
+ url = "ftp://ftp.xmlsoft.org/libxml2/win32/#{lib}.zip"
149
+ system("wget #{url} || curl -O #{url}")
150
+ end
151
+ end
152
+ file "tmp/cross/#{lib.split('-').first}" => ["tmp/stash/#{lib}.zip"] do |t|
153
+ puts "unzipping #{lib}.zip"
154
+ FileUtils.mkdir_p('tmp/cross')
155
+ Dir.chdir('tmp/cross') do
156
+ sh "unzip ../stash/#{lib}.zip"
157
+ sh "cp #{lib}/bin/* #{lib}/lib" # put DLL in lib, so dirconfig works
158
+ sh "cp #{lib}/bin/*.dll ../../ext/nokogiri/"
159
+ sh "mv #{lib} #{lib.split('-').first}"
160
+ sh "touch #{lib.split('-').first}"
161
+ end
162
+ end
163
+ if Rake::Task.task_defined?(:cross)
164
+ Rake::Task[:cross].prerequisites << "tmp/cross/#{lib.split('-').first}"
165
+ end
166
+ end
167
+
168
+ require 'tasks/test'
169
+
170
+ desc "set environment variables to build and/or test with debug options"
171
+ task :debug do
172
+ ENV['NOKOGIRI_DEBUG'] = "true"
173
+ ENV['CFLAGS'] ||= ""
174
+ ENV['CFLAGS'] += " -DDEBUG"
175
+ end
176
+
177
+ # required_ruby_version
178
+
179
+ # Only do this on unix, since we can't build on windows
180
+ unless windows || java || ENV['NOKOGIRI_FFI']
181
+ [:compile, :check_manifest].each do |task_name|
182
+ Rake::Task[task_name].prerequisites << GENERATED_PARSER
183
+ Rake::Task[task_name].prerequisites << GENERATED_TOKENIZER
184
+ end
185
+
186
+ Rake::Task[:test].prerequisites << :compile
187
+ ['valgrind', 'valgrind_mem', 'valgrind_mem0', 'coverage'].each do |task_name|
188
+ Rake::Task["test:#{task_name}"].prerequisites << :compile
189
+ end
190
+ end
191
+
192
+ namespace :install do
193
+ desc "Install frex and racc for development"
194
+ task :deps => %w(frex racc)
195
+
196
+ task :racc do |t|
197
+ sh "sudo gem install racc"
198
+ end
199
+
200
+ task :frex do
201
+ sh "sudo gem install tenderlove-frex -s http://gems.github.com"
202
+ end
203
+ end
204
+
205
+ # vim: syntax=Ruby
data/bin/nokogiri ADDED
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+ require 'open-uri'
4
+ require 'irb'
5
+ require 'uri'
6
+ require 'rubygems'
7
+ require 'nokogiri'
8
+
9
+ opts = OptionParser.new do |opts|
10
+ opts.banner = "Nokogiri: an HTML, XML, SAX, and Reader parser"
11
+ opts.define_head "Usage: nokogiri <uri|path> [options]"
12
+ opts.separator ""
13
+ opts.separator "Examples:"
14
+ opts.separator " nokogiri http://www.ruby-lang.org/"
15
+ opts.separator " nokogiri ./public/index.html"
16
+ opts.separator ""
17
+ opts.separator "Options:"
18
+
19
+ opts.on_tail("-?", "--help", "Show this message") do
20
+ puts opts
21
+ exit
22
+ end
23
+
24
+ opts.on_tail("-v", "--version", "Show version") do
25
+ require 'yaml'
26
+ puts Nokogiri::VERSION_INFO.to_yaml
27
+ exit
28
+ end
29
+ end
30
+ opts.parse!
31
+
32
+ uri = ARGV.shift
33
+
34
+ if uri.to_s.strip.empty?
35
+ puts opts
36
+ exit 1
37
+ end
38
+
39
+ if URI.parse(uri).scheme
40
+ @doc = Nokogiri(open(uri).read)
41
+ else
42
+ @doc = Nokogiri(File.read(uri))
43
+ end
44
+
45
+ puts "Your document is stored in @doc..."
46
+ IRB.start
47
+
@@ -0,0 +1,89 @@
1
+ ENV["ARCHFLAGS"] = "-arch #{`uname -p` =~ /powerpc/ ? 'ppc' : 'i386'}"
2
+
3
+ require 'mkmf'
4
+
5
+ ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
6
+ LIBDIR = Config::CONFIG['libdir']
7
+ INCLUDEDIR = Config::CONFIG['includedir']
8
+
9
+ if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'macruby'
10
+ $LIBRUBYARG_STATIC.gsub!(/-static/, '')
11
+ end
12
+
13
+ $CFLAGS << " #{ENV["CFLAGS"]}"
14
+ if Config::CONFIG['target_os'] == 'mingw32'
15
+ $CFLAGS << " -DXP_WIN -DXP_WIN32 -DUSE_INCLUDED_VASPRINTF"
16
+ elsif Config::CONFIG['target_os'] == 'solaris2'
17
+ $CFLAGS << " -DUSE_INCLUDED_VASPRINTF"
18
+ else
19
+ $CFLAGS << " -g -DXP_UNIX"
20
+ end
21
+
22
+ $CFLAGS << " -O3 -Wall -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
23
+
24
+ HEADER_DIRS = [
25
+ File.join(INCLUDEDIR, "libxml2"),
26
+ INCLUDEDIR,
27
+ '/usr/local/include/libxml2',
28
+ '/usr/include/libxml2',
29
+ ]
30
+
31
+ LIB_DIRS = [
32
+ LIBDIR,
33
+ '/opt/local/lib',
34
+ '/usr/local/lib',
35
+ '/usr/lib'
36
+ ]
37
+
38
+ iconv_dirs = dir_config('iconv', '/opt/local/include', '/opt/local/lib')
39
+ unless [nil, nil] == iconv_dirs
40
+ HEADER_DIRS.unshift iconv_dirs.first
41
+ LIB_DIRS.unshift iconv_dirs[1]
42
+ end
43
+
44
+ xml2_dirs = dir_config('xml2', '/opt/local/include/libxml2', '/opt/local/lib')
45
+ unless [nil, nil] == xml2_dirs
46
+ HEADER_DIRS.unshift xml2_dirs.first
47
+ LIB_DIRS.unshift xml2_dirs[1]
48
+ end
49
+
50
+ xslt_dirs = dir_config('xslt', '/opt/local/include/', '/opt/local/lib')
51
+ unless [nil, nil] == xslt_dirs
52
+ HEADER_DIRS.unshift xslt_dirs.first
53
+ LIB_DIRS.unshift xslt_dirs[1]
54
+ end
55
+
56
+ unless find_header('iconv.h', *HEADER_DIRS)
57
+ abort "iconv is missing. try 'port install iconv' or 'yum install iconv'"
58
+ end
59
+
60
+ unless find_header('libxml/parser.h', *HEADER_DIRS)
61
+ abort "libxml2 is missing. try 'port install libxml2' or 'yum install libxml2-devel'"
62
+ end
63
+
64
+ unless find_header('libxslt/xslt.h', *HEADER_DIRS)
65
+ abort "libxslt is missing. try 'port install libxslt' or 'yum install libxslt-devel'"
66
+ end
67
+ unless find_header('libexslt/exslt.h', *HEADER_DIRS)
68
+ abort "libxslt is missing. try 'port install libxslt' or 'yum install libxslt-devel'"
69
+ end
70
+
71
+ unless find_library('xml2', 'xmlParseDoc', *LIB_DIRS)
72
+ abort "libxml2 is missing. try 'port install libxml2' or 'yum install libxml2'"
73
+ end
74
+
75
+ unless find_library('xslt', 'xsltParseStylesheetDoc', *LIB_DIRS)
76
+ abort "libxslt is missing. try 'port install libxslt' or 'yum install libxslt-devel'"
77
+ end
78
+
79
+ unless find_library('exslt', 'exsltFuncRegister', *LIB_DIRS)
80
+ abort "libxslt is missing. try 'port install libxslt' or 'yum install libxslt-devel'"
81
+ end
82
+
83
+ if ENV['CPUPROFILE']
84
+ unless find_library('profiler', 'ProfilerEnable', *LIB_DIRS)
85
+ abort "google performance tools are not installed"
86
+ end
87
+ end
88
+
89
+ create_makefile('nokogiri/nokogiri')
@@ -0,0 +1,183 @@
1
+ #include <html_document.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * new
6
+ *
7
+ * Create a new document
8
+ */
9
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
10
+ {
11
+ VALUE uri, external_id, rest, rb_doc;
12
+
13
+ rb_scan_args(argc, argv, "0*", &rest);
14
+ uri = rb_ary_entry(rest, 0);
15
+ external_id = rb_ary_entry(rest, 1);
16
+
17
+ htmlDocPtr doc = htmlNewDoc(
18
+ RTEST(uri) ? (const xmlChar *)StringValuePtr(uri) : NULL,
19
+ RTEST(external_id) ? (const xmlChar *)StringValuePtr(external_id) : NULL
20
+ );
21
+ rb_doc = Nokogiri_wrap_xml_document(klass, doc);
22
+ rb_funcall2(rb_doc, rb_intern("initialize"), argc, argv);
23
+ return rb_doc ;
24
+ }
25
+
26
+ /*
27
+ * call-seq:
28
+ * read_io(io, url, encoding, options)
29
+ *
30
+ * Read the HTML document from +io+ with given +url+, +encoding+,
31
+ * and +options+. See Nokogiri::HTML.parse
32
+ */
33
+ static VALUE read_io( VALUE klass,
34
+ VALUE io,
35
+ VALUE url,
36
+ VALUE encoding,
37
+ VALUE options )
38
+ {
39
+ const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
40
+ const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
41
+ VALUE error_list = rb_ary_new();
42
+
43
+ xmlInitParser();
44
+ xmlResetLastError();
45
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
46
+
47
+ htmlDocPtr doc = htmlReadIO(
48
+ io_read_callback,
49
+ io_close_callback,
50
+ (void *)io,
51
+ c_url,
52
+ c_enc,
53
+ NUM2INT(options)
54
+ );
55
+ xmlSetStructuredErrorFunc(NULL, NULL);
56
+
57
+ if(doc == NULL) {
58
+ xmlFreeDoc(doc);
59
+
60
+ xmlErrorPtr error = xmlGetLastError();
61
+ if(error)
62
+ rb_funcall(rb_mKernel, rb_intern("raise"), 1,
63
+ Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
64
+ );
65
+ else
66
+ rb_raise(rb_eRuntimeError, "Could not parse document");
67
+
68
+ return Qnil;
69
+ }
70
+
71
+ VALUE document = Nokogiri_wrap_xml_document(klass, doc);
72
+ rb_funcall(document, rb_intern("errors="), 1, error_list);
73
+ return document;
74
+ }
75
+
76
+ /*
77
+ * call-seq:
78
+ * read_memory(string, url, encoding, options)
79
+ *
80
+ * Read the HTML document contained in +string+ with given +url+, +encoding+,
81
+ * and +options+. See Nokogiri::HTML.parse
82
+ */
83
+ static VALUE read_memory( VALUE klass,
84
+ VALUE string,
85
+ VALUE url,
86
+ VALUE encoding,
87
+ VALUE options )
88
+ {
89
+ const char * c_buffer = StringValuePtr(string);
90
+ const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
91
+ const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
92
+ int len = RSTRING_LEN(string);
93
+ VALUE error_list = rb_ary_new();
94
+
95
+ xmlInitParser();
96
+ xmlResetLastError();
97
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
98
+
99
+ htmlDocPtr doc = htmlReadMemory(c_buffer, len, c_url, c_enc, NUM2INT(options));
100
+ xmlSetStructuredErrorFunc(NULL, NULL);
101
+
102
+ if(doc == NULL) {
103
+ xmlFreeDoc(doc);
104
+
105
+ xmlErrorPtr error = xmlGetLastError();
106
+ if(error)
107
+ rb_funcall(rb_mKernel, rb_intern("raise"), 1,
108
+ Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
109
+ );
110
+ else
111
+ rb_raise(rb_eRuntimeError, "Could not parse document");
112
+
113
+ return Qnil;
114
+ }
115
+
116
+ VALUE document = Nokogiri_wrap_xml_document(klass, doc);
117
+ rb_funcall(document, rb_intern("errors="), 1, error_list);
118
+ return document;
119
+ }
120
+
121
+ /*
122
+ * call-seq:
123
+ * type
124
+ *
125
+ * The type for this document
126
+ */
127
+ static VALUE type(VALUE self)
128
+ {
129
+ htmlDocPtr doc;
130
+ Data_Get_Struct(self, xmlDoc, doc);
131
+ return INT2NUM((int)doc->type);
132
+ }
133
+
134
+ /*
135
+ * call-seq:
136
+ * meta_encoding=
137
+ *
138
+ * Set the meta tag encoding for this document.
139
+ */
140
+ static VALUE set_meta_encoding(VALUE self, VALUE encoding)
141
+ {
142
+ htmlDocPtr doc;
143
+ Data_Get_Struct(self, xmlDoc, doc);
144
+
145
+ htmlSetMetaEncoding(doc, (const xmlChar *)StringValuePtr(encoding));
146
+
147
+ return encoding;
148
+ }
149
+
150
+ /*
151
+ * call-seq:
152
+ * meta_encoding
153
+ *
154
+ * Get the meta tag encoding for this document.
155
+ */
156
+ static VALUE meta_encoding(VALUE self)
157
+ {
158
+ htmlDocPtr doc;
159
+ Data_Get_Struct(self, xmlDoc, doc);
160
+
161
+ return NOKOGIRI_STR_NEW2(htmlGetMetaEncoding(doc), doc->encoding);
162
+ }
163
+
164
+ VALUE cNokogiriHtmlDocument ;
165
+ void init_html_document()
166
+ {
167
+ VALUE nokogiri = rb_define_module("Nokogiri");
168
+ VALUE html = rb_define_module_under(nokogiri, "HTML");
169
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
170
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
171
+ VALUE xml_doc = rb_define_class_under(xml, "Document", node);
172
+ VALUE klass = rb_define_class_under(html, "Document", xml_doc);
173
+
174
+ cNokogiriHtmlDocument = klass;
175
+
176
+ rb_define_singleton_method(klass, "read_memory", read_memory, 4);
177
+ rb_define_singleton_method(klass, "read_io", read_io, 4);
178
+ rb_define_singleton_method(klass, "new", new, -1);
179
+
180
+ rb_define_method(klass, "type", type, 0);
181
+ rb_define_method(klass, "meta_encoding", meta_encoding, 0);
182
+ rb_define_method(klass, "meta_encoding=", set_meta_encoding, 1);
183
+ }