nokogiri 1.3.0-x86-mswin32
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/.autotest +27 -0
- data/CHANGELOG.ja.rdoc +233 -0
- data/CHANGELOG.rdoc +222 -0
- data/Manifest.txt +247 -0
- data/README.ja.rdoc +103 -0
- data/README.rdoc +117 -0
- data/Rakefile +205 -0
- data/bin/nokogiri +47 -0
- data/ext/nokogiri/extconf.rb +89 -0
- data/ext/nokogiri/html_document.c +183 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +30 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser.c +57 -0
- data/ext/nokogiri/html_sax_parser.h +11 -0
- data/ext/nokogiri/iconv.dll +0 -0
- data/ext/nokogiri/libexslt.dll +0 -0
- data/ext/nokogiri/libxml2.dll +0 -0
- data/ext/nokogiri/libxslt.dll +0 -0
- data/ext/nokogiri/nokogiri.c +81 -0
- data/ext/nokogiri/nokogiri.h +149 -0
- data/ext/nokogiri/xml_attr.c +92 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_cdata.c +53 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +51 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +308 -0
- data/ext/nokogiri/xml_document.h +21 -0
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +102 -0
- data/ext/nokogiri/xml_dtd.h +8 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +24 -0
- data/ext/nokogiri/xml_io.h +10 -0
- data/ext/nokogiri/xml_namespace.c +69 -0
- data/ext/nokogiri/xml_namespace.h +12 -0
- data/ext/nokogiri/xml_node.c +928 -0
- data/ext/nokogiri/xml_node.h +14 -0
- data/ext/nokogiri/xml_node_set.c +386 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +572 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +106 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +336 -0
- data/ext/nokogiri/xml_sax_parser.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +86 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +107 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +203 -0
- data/ext/nokogiri/xml_syntax_error.h +12 -0
- data/ext/nokogiri/xml_text.c +47 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +53 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +252 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +131 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/ext/nokogiri/zlib1.dll +0 -0
- data/lib/action-nokogiri.rb +36 -0
- data/lib/nokogiri.rb +110 -0
- data/lib/nokogiri/1.8/nokogiri.so +0 -0
- data/lib/nokogiri/1.9/nokogiri.so +0 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +748 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +144 -0
- data/lib/nokogiri/css/node.rb +107 -0
- data/lib/nokogiri/css/parser.rb +82 -0
- data/lib/nokogiri/css/parser.y +227 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +11 -0
- data/lib/nokogiri/css/tokenizer.rex +54 -0
- data/lib/nokogiri/css/xpath_visitor.rb +172 -0
- data/lib/nokogiri/decorators.rb +2 -0
- data/lib/nokogiri/decorators/hpricot.rb +3 -0
- data/lib/nokogiri/decorators/hpricot/node.rb +56 -0
- data/lib/nokogiri/decorators/hpricot/node_set.rb +54 -0
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +30 -0
- data/lib/nokogiri/decorators/slop.rb +33 -0
- data/lib/nokogiri/ffi/html/document.rb +37 -0
- data/lib/nokogiri/ffi/html/element_description.rb +85 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
- data/lib/nokogiri/ffi/libxml.rb +314 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +107 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
- data/lib/nokogiri/ffi/xml/node.rb +380 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +217 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
- data/lib/nokogiri/ffi/xml/schema.rb +55 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
- data/lib/nokogiri/hpricot.rb +62 -0
- data/lib/nokogiri/html.rb +34 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +71 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +47 -0
- data/lib/nokogiri/nokogiri.rb +1 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +29 -0
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +62 -0
- data/lib/nokogiri/xml/attr.rb +9 -0
- data/lib/nokogiri/xml/builder.rb +254 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/document.rb +100 -0
- data/lib/nokogiri/xml/document_fragment.rb +49 -0
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/entity_declaration.rb +11 -0
- data/lib/nokogiri/xml/fragment_handler.rb +55 -0
- data/lib/nokogiri/xml/namespace.rb +7 -0
- data/lib/nokogiri/xml/node.rb +745 -0
- data/lib/nokogiri/xml/node/save_options.rb +42 -0
- data/lib/nokogiri/xml/node_set.rb +238 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +80 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +66 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +3 -0
- data/lib/nokogiri/xml/sax/document.rb +143 -0
- data/lib/nokogiri/xml/sax/parser.rb +101 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +65 -0
- data/lib/nokogiri/xml/syntax_error.rb +34 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +48 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +64 -0
- data/tasks/test.rb +161 -0
- data/test/css/test_nthiness.rb +160 -0
- data/test/css/test_parser.rb +277 -0
- data/test/css/test_tokenizer.rb +176 -0
- data/test/css/test_xpath_visitor.rb +76 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/helper.rb +123 -0
- data/test/hpricot/files/basic.xhtml +17 -0
- data/test/hpricot/files/boingboing.html +2266 -0
- data/test/hpricot/files/cy0.html +3653 -0
- data/test/hpricot/files/immob.html +400 -0
- data/test/hpricot/files/pace_application.html +1320 -0
- data/test/hpricot/files/tenderlove.html +16 -0
- data/test/hpricot/files/uswebgen.html +220 -0
- data/test/hpricot/files/utf8.html +1054 -0
- data/test/hpricot/files/week9.html +1723 -0
- data/test/hpricot/files/why.xml +19 -0
- data/test/hpricot/load_files.rb +11 -0
- data/test/hpricot/test_alter.rb +68 -0
- data/test/hpricot/test_builder.rb +20 -0
- data/test/hpricot/test_parser.rb +426 -0
- data/test/hpricot/test_paths.rb +15 -0
- data/test/hpricot/test_preserved.rb +77 -0
- data/test/hpricot/test_xml.rb +30 -0
- data/test/html/sax/test_parser.rb +52 -0
- data/test/html/test_builder.rb +156 -0
- data/test/html/test_document.rb +361 -0
- data/test/html/test_document_encoding.rb +46 -0
- data/test/html/test_document_fragment.rb +97 -0
- data/test/html/test_element_description.rb +95 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +165 -0
- data/test/test_convert_xpath.rb +186 -0
- data/test/test_css_cache.rb +56 -0
- data/test/test_gc.rb +15 -0
- data/test/test_memory_leak.rb +77 -0
- data/test/test_nokogiri.rb +127 -0
- data/test/test_reader.rb +316 -0
- data/test/test_xslt_transforms.rb +131 -0
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +169 -0
- data/test/xml/sax/test_push_parser.rb +92 -0
- data/test/xml/test_attr.rb +38 -0
- data/test/xml/test_builder.rb +73 -0
- data/test/xml/test_cdata.rb +38 -0
- data/test/xml/test_comment.rb +23 -0
- data/test/xml/test_document.rb +397 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +76 -0
- data/test/xml/test_dtd.rb +42 -0
- data/test/xml/test_dtd_encoding.rb +31 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +43 -0
- data/test/xml/test_node.rb +808 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +84 -0
- data/test/xml/test_node_set.rb +368 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +65 -0
- data/test/xml/test_text.rb +18 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +106 -0
- metadata +409 -0
data/Rakefile
ADDED
@@ -0,0 +1,205 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rake'
|
5
|
+
require 'hoe'
|
6
|
+
|
7
|
+
LIB_DIR = File.expand_path(File.join(File.dirname(__FILE__), 'lib'))
|
8
|
+
$LOAD_PATH << LIB_DIR
|
9
|
+
|
10
|
+
windows = RUBY_PLATFORM =~ /(mswin|mingw)/i ? true : false
|
11
|
+
java = RUBY_PLATFORM =~ /java/ ? true : false
|
12
|
+
|
13
|
+
GENERATED_PARSER = "lib/nokogiri/css/generated_parser.rb"
|
14
|
+
GENERATED_TOKENIZER = "lib/nokogiri/css/generated_tokenizer.rb"
|
15
|
+
|
16
|
+
require 'nokogiri/version'
|
17
|
+
|
18
|
+
HOE = Hoe.new('nokogiri', Nokogiri::VERSION) do |p|
|
19
|
+
p.developer('Aaron Patterson', 'aaronp@rubyforge.org')
|
20
|
+
p.developer('Mike Dalessio', 'mike.dalessio@gmail.com')
|
21
|
+
p.readme_file = ['README', ENV['HLANG'], 'rdoc'].compact.join('.')
|
22
|
+
p.history_file = ['CHANGELOG', ENV['HLANG'], 'rdoc'].compact.join('.')
|
23
|
+
p.extra_rdoc_files = FileList['*.rdoc']
|
24
|
+
p.clean_globs = [
|
25
|
+
'lib/nokogiri/*.{o,so,bundle,a,log,dll}',
|
26
|
+
GENERATED_PARSER,
|
27
|
+
GENERATED_TOKENIZER,
|
28
|
+
'cross',
|
29
|
+
]
|
30
|
+
|
31
|
+
p.extra_dev_deps << "racc"
|
32
|
+
p.extra_dev_deps << "tenderlove-frex"
|
33
|
+
p.extra_dev_deps << "rake-compiler"
|
34
|
+
|
35
|
+
p.spec_extras = { :extensions => ["ext/nokogiri/extconf.rb"] }
|
36
|
+
end
|
37
|
+
|
38
|
+
unless java
|
39
|
+
|
40
|
+
gem 'rake-compiler', '>= 0.4.1'
|
41
|
+
require "rake/extensiontask"
|
42
|
+
|
43
|
+
RET = Rake::ExtensionTask.new("nokogiri", HOE.spec) do |ext|
|
44
|
+
ext.lib_dir = File.join(*['lib', 'nokogiri', ENV['FAT_DIR']].compact)
|
45
|
+
|
46
|
+
ext.config_options << ENV['EXTOPTS']
|
47
|
+
cross_dir = File.join(File.dirname(__FILE__), 'tmp', 'cross')
|
48
|
+
ext.cross_compile = true
|
49
|
+
ext.cross_platform = 'i386-mswin32'
|
50
|
+
ext.cross_config_options <<
|
51
|
+
"--with-iconv-dir=#{File.join(cross_dir, 'iconv')}"
|
52
|
+
ext.cross_config_options <<
|
53
|
+
"--with-xml2-dir=#{File.join(cross_dir, 'libxml2')}"
|
54
|
+
ext.cross_config_options <<
|
55
|
+
"--with-xslt-dir=#{File.join(cross_dir, 'libxslt')}"
|
56
|
+
end
|
57
|
+
|
58
|
+
###
|
59
|
+
# To build the windows fat binary, do:
|
60
|
+
#
|
61
|
+
# rake fat_binary native gem
|
62
|
+
#
|
63
|
+
# I keep my ruby in multiruby, so my command is like this:
|
64
|
+
#
|
65
|
+
# RAKE19=~/.multiruby/install/1.9.1-p129/bin/rake \
|
66
|
+
# rake fat_binary native gem
|
67
|
+
task 'fat_binary' do
|
68
|
+
rake19 = ENV['RAKE19'] || 'rake1.9'
|
69
|
+
system("rake clean cross compile RUBY_CC_VERSION=1.8.6 FAT_DIR=1.8")
|
70
|
+
system("#{rake19} clean cross compile RUBY_CC_VERSION=1.9.1 FAT_DIR=1.9")
|
71
|
+
File.open("lib/#{HOE.name}/#{HOE.name}.rb", 'wb') do |f|
|
72
|
+
f.write <<-eoruby
|
73
|
+
require "#{HOE.name}/\#{RUBY_VERSION.sub(/\\.\\d+$/, '')}/#{HOE.name}"
|
74
|
+
eoruby
|
75
|
+
end
|
76
|
+
HOE.spec.extensions = []
|
77
|
+
HOE.spec.platform = 'x86-mswin32'
|
78
|
+
HOE.spec.files += Dir["lib/#{HOE.name}/#{HOE.name}.rb"]
|
79
|
+
HOE.spec.files += Dir["lib/#{HOE.name}/1.{8,9}/*"]
|
80
|
+
HOE.spec.files += Dir["ext/nokogiri/*.dll"]
|
81
|
+
end
|
82
|
+
CLOBBER.include("lib/nokogiri/nokogiri.rb")
|
83
|
+
CLOBBER.include("lib/nokogiri/1.{8,9}")
|
84
|
+
end
|
85
|
+
|
86
|
+
namespace :gem do
|
87
|
+
namespace :dev do
|
88
|
+
task :spec do
|
89
|
+
File.open("#{HOE.name}.gemspec", 'w') do |f|
|
90
|
+
HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
|
91
|
+
f.write(HOE.spec.to_ruby)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
desc "Build a gem targetted for JRuby"
|
97
|
+
task :jruby => ['gem:jruby:spec'] do
|
98
|
+
system "gem build nokogiri.gemspec"
|
99
|
+
FileUtils.mkdir_p "pkg"
|
100
|
+
FileUtils.mv Dir.glob("nokogiri*-java.gem"), "pkg"
|
101
|
+
end
|
102
|
+
|
103
|
+
namespace :jruby do
|
104
|
+
task :spec => [GENERATED_PARSER, GENERATED_TOKENIZER] do
|
105
|
+
File.open("#{HOE.name}.gemspec", 'w') do |f|
|
106
|
+
HOE.spec.platform = 'java'
|
107
|
+
HOE.spec.files << GENERATED_PARSER
|
108
|
+
HOE.spec.files << GENERATED_TOKENIZER
|
109
|
+
HOE.spec.extensions = []
|
110
|
+
f.write(HOE.spec.to_ruby)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
task :spec => ['gem:dev:spec']
|
116
|
+
end
|
117
|
+
|
118
|
+
file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t|
|
119
|
+
begin
|
120
|
+
racc = `which racc`.strip
|
121
|
+
racc = "#{::Config::CONFIG['bindir']}/racc" if racc.empty?
|
122
|
+
sh "#{racc} -o #{t.name} #{t.prerequisites.first}"
|
123
|
+
rescue
|
124
|
+
abort "need racc, sudo gem install racc"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
file GENERATED_TOKENIZER => "lib/nokogiri/css/tokenizer.rex" do |t|
|
129
|
+
begin
|
130
|
+
sh "frex --independent -o #{t.name} #{t.prerequisites.first}"
|
131
|
+
rescue
|
132
|
+
abort "need frex, sudo gem install tenderlove-frex -s http://gems.github.com"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
libs = %w{
|
137
|
+
iconv-1.9.2.win32
|
138
|
+
zlib-1.2.3.win32
|
139
|
+
libxml2-2.7.3.win32
|
140
|
+
libxslt-1.1.24.win32
|
141
|
+
}
|
142
|
+
|
143
|
+
libs.each do |lib|
|
144
|
+
file "tmp/stash/#{lib}.zip" do |t|
|
145
|
+
puts "downloading #{lib}"
|
146
|
+
FileUtils.mkdir_p('tmp/stash')
|
147
|
+
Dir.chdir('tmp/stash') do
|
148
|
+
url = "ftp://ftp.xmlsoft.org/libxml2/win32/#{lib}.zip"
|
149
|
+
system("wget #{url} || curl -O #{url}")
|
150
|
+
end
|
151
|
+
end
|
152
|
+
file "tmp/cross/#{lib.split('-').first}" => ["tmp/stash/#{lib}.zip"] do |t|
|
153
|
+
puts "unzipping #{lib}.zip"
|
154
|
+
FileUtils.mkdir_p('tmp/cross')
|
155
|
+
Dir.chdir('tmp/cross') do
|
156
|
+
sh "unzip ../stash/#{lib}.zip"
|
157
|
+
sh "cp #{lib}/bin/* #{lib}/lib" # put DLL in lib, so dirconfig works
|
158
|
+
sh "cp #{lib}/bin/*.dll ../../ext/nokogiri/"
|
159
|
+
sh "mv #{lib} #{lib.split('-').first}"
|
160
|
+
sh "touch #{lib.split('-').first}"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
if Rake::Task.task_defined?(:cross)
|
164
|
+
Rake::Task[:cross].prerequisites << "tmp/cross/#{lib.split('-').first}"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
require 'tasks/test'
|
169
|
+
|
170
|
+
desc "set environment variables to build and/or test with debug options"
|
171
|
+
task :debug do
|
172
|
+
ENV['NOKOGIRI_DEBUG'] = "true"
|
173
|
+
ENV['CFLAGS'] ||= ""
|
174
|
+
ENV['CFLAGS'] += " -DDEBUG"
|
175
|
+
end
|
176
|
+
|
177
|
+
# required_ruby_version
|
178
|
+
|
179
|
+
# Only do this on unix, since we can't build on windows
|
180
|
+
unless windows || java || ENV['NOKOGIRI_FFI']
|
181
|
+
[:compile, :check_manifest].each do |task_name|
|
182
|
+
Rake::Task[task_name].prerequisites << GENERATED_PARSER
|
183
|
+
Rake::Task[task_name].prerequisites << GENERATED_TOKENIZER
|
184
|
+
end
|
185
|
+
|
186
|
+
Rake::Task[:test].prerequisites << :compile
|
187
|
+
['valgrind', 'valgrind_mem', 'valgrind_mem0', 'coverage'].each do |task_name|
|
188
|
+
Rake::Task["test:#{task_name}"].prerequisites << :compile
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
namespace :install do
|
193
|
+
desc "Install frex and racc for development"
|
194
|
+
task :deps => %w(frex racc)
|
195
|
+
|
196
|
+
task :racc do |t|
|
197
|
+
sh "sudo gem install racc"
|
198
|
+
end
|
199
|
+
|
200
|
+
task :frex do
|
201
|
+
sh "sudo gem install tenderlove-frex -s http://gems.github.com"
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# vim: syntax=Ruby
|
data/bin/nokogiri
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'irb'
|
5
|
+
require 'uri'
|
6
|
+
require 'rubygems'
|
7
|
+
require 'nokogiri'
|
8
|
+
|
9
|
+
opts = OptionParser.new do |opts|
|
10
|
+
opts.banner = "Nokogiri: an HTML, XML, SAX, and Reader parser"
|
11
|
+
opts.define_head "Usage: nokogiri <uri|path> [options]"
|
12
|
+
opts.separator ""
|
13
|
+
opts.separator "Examples:"
|
14
|
+
opts.separator " nokogiri http://www.ruby-lang.org/"
|
15
|
+
opts.separator " nokogiri ./public/index.html"
|
16
|
+
opts.separator ""
|
17
|
+
opts.separator "Options:"
|
18
|
+
|
19
|
+
opts.on_tail("-?", "--help", "Show this message") do
|
20
|
+
puts opts
|
21
|
+
exit
|
22
|
+
end
|
23
|
+
|
24
|
+
opts.on_tail("-v", "--version", "Show version") do
|
25
|
+
require 'yaml'
|
26
|
+
puts Nokogiri::VERSION_INFO.to_yaml
|
27
|
+
exit
|
28
|
+
end
|
29
|
+
end
|
30
|
+
opts.parse!
|
31
|
+
|
32
|
+
uri = ARGV.shift
|
33
|
+
|
34
|
+
if uri.to_s.strip.empty?
|
35
|
+
puts opts
|
36
|
+
exit 1
|
37
|
+
end
|
38
|
+
|
39
|
+
if URI.parse(uri).scheme
|
40
|
+
@doc = Nokogiri(open(uri).read)
|
41
|
+
else
|
42
|
+
@doc = Nokogiri(File.read(uri))
|
43
|
+
end
|
44
|
+
|
45
|
+
puts "Your document is stored in @doc..."
|
46
|
+
IRB.start
|
47
|
+
|
@@ -0,0 +1,89 @@
|
|
1
|
+
ENV["ARCHFLAGS"] = "-arch #{`uname -p` =~ /powerpc/ ? 'ppc' : 'i386'}"
|
2
|
+
|
3
|
+
require 'mkmf'
|
4
|
+
|
5
|
+
ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
|
6
|
+
LIBDIR = Config::CONFIG['libdir']
|
7
|
+
INCLUDEDIR = Config::CONFIG['includedir']
|
8
|
+
|
9
|
+
if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'macruby'
|
10
|
+
$LIBRUBYARG_STATIC.gsub!(/-static/, '')
|
11
|
+
end
|
12
|
+
|
13
|
+
$CFLAGS << " #{ENV["CFLAGS"]}"
|
14
|
+
if Config::CONFIG['target_os'] == 'mingw32'
|
15
|
+
$CFLAGS << " -DXP_WIN -DXP_WIN32 -DUSE_INCLUDED_VASPRINTF"
|
16
|
+
elsif Config::CONFIG['target_os'] == 'solaris2'
|
17
|
+
$CFLAGS << " -DUSE_INCLUDED_VASPRINTF"
|
18
|
+
else
|
19
|
+
$CFLAGS << " -g -DXP_UNIX"
|
20
|
+
end
|
21
|
+
|
22
|
+
$CFLAGS << " -O3 -Wall -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
|
23
|
+
|
24
|
+
HEADER_DIRS = [
|
25
|
+
File.join(INCLUDEDIR, "libxml2"),
|
26
|
+
INCLUDEDIR,
|
27
|
+
'/usr/local/include/libxml2',
|
28
|
+
'/usr/include/libxml2',
|
29
|
+
]
|
30
|
+
|
31
|
+
LIB_DIRS = [
|
32
|
+
LIBDIR,
|
33
|
+
'/opt/local/lib',
|
34
|
+
'/usr/local/lib',
|
35
|
+
'/usr/lib'
|
36
|
+
]
|
37
|
+
|
38
|
+
iconv_dirs = dir_config('iconv', '/opt/local/include', '/opt/local/lib')
|
39
|
+
unless [nil, nil] == iconv_dirs
|
40
|
+
HEADER_DIRS.unshift iconv_dirs.first
|
41
|
+
LIB_DIRS.unshift iconv_dirs[1]
|
42
|
+
end
|
43
|
+
|
44
|
+
xml2_dirs = dir_config('xml2', '/opt/local/include/libxml2', '/opt/local/lib')
|
45
|
+
unless [nil, nil] == xml2_dirs
|
46
|
+
HEADER_DIRS.unshift xml2_dirs.first
|
47
|
+
LIB_DIRS.unshift xml2_dirs[1]
|
48
|
+
end
|
49
|
+
|
50
|
+
xslt_dirs = dir_config('xslt', '/opt/local/include/', '/opt/local/lib')
|
51
|
+
unless [nil, nil] == xslt_dirs
|
52
|
+
HEADER_DIRS.unshift xslt_dirs.first
|
53
|
+
LIB_DIRS.unshift xslt_dirs[1]
|
54
|
+
end
|
55
|
+
|
56
|
+
unless find_header('iconv.h', *HEADER_DIRS)
|
57
|
+
abort "iconv is missing. try 'port install iconv' or 'yum install iconv'"
|
58
|
+
end
|
59
|
+
|
60
|
+
unless find_header('libxml/parser.h', *HEADER_DIRS)
|
61
|
+
abort "libxml2 is missing. try 'port install libxml2' or 'yum install libxml2-devel'"
|
62
|
+
end
|
63
|
+
|
64
|
+
unless find_header('libxslt/xslt.h', *HEADER_DIRS)
|
65
|
+
abort "libxslt is missing. try 'port install libxslt' or 'yum install libxslt-devel'"
|
66
|
+
end
|
67
|
+
unless find_header('libexslt/exslt.h', *HEADER_DIRS)
|
68
|
+
abort "libxslt is missing. try 'port install libxslt' or 'yum install libxslt-devel'"
|
69
|
+
end
|
70
|
+
|
71
|
+
unless find_library('xml2', 'xmlParseDoc', *LIB_DIRS)
|
72
|
+
abort "libxml2 is missing. try 'port install libxml2' or 'yum install libxml2'"
|
73
|
+
end
|
74
|
+
|
75
|
+
unless find_library('xslt', 'xsltParseStylesheetDoc', *LIB_DIRS)
|
76
|
+
abort "libxslt is missing. try 'port install libxslt' or 'yum install libxslt-devel'"
|
77
|
+
end
|
78
|
+
|
79
|
+
unless find_library('exslt', 'exsltFuncRegister', *LIB_DIRS)
|
80
|
+
abort "libxslt is missing. try 'port install libxslt' or 'yum install libxslt-devel'"
|
81
|
+
end
|
82
|
+
|
83
|
+
if ENV['CPUPROFILE']
|
84
|
+
unless find_library('profiler', 'ProfilerEnable', *LIB_DIRS)
|
85
|
+
abort "google performance tools are not installed"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
create_makefile('nokogiri/nokogiri')
|
@@ -0,0 +1,183 @@
|
|
1
|
+
#include <html_document.h>
|
2
|
+
|
3
|
+
/*
|
4
|
+
* call-seq:
|
5
|
+
* new
|
6
|
+
*
|
7
|
+
* Create a new document
|
8
|
+
*/
|
9
|
+
static VALUE new(int argc, VALUE *argv, VALUE klass)
|
10
|
+
{
|
11
|
+
VALUE uri, external_id, rest, rb_doc;
|
12
|
+
|
13
|
+
rb_scan_args(argc, argv, "0*", &rest);
|
14
|
+
uri = rb_ary_entry(rest, 0);
|
15
|
+
external_id = rb_ary_entry(rest, 1);
|
16
|
+
|
17
|
+
htmlDocPtr doc = htmlNewDoc(
|
18
|
+
RTEST(uri) ? (const xmlChar *)StringValuePtr(uri) : NULL,
|
19
|
+
RTEST(external_id) ? (const xmlChar *)StringValuePtr(external_id) : NULL
|
20
|
+
);
|
21
|
+
rb_doc = Nokogiri_wrap_xml_document(klass, doc);
|
22
|
+
rb_funcall2(rb_doc, rb_intern("initialize"), argc, argv);
|
23
|
+
return rb_doc ;
|
24
|
+
}
|
25
|
+
|
26
|
+
/*
|
27
|
+
* call-seq:
|
28
|
+
* read_io(io, url, encoding, options)
|
29
|
+
*
|
30
|
+
* Read the HTML document from +io+ with given +url+, +encoding+,
|
31
|
+
* and +options+. See Nokogiri::HTML.parse
|
32
|
+
*/
|
33
|
+
static VALUE read_io( VALUE klass,
|
34
|
+
VALUE io,
|
35
|
+
VALUE url,
|
36
|
+
VALUE encoding,
|
37
|
+
VALUE options )
|
38
|
+
{
|
39
|
+
const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
|
40
|
+
const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
|
41
|
+
VALUE error_list = rb_ary_new();
|
42
|
+
|
43
|
+
xmlInitParser();
|
44
|
+
xmlResetLastError();
|
45
|
+
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
46
|
+
|
47
|
+
htmlDocPtr doc = htmlReadIO(
|
48
|
+
io_read_callback,
|
49
|
+
io_close_callback,
|
50
|
+
(void *)io,
|
51
|
+
c_url,
|
52
|
+
c_enc,
|
53
|
+
NUM2INT(options)
|
54
|
+
);
|
55
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
56
|
+
|
57
|
+
if(doc == NULL) {
|
58
|
+
xmlFreeDoc(doc);
|
59
|
+
|
60
|
+
xmlErrorPtr error = xmlGetLastError();
|
61
|
+
if(error)
|
62
|
+
rb_funcall(rb_mKernel, rb_intern("raise"), 1,
|
63
|
+
Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
|
64
|
+
);
|
65
|
+
else
|
66
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
67
|
+
|
68
|
+
return Qnil;
|
69
|
+
}
|
70
|
+
|
71
|
+
VALUE document = Nokogiri_wrap_xml_document(klass, doc);
|
72
|
+
rb_funcall(document, rb_intern("errors="), 1, error_list);
|
73
|
+
return document;
|
74
|
+
}
|
75
|
+
|
76
|
+
/*
|
77
|
+
* call-seq:
|
78
|
+
* read_memory(string, url, encoding, options)
|
79
|
+
*
|
80
|
+
* Read the HTML document contained in +string+ with given +url+, +encoding+,
|
81
|
+
* and +options+. See Nokogiri::HTML.parse
|
82
|
+
*/
|
83
|
+
static VALUE read_memory( VALUE klass,
|
84
|
+
VALUE string,
|
85
|
+
VALUE url,
|
86
|
+
VALUE encoding,
|
87
|
+
VALUE options )
|
88
|
+
{
|
89
|
+
const char * c_buffer = StringValuePtr(string);
|
90
|
+
const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
|
91
|
+
const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
|
92
|
+
int len = RSTRING_LEN(string);
|
93
|
+
VALUE error_list = rb_ary_new();
|
94
|
+
|
95
|
+
xmlInitParser();
|
96
|
+
xmlResetLastError();
|
97
|
+
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
98
|
+
|
99
|
+
htmlDocPtr doc = htmlReadMemory(c_buffer, len, c_url, c_enc, NUM2INT(options));
|
100
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
101
|
+
|
102
|
+
if(doc == NULL) {
|
103
|
+
xmlFreeDoc(doc);
|
104
|
+
|
105
|
+
xmlErrorPtr error = xmlGetLastError();
|
106
|
+
if(error)
|
107
|
+
rb_funcall(rb_mKernel, rb_intern("raise"), 1,
|
108
|
+
Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
|
109
|
+
);
|
110
|
+
else
|
111
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
112
|
+
|
113
|
+
return Qnil;
|
114
|
+
}
|
115
|
+
|
116
|
+
VALUE document = Nokogiri_wrap_xml_document(klass, doc);
|
117
|
+
rb_funcall(document, rb_intern("errors="), 1, error_list);
|
118
|
+
return document;
|
119
|
+
}
|
120
|
+
|
121
|
+
/*
|
122
|
+
* call-seq:
|
123
|
+
* type
|
124
|
+
*
|
125
|
+
* The type for this document
|
126
|
+
*/
|
127
|
+
static VALUE type(VALUE self)
|
128
|
+
{
|
129
|
+
htmlDocPtr doc;
|
130
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
131
|
+
return INT2NUM((int)doc->type);
|
132
|
+
}
|
133
|
+
|
134
|
+
/*
|
135
|
+
* call-seq:
|
136
|
+
* meta_encoding=
|
137
|
+
*
|
138
|
+
* Set the meta tag encoding for this document.
|
139
|
+
*/
|
140
|
+
static VALUE set_meta_encoding(VALUE self, VALUE encoding)
|
141
|
+
{
|
142
|
+
htmlDocPtr doc;
|
143
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
144
|
+
|
145
|
+
htmlSetMetaEncoding(doc, (const xmlChar *)StringValuePtr(encoding));
|
146
|
+
|
147
|
+
return encoding;
|
148
|
+
}
|
149
|
+
|
150
|
+
/*
|
151
|
+
* call-seq:
|
152
|
+
* meta_encoding
|
153
|
+
*
|
154
|
+
* Get the meta tag encoding for this document.
|
155
|
+
*/
|
156
|
+
static VALUE meta_encoding(VALUE self)
|
157
|
+
{
|
158
|
+
htmlDocPtr doc;
|
159
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
160
|
+
|
161
|
+
return NOKOGIRI_STR_NEW2(htmlGetMetaEncoding(doc), doc->encoding);
|
162
|
+
}
|
163
|
+
|
164
|
+
VALUE cNokogiriHtmlDocument ;
|
165
|
+
void init_html_document()
|
166
|
+
{
|
167
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
168
|
+
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
169
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
170
|
+
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
|
171
|
+
VALUE xml_doc = rb_define_class_under(xml, "Document", node);
|
172
|
+
VALUE klass = rb_define_class_under(html, "Document", xml_doc);
|
173
|
+
|
174
|
+
cNokogiriHtmlDocument = klass;
|
175
|
+
|
176
|
+
rb_define_singleton_method(klass, "read_memory", read_memory, 4);
|
177
|
+
rb_define_singleton_method(klass, "read_io", read_io, 4);
|
178
|
+
rb_define_singleton_method(klass, "new", new, -1);
|
179
|
+
|
180
|
+
rb_define_method(klass, "type", type, 0);
|
181
|
+
rb_define_method(klass, "meta_encoding", meta_encoding, 0);
|
182
|
+
rb_define_method(klass, "meta_encoding=", set_meta_encoding, 1);
|
183
|
+
}
|