superfeedr-nokogiri 1.4.0.20091116183308
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +27 -0
- data/CHANGELOG.ja.rdoc +330 -0
- data/CHANGELOG.rdoc +314 -0
- data/Manifest.txt +269 -0
- data/README.ja.rdoc +105 -0
- data/README.rdoc +118 -0
- data/Rakefile +244 -0
- data/bin/nokogiri +49 -0
- data/ext/nokogiri/extconf.rb +145 -0
- data/ext/nokogiri/html_document.c +145 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +92 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/nokogiri.c +89 -0
- data/ext/nokogiri/nokogiri.h +145 -0
- data/ext/nokogiri/xml_attr.c +92 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +67 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +54 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +52 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +388 -0
- data/ext/nokogiri/xml_document.h +24 -0
- data/ext/nokogiri/xml_document_fragment.c +46 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +192 -0
- data/ext/nokogiri/xml_dtd.h +10 -0
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_entity_decl.c +97 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +31 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_namespace.c +74 -0
- data/ext/nokogiri/xml_namespace.h +12 -0
- data/ext/nokogiri/xml_node.c +1060 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +397 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +593 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +159 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +286 -0
- data/ext/nokogiri/xml_sax_parser.h +43 -0
- data/ext/nokogiri/xml_sax_parser_context.c +155 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +114 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +156 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +261 -0
- data/ext/nokogiri/xml_syntax_error.h +13 -0
- data/ext/nokogiri/xml_text.c +48 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +53 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +239 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +131 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri.rb +116 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +646 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +142 -0
- data/lib/nokogiri/css/node.rb +99 -0
- data/lib/nokogiri/css/parser.rb +82 -0
- data/lib/nokogiri/css/parser.y +227 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rex +54 -0
- data/lib/nokogiri/css/xpath_visitor.rb +162 -0
- data/lib/nokogiri/decorators/slop.rb +33 -0
- data/lib/nokogiri/ffi/html/document.rb +28 -0
- data/lib/nokogiri/ffi/html/element_description.rb +85 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
- data/lib/nokogiri/ffi/libxml.rb +356 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
- data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +135 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +69 -0
- data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
- data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
- data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
- data/lib/nokogiri/ffi/xml/node.rb +444 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +133 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +227 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +142 -0
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +39 -0
- data/lib/nokogiri/ffi/xml/schema.rb +92 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +91 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
- data/lib/nokogiri/html.rb +35 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +88 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +48 -0
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +33 -0
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +67 -0
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +405 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +131 -0
- data/lib/nokogiri/xml/document_fragment.rb +69 -0
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +15 -0
- data/lib/nokogiri/xml/fragment_handler.rb +71 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +665 -0
- data/lib/nokogiri/xml/node/save_options.rb +42 -0
- data/lib/nokogiri/xml/node_set.rb +307 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +85 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +74 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +4 -0
- data/lib/nokogiri/xml/sax/document.rb +160 -0
- data/lib/nokogiri/xml/sax/parser.rb +115 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +61 -0
- data/lib/nokogiri/xml/syntax_error.rb +38 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +48 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +71 -0
- data/tasks/test.rb +100 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +277 -0
- data/test/css/test_tokenizer.rb +183 -0
- data/test/css/test_xpath_visitor.rb +76 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/helper.rb +136 -0
- data/test/html/sax/test_parser.rb +64 -0
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +164 -0
- data/test/html/test_document.rb +390 -0
- data/test/html/test_document_encoding.rb +77 -0
- data/test/html/test_document_fragment.rb +132 -0
- data/test/html/test_element_description.rb +94 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +228 -0
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +135 -0
- data/test/test_css_cache.rb +45 -0
- data/test/test_gc.rb +15 -0
- data/test/test_memory_leak.rb +77 -0
- data/test/test_nokogiri.rb +134 -0
- data/test/test_reader.rb +358 -0
- data/test/test_xslt_transforms.rb +131 -0
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +307 -0
- data/test/xml/sax/test_parser_context.rb +56 -0
- data/test/xml/sax/test_push_parser.rb +131 -0
- data/test/xml/test_attr.rb +38 -0
- data/test/xml/test_attribute_decl.rb +82 -0
- data/test/xml/test_builder.rb +167 -0
- data/test/xml/test_cdata.rb +38 -0
- data/test/xml/test_comment.rb +29 -0
- data/test/xml/test_document.rb +607 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +138 -0
- data/test/xml/test_dtd.rb +82 -0
- data/test/xml/test_dtd_encoding.rb +33 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +83 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +68 -0
- data/test/xml/test_node.rb +889 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +107 -0
- data/test/xml/test_node_set.rb +531 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +89 -0
- data/test/xml/test_syntax_error.rb +27 -0
- data/test/xml/test_text.rb +30 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +106 -0
- metadata +430 -0
data/Rakefile
ADDED
@@ -0,0 +1,244 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
gem 'hoe', '>= 2.1.0'
|
5
|
+
require 'hoe'
|
6
|
+
|
7
|
+
windows = RUBY_PLATFORM =~ /(mswin|mingw)/i
|
8
|
+
java = RUBY_PLATFORM =~ /java/
|
9
|
+
|
10
|
+
GENERATED_PARSER = "lib/nokogiri/css/generated_parser.rb"
|
11
|
+
GENERATED_TOKENIZER = "lib/nokogiri/css/generated_tokenizer.rb"
|
12
|
+
|
13
|
+
# Make sure hoe-debugging is installed
|
14
|
+
Hoe.plugin :debugging
|
15
|
+
|
16
|
+
HOE = Hoe.spec 'nokogiri' do
|
17
|
+
developer('Aaron Patterson', 'aaronp@rubyforge.org')
|
18
|
+
developer('Mike Dalessio', 'mike.dalessio@gmail.com')
|
19
|
+
self.readme_file = ['README', ENV['HLANG'], 'rdoc'].compact.join('.')
|
20
|
+
self.history_file = ['CHANGELOG', ENV['HLANG'], 'rdoc'].compact.join('.')
|
21
|
+
self.extra_rdoc_files = FileList['*.rdoc']
|
22
|
+
self.clean_globs = [
|
23
|
+
'lib/nokogiri/*.{o,so,bundle,a,log,dll}',
|
24
|
+
'lib/nokogiri/nokogiri.rb',
|
25
|
+
'lib/nokogiri/1.{8,9}',
|
26
|
+
GENERATED_PARSER,
|
27
|
+
GENERATED_TOKENIZER,
|
28
|
+
'cross',
|
29
|
+
]
|
30
|
+
|
31
|
+
%w{ racc rexical rake-compiler }.each do |dep|
|
32
|
+
self.extra_dev_deps << [dep, '>= 0']
|
33
|
+
end
|
34
|
+
|
35
|
+
self.spec_extras = { :extensions => ["ext/nokogiri/extconf.rb"] }
|
36
|
+
end
|
37
|
+
|
38
|
+
task :ws_docs do
|
39
|
+
title = "#{HOE.name}-#{HOE.version} Documentation"
|
40
|
+
|
41
|
+
options = []
|
42
|
+
options << "--main=#{HOE.readme_file}"
|
43
|
+
options << '--format=activerecord'
|
44
|
+
options << '--threads=1'
|
45
|
+
options << "--title=#{title.inspect}"
|
46
|
+
|
47
|
+
options += HOE.spec.require_paths
|
48
|
+
options += HOE.spec.extra_rdoc_files
|
49
|
+
require 'rdoc/rdoc'
|
50
|
+
ENV['RAILS_ROOT'] ||= File.expand_path(File.join('..', 'nokogiri_ws'))
|
51
|
+
RDoc::RDoc.new.document options
|
52
|
+
end
|
53
|
+
|
54
|
+
unless java
|
55
|
+
gem 'rake-compiler', '>= 0.4.1'
|
56
|
+
require "rake/extensiontask"
|
57
|
+
|
58
|
+
RET = Rake::ExtensionTask.new("nokogiri", HOE.spec) do |ext|
|
59
|
+
ext.lib_dir = File.join(*['lib', 'nokogiri', ENV['FAT_DIR']].compact)
|
60
|
+
|
61
|
+
ext.config_options << ENV['EXTOPTS']
|
62
|
+
cross_dir = File.join(File.dirname(__FILE__), 'tmp', 'cross')
|
63
|
+
ext.cross_compile = true
|
64
|
+
ext.cross_platform = 'i386-mingw32'
|
65
|
+
ext.cross_config_options <<
|
66
|
+
"--with-iconv-dir=#{File.join(cross_dir, 'iconv')}"
|
67
|
+
ext.cross_config_options <<
|
68
|
+
"--with-xml2-dir=#{File.join(cross_dir, 'libxml2')}"
|
69
|
+
ext.cross_config_options <<
|
70
|
+
"--with-xslt-dir=#{File.join(cross_dir, 'libxslt')}"
|
71
|
+
end
|
72
|
+
|
73
|
+
file 'lib/nokogiri/nokogiri.rb' do
|
74
|
+
File.open("lib/#{HOE.name}/#{HOE.name}.rb", 'wb') do |f|
|
75
|
+
f.write <<-eoruby
|
76
|
+
require "#{HOE.name}/\#{RUBY_VERSION.sub(/\\.\\d+$/, '')}/#{HOE.name}"
|
77
|
+
eoruby
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
namespace :cross do
|
82
|
+
task :file_list do
|
83
|
+
HOE.spec.platform = 'x86-mingw32'
|
84
|
+
HOE.spec.extensions = []
|
85
|
+
HOE.spec.files += Dir["lib/#{HOE.name}/#{HOE.name}.rb"]
|
86
|
+
HOE.spec.files += Dir["lib/#{HOE.name}/1.{8,9}/#{HOE.name}.so"]
|
87
|
+
HOE.spec.files += Dir["ext/nokogiri/*.dll"]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
CLOBBER.include("lib/nokogiri/nokogiri.{so,dylib,rb,bundle}")
|
92
|
+
CLOBBER.include("lib/nokogiri/1.{8,9}")
|
93
|
+
CLOBBER.include("ext/nokogiri/*.dll")
|
94
|
+
end
|
95
|
+
|
96
|
+
namespace :gem do
|
97
|
+
namespace :dev do
|
98
|
+
task :spec => [ GENERATED_PARSER, GENERATED_TOKENIZER ] do
|
99
|
+
File.open("#{HOE.name}.gemspec", 'w') do |f|
|
100
|
+
HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
|
101
|
+
f.write(HOE.spec.to_ruby)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
desc "Build a gem targetted for JRuby"
|
107
|
+
task :jruby => ['gem:jruby:spec'] do
|
108
|
+
system "gem build nokogiri.gemspec"
|
109
|
+
FileUtils.mkdir_p "pkg"
|
110
|
+
FileUtils.mv Dir.glob("nokogiri*-java.gem"), "pkg"
|
111
|
+
end
|
112
|
+
|
113
|
+
namespace :jruby do
|
114
|
+
task :spec => [GENERATED_PARSER, GENERATED_TOKENIZER] do
|
115
|
+
File.open("#{HOE.name}.gemspec", 'w') do |f|
|
116
|
+
HOE.spec.platform = 'java'
|
117
|
+
HOE.spec.files << GENERATED_PARSER
|
118
|
+
HOE.spec.files << GENERATED_TOKENIZER
|
119
|
+
HOE.spec.files += Dir["ext/nokogiri/*.dll"]
|
120
|
+
HOE.spec.extensions = []
|
121
|
+
f.write(HOE.spec.to_ruby)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
task :spec => ['gem:dev:spec']
|
127
|
+
end
|
128
|
+
|
129
|
+
file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t|
|
130
|
+
begin
|
131
|
+
racc = `which racc`.strip
|
132
|
+
racc = "#{::Config::CONFIG['bindir']}/racc" if racc.empty?
|
133
|
+
sh "#{racc} -l -o #{t.name} #{t.prerequisites.first}"
|
134
|
+
rescue
|
135
|
+
abort "need racc, sudo gem install racc"
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
file GENERATED_TOKENIZER => "lib/nokogiri/css/tokenizer.rex" do |t|
|
140
|
+
begin
|
141
|
+
sh "rex --independent -o #{t.name} #{t.prerequisites.first}"
|
142
|
+
rescue
|
143
|
+
abort "need rexical, sudo gem install rexical"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
libs = %w{
|
148
|
+
iconv-1.9.2.win32
|
149
|
+
zlib-1.2.3.win32
|
150
|
+
libxml2-2.7.3.win32
|
151
|
+
libxslt-1.1.24.win32
|
152
|
+
}
|
153
|
+
|
154
|
+
lib_dlls = {
|
155
|
+
'iconv-1.9.2.win32' => 'iconv.dll',
|
156
|
+
'zlib-1.2.3.win32' => 'zlib1.dll',
|
157
|
+
'libxml2-2.7.3.win32' => 'libxml2.dll',
|
158
|
+
'libxslt-1.1.24.win32' => 'libxslt.dll',
|
159
|
+
}
|
160
|
+
|
161
|
+
libs.each do |lib|
|
162
|
+
libname = lib.split('-').first
|
163
|
+
|
164
|
+
file "tmp/stash/#{lib}.zip" do |t|
|
165
|
+
puts "downloading #{lib}"
|
166
|
+
FileUtils.mkdir_p('tmp/stash')
|
167
|
+
Dir.chdir('tmp/stash') do
|
168
|
+
url = "ftp://ftp.xmlsoft.org/libxml2/win32/#{lib}.zip"
|
169
|
+
system("wget #{url} || curl -O #{url}")
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
file "tmp/cross/#{libname}" => ["tmp/stash/#{lib}.zip"] do |t|
|
174
|
+
puts "unzipping #{lib}.zip"
|
175
|
+
FileUtils.mkdir_p('tmp/cross')
|
176
|
+
Dir.chdir('tmp/cross') do
|
177
|
+
sh "unzip ../stash/#{lib}.zip"
|
178
|
+
sh "cp #{lib}/bin/* #{lib}/lib" # put DLL in lib, so dirconfig works
|
179
|
+
sh "mv #{lib} #{lib.split('-').first}"
|
180
|
+
sh "touch #{lib.split('-').first}"
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
file "ext/nokogiri/#{lib_dlls[lib]}" => "tmp/cross/#{libname}" do |t|
|
185
|
+
Dir.chdir('tmp/cross') do
|
186
|
+
sh "cp #{libname}/bin/*.dll ../../ext/nokogiri/"
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
if Rake::Task.task_defined?(:cross)
|
191
|
+
Rake::Task[:cross].prerequisites << "ext/nokogiri/#{lib_dlls[lib]}"
|
192
|
+
Rake::Task[:cross].prerequisites << "lib/nokogiri/nokogiri.rb"
|
193
|
+
Rake::Task[:cross].prerequisites << "cross:file_list"
|
194
|
+
end
|
195
|
+
Rake::Task['gem:jruby:spec'].prerequisites << "ext/nokogiri/#{lib_dlls[lib]}"
|
196
|
+
end
|
197
|
+
|
198
|
+
require 'tasks/test'
|
199
|
+
|
200
|
+
desc "set environment variables to build and/or test with debug options"
|
201
|
+
task :debug do
|
202
|
+
ENV['NOKOGIRI_DEBUG'] = "true"
|
203
|
+
ENV['CFLAGS'] ||= ""
|
204
|
+
ENV['CFLAGS'] += " -DDEBUG"
|
205
|
+
end
|
206
|
+
|
207
|
+
# required_ruby_version
|
208
|
+
|
209
|
+
# Only do this on unix, since we can't build on windows
|
210
|
+
unless windows || java || ENV['NOKOGIRI_FFI']
|
211
|
+
[:compile, :check_manifest].each do |task_name|
|
212
|
+
Rake::Task[task_name].prerequisites << GENERATED_PARSER
|
213
|
+
Rake::Task[task_name].prerequisites << GENERATED_TOKENIZER
|
214
|
+
end
|
215
|
+
|
216
|
+
Rake::Task[:test].prerequisites << :compile
|
217
|
+
if Hoe.plugins.include?(:debugging)
|
218
|
+
['valgrind', 'valgrind:mem', 'valgrind:mem0'].each do |task_name|
|
219
|
+
Rake::Task["test:#{task_name}"].prerequisites << :compile
|
220
|
+
end
|
221
|
+
end
|
222
|
+
else
|
223
|
+
[:test, :check_manifest].each do |task_name|
|
224
|
+
if Rake::Task[task_name]
|
225
|
+
Rake::Task[task_name].prerequisites << GENERATED_PARSER
|
226
|
+
Rake::Task[task_name].prerequisites << GENERATED_TOKENIZER
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
namespace :install do
|
232
|
+
desc "Install rex and racc for development"
|
233
|
+
task :deps => %w(rexical racc)
|
234
|
+
|
235
|
+
task :racc do |t|
|
236
|
+
sh "sudo gem install racc"
|
237
|
+
end
|
238
|
+
|
239
|
+
task :rexical do
|
240
|
+
sh "sudo gem install rexical"
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
# vim: syntax=Ruby
|
data/bin/nokogiri
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'irb'
|
5
|
+
require 'uri'
|
6
|
+
require 'rubygems'
|
7
|
+
require 'nokogiri'
|
8
|
+
|
9
|
+
parse_class = Nokogiri
|
10
|
+
|
11
|
+
opts = OptionParser.new do |opts|
|
12
|
+
opts.banner = "Nokogiri: an HTML, XML, SAX, and Reader parser"
|
13
|
+
opts.define_head "Usage: nokogiri <uri|path> [options]"
|
14
|
+
opts.separator ""
|
15
|
+
opts.separator "Examples:"
|
16
|
+
opts.separator " nokogiri http://www.ruby-lang.org/"
|
17
|
+
opts.separator " nokogiri ./public/index.html"
|
18
|
+
opts.separator ""
|
19
|
+
opts.separator "Options:"
|
20
|
+
|
21
|
+
opts.on("--type [TYPE]", [:xml, :html]) do |v|
|
22
|
+
parse_class = {:xml => Nokogiri::XML, :html => Nokogiri::HTML}[v]
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on_tail("-?", "--help", "Show this message") do
|
26
|
+
puts opts
|
27
|
+
exit
|
28
|
+
end
|
29
|
+
|
30
|
+
opts.on_tail("-v", "--version", "Show version") do
|
31
|
+
require 'yaml'
|
32
|
+
puts Nokogiri::VERSION_INFO.to_yaml
|
33
|
+
exit
|
34
|
+
end
|
35
|
+
end
|
36
|
+
opts.parse!
|
37
|
+
|
38
|
+
uri = ARGV.shift
|
39
|
+
|
40
|
+
if uri.to_s.strip.empty?
|
41
|
+
puts opts
|
42
|
+
exit 1
|
43
|
+
end
|
44
|
+
|
45
|
+
@doc = parse_class.parse(open(uri).read)
|
46
|
+
|
47
|
+
puts "Your document is stored in @doc..."
|
48
|
+
IRB.start
|
49
|
+
|
@@ -0,0 +1,145 @@
|
|
1
|
+
ENV['RC_ARCHS'] = '' if RUBY_PLATFORM =~ /darwin/
|
2
|
+
|
3
|
+
# :stopdoc:
|
4
|
+
|
5
|
+
require 'mkmf'
|
6
|
+
|
7
|
+
ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
|
8
|
+
LIBDIR = Config::CONFIG['libdir']
|
9
|
+
INCLUDEDIR = Config::CONFIG['includedir']
|
10
|
+
|
11
|
+
if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'macruby'
|
12
|
+
$LIBRUBYARG_STATIC.gsub!(/-static/, '')
|
13
|
+
end
|
14
|
+
|
15
|
+
$CFLAGS << " #{ENV["CFLAGS"]}"
|
16
|
+
if Config::CONFIG['target_os'] == 'mingw32'
|
17
|
+
$CFLAGS << " -DXP_WIN -DXP_WIN32 -DUSE_INCLUDED_VASPRINTF"
|
18
|
+
elsif Config::CONFIG['target_os'] == 'solaris2'
|
19
|
+
$CFLAGS << " -DUSE_INCLUDED_VASPRINTF"
|
20
|
+
else
|
21
|
+
$CFLAGS << " -g -DXP_UNIX"
|
22
|
+
end
|
23
|
+
|
24
|
+
$CFLAGS << " -O3 -Wall -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
|
25
|
+
|
26
|
+
HEADER_DIRS = [
|
27
|
+
# First search /opt/local for macports
|
28
|
+
'/opt/local/include',
|
29
|
+
'/opt/local/include/libxml2',
|
30
|
+
|
31
|
+
# Then search /usr/local for people that installed from source
|
32
|
+
'/usr/local/include',
|
33
|
+
'/usr/local/include/libxml2',
|
34
|
+
|
35
|
+
# Check the ruby install locations
|
36
|
+
INCLUDEDIR,
|
37
|
+
File.join(INCLUDEDIR, "libxml2"),
|
38
|
+
|
39
|
+
# Finally fall back to /usr
|
40
|
+
'/usr/include',
|
41
|
+
'/usr/include/libxml2',
|
42
|
+
]
|
43
|
+
|
44
|
+
LIB_DIRS = [
|
45
|
+
# First search /opt/local for macports
|
46
|
+
'/opt/local/lib',
|
47
|
+
|
48
|
+
# Then search /usr/local for people that installed from source
|
49
|
+
'/usr/local/lib',
|
50
|
+
|
51
|
+
# Check the ruby install locations
|
52
|
+
LIBDIR,
|
53
|
+
|
54
|
+
# Finally fall back to /usr
|
55
|
+
'/usr/lib',
|
56
|
+
]
|
57
|
+
|
58
|
+
iconv_dirs = dir_config('iconv', '/opt/local/include', '/opt/local/lib')
|
59
|
+
unless ["", ""] == iconv_dirs
|
60
|
+
HEADER_DIRS.unshift iconv_dirs.first
|
61
|
+
LIB_DIRS.unshift iconv_dirs[1]
|
62
|
+
end
|
63
|
+
|
64
|
+
xml2_dirs = dir_config('xml2', '/opt/local/include/libxml2', '/opt/local/lib')
|
65
|
+
unless ["", ""] == xml2_dirs
|
66
|
+
HEADER_DIRS.unshift xml2_dirs.first
|
67
|
+
LIB_DIRS.unshift xml2_dirs[1]
|
68
|
+
end
|
69
|
+
|
70
|
+
xslt_dirs = dir_config('xslt', '/opt/local/include/', '/opt/local/lib')
|
71
|
+
unless ["", ""] == xslt_dirs
|
72
|
+
HEADER_DIRS.unshift xslt_dirs.first
|
73
|
+
LIB_DIRS.unshift xslt_dirs[1]
|
74
|
+
end
|
75
|
+
|
76
|
+
CUSTOM_DASH_I = []
|
77
|
+
|
78
|
+
def nokogiri_find_header header_file, *paths
|
79
|
+
# mkmf in ruby 1.8.5 does not have the "checking_message" method
|
80
|
+
message = defined?(checking_message) ?
|
81
|
+
checking_message(header_file, paths) :
|
82
|
+
header_file
|
83
|
+
|
84
|
+
header = cpp_include header_file
|
85
|
+
checking_for message do
|
86
|
+
found = false
|
87
|
+
paths.each do |dir|
|
88
|
+
if File.exists?(File.join(dir, header_file))
|
89
|
+
opt = "-I#{dir}".quote
|
90
|
+
if try_cpp header, opt
|
91
|
+
unless CUSTOM_DASH_I.include? dir
|
92
|
+
$INCFLAGS = "#{opt} #{$INCFLAGS}"
|
93
|
+
CUSTOM_DASH_I << dir
|
94
|
+
end
|
95
|
+
found = dir
|
96
|
+
break
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
found ||= try_cpp(header)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
unless nokogiri_find_header('iconv.h', *HEADER_DIRS)
|
105
|
+
abort "iconv is missing. try 'port install iconv' or 'yum install iconv'"
|
106
|
+
end
|
107
|
+
|
108
|
+
unless nokogiri_find_header('libxml/parser.h', *HEADER_DIRS)
|
109
|
+
abort "libxml2 is missing. try 'port install libxml2' or 'yum install libxml2-devel'"
|
110
|
+
end
|
111
|
+
|
112
|
+
unless nokogiri_find_header('libxslt/xslt.h', *HEADER_DIRS)
|
113
|
+
abort "libxslt is missing. try 'port install libxslt' or 'yum install libxslt-devel'"
|
114
|
+
end
|
115
|
+
|
116
|
+
unless nokogiri_find_header('libexslt/exslt.h', *HEADER_DIRS)
|
117
|
+
abort "libxslt is missing. try 'port install libxslt' or 'yum install libxslt-devel'"
|
118
|
+
end
|
119
|
+
|
120
|
+
unless find_library('xml2', 'xmlParseDoc', *LIB_DIRS)
|
121
|
+
abort "libxml2 is missing. try 'port install libxml2' or 'yum install libxml2'"
|
122
|
+
end
|
123
|
+
|
124
|
+
unless find_library('xslt', 'xsltParseStylesheetDoc', *LIB_DIRS)
|
125
|
+
abort "libxslt is missing. try 'port install libxslt' or 'yum install libxslt-devel'"
|
126
|
+
end
|
127
|
+
|
128
|
+
unless find_library('exslt', 'exsltFuncRegister', *LIB_DIRS)
|
129
|
+
abort "libxslt is missing. try 'port install libxslt' or 'yum install libxslt-devel'"
|
130
|
+
end
|
131
|
+
|
132
|
+
have_func('xmlRelaxNGSetParserStructuredErrors')
|
133
|
+
have_func('xmlRelaxNGSetParserStructuredErrors')
|
134
|
+
have_func('xmlRelaxNGSetValidStructuredErrors')
|
135
|
+
have_func('xmlSchemaSetValidStructuredErrors')
|
136
|
+
have_func('xmlSchemaSetParserStructuredErrors')
|
137
|
+
|
138
|
+
if ENV['CPUPROFILE']
|
139
|
+
unless find_library('profiler', 'ProfilerEnable', *LIB_DIRS)
|
140
|
+
abort "google performance tools are not installed"
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
create_makefile('nokogiri/nokogiri')
|
145
|
+
# :startdoc:
|
@@ -0,0 +1,145 @@
|
|
1
|
+
#include <html_document.h>
|
2
|
+
|
3
|
+
/*
|
4
|
+
* call-seq:
|
5
|
+
* new
|
6
|
+
*
|
7
|
+
* Create a new document
|
8
|
+
*/
|
9
|
+
static VALUE new(int argc, VALUE *argv, VALUE klass)
|
10
|
+
{
|
11
|
+
VALUE uri, external_id, rest, rb_doc;
|
12
|
+
|
13
|
+
rb_scan_args(argc, argv, "0*", &rest);
|
14
|
+
uri = rb_ary_entry(rest, (long)0);
|
15
|
+
external_id = rb_ary_entry(rest, (long)1);
|
16
|
+
|
17
|
+
htmlDocPtr doc = htmlNewDoc(
|
18
|
+
RTEST(uri) ? (const xmlChar *)StringValuePtr(uri) : NULL,
|
19
|
+
RTEST(external_id) ? (const xmlChar *)StringValuePtr(external_id) : NULL
|
20
|
+
);
|
21
|
+
rb_doc = Nokogiri_wrap_xml_document(klass, doc);
|
22
|
+
rb_obj_call_init(rb_doc, argc, argv);
|
23
|
+
return rb_doc ;
|
24
|
+
}
|
25
|
+
|
26
|
+
/*
|
27
|
+
* call-seq:
|
28
|
+
* read_io(io, url, encoding, options)
|
29
|
+
*
|
30
|
+
* Read the HTML document from +io+ with given +url+, +encoding+,
|
31
|
+
* and +options+. See Nokogiri::HTML.parse
|
32
|
+
*/
|
33
|
+
static VALUE read_io( VALUE klass,
|
34
|
+
VALUE io,
|
35
|
+
VALUE url,
|
36
|
+
VALUE encoding,
|
37
|
+
VALUE options )
|
38
|
+
{
|
39
|
+
const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url);
|
40
|
+
const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
|
41
|
+
VALUE error_list = rb_ary_new();
|
42
|
+
|
43
|
+
xmlResetLastError();
|
44
|
+
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
45
|
+
|
46
|
+
htmlDocPtr doc = htmlReadIO(
|
47
|
+
io_read_callback,
|
48
|
+
io_close_callback,
|
49
|
+
(void *)io,
|
50
|
+
c_url,
|
51
|
+
c_enc,
|
52
|
+
(int)NUM2INT(options)
|
53
|
+
);
|
54
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
55
|
+
|
56
|
+
if(doc == NULL) {
|
57
|
+
xmlFreeDoc(doc);
|
58
|
+
|
59
|
+
xmlErrorPtr error = xmlGetLastError();
|
60
|
+
if(error)
|
61
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
|
62
|
+
else
|
63
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
64
|
+
|
65
|
+
return Qnil;
|
66
|
+
}
|
67
|
+
|
68
|
+
VALUE document = Nokogiri_wrap_xml_document(klass, doc);
|
69
|
+
rb_iv_set(document, "@errors", error_list);
|
70
|
+
return document;
|
71
|
+
}
|
72
|
+
|
73
|
+
/*
|
74
|
+
* call-seq:
|
75
|
+
* read_memory(string, url, encoding, options)
|
76
|
+
*
|
77
|
+
* Read the HTML document contained in +string+ with given +url+, +encoding+,
|
78
|
+
* and +options+. See Nokogiri::HTML.parse
|
79
|
+
*/
|
80
|
+
static VALUE read_memory( VALUE klass,
|
81
|
+
VALUE string,
|
82
|
+
VALUE url,
|
83
|
+
VALUE encoding,
|
84
|
+
VALUE options )
|
85
|
+
{
|
86
|
+
const char * c_buffer = StringValuePtr(string);
|
87
|
+
const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url);
|
88
|
+
const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
|
89
|
+
int len = RSTRING_LEN(string);
|
90
|
+
VALUE error_list = rb_ary_new();
|
91
|
+
|
92
|
+
xmlResetLastError();
|
93
|
+
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
94
|
+
|
95
|
+
htmlDocPtr doc = htmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
|
96
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
97
|
+
|
98
|
+
if(doc == NULL) {
|
99
|
+
xmlFreeDoc(doc);
|
100
|
+
|
101
|
+
xmlErrorPtr error = xmlGetLastError();
|
102
|
+
if(error)
|
103
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
|
104
|
+
else
|
105
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
106
|
+
|
107
|
+
return Qnil;
|
108
|
+
}
|
109
|
+
|
110
|
+
VALUE document = Nokogiri_wrap_xml_document(klass, doc);
|
111
|
+
rb_iv_set(document, "@errors", error_list);
|
112
|
+
return document;
|
113
|
+
}
|
114
|
+
|
115
|
+
/*
|
116
|
+
* call-seq:
|
117
|
+
* type
|
118
|
+
*
|
119
|
+
* The type for this document
|
120
|
+
*/
|
121
|
+
static VALUE type(VALUE self)
|
122
|
+
{
|
123
|
+
htmlDocPtr doc;
|
124
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
125
|
+
return INT2NUM((long)doc->type);
|
126
|
+
}
|
127
|
+
|
128
|
+
VALUE cNokogiriHtmlDocument ;
|
129
|
+
void init_html_document()
|
130
|
+
{
|
131
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
132
|
+
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
133
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
134
|
+
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
|
135
|
+
VALUE xml_doc = rb_define_class_under(xml, "Document", node);
|
136
|
+
VALUE klass = rb_define_class_under(html, "Document", xml_doc);
|
137
|
+
|
138
|
+
cNokogiriHtmlDocument = klass;
|
139
|
+
|
140
|
+
rb_define_singleton_method(klass, "read_memory", read_memory, 4);
|
141
|
+
rb_define_singleton_method(klass, "read_io", read_io, 4);
|
142
|
+
rb_define_singleton_method(klass, "new", new, -1);
|
143
|
+
|
144
|
+
rb_define_method(klass, "type", type, 0);
|
145
|
+
}
|