nokogiri 1.5.0-x86-mswin32-60 → 1.5.1.rc1-x86-mswin32-60
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +39 -12
- data/CHANGELOG.rdoc +28 -0
- data/C_CODING_STYLE.rdoc +27 -0
- data/Manifest.txt +4 -0
- data/README.rdoc +11 -7
- data/Rakefile +40 -25
- data/bin/nokogiri +10 -2
- data/ext/nokogiri/extconf.rb +9 -1
- data/ext/nokogiri/html_document.c +16 -0
- data/ext/nokogiri/html_sax_parser_context.c +59 -37
- data/ext/nokogiri/html_sax_push_parser.c +87 -0
- data/ext/nokogiri/html_sax_push_parser.h +9 -0
- data/ext/nokogiri/nokogiri.c +6 -8
- data/ext/nokogiri/nokogiri.h +3 -0
- data/ext/nokogiri/xml_document.c +101 -3
- data/ext/nokogiri/xml_document.h +3 -3
- data/ext/nokogiri/xml_node.c +150 -58
- data/ext/nokogiri/xml_node_set.c +169 -120
- data/ext/nokogiri/xml_node_set.h +5 -0
- data/ext/nokogiri/xml_sax_parser_context.c +64 -41
- data/ext/nokogiri/xml_text.c +2 -0
- data/ext/nokogiri/xml_xpath_context.c +30 -24
- data/ext/nokogiri/xslt_stylesheet.c +62 -16
- data/ext/nokogiri/xslt_stylesheet.h +5 -0
- data/lib/nokogiri/1.8/nokogiri.so +0 -0
- data/lib/nokogiri/1.9/nokogiri.so +0 -0
- data/lib/nokogiri/css/parser.rb +165 -159
- data/lib/nokogiri/css/parser.y +6 -3
- data/lib/nokogiri/css/tokenizer.rb +1 -1
- data/lib/nokogiri/css/tokenizer.rex +1 -1
- data/lib/nokogiri/html.rb +1 -0
- data/lib/nokogiri/html/document.rb +82 -42
- data/lib/nokogiri/html/sax/push_parser.rb +16 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml.rb +6 -0
- data/lib/nokogiri/xml/builder.rb +7 -1
- data/lib/nokogiri/xml/document.rb +32 -17
- data/lib/nokogiri/xml/document_fragment.rb +6 -1
- data/lib/nokogiri/xml/node.rb +40 -9
- data/lib/nokogiri/xslt.rb +5 -1
- data/tasks/cross_compile.rb +1 -0
- data/tasks/nokogiri.org.rb +6 -0
- data/tasks/test.rb +1 -0
- data/test/css/test_xpath_visitor.rb +6 -0
- data/test/helper.rb +1 -0
- data/test/html/test_document.rb +26 -0
- data/test/html/test_document_fragment.rb +1 -2
- data/test/test_memory_leak.rb +81 -1
- data/test/test_xslt_transforms.rb +152 -123
- data/test/xml/test_builder.rb +24 -2
- data/test/xml/test_c14n.rb +151 -0
- data/test/xml/test_document.rb +48 -0
- data/test/xml/test_namespace.rb +5 -0
- data/test/xml/test_node.rb +82 -1
- data/test/xml/test_node_attributes.rb +19 -0
- data/test/xml/test_node_inheritance.rb +32 -0
- data/test/xml/test_node_reparenting.rb +32 -0
- data/test/xml/test_node_set.rb +16 -8
- data/test/xml/test_reader_encoding.rb +16 -0
- data/test/xml/test_unparented_node.rb +24 -0
- data/test/xml/test_xinclude.rb +83 -0
- data/test/xml/test_xpath.rb +22 -0
- metadata +159 -126
data/CHANGELOG.ja.rdoc
CHANGED
@@ -1,22 +1,49 @@
|
|
1
|
+
== 1.5.1 / 未リリース
|
2
|
+
|
3
|
+
* 新機能
|
4
|
+
|
5
|
+
* XML::Builder#comment はコメントノードを作れるようになった.
|
6
|
+
* CSS searches now support namespaced attributes. #593
|
7
|
+
|
8
|
+
* バグの修正
|
9
|
+
|
10
|
+
* エンコーディング自動認識において発生しうるメモリリークを修正.
|
11
|
+
@ender672に感謝!
|
12
|
+
|
13
|
+
* homebrew はインストールしたら、extconf は homebrew のパスを読む。
|
14
|
+
|
15
|
+
* XML::Attr nodes are not allowed to be added as node children, so an
|
16
|
+
exception is raised. #558
|
17
|
+
|
18
|
+
* No longer defensively "pickle" adjacent text nodes on
|
19
|
+
Node#add_next_sibling and Node#add_previous_sibling calls. #595.
|
20
|
+
|
21
|
+
* Document#add_child now accepts a Node, NodeSet, DocumentFragment,
|
22
|
+
or String. #546.
|
23
|
+
|
24
|
+
* Document#create_element now recognizes namespaces containing
|
25
|
+
non-word characters (like "SOAP-ENV"). This is mostly relevant to
|
26
|
+
users of Builder, which calls Document#create_element for nearly
|
27
|
+
everything. #531.
|
28
|
+
|
1
29
|
== 1.5.0 / 2011年7月1日
|
2
30
|
|
3
|
-
*
|
31
|
+
* 註
|
4
32
|
|
5
|
-
*
|
33
|
+
* 1.4.7からの変更点を参照
|
6
34
|
|
7
|
-
*
|
35
|
+
* 新機能
|
8
36
|
|
9
|
-
*
|
37
|
+
* 各文書形式用のデフォルトのNode::SaveOptionsの組合せを定数化.
|
38
|
+
(Node::SaveOptions::DEFAULT_{X,H,XH}TML)
|
10
39
|
|
11
|
-
*
|
40
|
+
* バグの修正
|
12
41
|
|
13
|
-
*
|
14
|
-
|
15
|
-
*
|
16
|
-
*
|
17
|
-
|
18
|
-
encoding.
|
19
|
-
* Add support for <meta charset="...">.
|
42
|
+
* JRuby版ではホワイトスペースの扱いに難があるため、XML出力(to_xml)において
|
43
|
+
自動整形をデフォルトでは行わないように変更. #415
|
44
|
+
* JRuby版でNodeのないNodeSetでNullPointerExceptionが発生するのを修正. #443
|
45
|
+
* エンコーディング宣言のないHTMLファイルで部分的に重複したドキュメントが生成される問題を修正した. #478
|
46
|
+
* <meta charset="..."> を認識するようになった.
|
20
47
|
|
21
48
|
|
22
49
|
== 1.5.0 beta3 2010年12月2日
|
data/CHANGELOG.rdoc
CHANGED
@@ -1,3 +1,31 @@
|
|
1
|
+
== 1.5.1 / unreleased
|
2
|
+
|
3
|
+
* Features
|
4
|
+
|
5
|
+
* XML::Builder#comment allows creation of comment nodes.
|
6
|
+
* CSS searches now support namespaced attributes. #593
|
7
|
+
|
8
|
+
* Bugfixes
|
9
|
+
|
10
|
+
* Fix a memory leak in encoding detection. Thanks for pointing this
|
11
|
+
out, @ender672!
|
12
|
+
|
13
|
+
* extconf searches homebrew paths if homebrew is installed.
|
14
|
+
|
15
|
+
* XML::Attr nodes are not allowed to be added as node children, so an
|
16
|
+
exception is raised. #558
|
17
|
+
|
18
|
+
* No longer defensively "pickle" adjacent text nodes on
|
19
|
+
Node#add_next_sibling and Node#add_previous_sibling calls. #595.
|
20
|
+
|
21
|
+
* Document#add_child now accepts a Node, NodeSet, DocumentFragment,
|
22
|
+
or String. #546.
|
23
|
+
|
24
|
+
* Document#create_element now recognizes namespaces containing
|
25
|
+
non-word characters (like "SOAP-ENV"). This is mostly relevant to
|
26
|
+
users of Builder, which calls Document#create_element for nearly
|
27
|
+
everything. #531.
|
28
|
+
|
1
29
|
== 1.5.0 / 2011-07-01
|
2
30
|
|
3
31
|
* Notes
|
data/C_CODING_STYLE.rdoc
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
= C/C++ mode style for Nokogiri
|
2
|
+
|
3
|
+
Please don't propose commits that only change whitespace. However, if your
|
4
|
+
commit touches a function or section that is not using MRI Ruby conventions,
|
5
|
+
feel free to update whitespace in the surrounding code.
|
6
|
+
|
7
|
+
= WHITESPACE:
|
8
|
+
|
9
|
+
indent level: 2
|
10
|
+
indent type: Always spaces
|
11
|
+
Line Breaks: LF
|
12
|
+
|
13
|
+
= FUNCTION DECLARATION:
|
14
|
+
|
15
|
+
ANSI C style:
|
16
|
+
|
17
|
+
type name(args)
|
18
|
+
{
|
19
|
+
declarations
|
20
|
+
|
21
|
+
code
|
22
|
+
}
|
23
|
+
|
24
|
+
= SOURCES:
|
25
|
+
|
26
|
+
* <3<3<3
|
27
|
+
|
data/Manifest.txt
CHANGED
@@ -72,6 +72,8 @@ ext/nokogiri/html_entity_lookup.c
|
|
72
72
|
ext/nokogiri/html_entity_lookup.h
|
73
73
|
ext/nokogiri/html_sax_parser_context.c
|
74
74
|
ext/nokogiri/html_sax_parser_context.h
|
75
|
+
ext/nokogiri/html_sax_push_parser.c
|
76
|
+
ext/nokogiri/html_sax_push_parser.h
|
75
77
|
ext/nokogiri/nokogiri.c
|
76
78
|
ext/nokogiri/nokogiri.h
|
77
79
|
ext/nokogiri/xml_attr.c
|
@@ -154,6 +156,7 @@ lib/nokogiri/html/element_description_defaults.rb
|
|
154
156
|
lib/nokogiri/html/entity_lookup.rb
|
155
157
|
lib/nokogiri/html/sax/parser.rb
|
156
158
|
lib/nokogiri/html/sax/parser_context.rb
|
159
|
+
lib/nokogiri/html/sax/push_parser.rb
|
157
160
|
lib/nokogiri/syntax_error.rb
|
158
161
|
lib/nokogiri/version.rb
|
159
162
|
lib/nokogiri/xml.rb
|
@@ -270,6 +273,7 @@ test/xml/test_node_attributes.rb
|
|
270
273
|
test/xml/test_node_encoding.rb
|
271
274
|
test/xml/test_node_reparenting.rb
|
272
275
|
test/xml/test_node_set.rb
|
276
|
+
test/xml/test_node_inheritance.rb
|
273
277
|
test/xml/test_parse_options.rb
|
274
278
|
test/xml/test_processing_instruction.rb
|
275
279
|
test/xml/test_reader_encoding.rb
|
data/README.rdoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
= Nokogiri
|
1
|
+
= Nokogiri {<img src="https://secure.travis-ci.org/tenderlove/nokogiri.png?rvm=1.9.3" />}[http://travis-ci.org/tenderlove/nokogiri]
|
2
2
|
|
3
3
|
* http://nokogiri.org
|
4
4
|
* http://github.com/tenderlove/nokogiri/wikis
|
@@ -25,6 +25,10 @@ correctly implemented CSS3 selector support as well as XPath support.
|
|
25
25
|
|
26
26
|
== SUPPORT:
|
27
27
|
|
28
|
+
Before filing a bug report, please read our {submission guidelines}[http://nokogiri.org/tutorials/getting_help.html] at:
|
29
|
+
|
30
|
+
* http://nokogiri.org/tutorials/getting_help.html
|
31
|
+
|
28
32
|
The Nokogiri {mailing list}[http://groups.google.com/group/nokogiri-talk]
|
29
33
|
is available here:
|
30
34
|
|
@@ -50,19 +54,19 @@ The IRC channel is #nokogiri on freenode.
|
|
50
54
|
|
51
55
|
####
|
52
56
|
# Search for nodes by css
|
53
|
-
doc.css('h3.r a
|
57
|
+
doc.css('h3.r a').each do |link|
|
54
58
|
puts link.content
|
55
59
|
end
|
56
60
|
|
57
61
|
####
|
58
62
|
# Search for nodes by xpath
|
59
|
-
doc.xpath('//h3/a
|
63
|
+
doc.xpath('//h3/a').each do |link|
|
60
64
|
puts link.content
|
61
65
|
end
|
62
66
|
|
63
67
|
####
|
64
68
|
# Or mix and match.
|
65
|
-
doc.search('h3.r a.l', '//h3/a
|
69
|
+
doc.search('h3.r a.l', '//h3/a').each do |link|
|
66
70
|
puts link.content
|
67
71
|
end
|
68
72
|
|
@@ -128,8 +132,8 @@ Then run rake:
|
|
128
132
|
|
129
133
|
=== Developing on JRuby
|
130
134
|
|
131
|
-
Currently, development with JRuby depends on
|
132
|
-
|
135
|
+
Currently, development with JRuby depends on CRuby being installed. With
|
136
|
+
CRuby, install racc and rexical:
|
133
137
|
|
134
138
|
$ gem install racc rexical
|
135
139
|
|
@@ -145,7 +149,7 @@ Then run rake:
|
|
145
149
|
|
146
150
|
(The MIT License)
|
147
151
|
|
148
|
-
Copyright (c) 2008 -
|
152
|
+
Copyright (c) 2008 - 2012:
|
149
153
|
|
150
154
|
* {Aaron Patterson}[http://tenderlovemaking.com]
|
151
155
|
* {Mike Dalessio}[http://mike.daless.io]
|
data/Rakefile
CHANGED
@@ -31,24 +31,22 @@ HOE = Hoe.spec 'nokogiri' do
|
|
31
31
|
|
32
32
|
self.clean_globs += [
|
33
33
|
'nokogiri.gemspec',
|
34
|
-
'lib/nokogiri
|
35
|
-
'lib/nokogiri/nokogiri.{so,dylib,rb,bundle}',
|
36
|
-
'lib/nokogiri/nokogiri.rb',
|
34
|
+
'lib/nokogiri/nokogiri.{bundle,jar,rb,so}',
|
37
35
|
'lib/nokogiri/1.{8,9}',
|
38
36
|
GENERATED_PARSER,
|
39
37
|
GENERATED_TOKENIZER
|
40
38
|
]
|
41
39
|
|
42
40
|
self.extra_dev_deps += [
|
41
|
+
["hoe-bundler", ">= 1.1"],
|
42
|
+
["hoe-debugging", ">= 1.0.3"],
|
43
|
+
["hoe-gemspec", ">= 1.0"],
|
44
|
+
["hoe-git", ">= 1.4"],
|
45
|
+
["mini_portile", ">= 0.2.2"],
|
46
|
+
["minitest", "~> 2.2.2"],
|
43
47
|
["racc", ">= 1.4.6"],
|
48
|
+
["rake-compiler", "= 0.8.0"],
|
44
49
|
["rexical", ">= 1.0.5"],
|
45
|
-
["rake-compiler", ">= 0.7.9"],
|
46
|
-
["minitest", "~> 2.2.2"],
|
47
|
-
["mini_portile", ">= 0.2.2"],
|
48
|
-
["hoe-debugging", ">= 0"],
|
49
|
-
["hoe-git", ">= 0"],
|
50
|
-
["hoe-gemspec", ">= 0"],
|
51
|
-
["hoe-bundler", ">= 0"]
|
52
50
|
]
|
53
51
|
|
54
52
|
if java?
|
@@ -83,7 +81,12 @@ if java?
|
|
83
81
|
HOE.spec.files += ['lib/nokogiri/nokogiri.jar']
|
84
82
|
end
|
85
83
|
else
|
86
|
-
|
84
|
+
mingw_available = true
|
85
|
+
begin
|
86
|
+
require 'tasks/cross_compile'
|
87
|
+
rescue
|
88
|
+
mingw_available = false
|
89
|
+
end
|
87
90
|
require "rake/extensiontask"
|
88
91
|
|
89
92
|
HOE.spec.files.reject! { |f| f =~ %r{\.(java|jar)$} }
|
@@ -91,13 +94,15 @@ else
|
|
91
94
|
Rake::ExtensionTask.new("nokogiri", HOE.spec) do |ext|
|
92
95
|
ext.lib_dir = File.join(*['lib', 'nokogiri', ENV['FAT_DIR']].compact)
|
93
96
|
ext.config_options << ENV['EXTOPTS']
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
97
|
+
if mingw_available
|
98
|
+
ext.cross_compile = true
|
99
|
+
ext.cross_platform = ["x86-mswin32-60", "x86-mingw32"]
|
100
|
+
ext.cross_config_options << "--with-xml2-include=#{File.join($recipes[:libxml2].path, 'include', 'libxml2')}"
|
101
|
+
ext.cross_config_options << "--with-xml2-lib=#{File.join($recipes[:libxml2].path, 'lib')}"
|
102
|
+
ext.cross_config_options << "--with-iconv-dir=#{$recipes[:libiconv].path}"
|
103
|
+
ext.cross_config_options << "--with-xslt-dir=#{$recipes[:libxslt].path}"
|
104
|
+
ext.cross_config_options << "--with-zlib-dir=#{CROSS_DIR}"
|
105
|
+
end
|
101
106
|
end
|
102
107
|
end
|
103
108
|
|
@@ -110,6 +115,7 @@ task 'gem:spec' => 'generate' if Rake::Task.task_defined?("gem:spec")
|
|
110
115
|
file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t|
|
111
116
|
racc = RbConfig::CONFIG['target_os'] =~ /mswin32/ ? '' : `which racc`.strip
|
112
117
|
racc = "#{::RbConfig::CONFIG['bindir']}/racc" if racc.empty?
|
118
|
+
racc = %x{command -v racc}.strip if racc.empty?
|
113
119
|
sh "#{racc} -l -o #{t.name} #{t.prerequisites.first}"
|
114
120
|
end
|
115
121
|
|
@@ -145,10 +151,19 @@ end
|
|
145
151
|
|
146
152
|
desc "build a windows gem without all the ceremony."
|
147
153
|
task "gem:windows" => "gem" do
|
148
|
-
|
154
|
+
cross_rubies = ["1.8.7-p330", "1.9.2-p136"]
|
155
|
+
ruby_cc_version = cross_rubies.collect { |_| _.split("-").first }.join(":") # e.g., "1.8.7:1.9.2"
|
156
|
+
rake_compiler_config_path = "#{ENV['HOME']}/.rake-compiler/config.yml"
|
157
|
+
|
158
|
+
unless File.exists? rake_compiler_config_path
|
159
|
+
raise "rake-compiler has not installed any cross rubies. try running 'env --unset=HOST rake-compiler cross-ruby VERSION=#{cross_rubies.first}'"
|
160
|
+
end
|
161
|
+
rake_compiler_config = YAML.load_file(rake_compiler_config_path)
|
149
162
|
|
150
|
-
# check that rake-compiler config contains the right patchlevels
|
151
|
-
|
163
|
+
# check that rake-compiler config contains the right patchlevels. see #279 for background,
|
164
|
+
# and http://blog.mmediasys.com/2011/01/22/rake-compiler-updated-list-of-supported-ruby-versions-for-cross-compilation/
|
165
|
+
# for more up-to-date docs.
|
166
|
+
cross_rubies.each do |version|
|
152
167
|
majmin, patchlevel = version.split("-")
|
153
168
|
rbconfig = "rbconfig-#{majmin}"
|
154
169
|
unless rake_compiler_config.key?(rbconfig) && rake_compiler_config[rbconfig] =~ /-#{patchlevel}/
|
@@ -156,12 +171,12 @@ task "gem:windows" => "gem" do
|
|
156
171
|
end
|
157
172
|
end
|
158
173
|
|
159
|
-
# verify that --export-all is in the 1.9
|
160
|
-
|
161
|
-
raise "rbconfig #{
|
174
|
+
# verify that --export-all is in the 1.9 rbconfig. see #279,#374,#375.
|
175
|
+
rbconfig_19 = rake_compiler_config["rbconfig-1.9.2"]
|
176
|
+
raise "rbconfig #{rbconfig_19} needs --export-all in its DLDFLAGS value" if File.read(rbconfig_19).split("\n").grep(/CONFIG\["DLDFLAGS"\].*--export-all/).empty?
|
162
177
|
|
163
178
|
pkg_config_path = [:libxslt, :libxml2].collect { |pkg| File.join($recipes[pkg].path, "lib/pkgconfig") }.join(":")
|
164
|
-
sh("env PKG_CONFIG_PATH=#{pkg_config_path} RUBY_CC_VERSION
|
179
|
+
sh("env PKG_CONFIG_PATH=#{pkg_config_path} RUBY_CC_VERSION=#{ruby_cc_version} rake cross native gem") || raise("build failed!")
|
165
180
|
end
|
166
181
|
|
167
182
|
# vim: syntax=Ruby
|
data/bin/nokogiri
CHANGED
@@ -27,6 +27,10 @@ opts = OptionParser.new do |opts|
|
|
27
27
|
encoding = v
|
28
28
|
end
|
29
29
|
|
30
|
+
opts.on("--rng <uri|path>", "Validate using this rng file.") do |v|
|
31
|
+
@rng = open(v) {|f| Nokogiri::XML::RelaxNG(f)}
|
32
|
+
end
|
33
|
+
|
30
34
|
opts.on_tail("-?", "--help", "Show this message") do
|
31
35
|
puts opts
|
32
36
|
exit
|
@@ -48,6 +52,10 @@ end
|
|
48
52
|
|
49
53
|
@doc = parse_class.parse(open(uri).read, nil, encoding)
|
50
54
|
|
51
|
-
|
52
|
-
|
55
|
+
if @rng
|
56
|
+
puts @rng.validate(@doc)
|
57
|
+
else
|
58
|
+
puts "Your document is stored in @doc..."
|
59
|
+
IRB.start
|
60
|
+
end
|
53
61
|
|
data/ext/nokogiri/extconf.rb
CHANGED
@@ -30,7 +30,8 @@ if RbConfig::MAKEFILE_CONFIG['CC'] =~ /mingw/
|
|
30
30
|
end
|
31
31
|
|
32
32
|
if RbConfig::MAKEFILE_CONFIG['CC'] =~ /gcc/
|
33
|
-
$CFLAGS << " -O3
|
33
|
+
$CFLAGS << " -O3" unless $CFLAGS[/-O\d/]
|
34
|
+
$CFLAGS << " -Wall -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
|
34
35
|
end
|
35
36
|
|
36
37
|
if RbConfig::CONFIG['target_os'] =~ /mswin32/
|
@@ -80,6 +81,13 @@ else
|
|
80
81
|
'/usr/local/include/libxml2',
|
81
82
|
File.join(INCLUDEDIR, "libxml2")
|
82
83
|
] + HEADER_DIRS
|
84
|
+
|
85
|
+
# If the user has homebrew installed, use the libxml2 inside homebrew
|
86
|
+
brew_prefix = `brew --prefix libxml2 2> /dev/null`.chomp
|
87
|
+
unless brew_prefix.empty?
|
88
|
+
LIB_DIRS.unshift File.join(brew_prefix, 'lib')
|
89
|
+
XML2_HEADER_DIRS.unshift File.join(brew_prefix, 'include/libxml2')
|
90
|
+
end
|
83
91
|
end
|
84
92
|
|
85
93
|
dir_config('zlib', HEADER_DIRS, LIB_DIRS)
|
@@ -1,5 +1,7 @@
|
|
1
1
|
#include <html_document.h>
|
2
2
|
|
3
|
+
static ID id_encoding_found;
|
4
|
+
|
3
5
|
/*
|
4
6
|
* call-seq:
|
5
7
|
* new
|
@@ -56,6 +58,18 @@ static VALUE read_io( VALUE klass,
|
|
56
58
|
);
|
57
59
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
58
60
|
|
61
|
+
/*
|
62
|
+
* If EncodingFound has occurred in EncodingReader, make sure to do
|
63
|
+
* a cleanup and propagate the error.
|
64
|
+
*/
|
65
|
+
if (rb_respond_to(io, id_encoding_found)) {
|
66
|
+
VALUE encoding_found = rb_funcall(io, id_encoding_found, 0);
|
67
|
+
if (!NIL_P(encoding_found)) {
|
68
|
+
xmlFreeDoc(doc);
|
69
|
+
rb_exc_raise(encoding_found);
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
59
73
|
if(doc == NULL) {
|
60
74
|
xmlErrorPtr error;
|
61
75
|
|
@@ -151,4 +165,6 @@ void init_html_document()
|
|
151
165
|
rb_define_singleton_method(klass, "new", new, -1);
|
152
166
|
|
153
167
|
rb_define_method(klass, "type", type, 0);
|
168
|
+
|
169
|
+
id_encoding_found = rb_intern("encoding_found");
|
154
170
|
}
|
@@ -13,31 +13,35 @@ static void deallocate(xmlParserCtxtPtr ctxt)
|
|
13
13
|
NOKOGIRI_DEBUG_END(handler);
|
14
14
|
}
|
15
15
|
|
16
|
-
static VALUE
|
16
|
+
static VALUE
|
17
|
+
parse_memory(VALUE klass, VALUE data, VALUE encoding)
|
17
18
|
{
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
19
|
+
htmlParserCtxtPtr ctxt;
|
20
|
+
|
21
|
+
if (NIL_P(data))
|
22
|
+
rb_raise(rb_eArgError, "data cannot be nil");
|
23
|
+
if (!(int)RSTRING_LEN(data))
|
24
|
+
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
25
|
+
|
26
|
+
ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
|
27
|
+
(int)RSTRING_LEN(data));
|
28
|
+
if (ctxt->sax) {
|
29
|
+
xmlFree(ctxt->sax);
|
30
|
+
ctxt->sax = NULL;
|
31
|
+
}
|
28
32
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
33
|
+
if (RTEST(encoding)) {
|
34
|
+
xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValuePtr(encoding));
|
35
|
+
if (enc != NULL) {
|
36
|
+
xmlSwitchToEncoding(ctxt, enc);
|
37
|
+
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
38
|
+
rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
|
39
|
+
StringValuePtr(encoding));
|
40
|
+
}
|
41
|
+
}
|
37
42
|
}
|
38
|
-
}
|
39
43
|
|
40
|
-
|
44
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
41
45
|
}
|
42
46
|
|
43
47
|
static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
|
@@ -49,30 +53,48 @@ static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
|
|
49
53
|
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
50
54
|
}
|
51
55
|
|
52
|
-
static VALUE
|
56
|
+
static VALUE
|
57
|
+
parse_doc(VALUE ctxt_val)
|
58
|
+
{
|
59
|
+
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
|
60
|
+
htmlParseDocument(ctxt);
|
61
|
+
return Qnil;
|
62
|
+
}
|
63
|
+
|
64
|
+
static VALUE
|
65
|
+
parse_doc_finalize(VALUE ctxt_val)
|
53
66
|
{
|
54
|
-
|
55
|
-
|
67
|
+
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
|
68
|
+
|
69
|
+
if (ctxt->myDoc)
|
70
|
+
xmlFreeDoc(ctxt->myDoc);
|
56
71
|
|
57
|
-
|
58
|
-
|
72
|
+
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
|
73
|
+
return Qnil;
|
74
|
+
}
|
75
|
+
|
76
|
+
static VALUE
|
77
|
+
parse_with(VALUE self, VALUE sax_handler)
|
78
|
+
{
|
79
|
+
htmlParserCtxtPtr ctxt;
|
80
|
+
htmlSAXHandlerPtr sax;
|
59
81
|
|
60
|
-
|
61
|
-
|
82
|
+
if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
|
83
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
62
84
|
|
63
|
-
|
64
|
-
|
65
|
-
xmlFree(ctxt->sax);
|
85
|
+
Data_Get_Struct(self, htmlParserCtxt, ctxt);
|
86
|
+
Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
|
66
87
|
|
67
|
-
|
68
|
-
|
88
|
+
/* Free the sax handler since we'll assign our own */
|
89
|
+
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
|
90
|
+
xmlFree(ctxt->sax);
|
69
91
|
|
70
|
-
|
92
|
+
ctxt->sax = sax;
|
93
|
+
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
71
94
|
|
72
|
-
|
95
|
+
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
|
73
96
|
|
74
|
-
|
75
|
-
return self;
|
97
|
+
return self;
|
76
98
|
}
|
77
99
|
|
78
100
|
void init_html_sax_parser_context()
|