nokogiri 1.5.0-x86-mingw32 → 1.5.1.rc1-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (63) hide show
  1. data/CHANGELOG.ja.rdoc +39 -12
  2. data/CHANGELOG.rdoc +28 -0
  3. data/C_CODING_STYLE.rdoc +27 -0
  4. data/Manifest.txt +4 -0
  5. data/README.rdoc +11 -7
  6. data/Rakefile +40 -25
  7. data/bin/nokogiri +10 -2
  8. data/ext/nokogiri/extconf.rb +9 -1
  9. data/ext/nokogiri/html_document.c +16 -0
  10. data/ext/nokogiri/html_sax_parser_context.c +59 -37
  11. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  12. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  13. data/ext/nokogiri/nokogiri.c +6 -8
  14. data/ext/nokogiri/nokogiri.h +3 -0
  15. data/ext/nokogiri/xml_document.c +101 -3
  16. data/ext/nokogiri/xml_document.h +3 -3
  17. data/ext/nokogiri/xml_node.c +150 -58
  18. data/ext/nokogiri/xml_node_set.c +169 -120
  19. data/ext/nokogiri/xml_node_set.h +5 -0
  20. data/ext/nokogiri/xml_sax_parser_context.c +64 -41
  21. data/ext/nokogiri/xml_text.c +2 -0
  22. data/ext/nokogiri/xml_xpath_context.c +30 -24
  23. data/ext/nokogiri/xslt_stylesheet.c +62 -16
  24. data/ext/nokogiri/xslt_stylesheet.h +5 -0
  25. data/lib/nokogiri/1.8/nokogiri.so +0 -0
  26. data/lib/nokogiri/1.9/nokogiri.so +0 -0
  27. data/lib/nokogiri/css/parser.rb +165 -159
  28. data/lib/nokogiri/css/parser.y +6 -3
  29. data/lib/nokogiri/css/tokenizer.rb +1 -1
  30. data/lib/nokogiri/css/tokenizer.rex +1 -1
  31. data/lib/nokogiri/html.rb +1 -0
  32. data/lib/nokogiri/html/document.rb +82 -42
  33. data/lib/nokogiri/html/sax/push_parser.rb +16 -0
  34. data/lib/nokogiri/version.rb +1 -1
  35. data/lib/nokogiri/xml.rb +6 -0
  36. data/lib/nokogiri/xml/builder.rb +7 -1
  37. data/lib/nokogiri/xml/document.rb +32 -17
  38. data/lib/nokogiri/xml/document_fragment.rb +6 -1
  39. data/lib/nokogiri/xml/node.rb +40 -9
  40. data/lib/nokogiri/xslt.rb +5 -1
  41. data/tasks/cross_compile.rb +1 -0
  42. data/tasks/nokogiri.org.rb +6 -0
  43. data/tasks/test.rb +1 -0
  44. data/test/css/test_xpath_visitor.rb +6 -0
  45. data/test/helper.rb +1 -0
  46. data/test/html/test_document.rb +26 -0
  47. data/test/html/test_document_fragment.rb +1 -2
  48. data/test/test_memory_leak.rb +81 -1
  49. data/test/test_xslt_transforms.rb +152 -123
  50. data/test/xml/test_builder.rb +24 -2
  51. data/test/xml/test_c14n.rb +151 -0
  52. data/test/xml/test_document.rb +48 -0
  53. data/test/xml/test_namespace.rb +5 -0
  54. data/test/xml/test_node.rb +82 -1
  55. data/test/xml/test_node_attributes.rb +19 -0
  56. data/test/xml/test_node_inheritance.rb +32 -0
  57. data/test/xml/test_node_reparenting.rb +32 -0
  58. data/test/xml/test_node_set.rb +16 -8
  59. data/test/xml/test_reader_encoding.rb +16 -0
  60. data/test/xml/test_unparented_node.rb +24 -0
  61. data/test/xml/test_xinclude.rb +83 -0
  62. data/test/xml/test_xpath.rb +22 -0
  63. metadata +159 -126
@@ -1,22 +1,49 @@
1
+ == 1.5.1 / 未リリース
2
+
3
+ * 新機能
4
+
5
+ * XML::Builder#comment はコメントノードを作れるようになった.
6
+ * CSS searches now support namespaced attributes. #593
7
+
8
+ * バグの修正
9
+
10
+ * エンコーディング自動認識において発生しうるメモリリークを修正.
11
+ @ender672に感謝!
12
+
13
+ * homebrew はインストールしたら、extconf は homebrew のパスを読む。
14
+
15
+ * XML::Attr nodes are not allowed to be added as node children, so an
16
+ exception is raised. #558
17
+
18
+ * No longer defensively "pickle" adjacent text nodes on
19
+ Node#add_next_sibling and Node#add_previous_sibling calls. #595.
20
+
21
+ * Document#add_child now accepts a Node, NodeSet, DocumentFragment,
22
+ or String. #546.
23
+
24
+ * Document#create_element now recognizes namespaces containing
25
+ non-word characters (like "SOAP-ENV"). This is mostly relevant to
26
+ users of Builder, which calls Document#create_element for nearly
27
+ everything. #531.
28
+
1
29
  == 1.5.0 / 2011年7月1日
2
30
 
3
- * Notes
31
+ *
4
32
 
5
- * See changelog from 1.4.7
33
+ * 1.4.7からの変更点を参照
6
34
 
7
- * Features
35
+ * 新機能
8
36
 
9
- * extracted sets of Node::SaveOptions into Node::SaveOptions::DEFAULT_{X,H,XH}TML (refactor)
37
+ * 各文書形式用のデフォルトのNode::SaveOptionsの組合せを定数化.
38
+ (Node::SaveOptions::DEFAULT_{X,H,XH}TML)
10
39
 
11
- * Bugfixes
40
+ * バグの修正
12
41
 
13
- * default output of XML on JRuby is no longer formatted due to
14
- inconsistent whitespace handling. #415
15
- * (JRuby) making empty NodeSets with null `nodes` member safe to operate on. #443
16
- * Fix a bug in advanced encoding detection that leads to partially
17
- duplicated document when parsing an HTML file with unknown
18
- encoding.
19
- * Add support for <meta charset="...">.
42
+ * JRuby版ではホワイトスペースの扱いに難があるため、XML出力(to_xml)において
43
+ 自動整形をデフォルトでは行わないように変更. #415
44
+ * JRuby版でNodeのないNodeSetでNullPointerExceptionが発生するのを修正. #443
45
+ * エンコーディング宣言のないHTMLファイルで部分的に重複したドキュメントが生成される問題を修正した. #478
46
+ * <meta charset="..."> を認識するようになった.
20
47
 
21
48
 
22
49
  == 1.5.0 beta3 2010年12月2日
@@ -1,3 +1,31 @@
1
+ == 1.5.1 / unreleased
2
+
3
+ * Features
4
+
5
+ * XML::Builder#comment allows creation of comment nodes.
6
+ * CSS searches now support namespaced attributes. #593
7
+
8
+ * Bugfixes
9
+
10
+ * Fix a memory leak in encoding detection. Thanks for pointing this
11
+ out, @ender672!
12
+
13
+ * extconf searches homebrew paths if homebrew is installed.
14
+
15
+ * XML::Attr nodes are not allowed to be added as node children, so an
16
+ exception is raised. #558
17
+
18
+ * No longer defensively "pickle" adjacent text nodes on
19
+ Node#add_next_sibling and Node#add_previous_sibling calls. #595.
20
+
21
+ * Document#add_child now accepts a Node, NodeSet, DocumentFragment,
22
+ or String. #546.
23
+
24
+ * Document#create_element now recognizes namespaces containing
25
+ non-word characters (like "SOAP-ENV"). This is mostly relevant to
26
+ users of Builder, which calls Document#create_element for nearly
27
+ everything. #531.
28
+
1
29
  == 1.5.0 / 2011-07-01
2
30
 
3
31
  * Notes
@@ -0,0 +1,27 @@
1
+ = C/C++ mode style for Nokogiri
2
+
3
+ Please don't propose commits that only change whitespace. However, if your
4
+ commit touches a function or section that is not using MRI Ruby conventions,
5
+ feel free to update whitespace in the surrounding code.
6
+
7
+ = WHITESPACE:
8
+
9
+ indent level: 2
10
+ indent type: Always spaces
11
+ Line Breaks: LF
12
+
13
+ = FUNCTION DECLARATION:
14
+
15
+ ANSI C style:
16
+
17
+ type name(args)
18
+ {
19
+ declarations
20
+
21
+ code
22
+ }
23
+
24
+ = SOURCES:
25
+
26
+ * <3<3<3
27
+
@@ -72,6 +72,8 @@ ext/nokogiri/html_entity_lookup.c
72
72
  ext/nokogiri/html_entity_lookup.h
73
73
  ext/nokogiri/html_sax_parser_context.c
74
74
  ext/nokogiri/html_sax_parser_context.h
75
+ ext/nokogiri/html_sax_push_parser.c
76
+ ext/nokogiri/html_sax_push_parser.h
75
77
  ext/nokogiri/nokogiri.c
76
78
  ext/nokogiri/nokogiri.h
77
79
  ext/nokogiri/xml_attr.c
@@ -154,6 +156,7 @@ lib/nokogiri/html/element_description_defaults.rb
154
156
  lib/nokogiri/html/entity_lookup.rb
155
157
  lib/nokogiri/html/sax/parser.rb
156
158
  lib/nokogiri/html/sax/parser_context.rb
159
+ lib/nokogiri/html/sax/push_parser.rb
157
160
  lib/nokogiri/syntax_error.rb
158
161
  lib/nokogiri/version.rb
159
162
  lib/nokogiri/xml.rb
@@ -270,6 +273,7 @@ test/xml/test_node_attributes.rb
270
273
  test/xml/test_node_encoding.rb
271
274
  test/xml/test_node_reparenting.rb
272
275
  test/xml/test_node_set.rb
276
+ test/xml/test_node_inheritance.rb
273
277
  test/xml/test_parse_options.rb
274
278
  test/xml/test_processing_instruction.rb
275
279
  test/xml/test_reader_encoding.rb
@@ -1,4 +1,4 @@
1
- = Nokogiri
1
+ = Nokogiri {<img src="https://secure.travis-ci.org/tenderlove/nokogiri.png?rvm=1.9.3" />}[http://travis-ci.org/tenderlove/nokogiri]
2
2
 
3
3
  * http://nokogiri.org
4
4
  * http://github.com/tenderlove/nokogiri/wikis
@@ -25,6 +25,10 @@ correctly implemented CSS3 selector support as well as XPath support.
25
25
 
26
26
  == SUPPORT:
27
27
 
28
+ Before filing a bug report, please read our {submission guidelines}[http://nokogiri.org/tutorials/getting_help.html] at:
29
+
30
+ * http://nokogiri.org/tutorials/getting_help.html
31
+
28
32
  The Nokogiri {mailing list}[http://groups.google.com/group/nokogiri-talk]
29
33
  is available here:
30
34
 
@@ -50,19 +54,19 @@ The IRC channel is #nokogiri on freenode.
50
54
 
51
55
  ####
52
56
  # Search for nodes by css
53
- doc.css('h3.r a.l').each do |link|
57
+ doc.css('h3.r a').each do |link|
54
58
  puts link.content
55
59
  end
56
60
 
57
61
  ####
58
62
  # Search for nodes by xpath
59
- doc.xpath('//h3/a[@class="l"]').each do |link|
63
+ doc.xpath('//h3/a').each do |link|
60
64
  puts link.content
61
65
  end
62
66
 
63
67
  ####
64
68
  # Or mix and match.
65
- doc.search('h3.r a.l', '//h3/a[@class="l"]').each do |link|
69
+ doc.search('h3.r a.l', '//h3/a').each do |link|
66
70
  puts link.content
67
71
  end
68
72
 
@@ -128,8 +132,8 @@ Then run rake:
128
132
 
129
133
  === Developing on JRuby
130
134
 
131
- Currently, development with JRuby depends on C Ruby being installed. With
132
- normal C Ruby, install racc and rexical:
135
+ Currently, development with JRuby depends on CRuby being installed. With
136
+ CRuby, install racc and rexical:
133
137
 
134
138
  $ gem install racc rexical
135
139
 
@@ -145,7 +149,7 @@ Then run rake:
145
149
 
146
150
  (The MIT License)
147
151
 
148
- Copyright (c) 2008 - 2010:
152
+ Copyright (c) 2008 - 2012:
149
153
 
150
154
  * {Aaron Patterson}[http://tenderlovemaking.com]
151
155
  * {Mike Dalessio}[http://mike.daless.io]
data/Rakefile CHANGED
@@ -31,24 +31,22 @@ HOE = Hoe.spec 'nokogiri' do
31
31
 
32
32
  self.clean_globs += [
33
33
  'nokogiri.gemspec',
34
- 'lib/nokogiri/*.{o,so,bundle,a,log,dll}',
35
- 'lib/nokogiri/nokogiri.{so,dylib,rb,bundle}',
36
- 'lib/nokogiri/nokogiri.rb',
34
+ 'lib/nokogiri/nokogiri.{bundle,jar,rb,so}',
37
35
  'lib/nokogiri/1.{8,9}',
38
36
  GENERATED_PARSER,
39
37
  GENERATED_TOKENIZER
40
38
  ]
41
39
 
42
40
  self.extra_dev_deps += [
41
+ ["hoe-bundler", ">= 1.1"],
42
+ ["hoe-debugging", ">= 1.0.3"],
43
+ ["hoe-gemspec", ">= 1.0"],
44
+ ["hoe-git", ">= 1.4"],
45
+ ["mini_portile", ">= 0.2.2"],
46
+ ["minitest", "~> 2.2.2"],
43
47
  ["racc", ">= 1.4.6"],
48
+ ["rake-compiler", "= 0.8.0"],
44
49
  ["rexical", ">= 1.0.5"],
45
- ["rake-compiler", ">= 0.7.9"],
46
- ["minitest", "~> 2.2.2"],
47
- ["mini_portile", ">= 0.2.2"],
48
- ["hoe-debugging", ">= 0"],
49
- ["hoe-git", ">= 0"],
50
- ["hoe-gemspec", ">= 0"],
51
- ["hoe-bundler", ">= 0"]
52
50
  ]
53
51
 
54
52
  if java?
@@ -83,7 +81,12 @@ if java?
83
81
  HOE.spec.files += ['lib/nokogiri/nokogiri.jar']
84
82
  end
85
83
  else
86
- require 'tasks/cross_compile'
84
+ mingw_available = true
85
+ begin
86
+ require 'tasks/cross_compile'
87
+ rescue
88
+ mingw_available = false
89
+ end
87
90
  require "rake/extensiontask"
88
91
 
89
92
  HOE.spec.files.reject! { |f| f =~ %r{\.(java|jar)$} }
@@ -91,13 +94,15 @@ else
91
94
  Rake::ExtensionTask.new("nokogiri", HOE.spec) do |ext|
92
95
  ext.lib_dir = File.join(*['lib', 'nokogiri', ENV['FAT_DIR']].compact)
93
96
  ext.config_options << ENV['EXTOPTS']
94
- ext.cross_compile = true
95
- ext.cross_platform = ["x86-mswin32-60", "x86-mingw32"]
96
- ext.cross_config_options << "--with-xml2-include=#{File.join($recipes[:libxml2].path, 'include', 'libxml2')}"
97
- ext.cross_config_options << "--with-xml2-lib=#{File.join($recipes[:libxml2].path, 'lib')}"
98
- ext.cross_config_options << "--with-iconv-dir=#{$recipes[:libiconv].path}"
99
- ext.cross_config_options << "--with-xslt-dir=#{$recipes[:libxslt].path}"
100
- ext.cross_config_options << "--with-zlib-dir=#{CROSS_DIR}"
97
+ if mingw_available
98
+ ext.cross_compile = true
99
+ ext.cross_platform = ["x86-mswin32-60", "x86-mingw32"]
100
+ ext.cross_config_options << "--with-xml2-include=#{File.join($recipes[:libxml2].path, 'include', 'libxml2')}"
101
+ ext.cross_config_options << "--with-xml2-lib=#{File.join($recipes[:libxml2].path, 'lib')}"
102
+ ext.cross_config_options << "--with-iconv-dir=#{$recipes[:libiconv].path}"
103
+ ext.cross_config_options << "--with-xslt-dir=#{$recipes[:libxslt].path}"
104
+ ext.cross_config_options << "--with-zlib-dir=#{CROSS_DIR}"
105
+ end
101
106
  end
102
107
  end
103
108
 
@@ -110,6 +115,7 @@ task 'gem:spec' => 'generate' if Rake::Task.task_defined?("gem:spec")
110
115
  file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t|
111
116
  racc = RbConfig::CONFIG['target_os'] =~ /mswin32/ ? '' : `which racc`.strip
112
117
  racc = "#{::RbConfig::CONFIG['bindir']}/racc" if racc.empty?
118
+ racc = %x{command -v racc}.strip if racc.empty?
113
119
  sh "#{racc} -l -o #{t.name} #{t.prerequisites.first}"
114
120
  end
115
121
 
@@ -145,10 +151,19 @@ end
145
151
 
146
152
  desc "build a windows gem without all the ceremony."
147
153
  task "gem:windows" => "gem" do
148
- rake_compiler_config = YAML.load_file("#{ENV['HOME']}/.rake-compiler/config.yml")
154
+ cross_rubies = ["1.8.7-p330", "1.9.2-p136"]
155
+ ruby_cc_version = cross_rubies.collect { |_| _.split("-").first }.join(":") # e.g., "1.8.7:1.9.2"
156
+ rake_compiler_config_path = "#{ENV['HOME']}/.rake-compiler/config.yml"
157
+
158
+ unless File.exists? rake_compiler_config_path
159
+ raise "rake-compiler has not installed any cross rubies. try running 'env --unset=HOST rake-compiler cross-ruby VERSION=#{cross_rubies.first}'"
160
+ end
161
+ rake_compiler_config = YAML.load_file(rake_compiler_config_path)
149
162
 
150
- # check that rake-compiler config contains the right patchlevels of 1.8.6 and 1.9.1. see #279.
151
- ["1.8.6-p383", "1.9.1-p243"].each do |version|
163
+ # check that rake-compiler config contains the right patchlevels. see #279 for background,
164
+ # and http://blog.mmediasys.com/2011/01/22/rake-compiler-updated-list-of-supported-ruby-versions-for-cross-compilation/
165
+ # for more up-to-date docs.
166
+ cross_rubies.each do |version|
152
167
  majmin, patchlevel = version.split("-")
153
168
  rbconfig = "rbconfig-#{majmin}"
154
169
  unless rake_compiler_config.key?(rbconfig) && rake_compiler_config[rbconfig] =~ /-#{patchlevel}/
@@ -156,12 +171,12 @@ task "gem:windows" => "gem" do
156
171
  end
157
172
  end
158
173
 
159
- # verify that --export-all is in the 1.9.1 rbconfig. see #279,#374,#375.
160
- rbconfig_191 = rake_compiler_config["rbconfig-1.9.1"]
161
- raise "rbconfig #{rbconfig_191} needs --export-all in its DLDFLAGS value" if File.read(rbconfig_191).grep(/CONFIG\["DLDFLAGS"\].*--export-all/).empty?
174
+ # verify that --export-all is in the 1.9 rbconfig. see #279,#374,#375.
175
+ rbconfig_19 = rake_compiler_config["rbconfig-1.9.2"]
176
+ raise "rbconfig #{rbconfig_19} needs --export-all in its DLDFLAGS value" if File.read(rbconfig_19).split("\n").grep(/CONFIG\["DLDFLAGS"\].*--export-all/).empty?
162
177
 
163
178
  pkg_config_path = [:libxslt, :libxml2].collect { |pkg| File.join($recipes[pkg].path, "lib/pkgconfig") }.join(":")
164
- sh("env PKG_CONFIG_PATH=#{pkg_config_path} RUBY_CC_VERSION=1.8.6:1.9.1 rake cross native gem") || raise("build failed!")
179
+ sh("env PKG_CONFIG_PATH=#{pkg_config_path} RUBY_CC_VERSION=#{ruby_cc_version} rake cross native gem") || raise("build failed!")
165
180
  end
166
181
 
167
182
  # vim: syntax=Ruby
@@ -27,6 +27,10 @@ opts = OptionParser.new do |opts|
27
27
  encoding = v
28
28
  end
29
29
 
30
+ opts.on("--rng <uri|path>", "Validate using this rng file.") do |v|
31
+ @rng = open(v) {|f| Nokogiri::XML::RelaxNG(f)}
32
+ end
33
+
30
34
  opts.on_tail("-?", "--help", "Show this message") do
31
35
  puts opts
32
36
  exit
@@ -48,6 +52,10 @@ end
48
52
 
49
53
  @doc = parse_class.parse(open(uri).read, nil, encoding)
50
54
 
51
- puts "Your document is stored in @doc..."
52
- IRB.start
55
+ if @rng
56
+ puts @rng.validate(@doc)
57
+ else
58
+ puts "Your document is stored in @doc..."
59
+ IRB.start
60
+ end
53
61
 
@@ -30,7 +30,8 @@ if RbConfig::MAKEFILE_CONFIG['CC'] =~ /mingw/
30
30
  end
31
31
 
32
32
  if RbConfig::MAKEFILE_CONFIG['CC'] =~ /gcc/
33
- $CFLAGS << " -O3 -Wall -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
33
+ $CFLAGS << " -O3" unless $CFLAGS[/-O\d/]
34
+ $CFLAGS << " -Wall -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
34
35
  end
35
36
 
36
37
  if RbConfig::CONFIG['target_os'] =~ /mswin32/
@@ -80,6 +81,13 @@ else
80
81
  '/usr/local/include/libxml2',
81
82
  File.join(INCLUDEDIR, "libxml2")
82
83
  ] + HEADER_DIRS
84
+
85
+ # If the user has homebrew installed, use the libxml2 inside homebrew
86
+ brew_prefix = `brew --prefix libxml2 2> /dev/null`.chomp
87
+ unless brew_prefix.empty?
88
+ LIB_DIRS.unshift File.join(brew_prefix, 'lib')
89
+ XML2_HEADER_DIRS.unshift File.join(brew_prefix, 'include/libxml2')
90
+ end
83
91
  end
84
92
 
85
93
  dir_config('zlib', HEADER_DIRS, LIB_DIRS)
@@ -1,5 +1,7 @@
1
1
  #include <html_document.h>
2
2
 
3
+ static ID id_encoding_found;
4
+
3
5
  /*
4
6
  * call-seq:
5
7
  * new
@@ -56,6 +58,18 @@ static VALUE read_io( VALUE klass,
56
58
  );
57
59
  xmlSetStructuredErrorFunc(NULL, NULL);
58
60
 
61
+ /*
62
+ * If EncodingFound has occurred in EncodingReader, make sure to do
63
+ * a cleanup and propagate the error.
64
+ */
65
+ if (rb_respond_to(io, id_encoding_found)) {
66
+ VALUE encoding_found = rb_funcall(io, id_encoding_found, 0);
67
+ if (!NIL_P(encoding_found)) {
68
+ xmlFreeDoc(doc);
69
+ rb_exc_raise(encoding_found);
70
+ }
71
+ }
72
+
59
73
  if(doc == NULL) {
60
74
  xmlErrorPtr error;
61
75
 
@@ -151,4 +165,6 @@ void init_html_document()
151
165
  rb_define_singleton_method(klass, "new", new, -1);
152
166
 
153
167
  rb_define_method(klass, "type", type, 0);
168
+
169
+ id_encoding_found = rb_intern("encoding_found");
154
170
  }
@@ -13,31 +13,35 @@ static void deallocate(xmlParserCtxtPtr ctxt)
13
13
  NOKOGIRI_DEBUG_END(handler);
14
14
  }
15
15
 
16
- static VALUE parse_memory(VALUE klass, VALUE data, VALUE encoding)
16
+ static VALUE
17
+ parse_memory(VALUE klass, VALUE data, VALUE encoding)
17
18
  {
18
- htmlParserCtxtPtr ctxt;
19
-
20
- if(NIL_P(data)) rb_raise(rb_eArgError, "data cannot be nil");
21
- if(!(int)RSTRING_LEN(data))
22
- rb_raise(rb_eRuntimeError, "data cannot be empty");
23
-
24
- ctxt = htmlCreateMemoryParserCtxt(
25
- StringValuePtr(data),
26
- (int)RSTRING_LEN(data)
27
- );
19
+ htmlParserCtxtPtr ctxt;
20
+
21
+ if (NIL_P(data))
22
+ rb_raise(rb_eArgError, "data cannot be nil");
23
+ if (!(int)RSTRING_LEN(data))
24
+ rb_raise(rb_eRuntimeError, "data cannot be empty");
25
+
26
+ ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
27
+ (int)RSTRING_LEN(data));
28
+ if (ctxt->sax) {
29
+ xmlFree(ctxt->sax);
30
+ ctxt->sax = NULL;
31
+ }
28
32
 
29
- if(RTEST(encoding)) {
30
- xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValuePtr(encoding));
31
- if(enc != NULL) {
32
- xmlSwitchToEncoding(ctxt, enc);
33
- if(ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
34
- rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
35
- StringValuePtr(encoding));
36
- }
33
+ if (RTEST(encoding)) {
34
+ xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValuePtr(encoding));
35
+ if (enc != NULL) {
36
+ xmlSwitchToEncoding(ctxt, enc);
37
+ if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
38
+ rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
39
+ StringValuePtr(encoding));
40
+ }
41
+ }
37
42
  }
38
- }
39
43
 
40
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
44
+ return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
41
45
  }
42
46
 
43
47
  static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
@@ -49,30 +53,48 @@ static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
49
53
  return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
50
54
  }
51
55
 
52
- static VALUE parse_with(VALUE self, VALUE sax_handler)
56
+ static VALUE
57
+ parse_doc(VALUE ctxt_val)
58
+ {
59
+ htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
60
+ htmlParseDocument(ctxt);
61
+ return Qnil;
62
+ }
63
+
64
+ static VALUE
65
+ parse_doc_finalize(VALUE ctxt_val)
53
66
  {
54
- htmlParserCtxtPtr ctxt;
55
- htmlSAXHandlerPtr sax;
67
+ htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
68
+
69
+ if (ctxt->myDoc)
70
+ xmlFreeDoc(ctxt->myDoc);
56
71
 
57
- if(!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
58
- rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
72
+ NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
73
+ return Qnil;
74
+ }
75
+
76
+ static VALUE
77
+ parse_with(VALUE self, VALUE sax_handler)
78
+ {
79
+ htmlParserCtxtPtr ctxt;
80
+ htmlSAXHandlerPtr sax;
59
81
 
60
- Data_Get_Struct(self, htmlParserCtxt, ctxt);
61
- Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
82
+ if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
83
+ rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
62
84
 
63
- /* Free the sax handler since we'll assign our own */
64
- if(ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
65
- xmlFree(ctxt->sax);
85
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
86
+ Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
66
87
 
67
- ctxt->sax = sax;
68
- ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
88
+ /* Free the sax handler since we'll assign our own */
89
+ if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
90
+ xmlFree(ctxt->sax);
69
91
 
70
- htmlParseDocument(ctxt);
92
+ ctxt->sax = sax;
93
+ ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
71
94
 
72
- if(NULL != ctxt->myDoc) xmlFreeDoc(ctxt->myDoc);
95
+ rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
73
96
 
74
- NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
75
- return self;
97
+ return self;
76
98
  }
77
99
 
78
100
  void init_html_sax_parser_context()