nokogiri 1.5.6.rc1-java → 1.5.6.rc2-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +3 -0
- data/CHANGELOG.rdoc +3 -0
- data/Manifest.txt +8 -4
- data/README.ja.rdoc +1 -1
- data/README.rdoc +1 -1
- data/ROADMAP.md +3 -0
- data/Rakefile +26 -7
- data/build_all +40 -27
- data/ext/java/nokogiri/HtmlDocument.java +26 -0
- data/ext/java/nokogiri/XmlDocument.java +17 -4
- data/ext/java/nokogiri/XmlDocumentFragment.java +1 -39
- data/ext/java/nokogiri/XmlNode.java +3 -2
- data/ext/java/nokogiri/XmlSaxPushParser.java +55 -53
- data/ext/java/nokogiri/XsltStylesheet.java +4 -2
- data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +2 -2
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
- data/ext/java/nokogiri/internals/{XmlDomParser.java → NokogiriDomParser.java} +25 -14
- data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +109 -0
- data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +123 -0
- data/ext/java/nokogiri/internals/NokogiriHandler.java +12 -10
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +12 -2
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +1 -1
- data/ext/nokogiri/extconf.rb +1 -0
- data/ext/nokogiri/xslt_stylesheet.c +19 -2
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml/node.rb +43 -50
- data/lib/nokogiri/xml/sax/parser.rb +7 -0
- data/lib/nokogiri/xslt.rb +1 -1
- data/tasks/cross_compile.rb +3 -3
- data/test/html/test_document.rb +23 -0
- data/test/test_xslt_transforms.rb +30 -0
- data/test/xml/sax/test_parser.rb +5 -0
- data/test/xml/test_node.rb +9 -1
- metadata +106 -80
data/CHANGELOG.ja.rdoc
CHANGED
@@ -6,6 +6,9 @@
|
|
6
6
|
|
7
7
|
* JRuby で '#' で始まる文字列を名前とする EntityReference を作ろうとすると INVALID_CHARACTER_ERR という例外がはっせいする。 #719
|
8
8
|
* JRuby では Nodeのサブクラスのnamespaceを正しく文字列に変換しない。 #715
|
9
|
+
* Nokogiri now detects XSLT transform errors. #731 (Thanks, Justin Fitzsimmons!)
|
10
|
+
* Raise an ArgumentError if an invalid encoding is passed to the SAX parser. #756 (Thanks, Bradley Schaefer!)
|
11
|
+
* JRuby Node#content now renders newlines properly. #737 (Thanks, Piotr Szmielew!)
|
9
12
|
|
10
13
|
|
11
14
|
== 1.5.5 / 2012年6月24日
|
data/CHANGELOG.rdoc
CHANGED
@@ -6,6 +6,9 @@
|
|
6
6
|
|
7
7
|
* JRuby raises INVALID_CHARACTER_ERR exception when EntityReference name starts with '#'. #719
|
8
8
|
* JRuby doesn't coerce namespaces out of strings on a direct subclass of Node. #715
|
9
|
+
* Nokogiri now detects XSLT transform errors. #731 (Thanks, Justin Fitzsimmons!)
|
10
|
+
* Raise an ArgumentError if an invalid encoding is passed to the SAX parser. #756 (Thanks, Bradley Schaefer!)
|
11
|
+
* JRuby Node#content now renders newlines properly. #737 (Thanks, Piotr Szmielew!)
|
9
12
|
|
10
13
|
|
11
14
|
== 1.5.5 / 2012-06-24
|
data/Manifest.txt
CHANGED
@@ -43,8 +43,13 @@ ext/java/nokogiri/XmlSyntaxError.java
|
|
43
43
|
ext/java/nokogiri/XmlText.java
|
44
44
|
ext/java/nokogiri/XmlXpathContext.java
|
45
45
|
ext/java/nokogiri/XsltStylesheet.java
|
46
|
+
ext/java/nokogiri/internals/ClosedStreamException.java
|
46
47
|
ext/java/nokogiri/internals/HtmlDomParserContext.java
|
48
|
+
ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java
|
47
49
|
ext/java/nokogiri/internals/NokogiriDocumentCache.java
|
50
|
+
ext/java/nokogiri/internals/NokogiriDomParser.java
|
51
|
+
ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java
|
52
|
+
ext/java/nokogiri/internals/NokogiriEntityResolver.java
|
48
53
|
ext/java/nokogiri/internals/NokogiriErrorHandler.java
|
49
54
|
ext/java/nokogiri/internals/NokogiriHandler.java
|
50
55
|
ext/java/nokogiri/internals/NokogiriHelpers.java
|
@@ -62,7 +67,6 @@ ext/java/nokogiri/internals/ReaderNode.java
|
|
62
67
|
ext/java/nokogiri/internals/SaveContextVisitor.java
|
63
68
|
ext/java/nokogiri/internals/SchemaErrorHandler.java
|
64
69
|
ext/java/nokogiri/internals/XmlDeclHandler.java
|
65
|
-
ext/java/nokogiri/internals/XmlDomParser.java
|
66
70
|
ext/java/nokogiri/internals/XmlDomParserContext.java
|
67
71
|
ext/java/nokogiri/internals/XmlSaxParser.java
|
68
72
|
ext/java/nokogiri/internals/XsltExtensionFunction.java
|
@@ -230,13 +234,13 @@ test/files/snuggles.xml
|
|
230
234
|
test/files/staff.dtd
|
231
235
|
test/files/staff.xml
|
232
236
|
test/files/staff.xslt
|
237
|
+
test/files/test_document_url/bar.xml
|
238
|
+
test/files/test_document_url/document.dtd
|
239
|
+
test/files/test_document_url/document.xml
|
233
240
|
test/files/tlm.html
|
234
241
|
test/files/to_be_xincluded.xml
|
235
242
|
test/files/valid_bar.xml
|
236
243
|
test/files/xinclude.xml
|
237
|
-
test/files/test_document_url/bar.xml
|
238
|
-
test/files/test_document_url/document.dtd
|
239
|
-
test/files/test_document_url/document.xml
|
240
244
|
test/helper.rb
|
241
245
|
test/html/sax/test_parser.rb
|
242
246
|
test/html/sax/test_parser_context.rb
|
data/README.ja.rdoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
= Nokogiri (鋸)
|
1
|
+
= Nokogiri (鋸) {<img src="https://secure.travis-ci.org/sparklemotion/nokogiri.png?rvm=1.9.3" />}[http://travis-ci.org/sparklemotion/nokogiri] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/sparklemotion/nokogiri]
|
2
2
|
|
3
3
|
* http://nokogiri.org/
|
4
4
|
* http://github.com/sparklemotion/nokogiri/wikis
|
data/README.rdoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
= Nokogiri {<img src="https://secure.travis-ci.org/sparklemotion/nokogiri.png?rvm=1.9.3" />}[http://travis-ci.org/sparklemotion/nokogiri]
|
1
|
+
= Nokogiri {<img src="https://secure.travis-ci.org/sparklemotion/nokogiri.png?rvm=1.9.3" />}[http://travis-ci.org/sparklemotion/nokogiri] {<img src="https://codeclimate.com/badge.png" />}[https://codeclimate.com/github/sparklemotion/nokogiri]
|
2
2
|
|
3
3
|
* http://nokogiri.org
|
4
4
|
* http://github.com/sparklemotion/nokogiri/wikis
|
data/ROADMAP.md
CHANGED
@@ -69,6 +69,9 @@
|
|
69
69
|
* we should standardize on a hash of options for these and other calls
|
70
70
|
* what should NodeSet#xpath return?
|
71
71
|
* https://github.com/sparklemotion/nokogiri/issues/656
|
72
|
+
* also, clean up or unify the implementations of #xpath-and-friends in Node and NodeSet
|
73
|
+
* implementations are very similar, but no shared code :(
|
74
|
+
* decorate nodes in a consistent manner
|
72
75
|
|
73
76
|
## Encoding
|
74
77
|
|
data/Rakefile
CHANGED
@@ -46,14 +46,10 @@ HOE = Hoe.spec 'nokogiri' do
|
|
46
46
|
["mini_portile", ">= 0.2.2"],
|
47
47
|
["minitest", "~> 2.2.2"],
|
48
48
|
["rake", ">= 0.9"],
|
49
|
-
["rake-compiler", "= 0.8.0"]
|
49
|
+
["rake-compiler", "= 0.8.0"],
|
50
|
+
["racc", ">= 1.4.6"],
|
51
|
+
["rexical", ">= 1.0.5"]
|
50
52
|
]
|
51
|
-
if ! java?
|
52
|
-
self.extra_dev_deps += [
|
53
|
-
["racc", ">= 1.4.6"],
|
54
|
-
["rexical", ">= 1.0.5"]
|
55
|
-
]
|
56
|
-
end
|
57
53
|
|
58
54
|
if java?
|
59
55
|
self.spec_extras = { :platform => 'java' }
|
@@ -118,6 +114,20 @@ desc "Generate css/parser.rb and css/tokenizer.rex"
|
|
118
114
|
task 'generate' => [GENERATED_PARSER, GENERATED_TOKENIZER]
|
119
115
|
task 'gem:spec' => 'generate' if Rake::Task.task_defined?("gem:spec")
|
120
116
|
|
117
|
+
# This is a big hack to make sure that the racc and rexical
|
118
|
+
# dependencies in the Gemfile are constrainted to ruby platforms
|
119
|
+
# (i.e. MRI and Rubinius). There's no way to do that through hoe,
|
120
|
+
# and any solution will require changing hoe and hoe-bundler.
|
121
|
+
old_gemfile_task = Rake::Task['bundler:gemfile']
|
122
|
+
task 'bundler:gemfile' do
|
123
|
+
old_gemfile_task.invoke
|
124
|
+
|
125
|
+
lines = File.open('Gemfile', 'r') { |f| f.readlines }.map do |line|
|
126
|
+
line =~ /racc|rexical/ ? "#{line.strip}, :platform => :ruby" : line
|
127
|
+
end
|
128
|
+
File.open('Gemfile', 'w') { |f| lines.each { |line| f.puts line } }
|
129
|
+
end
|
130
|
+
|
121
131
|
file GENERATED_PARSER => "lib/nokogiri/css/parser.y" do |t|
|
122
132
|
racc = RbConfig::CONFIG['target_os'] =~ /mswin32/ ? '' : `which racc`.strip
|
123
133
|
racc = "#{::RbConfig::CONFIG['bindir']}/racc" if racc.empty?
|
@@ -149,9 +159,18 @@ task :java_debug do
|
|
149
159
|
ENV['JAVA_OPTS'] = '-Xdebug -Xrunjdwp:transport=dt_socket,address=8000,server=y,suspend=y' if java? && ENV['JAVA_DEBUG']
|
150
160
|
end
|
151
161
|
|
162
|
+
if java?
|
163
|
+
task :test_18 => :test
|
164
|
+
task :test_19 do
|
165
|
+
ENV['JRUBY_OPTS'] = "--1.9"
|
166
|
+
Rake::Task["test"].invoke
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
152
170
|
Rake::Task[:test].prerequisites << :compile
|
153
171
|
Rake::Task[:test].prerequisites << :java_debug
|
154
172
|
Rake::Task[:test].prerequisites << :check_extra_deps unless java?
|
173
|
+
|
155
174
|
if Hoe.plugins.include?(:debugging)
|
156
175
|
['valgrind', 'valgrind:mem', 'valgrind:mem0'].each do |task_name|
|
157
176
|
Rake::Task["test:#{task_name}"].prerequisites << :compile
|
data/build_all
CHANGED
@@ -2,26 +2,32 @@
|
|
2
2
|
#
|
3
3
|
# script to build gems for all relevant platforms:
|
4
4
|
# - MRI et al (standard gem)
|
5
|
-
# - windows (x86-mingw32 and x86-
|
5
|
+
# - windows (x86-mingw32 and x86-mswin32-60)
|
6
6
|
# - jruby
|
7
7
|
#
|
8
|
-
#
|
9
|
-
# on ubuntu, `sudo apt-get install mingw32`
|
10
|
-
# for others, read up at https://github.com/luislavena/rake-compiler
|
8
|
+
# here's what I recommend for building all the gems:
|
11
9
|
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
10
|
+
# 1. set up a vagrant VM guest running ubuntu lucid 32-bit.
|
11
|
+
# 2. install rvm, and install 1.8.7, 1.9.3 and jruby.
|
12
|
+
# 3. `sudo apt-get install mingw32`
|
15
13
|
#
|
16
|
-
# you
|
14
|
+
# as you build, you may run into these problems:
|
17
15
|
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
# end
|
22
|
-
# end
|
16
|
+
# - on ubuntus 11 and later, you may have issues with building
|
17
|
+
# rake-compiler's rubies against openssl v2. Just comment the lines
|
18
|
+
# out from ossl_ssl.c and you'll be fine.
|
23
19
|
#
|
24
|
-
# you may
|
20
|
+
# - you may have issues with Pathname conversion to String in
|
21
|
+
# bundler. Add this to the offending bundler file:
|
22
|
+
#
|
23
|
+
# class Pathname
|
24
|
+
# def to_str
|
25
|
+
# to_s
|
26
|
+
# end
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# - you may also have to hack rubygems.rb to eliminate a reference to
|
30
|
+
# RUBY_ENGINE
|
25
31
|
#
|
26
32
|
|
27
33
|
HOST=
|
@@ -53,16 +59,23 @@ rm -rf gems
|
|
53
59
|
mkdir -p gems
|
54
60
|
|
55
61
|
# windows
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
62
|
+
platform=$(uname -i)
|
63
|
+
if [[ $platform =~ "64" ]] ; then
|
64
|
+
echo ""
|
65
|
+
echo "ERROR: You need to build the windows gem on a 32-bit machine!"
|
66
|
+
echo ""
|
67
|
+
exit 1
|
68
|
+
fi
|
69
|
+
rvm_use 1.8.7
|
70
|
+
if [[ ! -a ${HOME}/.rake-compiler/ruby/ruby-1.8.7-p330/lib/ruby/1.8.7/x86_64-linux/rbconfig.rb ]] ; then
|
71
|
+
bundle exec rake-compiler cross-ruby VERSION=1.8.7-p330
|
72
|
+
fi
|
73
|
+
if [[ ! -a ${HOME}/.rake-compiler/ruby/ruby-1.9.2-p136/lib/ruby/1.9.1/x86_64-linux/rbconfig.rb ]] ; then
|
74
|
+
bundle exec rake-compiler cross-ruby VERSION=1.9.2-p136
|
75
|
+
fi
|
76
|
+
bundle exec rake cross
|
77
|
+
bundle exec rake gem:windows
|
78
|
+
cp -v pkg/nokogiri*x86-{mingw32,mswin32}*.gem gems
|
66
79
|
|
67
80
|
# MRI
|
68
81
|
rvm_use 1.8.7
|
@@ -70,11 +83,11 @@ bundle exec rake gem
|
|
70
83
|
cp -v pkg/nokogiri*.gem gems # should only be one at this point in the script
|
71
84
|
|
72
85
|
# jruby
|
73
|
-
rvm_use jruby
|
86
|
+
rvm_use jruby
|
74
87
|
bundle install --quiet --local || bundle install
|
75
88
|
bundle exec rake clean clobber
|
76
89
|
rvm_use 1.8.7
|
77
90
|
bundle exec rake generate
|
78
|
-
rvm_use jruby
|
91
|
+
rvm_use jruby
|
79
92
|
bundle exec rake gem
|
80
|
-
cp -v pkg/nokogiri*java.gem gems
|
93
|
+
cp -v pkg/nokogiri*java.gem gems
|
@@ -55,6 +55,10 @@ import org.w3c.dom.NodeList;
|
|
55
55
|
*/
|
56
56
|
@JRubyClass(name="Nokogiri::HTML::Document", parent="Nokogiri::XML::Document")
|
57
57
|
public class HtmlDocument extends XmlDocument {
|
58
|
+
private static final String DEFAULT_CONTENT_TYPE = "html";
|
59
|
+
private static final String DEFAULT_PUBLIC_ID = "-//W3C//DTD HTML 4.01//EN";
|
60
|
+
private static final String DEFAULT_SYTEM_ID = "http://www.w3.org/TR/html4/strict.dtd";
|
61
|
+
|
58
62
|
private String parsed_encoding = null;
|
59
63
|
|
60
64
|
public HtmlDocument(Ruby ruby, RubyClass klazz) {
|
@@ -82,6 +86,28 @@ public class HtmlDocument extends XmlDocument {
|
|
82
86
|
return htmlDocument;
|
83
87
|
}
|
84
88
|
|
89
|
+
public IRubyObject getInternalSubset(ThreadContext context) {
|
90
|
+
IRubyObject internalSubset = super.getInternalSubset(context);
|
91
|
+
|
92
|
+
// html documents are expected to have a default internal subset
|
93
|
+
// the default values are the same ones used when the following
|
94
|
+
// feature is turned on
|
95
|
+
// "http://cyberneko.org/html/features/insert-doctype"
|
96
|
+
// the reason we don't turn it on, is because it overrides the document's
|
97
|
+
// declared doctype declaration.
|
98
|
+
|
99
|
+
if (internalSubset.isNil()) {
|
100
|
+
internalSubset = XmlDtd.newEmpty(context.getRuntime(),
|
101
|
+
getDocument(),
|
102
|
+
context.getRuntime().newString(DEFAULT_CONTENT_TYPE),
|
103
|
+
context.getRuntime().newString(DEFAULT_PUBLIC_ID),
|
104
|
+
context.getRuntime().newString(DEFAULT_SYTEM_ID));
|
105
|
+
setInternalSubset(internalSubset);
|
106
|
+
}
|
107
|
+
|
108
|
+
return internalSubset;
|
109
|
+
}
|
110
|
+
|
85
111
|
public static IRubyObject do_parse(ThreadContext context,
|
86
112
|
IRubyObject klass,
|
87
113
|
IRubyObject[] args) {
|
@@ -64,6 +64,7 @@ import org.jruby.runtime.ThreadContext;
|
|
64
64
|
import org.jruby.runtime.builtin.IRubyObject;
|
65
65
|
import org.w3c.dom.Attr;
|
66
66
|
import org.w3c.dom.Document;
|
67
|
+
import org.w3c.dom.DocumentType;
|
67
68
|
import org.w3c.dom.NamedNodeMap;
|
68
69
|
import org.w3c.dom.Node;
|
69
70
|
import org.w3c.dom.NodeList;
|
@@ -288,7 +289,7 @@ public class XmlDocument extends XmlNode {
|
|
288
289
|
|
289
290
|
@JRubyMethod
|
290
291
|
public IRubyObject encoding(ThreadContext context) {
|
291
|
-
if (this.encoding == null) {
|
292
|
+
if (this.encoding == null || this.encoding.isNil()) {
|
292
293
|
if (getDocument().getXmlEncoding() == null) {
|
293
294
|
this.encoding = context.getRuntime().getNil();
|
294
295
|
} else {
|
@@ -296,7 +297,7 @@ public class XmlDocument extends XmlNode {
|
|
296
297
|
}
|
297
298
|
}
|
298
299
|
|
299
|
-
return this.encoding;
|
300
|
+
return this.encoding.isNil() ? this.encoding : this.encoding.asString().encode(context, context.getRuntime().newString("UTF-8"));
|
300
301
|
}
|
301
302
|
|
302
303
|
@JRubyMethod(meta = true)
|
@@ -438,8 +439,20 @@ public class XmlDocument extends XmlNode {
|
|
438
439
|
IRubyObject dtd = (IRubyObject) node.getUserData(DTD_INTERNAL_SUBSET);
|
439
440
|
|
440
441
|
if (dtd == null) {
|
441
|
-
|
442
|
-
|
442
|
+
Document document = getDocument();
|
443
|
+
if (document.getUserData(XmlDocument.DTD_RAW_DOCUMENT) != null) {
|
444
|
+
dtd = XmlDtd.newFromInternalSubset(context.getRuntime(), document);
|
445
|
+
} else if (document.getDoctype() != null) {
|
446
|
+
DocumentType docType = document.getDoctype();
|
447
|
+
dtd = XmlDtd.newEmpty(context.getRuntime(),
|
448
|
+
document,
|
449
|
+
context.getRuntime().newString(docType.getName()),
|
450
|
+
context.getRuntime().newString(docType.getPublicId()),
|
451
|
+
context.getRuntime().newString(docType.getSystemId()));
|
452
|
+
} else {
|
453
|
+
dtd = context.getRuntime().getNil();
|
454
|
+
}
|
455
|
+
|
443
456
|
setInternalSubset(dtd);
|
444
457
|
}
|
445
458
|
|
@@ -33,7 +33,6 @@
|
|
33
33
|
package nokogiri;
|
34
34
|
|
35
35
|
import static nokogiri.internals.NokogiriHelpers.getLocalNameForNamespace;
|
36
|
-
import static nokogiri.internals.NokogiriHelpers.getLocalPart;
|
37
36
|
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
38
37
|
import static nokogiri.internals.NokogiriHelpers.getPrefix;
|
39
38
|
import static nokogiri.internals.NokogiriHelpers.isNamespace;
|
@@ -93,7 +92,6 @@ public class XmlDocumentFragment extends XmlNode {
|
|
93
92
|
if (args.length > 1 && args[1] instanceof RubyString) {
|
94
93
|
args[1] = trim(context, doc, (RubyString)args[1]);
|
95
94
|
if (XmlDocumentFragment.isTag((RubyString)args[1])) {
|
96
|
-
args[1] = RubyString.newString(context.getRuntime(), ignoreNamespaceIfNeeded(doc, rubyStringToString(args[1])));
|
97
95
|
args[1] = RubyString.newString(context.getRuntime(), addNamespaceDeclIfNeeded(doc, rubyStringToString(args[1])));
|
98
96
|
}
|
99
97
|
}
|
@@ -127,46 +125,10 @@ public class XmlDocumentFragment extends XmlNode {
|
|
127
125
|
if (str.startsWith("<") && str.endsWith(">")) return true;
|
128
126
|
return false;
|
129
127
|
}
|
130
|
-
|
128
|
+
|
131
129
|
private static Pattern qname_pattern = Pattern.compile("[^</:>\\s]+:[^</:>=\\s]+");
|
132
130
|
private static Pattern starttag_pattern = Pattern.compile("<[^</>]+>");
|
133
131
|
|
134
|
-
/**
|
135
|
-
* Remove the namespace from @param tag and/or the attributes in @param tag if
|
136
|
-
* the namespace is the default in the current context as defined by @param doc.
|
137
|
-
*
|
138
|
-
* @return the normalized tag.
|
139
|
-
*/
|
140
|
-
private static String ignoreNamespaceIfNeeded(XmlDocument doc, String tags) {
|
141
|
-
if (doc.getDocument() == null) return tags;
|
142
|
-
Map<String, String> rewriteTable = new HashMap<String, String>();
|
143
|
-
// we have to make sure that we don't replace strings in double quotes, e.g.
|
144
|
-
// 'urn:xmpp:foospec:barfoo' in '<foobar xmlns="urn:xmpp:foospec:barfoo"/>'
|
145
|
-
// has to remain the same
|
146
|
-
String[] parts = tags.split("\"");
|
147
|
-
for (int partidx = 0; partidx < parts.length; partidx++) {
|
148
|
-
if (partidx % 2 == 1)
|
149
|
-
continue;
|
150
|
-
Matcher matcher = qname_pattern.matcher(parts[partidx]);
|
151
|
-
while(matcher.find()) {
|
152
|
-
String qName = matcher.group();
|
153
|
-
if (doc.getDocument().getDocumentElement() != null) {
|
154
|
-
NamedNodeMap nodeMap = doc.getDocument().getDocumentElement().getAttributes();
|
155
|
-
if (!isNamespaceDefined(qName, nodeMap)) {
|
156
|
-
rewriteTable.put(qName, getLocalPart(qName));
|
157
|
-
}
|
158
|
-
} else {
|
159
|
-
rewriteTable.put(qName, getLocalPart(qName));
|
160
|
-
}
|
161
|
-
}
|
162
|
-
}
|
163
|
-
Set<String> keys = rewriteTable.keySet();
|
164
|
-
for (String key : keys) {
|
165
|
-
tags = tags.replace(key, rewriteTable.get(key));
|
166
|
-
}
|
167
|
-
return tags;
|
168
|
-
}
|
169
|
-
|
170
132
|
private static boolean isNamespaceDefined(String qName, NamedNodeMap nodeMap) {
|
171
133
|
if (isNamespace(qName.intern())) return true;
|
172
134
|
for (int i=0; i < nodeMap.getLength(); i++) {
|
@@ -448,6 +448,7 @@ public class XmlNode extends RubyObject {
|
|
448
448
|
public void relink_namespace(ThreadContext context) {
|
449
449
|
if (node instanceof Element) {
|
450
450
|
Element e = (Element) node;
|
451
|
+
e.getOwnerDocument().setStrictErrorChecking(false);
|
451
452
|
e.getOwnerDocument().renameNode(e, e.lookupNamespaceURI(e.getPrefix()), e.getNodeName());
|
452
453
|
|
453
454
|
if (e.hasAttributes()) {
|
@@ -550,7 +551,7 @@ public class XmlNode extends RubyObject {
|
|
550
551
|
str = NokogiriHelpers.getLocalPart(str);
|
551
552
|
}
|
552
553
|
if (str == null) str = "";
|
553
|
-
name = context.getRuntime()
|
554
|
+
name = NokogiriHelpers.stringOrBlank(context.getRuntime(), str);
|
554
555
|
return name;
|
555
556
|
}
|
556
557
|
|
@@ -815,7 +816,7 @@ public class XmlNode extends RubyObject {
|
|
815
816
|
if (node == null) {
|
816
817
|
textContent = "";
|
817
818
|
} else {
|
818
|
-
textContent = ((Document)this.node).getDocumentElement().getTextContent()
|
819
|
+
textContent = ((Document)this.node).getDocumentElement().getTextContent();
|
819
820
|
}
|
820
821
|
} else {
|
821
822
|
textContent = this.node.getTextContent();
|
@@ -35,16 +35,16 @@ package nokogiri;
|
|
35
35
|
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
36
36
|
import static org.jruby.javasupport.util.RuntimeHelpers.invoke;
|
37
37
|
|
38
|
+
import java.io.ByteArrayInputStream;
|
38
39
|
import java.io.IOException;
|
39
|
-
import java.io.InputStream;
|
40
|
-
import java.io.OutputStream;
|
41
|
-
import java.nio.channels.Channels;
|
42
|
-
import java.nio.channels.Pipe;
|
43
40
|
import java.util.concurrent.Callable;
|
44
41
|
import java.util.concurrent.ExecutorService;
|
45
42
|
import java.util.concurrent.Executors;
|
43
|
+
import java.util.concurrent.Future;
|
46
44
|
import java.util.concurrent.FutureTask;
|
47
45
|
|
46
|
+
import nokogiri.internals.ClosedStreamException;
|
47
|
+
import nokogiri.internals.NokogiriBlockingQueueInputStream;
|
48
48
|
import nokogiri.internals.ParserContext;
|
49
49
|
|
50
50
|
import org.jruby.Ruby;
|
@@ -69,8 +69,7 @@ public class XmlSaxPushParser extends RubyObject {
|
|
69
69
|
ParserContext.Options options;
|
70
70
|
IRubyObject optionsRuby;
|
71
71
|
IRubyObject saxParser;
|
72
|
-
|
73
|
-
InputStream istream = null;
|
72
|
+
NokogiriBlockingQueueInputStream stream;
|
74
73
|
ParserTask parserTask = null;
|
75
74
|
FutureTask<XmlSaxParserContext> futureTask = null;
|
76
75
|
ExecutorService executor = null;
|
@@ -120,53 +119,43 @@ public class XmlSaxPushParser extends RubyObject {
|
|
120
119
|
byte[] data = null;
|
121
120
|
if (chunk instanceof RubyString || chunk.respondsTo("to_str")) {
|
122
121
|
data = chunk.convertToString().getBytes();
|
123
|
-
} else {
|
124
|
-
|
125
|
-
|
126
|
-
} catch (IOException e) {
|
127
|
-
throw context.getRuntime().newRuntimeError(e.getMessage());
|
128
|
-
}
|
129
|
-
XmlSyntaxError xmlSyntaxError =
|
122
|
+
} else {
|
123
|
+
terminateTask(context);
|
124
|
+
XmlSyntaxError xmlSyntaxError =
|
130
125
|
(XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(context.getRuntime(), getNokogiriClass(context.getRuntime(), "Nokogiri::XML::SyntaxError"));
|
131
126
|
throw new RaiseException(xmlSyntaxError);
|
132
127
|
}
|
133
128
|
|
134
129
|
int errorCount0 = parserTask.getErrorCount();;
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
130
|
+
|
131
|
+
|
132
|
+
if (isLast.isTrue()) {
|
133
|
+
IRubyObject document = invoke(context, this, "document");
|
134
|
+
invoke(context, document, "end_document");
|
135
|
+
terminateTask(context);
|
136
|
+
} else {
|
137
|
+
try {
|
138
|
+
Future<Void> task = stream.addChunk(new ByteArrayInputStream(data));
|
139
|
+
task.get();
|
140
|
+
} catch (ClosedStreamException ex) {
|
141
|
+
// this means the stream is closed, ignore this exception
|
142
|
+
} catch (Exception e) {
|
143
|
+
throw context.getRuntime().newRuntimeError(e.getMessage());
|
144
144
|
}
|
145
|
-
|
146
|
-
throw context.getRuntime().newRuntimeError(e.getMessage());
|
147
|
-
} catch (InterruptedException e) {
|
148
|
-
throw context.getRuntime().newRuntimeError(e.getMessage());
|
145
|
+
|
149
146
|
}
|
150
147
|
|
151
148
|
if (!options.recover && parserTask.getErrorCount() > errorCount0) {
|
152
|
-
|
153
|
-
terminateTask();
|
154
|
-
} catch (IOException e) {
|
155
|
-
throw context.getRuntime().newRuntimeError(e.getMessage());
|
156
|
-
}
|
149
|
+
terminateTask(context);
|
157
150
|
throw new RaiseException(parserTask.getLastError(), true);
|
158
151
|
}
|
159
152
|
|
160
153
|
return this;
|
161
154
|
}
|
162
|
-
|
155
|
+
|
163
156
|
private void initialize_task(ThreadContext context) throws IOException {
|
164
|
-
if (futureTask == null ||
|
165
|
-
|
166
|
-
Pipe.SinkChannel sink = pipe.sink();
|
167
|
-
ostream = Channels.newOutputStream(sink);
|
168
|
-
Pipe.SourceChannel source = pipe.source();
|
169
|
-
istream = Channels.newInputStream(source);
|
157
|
+
if (futureTask == null || stream == null) {
|
158
|
+
stream = new NokogiriBlockingQueueInputStream();
|
170
159
|
|
171
160
|
parserTask = new ParserTask(context, saxParser);
|
172
161
|
futureTask = new FutureTask<XmlSaxParserContext>(parserTask);
|
@@ -174,40 +163,53 @@ public class XmlSaxPushParser extends RubyObject {
|
|
174
163
|
executor.submit(futureTask);
|
175
164
|
}
|
176
165
|
}
|
177
|
-
|
178
|
-
private synchronized void terminateTask()
|
166
|
+
|
167
|
+
private synchronized void terminateTask(ThreadContext context) {
|
168
|
+
try {
|
169
|
+
Future<Void> task = stream.addChunk(NokogiriBlockingQueueInputStream.END);
|
170
|
+
task.get();
|
171
|
+
} catch (ClosedStreamException ex) {
|
172
|
+
// ignore this exception, it means the stream was closed
|
173
|
+
} catch (Exception e) {
|
174
|
+
throw context.getRuntime().newRuntimeError(e.getMessage());
|
175
|
+
}
|
179
176
|
futureTask.cancel(true);
|
180
177
|
executor.shutdown();
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
istream = null;
|
178
|
+
executor = null;
|
179
|
+
stream = null;
|
180
|
+
futureTask = null;
|
185
181
|
}
|
186
|
-
|
182
|
+
|
187
183
|
private class ParserTask implements Callable<XmlSaxParserContext> {
|
188
|
-
private ThreadContext context;
|
189
|
-
private IRubyObject handler;
|
190
|
-
private XmlSaxParserContext parser;
|
191
|
-
|
184
|
+
private final ThreadContext context;
|
185
|
+
private final IRubyObject handler;
|
186
|
+
private final XmlSaxParserContext parser;
|
187
|
+
|
192
188
|
private ParserTask(ThreadContext context, IRubyObject handler) {
|
193
189
|
RubyClass klazz = getNokogiriClass(context.getRuntime(), "Nokogiri::XML::SAX::ParserContext");
|
194
190
|
this.context = context;
|
195
191
|
this.handler = handler;
|
196
|
-
this.parser = (XmlSaxParserContext) XmlSaxParserContext.parse_stream(context, klazz,
|
192
|
+
this.parser = (XmlSaxParserContext) XmlSaxParserContext.parse_stream(context, klazz, stream);
|
197
193
|
}
|
198
194
|
|
199
195
|
@Override
|
200
196
|
public XmlSaxParserContext call() throws Exception {
|
197
|
+
try {
|
201
198
|
parser.parse_with(context, handler);
|
202
|
-
|
199
|
+
} finally {
|
200
|
+
// we have to close the stream before exiting, otherwise someone
|
201
|
+
// can add a chunk and block on task.get() forever.
|
202
|
+
stream.close();
|
203
|
+
}
|
204
|
+
return parser;
|
203
205
|
}
|
204
|
-
|
206
|
+
|
205
207
|
private synchronized int getErrorCount() {
|
206
208
|
// check for null because thread may not have started yet
|
207
209
|
if (parser.getNokogiriHandler() == null) return 0;
|
208
210
|
else return parser.getNokogiriHandler().getErrorCount();
|
209
211
|
}
|
210
|
-
|
212
|
+
|
211
213
|
private synchronized RubyException getLastError() {
|
212
214
|
return (RubyException) parser.getNokogiriHandler().getLastError();
|
213
215
|
}
|