nokogiri 1.5.6.rc3-java → 1.5.7-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +87 -26
- data/CHANGELOG.rdoc +94 -32
- data/Manifest.txt +1 -0
- data/Rakefile +28 -15
- data/build_all +13 -5
- data/ext/java/nokogiri/NokogiriService.java +8 -1
- data/ext/java/nokogiri/XmlDocument.java +4 -4
- data/ext/java/nokogiri/XmlDtd.java +13 -2
- data/ext/java/nokogiri/XmlElement.java +3 -12
- data/ext/java/nokogiri/XmlEntityReference.java +11 -31
- data/ext/java/nokogiri/XmlNode.java +76 -32
- data/ext/java/nokogiri/XmlReader.java +257 -181
- data/ext/java/nokogiri/XmlSaxPushParser.java +17 -2
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +23 -16
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +18 -1
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +9 -0
- data/ext/java/nokogiri/internals/ReaderNode.java +37 -37
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +23 -16
- data/ext/java/nokogiri/internals/UncloseableInputStream.java +102 -0
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +7 -4
- data/ext/nokogiri/extconf.rb +1 -0
- data/ext/nokogiri/nokogiri.h +4 -0
- data/ext/nokogiri/xml_node.c +33 -1
- data/ext/nokogiri/xml_reader.c +0 -3
- data/ext/nokogiri/xml_sax_parser.c +4 -1
- data/lib/nekodtd.jar +0 -0
- data/lib/nokogiri.rb +1 -0
- data/lib/nokogiri/css/xpath_visitor.rb +1 -1
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +4 -1
- data/lib/nokogiri/xml/builder.rb +12 -2
- data/lib/nokogiri/xml/document.rb +3 -1
- data/lib/nokogiri/xml/sax/parser.rb +1 -0
- data/tasks/cross_compile.rb +15 -15
- data/test/css/test_parser.rb +9 -9
- data/test/css/test_xpath_visitor.rb +1 -1
- data/test/helper.rb +1 -0
- data/test/html/sax/test_parser.rb +5 -2
- data/test/html/test_document_fragment.rb +4 -2
- data/test/namespaces/test_namespaces_in_builder_doc.rb +60 -0
- data/test/namespaces/test_namespaces_in_created_doc.rb +62 -0
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +60 -0
- data/test/test_reader.rb +38 -4
- data/test/xml/sax/test_parser.rb +10 -1
- data/test/xml/test_builder.rb +40 -1
- data/test/xml/test_document.rb +50 -2
- data/test/xml/test_entity_reference.rb +2 -4
- data/test/xml/test_node.rb +30 -1
- data/test_all +2 -2
- metadata +142 -232
@@ -42,6 +42,7 @@ import java.util.concurrent.ExecutorService;
|
|
42
42
|
import java.util.concurrent.Executors;
|
43
43
|
import java.util.concurrent.Future;
|
44
44
|
import java.util.concurrent.FutureTask;
|
45
|
+
import java.util.concurrent.ThreadFactory;
|
45
46
|
|
46
47
|
import nokogiri.internals.ClosedStreamException;
|
47
48
|
import nokogiri.internals.NokogiriBlockingQueueInputStream;
|
@@ -78,6 +79,11 @@ public class XmlSaxPushParser extends RubyObject {
|
|
78
79
|
super(ruby, rubyClass);
|
79
80
|
}
|
80
81
|
|
82
|
+
@Override
|
83
|
+
public void finalize() {
|
84
|
+
terminateTask(null);
|
85
|
+
}
|
86
|
+
|
81
87
|
@JRubyMethod
|
82
88
|
public IRubyObject initialize_native(final ThreadContext context,
|
83
89
|
IRubyObject saxParser,
|
@@ -159,7 +165,15 @@ public class XmlSaxPushParser extends RubyObject {
|
|
159
165
|
|
160
166
|
parserTask = new ParserTask(context, saxParser);
|
161
167
|
futureTask = new FutureTask<XmlSaxParserContext>(parserTask);
|
162
|
-
executor = Executors.newSingleThreadExecutor()
|
168
|
+
executor = Executors.newSingleThreadExecutor(new ThreadFactory() {
|
169
|
+
@Override
|
170
|
+
public Thread newThread(Runnable r) {
|
171
|
+
Thread t = new Thread(r);
|
172
|
+
t.setName("XmlSaxPushParser");
|
173
|
+
t.setDaemon(true);
|
174
|
+
return t;
|
175
|
+
}
|
176
|
+
});
|
163
177
|
executor.submit(futureTask);
|
164
178
|
}
|
165
179
|
}
|
@@ -171,7 +185,8 @@ public class XmlSaxPushParser extends RubyObject {
|
|
171
185
|
} catch (ClosedStreamException ex) {
|
172
186
|
// ignore this exception, it means the stream was closed
|
173
187
|
} catch (Exception e) {
|
174
|
-
|
188
|
+
if (context != null)
|
189
|
+
throw context.getRuntime().newRuntimeError(e.getMessage());
|
175
190
|
}
|
176
191
|
futureTask.cancel(true);
|
177
192
|
executor.shutdown();
|
@@ -40,7 +40,6 @@ import java.nio.ByteBuffer;
|
|
40
40
|
import java.nio.CharBuffer;
|
41
41
|
import java.nio.charset.CharacterCodingException;
|
42
42
|
import java.nio.charset.Charset;
|
43
|
-
import java.nio.charset.CharsetDecoder;
|
44
43
|
import java.nio.charset.CharsetEncoder;
|
45
44
|
import java.util.ArrayList;
|
46
45
|
import java.util.List;
|
@@ -72,9 +71,11 @@ import org.jruby.runtime.ThreadContext;
|
|
72
71
|
import org.jruby.runtime.builtin.IRubyObject;
|
73
72
|
import org.jruby.util.ByteList;
|
74
73
|
import org.w3c.dom.Attr;
|
74
|
+
import org.w3c.dom.Document;
|
75
75
|
import org.w3c.dom.NamedNodeMap;
|
76
76
|
import org.w3c.dom.Node;
|
77
77
|
import org.w3c.dom.NodeList;
|
78
|
+
import org.w3c.dom.DOMException;
|
78
79
|
|
79
80
|
/**
|
80
81
|
* A class for various utility methods.
|
@@ -635,10 +636,11 @@ public class NokogiriHelpers {
|
|
635
636
|
}
|
636
637
|
|
637
638
|
public static String newQName(String newPrefix, Node node) {
|
639
|
+
String tagName = getLocalPart(node.getNodeName());
|
638
640
|
if(newPrefix == null) {
|
639
|
-
return
|
641
|
+
return tagName;
|
640
642
|
} else {
|
641
|
-
return newPrefix + ":" +
|
643
|
+
return newPrefix + ":" + tagName;
|
642
644
|
}
|
643
645
|
}
|
644
646
|
|
@@ -724,21 +726,14 @@ public class NokogiriHelpers {
|
|
724
726
|
}
|
725
727
|
|
726
728
|
public static byte[] convertEncoding(Charset output_charset, String input_string) throws CharacterCodingException {
|
727
|
-
Charset input = Charset.forName("UTF-8");
|
728
|
-
CharsetDecoder decoder = input.newDecoder();
|
729
729
|
CharsetEncoder encoder = output_charset.newEncoder();
|
730
|
-
|
731
|
-
encoder.
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
encoder.encode(cbuf, bbuf, true);
|
736
|
-
int length = bbuf.position();
|
737
|
-
byte[] bytes = new byte[length];
|
738
|
-
System.arraycopy(bbuf.array(), 0, bytes, 0, length);
|
739
|
-
return bytes;
|
730
|
+
CharBuffer charBuffer = CharBuffer.wrap(input_string);
|
731
|
+
ByteBuffer byteBuffer = encoder.encode(charBuffer);
|
732
|
+
byte[] buffer = new byte[byteBuffer.remaining()];
|
733
|
+
byteBuffer.get(buffer);
|
734
|
+
return buffer;
|
740
735
|
}
|
741
|
-
|
736
|
+
|
742
737
|
public static String convertEncodingByNKFIfNecessary(Ruby runtime, XmlDocument doc, String thing) {
|
743
738
|
if (!(doc instanceof HtmlDocument)) return thing;
|
744
739
|
String parsed_encoding = ((HtmlDocument)doc).getPraedEncoding();
|
@@ -815,4 +810,16 @@ public class NokogiriHelpers {
|
|
815
810
|
public static boolean shouldDecode(Node text) {
|
816
811
|
return !shouldEncode(text);
|
817
812
|
}
|
813
|
+
|
814
|
+
public static Node renameNode(Node n, String namespaceURI, String qualifiedName) throws DOMException {
|
815
|
+
Document doc = n.getOwnerDocument();
|
816
|
+
XmlDocument xmlDoc = (XmlDocument)getCachedNode(doc);
|
817
|
+
NokogiriNamespaceCache nsCache = xmlDoc.getNamespaceCache();
|
818
|
+
int oldHash = n.hashCode();
|
819
|
+
Node result = doc.renameNode(n, namespaceURI, qualifiedName);
|
820
|
+
if (result != n) {
|
821
|
+
nsCache.replaceNode(n, result);
|
822
|
+
}
|
823
|
+
return result;
|
824
|
+
}
|
818
825
|
}
|
@@ -99,7 +99,7 @@ public class NokogiriNamespaceCache {
|
|
99
99
|
List<XmlNamespace> namespaces = new ArrayList<XmlNamespace>();
|
100
100
|
for (int i=0; i < keys.size(); i++) {
|
101
101
|
CacheEntry entry = cache.get(i);
|
102
|
-
if (entry.
|
102
|
+
if (entry.isOwner(node)) {
|
103
103
|
namespaces.add(entry.namespace);
|
104
104
|
}
|
105
105
|
}
|
@@ -151,6 +151,15 @@ public class NokogiriNamespaceCache {
|
|
151
151
|
defaultNamespace = null;
|
152
152
|
}
|
153
153
|
|
154
|
+
public void replaceNode(Node oldNode, Node newNode) {
|
155
|
+
for (int i=0; i < keys.size(); i++) {
|
156
|
+
CacheEntry entry = cache.get(i);
|
157
|
+
if (entry.isOwner(oldNode)) {
|
158
|
+
entry.replaceOwner(newNode);
|
159
|
+
}
|
160
|
+
}
|
161
|
+
}
|
162
|
+
|
154
163
|
private class CacheEntry {
|
155
164
|
private XmlNamespace namespace;
|
156
165
|
private Node ownerNode;
|
@@ -159,5 +168,13 @@ public class NokogiriNamespaceCache {
|
|
159
168
|
this.namespace = namespace;
|
160
169
|
this.ownerNode = ownerNode;
|
161
170
|
}
|
171
|
+
|
172
|
+
public Boolean isOwner(Node n) {
|
173
|
+
return this.ownerNode.isSameNode(n);
|
174
|
+
}
|
175
|
+
|
176
|
+
public void replaceOwner(Node newNode) {
|
177
|
+
this.ownerNode = newNode;
|
178
|
+
}
|
162
179
|
}
|
163
180
|
}
|
@@ -56,6 +56,15 @@ public class NokogiriNonStrictErrorHandler extends NokogiriErrorHandler{
|
|
56
56
|
}
|
57
57
|
|
58
58
|
public void fatalError(SAXParseException ex) throws SAXException {
|
59
|
+
// fix #837
|
60
|
+
// Xerces won't skip the reference entity (and other invalid) constructs
|
61
|
+
// found in the prolog, instead it will keep calling this method and we'll
|
62
|
+
// keep inserting the error in the document errors array until we run
|
63
|
+
// out of memory
|
64
|
+
String message = ex.getMessage();
|
65
|
+
if (message != null && message.toLowerCase().contains("in prolog")) {
|
66
|
+
throw ex;
|
67
|
+
}
|
59
68
|
errors.add(ex);
|
60
69
|
}
|
61
70
|
|
@@ -17,10 +17,10 @@
|
|
17
17
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
18
|
* permit persons to whom the Software is furnished to do so, subject to
|
19
19
|
* the following conditions:
|
20
|
-
*
|
20
|
+
*
|
21
21
|
* The above copyright notice and this permission notice shall be
|
22
22
|
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
23
|
+
*
|
24
24
|
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
25
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
26
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
@@ -51,6 +51,7 @@ import nokogiri.XmlAttr;
|
|
51
51
|
import nokogiri.XmlDocument;
|
52
52
|
import nokogiri.XmlSyntaxError;
|
53
53
|
|
54
|
+
import org.apache.xerces.xni.XMLAttributes;
|
54
55
|
import org.jruby.Ruby;
|
55
56
|
import org.jruby.RubyArray;
|
56
57
|
import org.jruby.RubyBoolean;
|
@@ -59,12 +60,10 @@ import org.jruby.runtime.ThreadContext;
|
|
59
60
|
import org.jruby.runtime.builtin.IRubyObject;
|
60
61
|
import org.w3c.dom.Attr;
|
61
62
|
import org.w3c.dom.Document;
|
62
|
-
import org.xml.sax.Attributes;
|
63
|
-
import org.xml.sax.SAXParseException;
|
64
63
|
|
65
64
|
/**
|
66
65
|
* Abstract class of Node for XmlReader.
|
67
|
-
*
|
66
|
+
*
|
68
67
|
* @author Yoko Harada <yokolet@gmail.com>
|
69
68
|
*
|
70
69
|
*/
|
@@ -75,17 +74,18 @@ public abstract class ReaderNode {
|
|
75
74
|
public Map<String, String> namespaces;
|
76
75
|
public int depth, nodeType;
|
77
76
|
public String lang, localName, xmlBase, prefix, name, uri, value, xmlVersion = "1.0";
|
77
|
+
public int startOffset, endOffset;
|
78
78
|
public boolean hasChildren = false;
|
79
79
|
public abstract String getString();
|
80
80
|
private Document document = null;
|
81
|
-
|
81
|
+
|
82
82
|
private static ElementNode elementNode = null;
|
83
83
|
private static ClosingNode closingNode = null;
|
84
84
|
private static TextNode textNode = null;
|
85
|
-
|
85
|
+
|
86
86
|
public IRubyObject getAttributeByIndex(IRubyObject index){
|
87
87
|
if(index.isNil()) return index;
|
88
|
-
|
88
|
+
|
89
89
|
long i = index.convertToInteger().getLongValue();
|
90
90
|
if(i > Integer.MAX_VALUE) {
|
91
91
|
throw ruby.newArgumentError("value too long to be an array index");
|
@@ -101,7 +101,7 @@ public abstract class ReaderNode {
|
|
101
101
|
String value = attributeList.getByName(rubyStringToString(name));
|
102
102
|
return stringOrNil(ruby, value);
|
103
103
|
}
|
104
|
-
|
104
|
+
|
105
105
|
public IRubyObject getAttributeByName(String name){
|
106
106
|
if(attributeList == null) return ruby.getNil();
|
107
107
|
String value = attributeList.getByName(name);
|
@@ -117,7 +117,8 @@ public abstract class ReaderNode {
|
|
117
117
|
RubyArray array = RubyArray.newArray(ruby);
|
118
118
|
if (attributeList != null && attributeList.length > 0) {
|
119
119
|
if (document == null) {
|
120
|
-
|
120
|
+
XmlDocument doc = (XmlDocument) XmlDocument.rbNew(ruby.getCurrentContext(), getNokogiriClass(ruby, "Nokogiri::XML::Document"), new IRubyObject[0]);
|
121
|
+
document = doc.getDocument();
|
121
122
|
}
|
122
123
|
for (int i=0; i<attributeList.length; i++) {
|
123
124
|
if (!isNamespace(attributeList.names.get(i))) {
|
@@ -131,7 +132,7 @@ public abstract class ReaderNode {
|
|
131
132
|
}
|
132
133
|
return array;
|
133
134
|
}
|
134
|
-
|
135
|
+
|
135
136
|
public IRubyObject getAttributes(ThreadContext context) {
|
136
137
|
if(attributeList == null) return context.getRuntime().getNil();
|
137
138
|
RubyHash hash = RubyHash.newHash(context.getRuntime());
|
@@ -173,7 +174,7 @@ public abstract class ReaderNode {
|
|
173
174
|
}
|
174
175
|
return hash;
|
175
176
|
}
|
176
|
-
|
177
|
+
|
177
178
|
public IRubyObject getXmlBase() {
|
178
179
|
return stringOrNil(ruby, xmlBase);
|
179
180
|
}
|
@@ -218,7 +219,7 @@ public abstract class ReaderNode {
|
|
218
219
|
}
|
219
220
|
|
220
221
|
public IRubyObject toSyntaxError() { return ruby.getNil(); }
|
221
|
-
|
222
|
+
|
222
223
|
public IRubyObject getNodeType() { return ruby.newFixnum(nodeType); }
|
223
224
|
|
224
225
|
public static enum ReaderNodeType {
|
@@ -240,17 +241,17 @@ public abstract class ReaderNode {
|
|
240
241
|
END_ELEMENT(15),
|
241
242
|
END_ENTITY(16),
|
242
243
|
XML_DECLARATION(17);
|
243
|
-
|
244
|
+
|
244
245
|
private final int value;
|
245
246
|
ReaderNodeType(int value) {
|
246
247
|
this.value = value;
|
247
248
|
}
|
248
|
-
|
249
|
+
|
249
250
|
public int getValue() {
|
250
251
|
return value;
|
251
252
|
}
|
252
253
|
}
|
253
|
-
|
254
|
+
|
254
255
|
public static ClosingNode createClosingNode(Ruby ruby, String uri, String localName, String qName, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
255
256
|
if (closingNode == null) closingNode = new ClosingNode();
|
256
257
|
ClosingNode clone;
|
@@ -264,13 +265,13 @@ public abstract class ReaderNode {
|
|
264
265
|
}
|
265
266
|
|
266
267
|
public static class ClosingNode extends ReaderNode {
|
267
|
-
|
268
|
+
|
268
269
|
public ClosingNode() {}
|
269
270
|
|
270
271
|
public ClosingNode(Ruby ruby, String uri, String localName, String qName, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
271
272
|
init(ruby, uri, localName, qName, depth, langStack, xmlBaseStack);
|
272
273
|
}
|
273
|
-
|
274
|
+
|
274
275
|
public void init(Ruby ruby, String uri, String localName, String qName, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
275
276
|
this.ruby = ruby;
|
276
277
|
nodeType = ReaderNodeType.END_ELEMENT.getValue();
|
@@ -300,8 +301,8 @@ public abstract class ReaderNode {
|
|
300
301
|
return new String(sb);
|
301
302
|
}
|
302
303
|
}
|
303
|
-
|
304
|
-
public static ElementNode createElementNode(Ruby ruby, String uri, String localName, String qName,
|
304
|
+
|
305
|
+
public static ElementNode createElementNode(Ruby ruby, String uri, String localName, String qName, XMLAttributes attrs, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
305
306
|
if (elementNode == null) elementNode = new ElementNode();
|
306
307
|
ElementNode clone;
|
307
308
|
try {
|
@@ -312,17 +313,17 @@ public abstract class ReaderNode {
|
|
312
313
|
clone.init(ruby, uri, localName, qName, attrs, depth, langStack, xmlBaseStack);
|
313
314
|
return clone;
|
314
315
|
}
|
315
|
-
|
316
|
+
|
316
317
|
public static class ElementNode extends ReaderNode {
|
317
|
-
private List<String> attributeStrings = new ArrayList<String>();
|
318
|
-
|
318
|
+
private final List<String> attributeStrings = new ArrayList<String>();
|
319
|
+
|
319
320
|
public ElementNode() {}
|
320
|
-
|
321
|
-
public ElementNode(Ruby ruby, String uri, String localName, String qName,
|
321
|
+
|
322
|
+
public ElementNode(Ruby ruby, String uri, String localName, String qName, XMLAttributes attrs, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
322
323
|
init(ruby, uri, localName, qName, attrs, depth, langStack, xmlBaseStack);
|
323
324
|
}
|
324
|
-
|
325
|
-
public void init(Ruby ruby, String uri, String localName, String qName,
|
325
|
+
|
326
|
+
public void init(Ruby ruby, String uri, String localName, String qName, XMLAttributes attrs, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
326
327
|
this.ruby = ruby;
|
327
328
|
this.nodeType = ReaderNodeType.ELEMENT.getValue();
|
328
329
|
this.uri = "".equals(uri) ? null : uri;
|
@@ -330,7 +331,6 @@ public abstract class ReaderNode {
|
|
330
331
|
this.name = qName;
|
331
332
|
parsePrefix(qName);
|
332
333
|
this.depth = depth;
|
333
|
-
hasChildren = true;
|
334
334
|
parseAttributes(attrs, langStack, xmlBaseStack);
|
335
335
|
}
|
336
336
|
|
@@ -339,7 +339,7 @@ public abstract class ReaderNode {
|
|
339
339
|
return ruby.getFalse();
|
340
340
|
}
|
341
341
|
|
342
|
-
private void parseAttributes(
|
342
|
+
private void parseAttributes(XMLAttributes attrs, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
343
343
|
if (attrs.getLength() > 0) attributeList = new ReaderAttributeList();
|
344
344
|
String u, n, v;
|
345
345
|
for (int i = 0; i < attrs.getLength(); i++) {
|
@@ -357,7 +357,7 @@ public abstract class ReaderNode {
|
|
357
357
|
attributeStrings.add(n + "=\"" + v + "\"");
|
358
358
|
}
|
359
359
|
}
|
360
|
-
|
360
|
+
|
361
361
|
private String resolveLang(String n, String v, Stack<String> langStack) {
|
362
362
|
if ("xml:lang".equals(n)) {
|
363
363
|
return v;
|
@@ -367,7 +367,7 @@ public abstract class ReaderNode {
|
|
367
367
|
return null;
|
368
368
|
}
|
369
369
|
}
|
370
|
-
|
370
|
+
|
371
371
|
private String resolveXmlBase(String n, String v, Stack<String> xmlBaseStack) {
|
372
372
|
if (isXmlBase(n)) {
|
373
373
|
return getXmlBaseUri(n, v, xmlBaseStack);
|
@@ -377,7 +377,7 @@ public abstract class ReaderNode {
|
|
377
377
|
return null;
|
378
378
|
}
|
379
379
|
}
|
380
|
-
|
380
|
+
|
381
381
|
private String getXmlBaseUri(String n, String v, Stack<String> xmlBaseStack) {
|
382
382
|
if ("xml:base".equals(n)) {
|
383
383
|
if (v.startsWith("http://")) {
|
@@ -423,7 +423,7 @@ public abstract class ReaderNode {
|
|
423
423
|
List<String> names = new ArrayList<String>();
|
424
424
|
List<String> values = new ArrayList<String>();
|
425
425
|
int length = 0;
|
426
|
-
|
426
|
+
|
427
427
|
void add(String namespace, String name, String value) {
|
428
428
|
namespace = namespace != null ? namespace : "";
|
429
429
|
namespaces.add(namespace);
|
@@ -433,7 +433,7 @@ public abstract class ReaderNode {
|
|
433
433
|
values.add(value);
|
434
434
|
length++;
|
435
435
|
}
|
436
|
-
|
436
|
+
|
437
437
|
String getByName(String name) {
|
438
438
|
for (int i=0; i<names.size(); i++) {
|
439
439
|
if (name.equals(names.get(i))) {
|
@@ -471,7 +471,7 @@ public abstract class ReaderNode {
|
|
471
471
|
private final XmlSyntaxError exception;
|
472
472
|
|
473
473
|
// Still don't know what to do with ex.
|
474
|
-
public ExceptionNode(Ruby runtime,
|
474
|
+
public ExceptionNode(Ruby runtime, Exception ex) {
|
475
475
|
super(runtime);
|
476
476
|
exception = (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(runtime, getNokogiriClass(ruby, "Nokogiri::XML::SyntaxError"));
|
477
477
|
}
|
@@ -486,7 +486,7 @@ public abstract class ReaderNode {
|
|
486
486
|
return this.exception;
|
487
487
|
}
|
488
488
|
}
|
489
|
-
|
489
|
+
|
490
490
|
public static TextNode createTextNode(Ruby ruby, String content, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
491
491
|
if (textNode == null) textNode = new TextNode();
|
492
492
|
TextNode clone;
|
@@ -505,7 +505,7 @@ public abstract class ReaderNode {
|
|
505
505
|
public TextNode(Ruby ruby, String content, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
506
506
|
init(ruby, content, depth, langStack, xmlBaseStack);
|
507
507
|
}
|
508
|
-
|
508
|
+
|
509
509
|
public void init(Ruby ruby, String content, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
510
510
|
this.ruby = ruby;
|
511
511
|
this.value = content;
|
@@ -37,6 +37,8 @@ import static nokogiri.internals.NokogiriHelpers.encodeJavaString;
|
|
37
37
|
import static nokogiri.internals.NokogiriHelpers.isNamespace;
|
38
38
|
import static nokogiri.internals.NokogiriHelpers.isWhitespaceText;
|
39
39
|
|
40
|
+
import java.nio.charset.Charset;
|
41
|
+
import java.nio.charset.CharsetEncoder;
|
40
42
|
import java.util.ArrayDeque;
|
41
43
|
import java.util.ArrayList;
|
42
44
|
import java.util.Arrays;
|
@@ -193,7 +195,7 @@ public class SaveContextVisitor {
|
|
193
195
|
return enter((Entity)node);
|
194
196
|
}
|
195
197
|
if (node instanceof EntityReference) {
|
196
|
-
return enter(node);
|
198
|
+
return enter((EntityReference) node);
|
197
199
|
}
|
198
200
|
if (node instanceof Notation) {
|
199
201
|
return enter((Notation)node);
|
@@ -237,7 +239,7 @@ public class SaveContextVisitor {
|
|
237
239
|
return;
|
238
240
|
}
|
239
241
|
if (node instanceof EntityReference) {
|
240
|
-
leave(node);
|
242
|
+
leave((EntityReference) node);
|
241
243
|
return;
|
242
244
|
}
|
243
245
|
if (node instanceof Notation) {
|
@@ -433,11 +435,13 @@ public class SaveContextVisitor {
|
|
433
435
|
return true;
|
434
436
|
}
|
435
437
|
// no child
|
436
|
-
if (asHtml
|
438
|
+
if (asHtml) {
|
437
439
|
buffer.append(">");
|
438
440
|
} else if (asXml && noEmpty) {
|
439
441
|
buffer.append(">");
|
440
442
|
} else {
|
443
|
+
if (asXhtml)
|
444
|
+
buffer.append(" "); // see http://www.w3.org/TR/xhtml1/#C_2
|
441
445
|
buffer.append("/>");
|
442
446
|
}
|
443
447
|
if (needBreakInOpening(element)) {
|
@@ -677,13 +681,11 @@ public class SaveContextVisitor {
|
|
677
681
|
// no-op
|
678
682
|
}
|
679
683
|
|
680
|
-
public boolean
|
681
|
-
|
682
|
-
buffer.append("&" + name + ";");
|
684
|
+
public boolean enter(EntityReference entityRef) {
|
685
|
+
buffer.append("&" + entityRef.getNodeName() + ";");
|
683
686
|
return true;
|
684
687
|
}
|
685
|
-
|
686
|
-
public void leaveEntityReference(Text entityRef) {
|
688
|
+
public void leave(EntityReference entityRef) {
|
687
689
|
// no-op
|
688
690
|
}
|
689
691
|
|
@@ -731,6 +733,10 @@ public class SaveContextVisitor {
|
|
731
733
|
// no-op
|
732
734
|
}
|
733
735
|
|
736
|
+
private boolean isHtmlScript(Text text) {
|
737
|
+
return htmlDoc && text.getParentNode().getNodeName().equals("script");
|
738
|
+
}
|
739
|
+
|
734
740
|
private static char lineSeparator = '\n'; // System.getProperty("line.separator"); ?
|
735
741
|
public boolean enter(Text text) {
|
736
742
|
String textContent = text.getNodeValue();
|
@@ -742,13 +748,11 @@ public class SaveContextVisitor {
|
|
742
748
|
}
|
743
749
|
}
|
744
750
|
|
745
|
-
if (NokogiriHelpers.shouldEncode(text)) {
|
751
|
+
if (NokogiriHelpers.shouldEncode(text) && !isHtmlScript(text)) {
|
746
752
|
textContent = encodeJavaString(textContent);
|
747
753
|
}
|
748
754
|
|
749
|
-
|
750
|
-
textContent = encodeStringToHtmlEntity(textContent);
|
751
|
-
}
|
755
|
+
textContent = encodeStringToHtmlEntity(textContent);
|
752
756
|
buffer.append(textContent);
|
753
757
|
return true;
|
754
758
|
}
|
@@ -760,12 +764,15 @@ public class SaveContextVisitor {
|
|
760
764
|
}
|
761
765
|
|
762
766
|
private String encodeStringToHtmlEntity(String text) {
|
767
|
+
if (encoding == null)
|
768
|
+
return text;
|
769
|
+
CharsetEncoder encoder = Charset.forName(encoding).newEncoder();
|
763
770
|
int last = 126; // = U+007E. No need to encode under U+007E.
|
764
771
|
StringBuffer sb = new StringBuffer();
|
765
|
-
for (int i=0; i<text.length(); i++) {
|
766
|
-
|
767
|
-
if (
|
768
|
-
else sb.append(
|
772
|
+
for (int i = 0; i < text.length(); i++) {
|
773
|
+
char ch = text.charAt(i);
|
774
|
+
if (encoder.canEncode(ch)) sb.append(ch);
|
775
|
+
else sb.append("&#x" + Integer.toHexString(ch) + ";");
|
769
776
|
}
|
770
777
|
return new String(sb);
|
771
778
|
}
|