nokogiri 1.5.5.rc3-java → 1.5.6-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +42 -1
- data/CHANGELOG.rdoc +41 -1
- data/Manifest.txt +8 -1
- data/README.ja.rdoc +1 -1
- data/README.rdoc +5 -8
- data/ROADMAP.md +6 -2
- data/Rakefile +29 -7
- data/bin/nokogiri +19 -4
- data/build_all +56 -17
- data/ext/java/nokogiri/HtmlDocument.java +26 -0
- data/ext/java/nokogiri/NokogiriService.java +7 -1
- data/ext/java/nokogiri/XmlDocument.java +24 -6
- data/ext/java/nokogiri/XmlDocumentFragment.java +2 -26
- data/ext/java/nokogiri/XmlDtd.java +13 -2
- data/ext/java/nokogiri/XmlElement.java +3 -12
- data/ext/java/nokogiri/XmlEntityReference.java +32 -8
- data/ext/java/nokogiri/XmlNamespace.java +2 -1
- data/ext/java/nokogiri/XmlNode.java +83 -31
- data/ext/java/nokogiri/XmlSaxPushParser.java +55 -53
- data/ext/java/nokogiri/XmlText.java +2 -14
- data/ext/java/nokogiri/XsltStylesheet.java +4 -2
- data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +2 -2
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
- data/ext/java/nokogiri/internals/{XmlDomParser.java → NokogiriDomParser.java} +25 -14
- data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +109 -0
- data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +123 -0
- data/ext/java/nokogiri/internals/NokogiriHandler.java +22 -14
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +25 -7
- data/ext/java/nokogiri/internals/ParserContext.java +2 -1
- data/ext/java/nokogiri/internals/ReaderNode.java +2 -1
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +100 -102
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +10 -4
- data/ext/nokogiri/extconf.rb +1 -0
- data/ext/nokogiri/xml_document.c +2 -2
- data/ext/nokogiri/xml_node.c +31 -14
- data/ext/nokogiri/xml_sax_parser.c +16 -0
- data/ext/nokogiri/xslt_stylesheet.c +19 -2
- data/lib/nekodtd.jar +0 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +4 -1
- data/lib/nokogiri/xml/document.rb +8 -6
- data/lib/nokogiri/xml/document_fragment.rb +10 -1
- data/lib/nokogiri/xml/node.rb +58 -61
- data/lib/nokogiri/xml/sax/document.rb +7 -0
- data/lib/nokogiri/xml/sax/parser.rb +7 -0
- data/lib/nokogiri/xml/xpath_context.rb +1 -1
- data/lib/nokogiri/xslt.rb +1 -1
- data/tasks/cross_compile.rb +5 -8
- data/test/files/test_document_url/bar.xml +2 -0
- data/test/files/test_document_url/document.dtd +4 -0
- data/test/files/test_document_url/document.xml +6 -0
- data/test/helper.rb +6 -0
- data/test/html/test_document.rb +23 -0
- data/test/html/test_document_fragment.rb +5 -0
- data/test/test_xslt_transforms.rb +30 -0
- data/test/xml/sax/test_parser.rb +20 -1
- data/test/xml/test_builder.rb +42 -0
- data/test/xml/test_document.rb +64 -9
- data/test/xml/test_document_fragment.rb +7 -0
- data/test/xml/test_entity_reference.rb +12 -0
- data/test/xml/test_namespace.rb +20 -0
- data/test/xml/test_node.rb +79 -0
- data/test/xml/test_node_attributes.rb +29 -0
- data/test/xml/test_unparented_node.rb +9 -0
- data/test_all +11 -14
- metadata +744 -560
@@ -0,0 +1,109 @@
|
|
1
|
+
package nokogiri.internals;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
import java.io.InputStream;
|
5
|
+
|
6
|
+
import org.jruby.Ruby;
|
7
|
+
import org.jruby.RubyObject;
|
8
|
+
import org.jruby.RubyProcess.Sys;
|
9
|
+
import org.jruby.exceptions.RaiseException;
|
10
|
+
import org.jruby.javasupport.util.RuntimeHelpers;
|
11
|
+
import org.jruby.runtime.ThreadContext;
|
12
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
13
|
+
import org.jruby.util.ByteList;
|
14
|
+
|
15
|
+
/**
|
16
|
+
* This class wraps the EncodingReader which act like a rewinding input stream,
|
17
|
+
* it tries to read the first 1K of data to detect the encoding, but save
|
18
|
+
* this data in a buffer for the subsequent read. Unfortunately, the EncodingReader
|
19
|
+
* will behave as expected only if encoding was detected, otherwise, the read data
|
20
|
+
* won't be stored and EncodingReader will fallback to read directory from the io stream.
|
21
|
+
* this is kind of lame, since we need to have similar logic in both layers. The alternative
|
22
|
+
* is to implement the encoding detection similar to the way C-Nokogiri does it; it starts
|
23
|
+
* parsing assuming encoding is unknown and if encoding is detected it will throw an exception
|
24
|
+
* causing parsing to stop, in which case we have to intercept the exception and set the encoding.
|
25
|
+
* Also in this case we don't have to restart the parsing since html/document.rb does that for us.
|
26
|
+
*
|
27
|
+
* @author John Shahid <jvshahid@gmail.com>
|
28
|
+
*
|
29
|
+
*/
|
30
|
+
public class NokogiriEncodingReaderWrapper extends InputStream {
|
31
|
+
private final ThreadContext context;
|
32
|
+
private final IRubyObject encodingReader;
|
33
|
+
private final Ruby ruby;
|
34
|
+
private IRubyObject detectedEncoding;
|
35
|
+
private final byte[] firstChunk = new byte[1024];
|
36
|
+
private int firstChunkOff = 0;
|
37
|
+
private int firstChunkLength = 0;
|
38
|
+
|
39
|
+
public NokogiriEncodingReaderWrapper(ThreadContext context, RubyObject encodingReader) {
|
40
|
+
this.context = context;
|
41
|
+
this.encodingReader = encodingReader;
|
42
|
+
this.ruby = context.getRuntime();
|
43
|
+
|
44
|
+
if (!RuntimeHelpers.invoke(context, encodingReader, "respond_to?", ruby.newSymbol("read").to_sym()).isTrue()
|
45
|
+
|| encodingReader.getInstanceVariable("@io") == null) {
|
46
|
+
throw ruby.newArgumentError("Argument doesn't respond to read or doesn't have instance variable @io");
|
47
|
+
}
|
48
|
+
}
|
49
|
+
|
50
|
+
public boolean detectEncoding() {
|
51
|
+
try {
|
52
|
+
firstChunkLength = read(firstChunk);
|
53
|
+
} catch (RaiseException e) {
|
54
|
+
detectedEncoding = e.getException().getInstanceVariable("@found_encoding");
|
55
|
+
return true;
|
56
|
+
}
|
57
|
+
detectedEncoding = context.nil;
|
58
|
+
return false;
|
59
|
+
}
|
60
|
+
|
61
|
+
public IRubyObject getEncoding() {
|
62
|
+
return detectedEncoding;
|
63
|
+
}
|
64
|
+
|
65
|
+
@Override
|
66
|
+
public int read(byte b[]) {
|
67
|
+
return read(b, 0, b.length);
|
68
|
+
}
|
69
|
+
|
70
|
+
@Override
|
71
|
+
public int read(byte b[], int off, int len) {
|
72
|
+
if (b == null) {
|
73
|
+
throw new NullPointerException();
|
74
|
+
} else if (off < 0 || len < 0 || len > b.length - off) {
|
75
|
+
throw new IndexOutOfBoundsException();
|
76
|
+
} else if (len == 0) {
|
77
|
+
return 0;
|
78
|
+
}
|
79
|
+
|
80
|
+
int copyLength = Math.min(firstChunkLength - firstChunkOff, len);
|
81
|
+
if (copyLength > 0) {
|
82
|
+
System.arraycopy(firstChunk, firstChunkOff, b, off, copyLength);
|
83
|
+
len -= copyLength;
|
84
|
+
firstChunkOff += copyLength;
|
85
|
+
}
|
86
|
+
|
87
|
+
if (len <= 0)
|
88
|
+
return copyLength;
|
89
|
+
|
90
|
+
IRubyObject returnValue = encodingReader.callMethod(context, "read", ruby.newFixnum(len));
|
91
|
+
if (returnValue.isNil())
|
92
|
+
return -1;
|
93
|
+
|
94
|
+
ByteList bytes = returnValue.asString().getByteList();
|
95
|
+
int length = bytes.length();
|
96
|
+
System.arraycopy(bytes.unsafeBytes(), bytes.getBegin(), b, off + copyLength, length);
|
97
|
+
return length + copyLength;
|
98
|
+
}
|
99
|
+
|
100
|
+
@Override
|
101
|
+
public int read() {
|
102
|
+
byte[] bytes = new byte[1];
|
103
|
+
int count = read(bytes, 0, 1);
|
104
|
+
if (count < 1)
|
105
|
+
return count;
|
106
|
+
return bytes[0];
|
107
|
+
}
|
108
|
+
|
109
|
+
}
|
@@ -0,0 +1,123 @@
|
|
1
|
+
package nokogiri.internals;
|
2
|
+
|
3
|
+
import java.io.ByteArrayInputStream;
|
4
|
+
import java.io.File;
|
5
|
+
import java.io.IOException;
|
6
|
+
import java.net.URI;
|
7
|
+
|
8
|
+
import nokogiri.internals.ParserContext.Options;
|
9
|
+
|
10
|
+
import org.jruby.Ruby;
|
11
|
+
import org.xml.sax.InputSource;
|
12
|
+
import org.xml.sax.SAXException;
|
13
|
+
import org.xml.sax.ext.EntityResolver2;
|
14
|
+
|
15
|
+
/**
|
16
|
+
* An entity resolver aware of the fact that the Ruby runtime can
|
17
|
+
* change directory but the JVM cannot. Thus any file based
|
18
|
+
* entity resolution that uses relative paths must be translated
|
19
|
+
* to be relative to the current directory of the Ruby runtime.
|
20
|
+
*/
|
21
|
+
public class NokogiriEntityResolver implements EntityResolver2 {
|
22
|
+
protected Ruby runtime;
|
23
|
+
private final NokogiriErrorHandler handler;
|
24
|
+
private final Options options;
|
25
|
+
|
26
|
+
public NokogiriEntityResolver(Ruby runtime, NokogiriErrorHandler handler, Options options) {
|
27
|
+
super();
|
28
|
+
this.runtime = runtime;
|
29
|
+
this.handler = handler;
|
30
|
+
this.options = options;
|
31
|
+
}
|
32
|
+
|
33
|
+
@Override
|
34
|
+
public InputSource getExternalSubset(String name, String baseURI)
|
35
|
+
throws SAXException, IOException {
|
36
|
+
return null;
|
37
|
+
}
|
38
|
+
|
39
|
+
@Override
|
40
|
+
public InputSource resolveEntity(String publicId, String systemId)
|
41
|
+
throws SAXException, IOException {
|
42
|
+
return resolveEntity(runtime, null, publicId, null, systemId);
|
43
|
+
}
|
44
|
+
|
45
|
+
@Override
|
46
|
+
public InputSource resolveEntity(String name,
|
47
|
+
String publicId,
|
48
|
+
String baseURI,
|
49
|
+
String systemId)
|
50
|
+
throws SAXException, IOException {
|
51
|
+
return resolveEntity(runtime, name, publicId, baseURI, systemId);
|
52
|
+
}
|
53
|
+
|
54
|
+
private File join(String parent, String child) {
|
55
|
+
if (new File(parent).isFile()) {
|
56
|
+
parent = new File(parent).getParent();
|
57
|
+
}
|
58
|
+
|
59
|
+
return new File(parent, child);
|
60
|
+
}
|
61
|
+
|
62
|
+
private InputSource emptyInputSource(InputSource source) {
|
63
|
+
source.setByteStream(new ByteArrayInputStream(new byte[0]));
|
64
|
+
return source;
|
65
|
+
}
|
66
|
+
|
67
|
+
private boolean shouldLoadDtd() {
|
68
|
+
return options.dtdLoad || options.dtdValid;
|
69
|
+
}
|
70
|
+
|
71
|
+
private void addError(String errorMessage) {
|
72
|
+
if (handler != null)
|
73
|
+
handler.errors.add(new Exception(errorMessage));
|
74
|
+
}
|
75
|
+
|
76
|
+
/**
|
77
|
+
* Create a file base input source taking into account the current
|
78
|
+
* directory of <code>runtime</code>.
|
79
|
+
* @throws SAXException
|
80
|
+
*/
|
81
|
+
protected InputSource resolveEntity(Ruby runtime, String name, String publicId, String baseURI, String systemId)
|
82
|
+
throws IOException, SAXException {
|
83
|
+
InputSource s = new InputSource();
|
84
|
+
if (name.equals("[dtd]") && !shouldLoadDtd()) {
|
85
|
+
return emptyInputSource(s);
|
86
|
+
} else if (!name.equals("[dtd]") && !options.noEnt) {
|
87
|
+
return emptyInputSource(s);
|
88
|
+
}
|
89
|
+
String adjustedSystemId;
|
90
|
+
URI uri = URI.create(systemId);
|
91
|
+
if (options.noNet && uri.getHost() != null) {
|
92
|
+
addError("Attempt to load network entity " + systemId);
|
93
|
+
return emptyInputSource(s);
|
94
|
+
}
|
95
|
+
// if this is a url or absolute file name then use it
|
96
|
+
if (uri.isAbsolute() && !uri.isOpaque()) {
|
97
|
+
adjustedSystemId = uri.toURL().toString();
|
98
|
+
} else if (new File(uri.getPath()).isAbsolute()) {
|
99
|
+
adjustedSystemId = uri.getPath();
|
100
|
+
} else if (baseURI != null) {
|
101
|
+
URI baseuri = URI.create(baseURI);
|
102
|
+
if (options.noNet && baseuri.getHost() != null) {
|
103
|
+
addError("Attempt to load network entity " + systemId);
|
104
|
+
return emptyInputSource(s);
|
105
|
+
}
|
106
|
+
if (baseuri.getHost() == null) {
|
107
|
+
// this is a local file
|
108
|
+
adjustedSystemId = join(baseuri.getPath(), uri.getPath()).getCanonicalPath();
|
109
|
+
} else {
|
110
|
+
// this is a url, then resolve uri using baseuri
|
111
|
+
adjustedSystemId = baseuri.resolve(systemId).toURL().toString();
|
112
|
+
}
|
113
|
+
} else {
|
114
|
+
// baseURI is null we have to use the current working directory to resolve the entity
|
115
|
+
String pwd = runtime.getCurrentDirectory();
|
116
|
+
adjustedSystemId = join(pwd, uri.getPath()).getCanonicalPath();
|
117
|
+
}
|
118
|
+
s.setSystemId(adjustedSystemId);
|
119
|
+
s.setPublicId(publicId);
|
120
|
+
return s;
|
121
|
+
}
|
122
|
+
|
123
|
+
}
|
@@ -62,12 +62,10 @@ import org.xml.sax.ext.DefaultHandler2;
|
|
62
62
|
* @author Yoko Harada <yokolet@gmail.com>
|
63
63
|
*/
|
64
64
|
public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler {
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
private
|
69
|
-
private RubyClass attrClass;
|
70
|
-
private IRubyObject object;
|
65
|
+
private StringBuffer buffer;
|
66
|
+
private final Ruby ruby;
|
67
|
+
private final RubyClass attrClass;
|
68
|
+
private final IRubyObject object;
|
71
69
|
|
72
70
|
/**
|
73
71
|
* Stores parse errors with the most-recent error last.
|
@@ -75,11 +73,11 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler {
|
|
75
73
|
* TODO: should these be stored in the document 'errors' array?
|
76
74
|
* Currently only string messages are stored there.
|
77
75
|
*/
|
78
|
-
private LinkedList<XmlSyntaxError> errors = new LinkedList<XmlSyntaxError>();
|
79
|
-
|
76
|
+
private final LinkedList<XmlSyntaxError> errors = new LinkedList<XmlSyntaxError>();
|
77
|
+
|
80
78
|
private Locator locator;
|
81
|
-
private ArrayDeque<Integer> lines;
|
82
|
-
private ArrayDeque<Integer> columns;
|
79
|
+
private final ArrayDeque<Integer> lines;
|
80
|
+
private final ArrayDeque<Integer> columns;
|
83
81
|
private static String htmlParserName = "Nokogiri::HTML::SAX::Parser";
|
84
82
|
private boolean needEmptyAttrCheck = false;
|
85
83
|
|
@@ -108,6 +106,7 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler {
|
|
108
106
|
call("start_document");
|
109
107
|
}
|
110
108
|
|
109
|
+
@Override
|
111
110
|
public void xmlDecl(String version, String encoding, String standalone) {
|
112
111
|
call("xmldecl", stringOrNil(ruby, version),
|
113
112
|
stringOrNil(ruby, encoding),
|
@@ -119,6 +118,11 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler {
|
|
119
118
|
call("end_document");
|
120
119
|
}
|
121
120
|
|
121
|
+
@Override
|
122
|
+
public void processingInstruction(String target, String data) {
|
123
|
+
call("processing_instruction", ruby.newString(target), ruby.newString(data));
|
124
|
+
}
|
125
|
+
|
122
126
|
/*
|
123
127
|
* This has to call either "start_element" or
|
124
128
|
* "start_element_namespace" depending on whether there are any
|
@@ -235,8 +239,11 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler {
|
|
235
239
|
|
236
240
|
@Override
|
237
241
|
public void characters(char[] ch, int start, int length) throws SAXException {
|
238
|
-
|
239
|
-
|
242
|
+
if (buffer != null) {
|
243
|
+
buffer.append(new String(ch, start, length));
|
244
|
+
} else {
|
245
|
+
call("characters", ruby.newString(new String(ch, start, length)));
|
246
|
+
}
|
240
247
|
}
|
241
248
|
|
242
249
|
@Override
|
@@ -246,12 +253,13 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler {
|
|
246
253
|
|
247
254
|
@Override
|
248
255
|
public void startCDATA() throws SAXException {
|
249
|
-
|
256
|
+
buffer = new StringBuffer();
|
250
257
|
}
|
251
258
|
|
252
259
|
@Override
|
253
260
|
public void endCDATA() throws SAXException {
|
254
|
-
|
261
|
+
call("cdata_block", ruby.newString(buffer.toString()));
|
262
|
+
buffer = null;
|
255
263
|
}
|
256
264
|
|
257
265
|
@Override
|
@@ -45,7 +45,6 @@ import java.nio.charset.CharsetEncoder;
|
|
45
45
|
import java.util.ArrayList;
|
46
46
|
import java.util.List;
|
47
47
|
import java.util.Set;
|
48
|
-
import java.util.SortedMap;
|
49
48
|
import java.util.regex.Matcher;
|
50
49
|
import java.util.regex.Pattern;
|
51
50
|
|
@@ -63,6 +62,7 @@ import nokogiri.XmlNode;
|
|
63
62
|
import nokogiri.XmlProcessingInstruction;
|
64
63
|
import nokogiri.XmlText;
|
65
64
|
|
65
|
+
import org.jcodings.specific.UTF8Encoding;
|
66
66
|
import org.jruby.Ruby;
|
67
67
|
import org.jruby.RubyArray;
|
68
68
|
import org.jruby.RubyClass;
|
@@ -179,7 +179,7 @@ public class NokogiriHelpers {
|
|
179
179
|
|
180
180
|
public static IRubyObject stringOrNil(Ruby runtime, String s) {
|
181
181
|
if (s == null) return runtime.getNil();
|
182
|
-
return
|
182
|
+
return convertJavaStringToRuby(runtime, s);
|
183
183
|
}
|
184
184
|
|
185
185
|
public static IRubyObject stringOrNil(Ruby runtime, byte[] bytes) {
|
@@ -189,7 +189,16 @@ public class NokogiriHelpers {
|
|
189
189
|
|
190
190
|
public static IRubyObject stringOrBlank(Ruby runtime, String s) {
|
191
191
|
if (s == null) return runtime.newString();
|
192
|
-
return
|
192
|
+
return convertJavaStringToRuby(runtime, s);
|
193
|
+
}
|
194
|
+
|
195
|
+
private static IRubyObject convertJavaStringToRuby(Ruby runtime, String str) {
|
196
|
+
if (runtime.is1_9()) {
|
197
|
+
ByteList bytes = new ByteList(str.getBytes(RubyEncoding.UTF8), UTF8Encoding.INSTANCE);
|
198
|
+
return RubyString.newString(runtime, bytes);
|
199
|
+
} else {
|
200
|
+
return RubyString.newString(runtime, str);
|
201
|
+
}
|
193
202
|
}
|
194
203
|
|
195
204
|
/**
|
@@ -582,7 +591,7 @@ public class NokogiriHelpers {
|
|
582
591
|
}
|
583
592
|
|
584
593
|
public static boolean isNamespace(String nodeName) {
|
585
|
-
return (nodeName.
|
594
|
+
return (nodeName.startsWith("xmlns"));
|
586
595
|
}
|
587
596
|
|
588
597
|
public static boolean isNonDefaultNamespace(Node node) {
|
@@ -675,9 +684,9 @@ public class NokogiriHelpers {
|
|
675
684
|
if (name == null) name = "UTF-8";
|
676
685
|
return name;
|
677
686
|
}
|
678
|
-
|
679
|
-
private static Set<String> charsetNames =
|
680
|
-
|
687
|
+
|
688
|
+
private static Set<String> charsetNames = Charset.availableCharsets().keySet();
|
689
|
+
|
681
690
|
private static String ignoreInvalidEncoding(Ruby runtime, IRubyObject encoding) {
|
682
691
|
String givenEncoding = rubyStringToString(encoding);
|
683
692
|
if (charsetNames.contains(givenEncoding)) return givenEncoding;
|
@@ -797,4 +806,13 @@ public class NokogiriHelpers {
|
|
797
806
|
private static Charset shift_jis = Charset.forName("Shift_JIS");
|
798
807
|
private static Charset jis = Charset.forName("ISO-2022-JP");
|
799
808
|
private static Charset euc_jp = Charset.forName("EUC-JP");
|
809
|
+
|
810
|
+
public static boolean shouldEncode(Node text) {
|
811
|
+
return text.getUserData(NokogiriHelpers.ENCODED_STRING) == null ||
|
812
|
+
!((Boolean)text.getUserData(NokogiriHelpers.ENCODED_STRING));
|
813
|
+
}
|
814
|
+
|
815
|
+
public static boolean shouldDecode(Node text) {
|
816
|
+
return !shouldEncode(text);
|
817
|
+
}
|
800
818
|
}
|
@@ -108,7 +108,8 @@ public class ParserContext extends RubyObject {
|
|
108
108
|
(RubyIO) TypeConverter.convertToType(data,
|
109
109
|
ruby.getIO(),
|
110
110
|
"to_io");
|
111
|
-
|
111
|
+
// use unclosedable input stream to fix #495
|
112
|
+
source.setByteStream(new UncloseableInputStream(io.getInStream()));
|
112
113
|
} else {
|
113
114
|
if (invoke(context, data, "respond_to?",
|
114
115
|
ruby.newSymbol("string").to_sym()).isTrue()) {
|
@@ -117,7 +117,8 @@ public abstract class ReaderNode {
|
|
117
117
|
RubyArray array = RubyArray.newArray(ruby);
|
118
118
|
if (attributeList != null && attributeList.length > 0) {
|
119
119
|
if (document == null) {
|
120
|
-
|
120
|
+
XmlDocument doc = (XmlDocument) XmlDocument.rbNew(ruby.getCurrentContext(), getNokogiriClass(ruby, "Nokogiri::XML::Document"), new IRubyObject[0]);
|
121
|
+
document = doc.getDocument();
|
121
122
|
}
|
122
123
|
for (int i=0; i<attributeList.length; i++) {
|
123
124
|
if (!isNamespace(attributeList.names.get(i))) {
|
@@ -17,10 +17,10 @@
|
|
17
17
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
18
|
* permit persons to whom the Software is furnished to do so, subject to
|
19
19
|
* the following conditions:
|
20
|
-
*
|
20
|
+
*
|
21
21
|
* The above copyright notice and this permission notice shall be
|
22
22
|
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
23
|
+
*
|
24
24
|
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
25
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
26
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
@@ -65,21 +65,33 @@ import org.w3c.dom.Text;
|
|
65
65
|
|
66
66
|
/**
|
67
67
|
* A class for serializing a document.
|
68
|
-
*
|
68
|
+
*
|
69
69
|
* @author sergio
|
70
70
|
* @author Patrick Mahoney <pat@polycrystal.org>
|
71
71
|
* @author Yoko Harada <yokolet@gmail.com>
|
72
72
|
*/
|
73
73
|
public class SaveContextVisitor {
|
74
74
|
|
75
|
-
private StringBuffer buffer;
|
76
|
-
private Stack<String> indentation;
|
77
|
-
private String encoding
|
78
|
-
private
|
79
|
-
private boolean
|
80
|
-
private
|
81
|
-
private
|
82
|
-
private
|
75
|
+
private final StringBuffer buffer;
|
76
|
+
private final Stack<String> indentation;
|
77
|
+
private String encoding;
|
78
|
+
private final String indentString;
|
79
|
+
private boolean format;
|
80
|
+
private final boolean noDecl;
|
81
|
+
private final boolean noEmpty;
|
82
|
+
private final boolean noXhtml;
|
83
|
+
private final boolean asXhtml;
|
84
|
+
private boolean asXml;
|
85
|
+
private final boolean asHtml;
|
86
|
+
private final boolean asBuilder;
|
87
|
+
private boolean htmlDoc;
|
88
|
+
private final boolean fragment;
|
89
|
+
private final boolean canonical, incl_ns, with_comments;
|
90
|
+
private boolean subsets;
|
91
|
+
private boolean exclusive;
|
92
|
+
private final List<Node> c14nNodeList;
|
93
|
+
private final Deque<Attr[]> c14nNamespaceStack;
|
94
|
+
private final Deque<Attr[]> c14nAttrStack;
|
83
95
|
private List<String> c14nExclusiveInclusivePrefixes = null;
|
84
96
|
/*
|
85
97
|
* U can't touch this.
|
@@ -96,7 +108,7 @@ public class SaveContextVisitor {
|
|
96
108
|
public static final int AS_XML = 32;
|
97
109
|
public static final int AS_HTML = 64;
|
98
110
|
public static final int AS_BUILDER = 128;
|
99
|
-
|
111
|
+
|
100
112
|
public static final int CANONICAL = 1;
|
101
113
|
public static final int INCL_NS = 2;
|
102
114
|
public static final int WITH_COMMENTS = 4;
|
@@ -113,7 +125,7 @@ public class SaveContextVisitor {
|
|
113
125
|
c14nNamespaceStack = new ArrayDeque<Attr[]>();
|
114
126
|
c14nAttrStack = new ArrayDeque<Attr[]>();
|
115
127
|
format = (options & FORMAT) == FORMAT;
|
116
|
-
|
128
|
+
|
117
129
|
noDecl = (options & NO_DECL) == NO_DECL;
|
118
130
|
noEmpty = (options & NO_EMPTY) == NO_EMPTY;
|
119
131
|
noXhtml = (options & NO_XHTML) == NO_XHTML;
|
@@ -121,40 +133,40 @@ public class SaveContextVisitor {
|
|
121
133
|
asXml = (options & AS_XML) == AS_XML;
|
122
134
|
asHtml = (options & AS_HTML) == AS_HTML;
|
123
135
|
asBuilder = (options & AS_BUILDER) == AS_BUILDER;
|
124
|
-
|
136
|
+
|
125
137
|
canonical = (canonicalOpts & CANONICAL) == CANONICAL;
|
126
138
|
incl_ns = (canonicalOpts & INCL_NS) == INCL_NS;
|
127
139
|
with_comments = (canonicalOpts & WITH_COMMENTS) == WITH_COMMENTS;
|
128
140
|
subsets = (canonicalOpts & SUBSETS) == SUBSETS;
|
129
|
-
|
141
|
+
|
130
142
|
if ((format && indent == null) || (format && indent.length() == 0)) indent = " "; // default, two spaces
|
131
143
|
if ((!format && indent != null) && indent.length() > 0) format = true;
|
132
144
|
if ((asBuilder && indent == null) || (asBuilder && indent.length() == 0)) indent = " "; // default, two spaces
|
133
145
|
indentString = indent;
|
134
146
|
if (!asXml && !asHtml && !asXhtml && !asBuilder) asXml = true;
|
135
147
|
}
|
136
|
-
|
148
|
+
|
137
149
|
@Override
|
138
150
|
public String toString() {
|
139
151
|
return (new String(buffer));
|
140
152
|
}
|
141
|
-
|
153
|
+
|
142
154
|
public void setHtmlDoc(boolean htmlDoc) {
|
143
155
|
this.htmlDoc = htmlDoc;
|
144
156
|
}
|
145
|
-
|
157
|
+
|
146
158
|
public void setEncoding(String encoding) {
|
147
159
|
this.encoding = encoding;
|
148
160
|
}
|
149
|
-
|
161
|
+
|
150
162
|
public List<Node> getC14nNodeList() {
|
151
163
|
return c14nNodeList;
|
152
164
|
}
|
153
|
-
|
165
|
+
|
154
166
|
public void setC14nExclusiveInclusivePrefixes(List<String> prefixes) {
|
155
167
|
c14nExclusiveInclusivePrefixes = prefixes;
|
156
168
|
}
|
157
|
-
|
169
|
+
|
158
170
|
public boolean enter(Node node) {
|
159
171
|
if (node instanceof Document) {
|
160
172
|
return enter((Document)node);
|
@@ -181,7 +193,7 @@ public class SaveContextVisitor {
|
|
181
193
|
return enter((Entity)node);
|
182
194
|
}
|
183
195
|
if (node instanceof EntityReference) {
|
184
|
-
return enter(
|
196
|
+
return enter(node);
|
185
197
|
}
|
186
198
|
if (node instanceof Notation) {
|
187
199
|
return enter((Notation)node);
|
@@ -191,7 +203,7 @@ public class SaveContextVisitor {
|
|
191
203
|
}
|
192
204
|
return false;
|
193
205
|
}
|
194
|
-
|
206
|
+
|
195
207
|
public void leave(Node node) {
|
196
208
|
if (node instanceof Document) {
|
197
209
|
leave((Document)node);
|
@@ -206,7 +218,6 @@ public class SaveContextVisitor {
|
|
206
218
|
return;
|
207
219
|
}
|
208
220
|
if (node instanceof Text) {
|
209
|
-
leave((Text)node);
|
210
221
|
return;
|
211
222
|
}
|
212
223
|
if (node instanceof CDATASection) {
|
@@ -226,7 +237,7 @@ public class SaveContextVisitor {
|
|
226
237
|
return;
|
227
238
|
}
|
228
239
|
if (node instanceof EntityReference) {
|
229
|
-
leave(
|
240
|
+
leave(node);
|
230
241
|
return;
|
231
242
|
}
|
232
243
|
if (node instanceof Notation) {
|
@@ -238,16 +249,16 @@ public class SaveContextVisitor {
|
|
238
249
|
return;
|
239
250
|
}
|
240
251
|
}
|
241
|
-
|
252
|
+
|
242
253
|
public boolean enter(String string) {
|
243
254
|
buffer.append(string);
|
244
255
|
return true;
|
245
256
|
}
|
246
|
-
|
257
|
+
|
247
258
|
public void leave(String string) {
|
248
259
|
// no-op
|
249
260
|
}
|
250
|
-
|
261
|
+
|
251
262
|
public boolean enter(Attr attr) {
|
252
263
|
String name = attr.getName();
|
253
264
|
buffer.append(name);
|
@@ -260,34 +271,34 @@ public class SaveContextVisitor {
|
|
260
271
|
}
|
261
272
|
return true;
|
262
273
|
}
|
263
|
-
|
264
|
-
private static Pattern p =
|
274
|
+
|
275
|
+
private static Pattern p =
|
265
276
|
Pattern.compile("charset(()|\\s+)=(()|\\s+)(\\w|\\_|\\.|\\-)+", Pattern.CASE_INSENSITIVE);
|
266
|
-
|
277
|
+
|
267
278
|
private String replaceCharsetIfNecessary(Attr attr) {
|
268
279
|
String value = attr.getValue();
|
269
280
|
if (encoding == null) return value; // unable to replace in any case
|
270
281
|
if (!"content".equals(attr.getName().toLowerCase())) return value; // must be content attr
|
271
|
-
if (!"meta".equals(attr.getOwnerElement().getNodeName().toLowerCase())) return value;
|
282
|
+
if (!"meta".equals(attr.getOwnerElement().getNodeName().toLowerCase())) return value;
|
272
283
|
Matcher m = p.matcher(value);
|
273
284
|
if (!m.find()) return value;
|
274
285
|
if (value.contains(encoding)) return value; // no need to replace
|
275
286
|
return value.replace(m.group(), "charset=" + encoding);
|
276
287
|
}
|
277
|
-
|
288
|
+
|
278
289
|
public static final String[] HTML_BOOLEAN_ATTRS = {
|
279
290
|
"checked", "compact", "declare", "defer", "disabled", "ismap",
|
280
291
|
"multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
|
281
292
|
"selected"
|
282
293
|
};
|
283
|
-
|
294
|
+
|
284
295
|
private boolean isHtmlBooleanAttr(String name) {
|
285
296
|
for (String s : HTML_BOOLEAN_ATTRS) {
|
286
297
|
if (s.equals(name)) return true;
|
287
298
|
}
|
288
299
|
return false;
|
289
300
|
}
|
290
|
-
|
301
|
+
|
291
302
|
private String serializeAttrTextContent(String s, boolean htmlDoc) {
|
292
303
|
if (s == null) return "";
|
293
304
|
|
@@ -299,7 +310,7 @@ public class SaveContextVisitor {
|
|
299
310
|
case '\n': buffer.append(" "); break;
|
300
311
|
case '\r': buffer.append(" "); break;
|
301
312
|
case '\t': buffer.append("	"); break;
|
302
|
-
case '"': if (htmlDoc) buffer.append("%22");
|
313
|
+
case '"': if (htmlDoc) buffer.append("%22");
|
303
314
|
else buffer.append(""");
|
304
315
|
break;
|
305
316
|
case '<': buffer.append("<"); break;
|
@@ -315,14 +326,14 @@ public class SaveContextVisitor {
|
|
315
326
|
public void leave(Attr attr) {
|
316
327
|
// no-op
|
317
328
|
}
|
318
|
-
|
329
|
+
|
319
330
|
public boolean enter(CDATASection cdata) {
|
320
331
|
buffer.append("<![CDATA[");
|
321
332
|
buffer.append(cdata.getData());
|
322
333
|
buffer.append("]]>");
|
323
334
|
return true;
|
324
335
|
}
|
325
|
-
|
336
|
+
|
326
337
|
public void leave(CDATASection cdata) {
|
327
338
|
// no-op
|
328
339
|
}
|
@@ -337,11 +348,11 @@ public class SaveContextVisitor {
|
|
337
348
|
buffer.append("-->");
|
338
349
|
return true;
|
339
350
|
}
|
340
|
-
|
351
|
+
|
341
352
|
public void leave(Comment comment) {
|
342
353
|
// no-op
|
343
354
|
}
|
344
|
-
|
355
|
+
|
345
356
|
public boolean enter(Document document) {
|
346
357
|
if (!noDecl) {
|
347
358
|
buffer.append("<?xml version=\"");
|
@@ -357,11 +368,11 @@ public class SaveContextVisitor {
|
|
357
368
|
}
|
358
369
|
return true;
|
359
370
|
}
|
360
|
-
|
371
|
+
|
361
372
|
public void leave(Document document) {
|
362
373
|
// no-op
|
363
374
|
}
|
364
|
-
|
375
|
+
|
365
376
|
public boolean enter(DocumentType docType) {
|
366
377
|
if (canonical) {
|
367
378
|
c14nNodeList.add(docType);
|
@@ -389,7 +400,7 @@ public class SaveContextVisitor {
|
|
389
400
|
buffer.append(">\n");
|
390
401
|
return true;
|
391
402
|
}
|
392
|
-
|
403
|
+
|
393
404
|
public void leave(DocumentType docType) {
|
394
405
|
// no-op
|
395
406
|
}
|
@@ -403,12 +414,12 @@ public class SaveContextVisitor {
|
|
403
414
|
}
|
404
415
|
String current = indentation.peek();
|
405
416
|
buffer.append(current);
|
406
|
-
if (needIndent()) {
|
417
|
+
if (needIndent(element)) {
|
407
418
|
indentation.push(current + indentString);
|
408
419
|
}
|
409
420
|
String name = element.getTagName();
|
410
421
|
buffer.append("<" + name);
|
411
|
-
Attr[] attrs = getAttrsAndNamespaces(element);
|
422
|
+
Attr[] attrs = getAttrsAndNamespaces(element);
|
412
423
|
for (Attr attr : attrs) {
|
413
424
|
if (attr.getSpecified()) {
|
414
425
|
buffer.append(" ");
|
@@ -423,7 +434,7 @@ public class SaveContextVisitor {
|
|
423
434
|
}
|
424
435
|
// no child
|
425
436
|
if (asHtml || asXhtml) {
|
426
|
-
buffer.append(">");
|
437
|
+
buffer.append(">");
|
427
438
|
} else if (asXml && noEmpty) {
|
428
439
|
buffer.append(">");
|
429
440
|
} else {
|
@@ -434,26 +445,28 @@ public class SaveContextVisitor {
|
|
434
445
|
}
|
435
446
|
return true;
|
436
447
|
}
|
437
|
-
|
438
|
-
private boolean needIndent() {
|
448
|
+
|
449
|
+
private boolean needIndent(Element element) {
|
450
|
+
if (containsText(element)) return false;
|
439
451
|
if (fragment) return false; // a given option might be fragment and format. fragment matters
|
440
452
|
if (format || asBuilder) return true;
|
441
453
|
return false;
|
442
454
|
}
|
443
|
-
|
455
|
+
|
444
456
|
private boolean needBreakInOpening(Element element) {
|
457
|
+
if (containsText(element)) return false;
|
445
458
|
if (fragment) return false;
|
446
459
|
if (format) return true;
|
447
460
|
if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
|
448
461
|
if (format && element.getNextSibling() == null && element.hasChildNodes()) return true;
|
449
462
|
return false;
|
450
463
|
}
|
451
|
-
|
464
|
+
|
452
465
|
private boolean isEmpty(String name) {
|
453
466
|
HTMLElements.Element element = HTMLElements.getElement(name);
|
454
467
|
return element.isEmpty();
|
455
468
|
}
|
456
|
-
|
469
|
+
|
457
470
|
private Attr[] getAttrsAndNamespaces(Element element) {
|
458
471
|
NamedNodeMap attrs = element.getAttributes();
|
459
472
|
if (!canonical) {
|
@@ -474,7 +487,7 @@ public class SaveContextVisitor {
|
|
474
487
|
c14nAttrStack.push(attributeOfAncestors);
|
475
488
|
subsets = false; // namespace propagation should be done only once on top level node.
|
476
489
|
}
|
477
|
-
|
490
|
+
|
478
491
|
getNamespacesAndAttrs(element, namespaces, attributes);
|
479
492
|
|
480
493
|
Attr[] namespaceArray = getSortedArray(namespaces);
|
@@ -491,9 +504,9 @@ public class SaveContextVisitor {
|
|
491
504
|
c14nAttrStack.push(attributeArray);
|
492
505
|
return allAttrs;
|
493
506
|
}
|
494
|
-
|
507
|
+
|
495
508
|
}
|
496
|
-
|
509
|
+
|
497
510
|
private void getAttrsOfAncestors(Node parent, List<Attr> namespaces, List<Attr> attributes) {
|
498
511
|
if (parent == null) return;
|
499
512
|
NamedNodeMap attrs = parent.getAttributes();
|
@@ -505,7 +518,7 @@ public class SaveContextVisitor {
|
|
505
518
|
}
|
506
519
|
getAttrsOfAncestors(parent.getParentNode(), namespaces, attributes);
|
507
520
|
}
|
508
|
-
|
521
|
+
|
509
522
|
private void getNamespacesAndAttrs(Node current, List<Attr> namespaces, List<Attr> attributes) {
|
510
523
|
NamedNodeMap attrs = current.getAttributes();
|
511
524
|
for (int i=0; i<attrs.getLength(); i++) {
|
@@ -531,7 +544,7 @@ public class SaveContextVisitor {
|
|
531
544
|
if (parentNamespaces[n].getNodeValue().equals(attr.getNodeValue())) {
|
532
545
|
// exactly the same namespace should not be added
|
533
546
|
newNamespace = false;
|
534
|
-
} else {
|
547
|
+
} else {
|
535
548
|
// in case of namespace url change, propagated namespace will be override
|
536
549
|
namespaces.remove(parentNamespaces[n]);
|
537
550
|
}
|
@@ -540,7 +553,7 @@ public class SaveContextVisitor {
|
|
540
553
|
if (newNamespace && !namespaces.contains(attr)) namespaces.add(attr);
|
541
554
|
}
|
542
555
|
}
|
543
|
-
|
556
|
+
|
544
557
|
private void getAttributesWithPropagated(List<Attr> attributes, Attr attr) {
|
545
558
|
boolean newAttribute = true;
|
546
559
|
Iterator<Attr[]> iter = c14nAttrStack.iterator();
|
@@ -552,7 +565,7 @@ public class SaveContextVisitor {
|
|
552
565
|
if (parentAttr[n].getNodeValue().equals(attr.getNodeValue())) {
|
553
566
|
// exactly the same attribute should not be added
|
554
567
|
newAttribute = false;
|
555
|
-
} else {
|
568
|
+
} else {
|
556
569
|
// in case of attribute value change, propagated attribute will be override
|
557
570
|
attributes.remove(parentAttr[n]);
|
558
571
|
}
|
@@ -561,7 +574,7 @@ public class SaveContextVisitor {
|
|
561
574
|
if (newAttribute) attributes.add(attr);
|
562
575
|
}
|
563
576
|
}
|
564
|
-
|
577
|
+
|
565
578
|
private void verifyXmlSpace(List<Attr> attributes, NamedNodeMap attrs) {
|
566
579
|
Attr attr = (Attr) attrs.getNamedItem("xml:space");
|
567
580
|
if (attr == null) {
|
@@ -573,7 +586,7 @@ public class SaveContextVisitor {
|
|
573
586
|
}
|
574
587
|
}
|
575
588
|
}
|
576
|
-
|
589
|
+
|
577
590
|
private Attr[] getSortedArray(List<Attr> attrList) {
|
578
591
|
Attr[] attrArray = attrList.toArray(new Attr[0]);
|
579
592
|
Arrays.sort(attrArray, new Comparator<Attr>() {
|
@@ -584,7 +597,7 @@ public class SaveContextVisitor {
|
|
584
597
|
});
|
585
598
|
return attrArray;
|
586
599
|
}
|
587
|
-
|
600
|
+
|
588
601
|
public void leave(Element element) {
|
589
602
|
if (canonical) {
|
590
603
|
c14nNamespaceStack.poll();
|
@@ -596,10 +609,10 @@ public class SaveContextVisitor {
|
|
596
609
|
indentation.pop();
|
597
610
|
buffer.append(indentation.peek());
|
598
611
|
} else if (asBuilder) {
|
599
|
-
indentation.pop();
|
612
|
+
if (!containsText(element)) indentation.pop();
|
600
613
|
}
|
601
614
|
buffer.append("</" + name + ">");
|
602
|
-
if (needBreakInClosing()) {
|
615
|
+
if (needBreakInClosing(element)) {
|
603
616
|
buffer.append("\n");
|
604
617
|
}
|
605
618
|
return;
|
@@ -610,25 +623,31 @@ public class SaveContextVisitor {
|
|
610
623
|
buffer.append("</" + name + ">");
|
611
624
|
}
|
612
625
|
}
|
613
|
-
if (needBreakInClosing()) {
|
614
|
-
indentation.pop();
|
626
|
+
if (needBreakInClosing(element)) {
|
627
|
+
if (!containsText(element)) indentation.pop();
|
615
628
|
buffer.append("\n");
|
616
629
|
}
|
617
630
|
}
|
618
|
-
|
631
|
+
|
619
632
|
private boolean needIndentInClosing(Element element) {
|
633
|
+
if (containsText(element)) return false;
|
634
|
+
|
620
635
|
if (fragment) return false; // a given option might be fragment and format. fragment matters
|
621
636
|
if (format) return true;
|
622
637
|
if (asBuilder && element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.ELEMENT_NODE) return true;
|
623
638
|
return false;
|
624
639
|
}
|
625
|
-
|
626
|
-
private boolean needBreakInClosing() {
|
640
|
+
|
641
|
+
private boolean needBreakInClosing(Element element) {
|
627
642
|
if (fragment) return false;
|
628
643
|
if (format || asBuilder) return true;
|
629
644
|
return false;
|
630
645
|
}
|
631
646
|
|
647
|
+
private boolean containsText(Element element) {
|
648
|
+
return (element.getFirstChild() != null && element.getFirstChild().getNodeType() == Node.TEXT_NODE);
|
649
|
+
}
|
650
|
+
|
632
651
|
public boolean enter(Entity entity) {
|
633
652
|
String name = entity.getNodeName();
|
634
653
|
String pubId = entity.getPublicId();
|
@@ -653,20 +672,21 @@ public class SaveContextVisitor {
|
|
653
672
|
buffer.append(">");
|
654
673
|
return true;
|
655
674
|
}
|
656
|
-
|
675
|
+
|
657
676
|
public void leave(Entity entity) {
|
658
677
|
// no-op
|
659
678
|
}
|
660
679
|
|
661
|
-
public boolean
|
662
|
-
|
680
|
+
public boolean enterEntityReference(Text entityRef) {
|
681
|
+
String name = entityRef.getNodeName();
|
682
|
+
buffer.append("&" + name + ";");
|
663
683
|
return true;
|
664
684
|
}
|
665
|
-
|
666
|
-
public void
|
685
|
+
|
686
|
+
public void leaveEntityReference(Text entityRef) {
|
667
687
|
// no-op
|
668
688
|
}
|
669
|
-
|
689
|
+
|
670
690
|
public boolean enter(Notation notation) {
|
671
691
|
String name = notation.getNodeName();
|
672
692
|
String pubId = notation.getPublicId();
|
@@ -690,7 +710,7 @@ public class SaveContextVisitor {
|
|
690
710
|
buffer.append(">");
|
691
711
|
return true;
|
692
712
|
}
|
693
|
-
|
713
|
+
|
694
714
|
public void leave(Notation notation) {
|
695
715
|
// no-op
|
696
716
|
}
|
@@ -706,7 +726,7 @@ public class SaveContextVisitor {
|
|
706
726
|
if (canonical) c14nNodeList.add(pi);
|
707
727
|
return true;
|
708
728
|
}
|
709
|
-
|
729
|
+
|
710
730
|
public void leave(ProcessingInstruction pi) {
|
711
731
|
// no-op
|
712
732
|
}
|
@@ -721,14 +741,8 @@ public class SaveContextVisitor {
|
|
721
741
|
return true;
|
722
742
|
}
|
723
743
|
}
|
724
|
-
|
725
|
-
if (
|
726
|
-
String current = indentation.peek();
|
727
|
-
buffer.append(current);
|
728
|
-
indentation.push(current + indentString);
|
729
|
-
if (textContent.charAt(0) == lineSeparator) textContent = textContent.substring(1);
|
730
|
-
}
|
731
|
-
if (text.getUserData(NokogiriHelpers.ENCODED_STRING) == null || !((Boolean)text.getUserData(NokogiriHelpers.ENCODED_STRING))) {
|
744
|
+
|
745
|
+
if (NokogiriHelpers.shouldEncode(text)) {
|
732
746
|
textContent = encodeJavaString(textContent);
|
733
747
|
}
|
734
748
|
|
@@ -738,29 +752,13 @@ public class SaveContextVisitor {
|
|
738
752
|
buffer.append(textContent);
|
739
753
|
return true;
|
740
754
|
}
|
741
|
-
|
742
|
-
private boolean needIndentText() {
|
743
|
-
if (fragment) return false;
|
744
|
-
if (format) return true;
|
745
|
-
return false;
|
746
|
-
}
|
747
|
-
|
748
|
-
public void leave(Text text) {
|
749
|
-
String textContent = text.getNodeValue();
|
750
|
-
if (needIndentText() && !"".equals(textContent.trim())) {
|
751
|
-
indentation.pop();
|
752
|
-
if (textContent.charAt(textContent.length()-1) != lineSeparator) {
|
753
|
-
buffer.append("\n");
|
754
|
-
}
|
755
|
-
}
|
756
|
-
}
|
757
|
-
|
755
|
+
|
758
756
|
private String getEncoding(Text text) {
|
759
757
|
if (encoding != null) return encoding;
|
760
758
|
encoding = text.getOwnerDocument().getInputEncoding();
|
761
759
|
return encoding;
|
762
760
|
}
|
763
|
-
|
761
|
+
|
764
762
|
private String encodeStringToHtmlEntity(String text) {
|
765
763
|
int last = 126; // = U+007E. No need to encode under U+007E.
|
766
764
|
StringBuffer sb = new StringBuffer();
|