nokogiri 1.5.0-java → 1.5.1-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +56 -12
- data/CHANGELOG.rdoc +45 -0
- data/C_CODING_STYLE.rdoc +27 -0
- data/Manifest.txt +4 -0
- data/README.rdoc +11 -7
- data/Rakefile +44 -26
- data/bin/nokogiri +10 -2
- data/ext/java/nokogiri/HtmlDocument.java +37 -2
- data/ext/java/nokogiri/NokogiriService.java +10 -2
- data/ext/java/nokogiri/XmlAttr.java +1 -1
- data/ext/java/nokogiri/XmlDocument.java +68 -11
- data/ext/java/nokogiri/XmlDocumentFragment.java +16 -5
- data/ext/java/nokogiri/XmlElement.java +0 -40
- data/ext/java/nokogiri/XmlNamespace.java +8 -1
- data/ext/java/nokogiri/XmlNode.java +131 -27
- data/ext/java/nokogiri/XmlNodeSet.java +4 -1
- data/ext/java/nokogiri/XmlSaxParserContext.java +2 -13
- data/ext/java/nokogiri/XmlXpathContext.java +4 -1
- data/ext/java/nokogiri/XsltStylesheet.java +198 -37
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +40 -2
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +82 -9
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +4 -3
- data/ext/java/nokogiri/internals/ParserContext.java +33 -3
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +203 -12
- data/ext/java/nokogiri/internals/XmlDomParser.java +33 -2
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +32 -12
- data/ext/nokogiri/extconf.rb +11 -3
- data/ext/nokogiri/html_document.c +16 -0
- data/ext/nokogiri/html_sax_parser_context.c +59 -37
- data/ext/nokogiri/html_sax_push_parser.c +87 -0
- data/ext/nokogiri/html_sax_push_parser.h +9 -0
- data/ext/nokogiri/nokogiri.c +6 -8
- data/ext/nokogiri/nokogiri.h +3 -0
- data/ext/nokogiri/xml_document.c +101 -3
- data/ext/nokogiri/xml_document.h +3 -3
- data/ext/nokogiri/xml_node.c +150 -58
- data/ext/nokogiri/xml_node_set.c +169 -120
- data/ext/nokogiri/xml_node_set.h +5 -0
- data/ext/nokogiri/xml_sax_parser_context.c +64 -41
- data/ext/nokogiri/xml_text.c +2 -0
- data/ext/nokogiri/xml_xpath_context.c +30 -24
- data/ext/nokogiri/xslt_stylesheet.c +62 -16
- data/ext/nokogiri/xslt_stylesheet.h +5 -0
- data/lib/nokogiri/css/parser.rb +163 -157
- data/lib/nokogiri/css/parser.y +6 -3
- data/lib/nokogiri/css/tokenizer.rb +1 -1
- data/lib/nokogiri/css/tokenizer.rex +1 -1
- data/lib/nokogiri/html.rb +1 -0
- data/lib/nokogiri/html/document.rb +82 -42
- data/lib/nokogiri/html/sax/push_parser.rb +16 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml.rb +6 -0
- data/lib/nokogiri/xml/builder.rb +7 -1
- data/lib/nokogiri/xml/document.rb +32 -17
- data/lib/nokogiri/xml/document_fragment.rb +6 -1
- data/lib/nokogiri/xml/node.rb +40 -9
- data/lib/nokogiri/xslt.rb +5 -1
- data/tasks/cross_compile.rb +1 -0
- data/tasks/nokogiri.org.rb +6 -0
- data/tasks/test.rb +1 -0
- data/test/css/test_xpath_visitor.rb +6 -0
- data/test/helper.rb +1 -0
- data/test/html/test_document.rb +26 -0
- data/test/html/test_document_fragment.rb +1 -2
- data/test/test_memory_leak.rb +81 -1
- data/test/test_xslt_transforms.rb +152 -123
- data/test/xml/test_builder.rb +24 -2
- data/test/xml/test_c14n.rb +151 -0
- data/test/xml/test_document.rb +48 -0
- data/test/xml/test_namespace.rb +5 -0
- data/test/xml/test_node.rb +82 -1
- data/test/xml/test_node_attributes.rb +19 -0
- data/test/xml/test_node_inheritance.rb +32 -0
- data/test/xml/test_node_reparenting.rb +32 -0
- data/test/xml/test_node_set.rb +16 -8
- data/test/xml/test_reader_encoding.rb +16 -0
- data/test/xml/test_unparented_node.rb +32 -0
- data/test/xml/test_xinclude.rb +83 -0
- data/test/xml/test_xpath.rb +22 -0
- metadata +147 -123
@@ -1,7 +1,7 @@
|
|
1
1
|
/**
|
2
2
|
* (The MIT License)
|
3
3
|
*
|
4
|
-
* Copyright (c) 2008 -
|
4
|
+
* Copyright (c) 2008 - 2012:
|
5
5
|
*
|
6
6
|
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
7
|
* * {Mike Dalessio}[http://mike.daless.io]
|
@@ -35,10 +35,17 @@ package nokogiri.internals;
|
|
35
35
|
import java.io.File;
|
36
36
|
import java.io.UnsupportedEncodingException;
|
37
37
|
import java.nio.ByteBuffer;
|
38
|
+
import java.nio.CharBuffer;
|
39
|
+
import java.nio.charset.CharacterCodingException;
|
38
40
|
import java.nio.charset.Charset;
|
41
|
+
import java.nio.charset.CharsetDecoder;
|
42
|
+
import java.nio.charset.CharsetEncoder;
|
43
|
+
import java.util.ArrayList;
|
44
|
+
import java.util.List;
|
39
45
|
import java.util.regex.Matcher;
|
40
46
|
import java.util.regex.Pattern;
|
41
47
|
|
48
|
+
import nokogiri.HtmlDocument;
|
42
49
|
import nokogiri.NokogiriService;
|
43
50
|
import nokogiri.XmlAttr;
|
44
51
|
import nokogiri.XmlCdata;
|
@@ -57,6 +64,7 @@ import org.jruby.RubyArray;
|
|
57
64
|
import org.jruby.RubyClass;
|
58
65
|
import org.jruby.RubyEncoding;
|
59
66
|
import org.jruby.RubyString;
|
67
|
+
import org.jruby.runtime.ThreadContext;
|
60
68
|
import org.jruby.runtime.builtin.IRubyObject;
|
61
69
|
import org.jruby.util.ByteList;
|
62
70
|
import org.w3c.dom.Attr;
|
@@ -74,6 +82,7 @@ import org.w3c.dom.NodeList;
|
|
74
82
|
public class NokogiriHelpers {
|
75
83
|
public static final String CACHED_NODE = "NOKOGIRI_CACHED_NODE";
|
76
84
|
public static final String VALID_ROOT_NODE = "NOKOGIRI_VALIDE_ROOT_NODE";
|
85
|
+
public static final String ENCODED_STRING = "NOKOGIRI_ENCODED_STRING";
|
77
86
|
|
78
87
|
public static XmlNode getCachedNode(Node node) {
|
79
88
|
return (XmlNode) node.getUserData(CACHED_NODE);
|
@@ -89,12 +98,14 @@ public class NokogiriHelpers {
|
|
89
98
|
if(node == null) return ruby.getNil();
|
90
99
|
if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) {
|
91
100
|
XmlDocument xmlDocument = (XmlDocument)node.getOwnerDocument().getUserData(CACHED_NODE);
|
101
|
+
if (!(xmlDocument instanceof HtmlDocument)) {
|
92
102
|
String prefix = getLocalNameForNamespace(((Attr)node).getName());
|
93
103
|
prefix = prefix != null ? prefix : "";
|
94
104
|
String href = ((Attr)node).getValue();
|
95
105
|
XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
|
96
106
|
if (xmlNamespace != null) return xmlNamespace;
|
97
107
|
else return XmlNamespace.createFromAttr(ruby, (Attr)node);
|
108
|
+
}
|
98
109
|
}
|
99
110
|
XmlNode xmlNode = getCachedNode(node);
|
100
111
|
if(xmlNode == null) {
|
@@ -145,7 +156,7 @@ public class NokogiriHelpers {
|
|
145
156
|
return xmlCdata;
|
146
157
|
case Node.DOCUMENT_NODE:
|
147
158
|
XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"));
|
148
|
-
xmlDocument.
|
159
|
+
xmlDocument.setDocumentNode(runtime.getCurrentContext(), node);
|
149
160
|
return xmlDocument;
|
150
161
|
case Node.DOCUMENT_TYPE_NODE:
|
151
162
|
XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
|
@@ -167,6 +178,11 @@ public class NokogiriHelpers {
|
|
167
178
|
return RubyString.newString(runtime, s);
|
168
179
|
}
|
169
180
|
|
181
|
+
public static IRubyObject stringOrNil(Ruby runtime, byte[] bytes) {
|
182
|
+
if (bytes == null) return runtime.getNil();
|
183
|
+
return RubyString.newString(runtime, bytes);
|
184
|
+
}
|
185
|
+
|
170
186
|
public static IRubyObject stringOrBlank(Ruby runtime, String s) {
|
171
187
|
if (s == null) return runtime.newString();
|
172
188
|
return RubyString.newString(runtime, s);
|
@@ -263,6 +279,15 @@ public class NokogiriHelpers {
|
|
263
279
|
ByteBuffer buf = ByteBuffer.wrap(data, offset, len);
|
264
280
|
return getCharsetUTF8().decode(buf).toString();
|
265
281
|
}
|
282
|
+
|
283
|
+
public static List<String> rubyStringArrayToJavaList(RubyArray ary) {
|
284
|
+
List<String> list = new ArrayList<String>();
|
285
|
+
for (int i=0; i < ary.getLength(); i++) {
|
286
|
+
Object obj = ary.get(i);
|
287
|
+
if (obj != null) list.add(obj.toString());
|
288
|
+
}
|
289
|
+
return list;
|
290
|
+
}
|
266
291
|
|
267
292
|
public static String getNodeCompletePath(Node node) {
|
268
293
|
|
@@ -531,13 +556,6 @@ public class NokogiriHelpers {
|
|
531
556
|
return convert(encoded_pattern, s, encoded, decoded);
|
532
557
|
}
|
533
558
|
|
534
|
-
private static Pattern not_escaped_pattern = Pattern.compile("\\&(?!(amp;|gt;|lt;))|<|>");
|
535
|
-
public static boolean isNotXmlEscaped(String s) {
|
536
|
-
if (s == null) return false;
|
537
|
-
Matcher matcher = not_escaped_pattern.matcher(s);
|
538
|
-
return (matcher.find());
|
539
|
-
}
|
540
|
-
|
541
559
|
public static String getNodeName(Node node) {
|
542
560
|
if(node == null) { System.out.println("node is null"); return ""; }
|
543
561
|
String name = node.getNodeName();
|
@@ -569,6 +587,38 @@ public class NokogiriHelpers {
|
|
569
587
|
public static boolean isXmlBase(String attrName) {
|
570
588
|
return "xml:base".equals(attrName) || "xlink:href".equals(attrName);
|
571
589
|
}
|
590
|
+
|
591
|
+
public static boolean isWhitespaceText(ThreadContext context, IRubyObject obj) {
|
592
|
+
if (obj == null || obj.isNil()) return false;
|
593
|
+
|
594
|
+
XmlNode node = (XmlNode) obj;
|
595
|
+
if (!(node instanceof XmlText))
|
596
|
+
return false;
|
597
|
+
|
598
|
+
String content = rubyStringToString(node.content(context));
|
599
|
+
return content.trim().length() == 0;
|
600
|
+
}
|
601
|
+
|
602
|
+
public static boolean isWhitespaceText(String s) {
|
603
|
+
return s.trim().length() == 0;
|
604
|
+
}
|
605
|
+
|
606
|
+
public static String canonicalizeWhitespce(String s) {
|
607
|
+
StringBuilder sb = new StringBuilder();
|
608
|
+
char[] chars = s.toCharArray();
|
609
|
+
boolean newline_added = false;
|
610
|
+
for (int i=0; i<chars.length; i++) {
|
611
|
+
if (chars[i] == '\n') {
|
612
|
+
if (!newline_added) {
|
613
|
+
sb.append(chars[i]);
|
614
|
+
newline_added = true;
|
615
|
+
}
|
616
|
+
} else {
|
617
|
+
sb.append(chars[i]);
|
618
|
+
}
|
619
|
+
}
|
620
|
+
return sb.toString();
|
621
|
+
}
|
572
622
|
|
573
623
|
public static String newQName(String newPrefix, Node node) {
|
574
624
|
if(newPrefix == null) {
|
@@ -636,4 +686,27 @@ public class NokogiriHelpers {
|
|
636
686
|
if (dtdFile.exists()) return dtdFile.getPath();
|
637
687
|
return null;
|
638
688
|
}
|
689
|
+
|
690
|
+
public static boolean isUTF8(String encoding) {
|
691
|
+
if (encoding == null) return true; // no need to convert encoding
|
692
|
+
int ret = Charset.forName(encoding).compareTo(Charset.forName("UTF-8"));
|
693
|
+
return ret == 0;
|
694
|
+
}
|
695
|
+
|
696
|
+
public static byte[] convertEncoding(Charset output_charset, String input_string) throws CharacterCodingException {
|
697
|
+
Charset input = Charset.forName("UTF-8");
|
698
|
+
CharsetDecoder decoder = input.newDecoder();
|
699
|
+
CharsetEncoder encoder = output_charset.newEncoder();
|
700
|
+
decoder.reset();
|
701
|
+
encoder.reset();
|
702
|
+
ByteBuffer bbuf = ByteBuffer.wrap(input_string.getBytes());
|
703
|
+
CharBuffer cbuf = decoder.decode(bbuf);
|
704
|
+
bbuf.clear();
|
705
|
+
encoder.encode(cbuf, bbuf, true);
|
706
|
+
int length = bbuf.position();
|
707
|
+
byte[] bytes = new byte[length];
|
708
|
+
System.arraycopy(bbuf.array(), 0, bytes, 0, length);
|
709
|
+
return bytes;
|
710
|
+
}
|
711
|
+
|
639
712
|
}
|
@@ -42,14 +42,15 @@ import javax.xml.transform.TransformerException;
|
|
42
42
|
*/
|
43
43
|
public class NokogiriXsltErrorListener implements ErrorListener {
|
44
44
|
public enum ErrorType {
|
45
|
+
SUCCESS,
|
45
46
|
WARNING,
|
46
47
|
ERROR,
|
47
48
|
FATAL
|
48
49
|
}
|
49
50
|
|
50
|
-
private ErrorType type;
|
51
|
-
private String errorMessage;
|
52
|
-
private Exception exception;
|
51
|
+
private ErrorType type = ErrorType.SUCCESS;
|
52
|
+
private String errorMessage = null;
|
53
|
+
private Exception exception = null;
|
53
54
|
|
54
55
|
public void warning(TransformerException ex) throws TransformerException {
|
55
56
|
type = ErrorType.WARNING;
|
@@ -1,7 +1,7 @@
|
|
1
1
|
/**
|
2
2
|
* (The MIT License)
|
3
3
|
*
|
4
|
-
* Copyright (c) 2008 -
|
4
|
+
* Copyright (c) 2008 - 2012:
|
5
5
|
*
|
6
6
|
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
7
|
* * {Mike Dalessio}[http://mike.daless.io]
|
@@ -43,6 +43,7 @@ import java.io.InputStream;
|
|
43
43
|
|
44
44
|
import org.jruby.Ruby;
|
45
45
|
import org.jruby.RubyClass;
|
46
|
+
import org.jruby.RubyFixnum;
|
46
47
|
import org.jruby.RubyIO;
|
47
48
|
import org.jruby.RubyObject;
|
48
49
|
import org.jruby.RubyString;
|
@@ -65,6 +66,8 @@ import org.xml.sax.ext.EntityResolver2;
|
|
65
66
|
*/
|
66
67
|
public class ParserContext extends RubyObject {
|
67
68
|
protected InputSource source = null;
|
69
|
+
protected IRubyObject detected_encoding = null;
|
70
|
+
protected int stringDataSize = -1;
|
68
71
|
|
69
72
|
/**
|
70
73
|
* Create a file base input source taking into account the current
|
@@ -102,11 +105,28 @@ public class ParserContext extends RubyObject {
|
|
102
105
|
public void setInputSource(ThreadContext context, IRubyObject data, IRubyObject url) {
|
103
106
|
Ruby ruby = context.getRuntime();
|
104
107
|
String path = (String) url.toJava(String.class);
|
108
|
+
if (data.getType().respondsTo("detect_encoding")) {
|
109
|
+
// data is EnocodingReader
|
110
|
+
try {
|
111
|
+
data.callMethod(context, "read", RubyFixnum.newFixnum(context.getRuntime(), 1024));
|
112
|
+
} catch (RaiseException e) {
|
113
|
+
detected_encoding = e.getException().getInstanceVariable("@found_encoding");
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
105
117
|
if (isAbsolutePath(path)) {
|
106
|
-
|
107
|
-
source.setSystemId(path);
|
118
|
+
returnWithSystemId(path);
|
108
119
|
return;
|
109
120
|
}
|
121
|
+
// Dir.chdir might be called at some point before this.
|
122
|
+
String currentDir = context.getRuntime().getCurrentDirectory();
|
123
|
+
if (path != null && currentDir != null && currentDir.length() != 0) {
|
124
|
+
String absPath = currentDir + "/" + path;
|
125
|
+
if (isAbsolutePath(absPath)) {
|
126
|
+
returnWithSystemId(absPath);
|
127
|
+
return;
|
128
|
+
}
|
129
|
+
}
|
110
130
|
RubyString stringData = null;
|
111
131
|
if (invoke(context, data, "respond_to?",
|
112
132
|
ruby.newSymbol("to_io").to_sym()).isTrue()) {
|
@@ -140,6 +160,7 @@ public class ParserContext extends RubyObject {
|
|
140
160
|
}
|
141
161
|
if (stringData != null) {
|
142
162
|
ByteList bytes = stringData.getByteList();
|
163
|
+
stringDataSize = bytes.length() - bytes.begin();
|
143
164
|
source = new InputSource(new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length()));
|
144
165
|
}
|
145
166
|
}
|
@@ -148,6 +169,15 @@ public class ParserContext extends RubyObject {
|
|
148
169
|
if (url == null) return false;
|
149
170
|
return (new File(url)).isAbsolute();
|
150
171
|
}
|
172
|
+
|
173
|
+
private void returnWithSystemId(String url) {
|
174
|
+
source = new InputSource();
|
175
|
+
if (detected_encoding != null) {
|
176
|
+
source.setEncoding((String) detected_encoding.toJava(String.class));
|
177
|
+
}
|
178
|
+
source.setSystemId(url);
|
179
|
+
return;
|
180
|
+
}
|
151
181
|
|
152
182
|
/**
|
153
183
|
* Set the InputSource to read from <code>file</code>, a String filename.
|
@@ -1,7 +1,7 @@
|
|
1
1
|
/**
|
2
2
|
* (The MIT License)
|
3
3
|
*
|
4
|
-
* Copyright (c) 2008 -
|
4
|
+
* Copyright (c) 2008 - 2012:
|
5
5
|
*
|
6
6
|
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
7
|
* * {Mike Dalessio}[http://mike.daless.io]
|
@@ -32,9 +32,18 @@
|
|
32
32
|
|
33
33
|
package nokogiri.internals;
|
34
34
|
|
35
|
+
import static nokogiri.internals.NokogiriHelpers.canonicalizeWhitespce;
|
35
36
|
import static nokogiri.internals.NokogiriHelpers.encodeJavaString;
|
36
|
-
import static nokogiri.internals.NokogiriHelpers.
|
37
|
+
import static nokogiri.internals.NokogiriHelpers.isNamespace;
|
38
|
+
import static nokogiri.internals.NokogiriHelpers.isWhitespaceText;
|
37
39
|
|
40
|
+
import java.util.ArrayDeque;
|
41
|
+
import java.util.ArrayList;
|
42
|
+
import java.util.Arrays;
|
43
|
+
import java.util.Comparator;
|
44
|
+
import java.util.Deque;
|
45
|
+
import java.util.Iterator;
|
46
|
+
import java.util.List;
|
38
47
|
import java.util.Stack;
|
39
48
|
|
40
49
|
import org.cyberneko.html.HTMLElements;
|
@@ -65,7 +74,11 @@ public class SaveContextVisitor {
|
|
65
74
|
private Stack<String> indentation;
|
66
75
|
private String encoding, indentString;
|
67
76
|
private boolean format, noDecl, noEmpty, noXhtml, asXhtml, asXml, asHtml, asBuilder, htmlDoc, fragment;
|
68
|
-
|
77
|
+
private boolean canonical, incl_ns, with_comments, subsets, exclusive;
|
78
|
+
private List<Node> c14nNodeList;
|
79
|
+
private Deque<Attr[]> c14nNamespaceStack;
|
80
|
+
private Deque<Attr[]> c14nAttrStack;
|
81
|
+
private List<String> c14nExclusiveInclusivePrefixes = null;
|
69
82
|
/*
|
70
83
|
* U can't touch this.
|
71
84
|
* http://www.youtube.com/watch?v=WJ2ZFVx6A4Q
|
@@ -81,13 +94,22 @@ public class SaveContextVisitor {
|
|
81
94
|
public static final int AS_XML = 32;
|
82
95
|
public static final int AS_HTML = 64;
|
83
96
|
public static final int AS_BUILDER = 128;
|
97
|
+
|
98
|
+
public static final int CANONICAL = 1;
|
99
|
+
public static final int INCL_NS = 2;
|
100
|
+
public static final int WITH_COMMENTS = 4;
|
101
|
+
public static final int SUBSETS = 8;
|
102
|
+
public static final int EXCLUSIVE = 16;
|
84
103
|
|
85
|
-
public SaveContextVisitor(int options, String indent, String encoding, boolean htmlDoc, boolean fragment) {
|
104
|
+
public SaveContextVisitor(int options, String indent, String encoding, boolean htmlDoc, boolean fragment, int canonicalOpts) {
|
86
105
|
buffer = new StringBuffer();
|
87
106
|
this.encoding = encoding;
|
88
107
|
indentation = new Stack<String>(); indentation.push("");
|
89
108
|
this.htmlDoc = htmlDoc;
|
90
109
|
this.fragment = fragment;
|
110
|
+
c14nNodeList = new ArrayList<Node>();
|
111
|
+
c14nNamespaceStack = new ArrayDeque<Attr[]>();
|
112
|
+
c14nAttrStack = new ArrayDeque<Attr[]>();
|
91
113
|
format = (options & FORMAT) == FORMAT;
|
92
114
|
|
93
115
|
noDecl = (options & NO_DECL) == NO_DECL;
|
@@ -97,6 +119,12 @@ public class SaveContextVisitor {
|
|
97
119
|
asXml = (options & AS_XML) == AS_XML;
|
98
120
|
asHtml = (options & AS_HTML) == AS_HTML;
|
99
121
|
asBuilder = (options & AS_BUILDER) == AS_BUILDER;
|
122
|
+
|
123
|
+
canonical = (canonicalOpts & CANONICAL) == CANONICAL;
|
124
|
+
incl_ns = (canonicalOpts & INCL_NS) == INCL_NS;
|
125
|
+
with_comments = (canonicalOpts & WITH_COMMENTS) == WITH_COMMENTS;
|
126
|
+
subsets = (canonicalOpts & SUBSETS) == SUBSETS;
|
127
|
+
|
100
128
|
if ((format && indent == null) || (format && indent.length() == 0)) indent = " "; // default, two spaces
|
101
129
|
if ((!format && indent != null) && indent.length() > 0) format = true;
|
102
130
|
if ((asBuilder && indent == null) || (asBuilder && indent.length() == 0)) indent = " "; // default, two spaces
|
@@ -117,6 +145,14 @@ public class SaveContextVisitor {
|
|
117
145
|
this.encoding = encoding;
|
118
146
|
}
|
119
147
|
|
148
|
+
public List<Node> getC14nNodeList() {
|
149
|
+
return c14nNodeList;
|
150
|
+
}
|
151
|
+
|
152
|
+
public void setC14nExclusiveInclusivePrefixes(List<String> prefixes) {
|
153
|
+
c14nExclusiveInclusivePrefixes = prefixes;
|
154
|
+
}
|
155
|
+
|
120
156
|
public boolean enter(Node node) {
|
121
157
|
if (node instanceof Document) {
|
122
158
|
return enter((Document)node);
|
@@ -275,6 +311,10 @@ public class SaveContextVisitor {
|
|
275
311
|
}
|
276
312
|
|
277
313
|
public boolean enter(Comment comment) {
|
314
|
+
if (canonical) {
|
315
|
+
c14nNodeList.add(comment);
|
316
|
+
if (!with_comments) return true;
|
317
|
+
}
|
278
318
|
buffer.append("<!--");
|
279
319
|
buffer.append(comment.getData());
|
280
320
|
buffer.append("-->");
|
@@ -293,7 +333,7 @@ public class SaveContextVisitor {
|
|
293
333
|
|
294
334
|
if (encoding != null) {
|
295
335
|
buffer.append(" encoding=\"");
|
296
|
-
buffer.append(encoding
|
336
|
+
buffer.append(encoding);
|
297
337
|
buffer.append("\"");
|
298
338
|
}
|
299
339
|
buffer.append("?>\n");
|
@@ -306,6 +346,10 @@ public class SaveContextVisitor {
|
|
306
346
|
}
|
307
347
|
|
308
348
|
public boolean enter(DocumentType docType) {
|
349
|
+
if (canonical) {
|
350
|
+
c14nNodeList.add(docType);
|
351
|
+
return true;
|
352
|
+
}
|
309
353
|
String name = docType.getName();
|
310
354
|
String pubId = docType.getPublicId();
|
311
355
|
String sysId = docType.getSystemId();
|
@@ -334,6 +378,12 @@ public class SaveContextVisitor {
|
|
334
378
|
}
|
335
379
|
|
336
380
|
public boolean enter(Element element) {
|
381
|
+
if (canonical) {
|
382
|
+
c14nNodeList.add(element);
|
383
|
+
if (element == element.getOwnerDocument().getDocumentElement()) {
|
384
|
+
c14nNodeList.add(element.getOwnerDocument());
|
385
|
+
}
|
386
|
+
}
|
337
387
|
String current = indentation.peek();
|
338
388
|
buffer.append(current);
|
339
389
|
if (needIndent()) {
|
@@ -341,9 +391,8 @@ public class SaveContextVisitor {
|
|
341
391
|
}
|
342
392
|
String name = element.getTagName();
|
343
393
|
buffer.append("<" + name);
|
344
|
-
|
345
|
-
for (
|
346
|
-
Attr attr = (Attr) attrs.item(i);
|
394
|
+
Attr[] attrs = getAttrsAndNamespaces(element);
|
395
|
+
for (Attr attr : attrs) {
|
347
396
|
if (attr.getSpecified()) {
|
348
397
|
buffer.append(" ");
|
349
398
|
enter(attr);
|
@@ -356,10 +405,8 @@ public class SaveContextVisitor {
|
|
356
405
|
return true;
|
357
406
|
}
|
358
407
|
// no child
|
359
|
-
if (asHtml) {
|
408
|
+
if (asHtml || asXhtml) {
|
360
409
|
buffer.append(">");
|
361
|
-
} else if (asXhtml) {
|
362
|
-
buffer.append(" />");
|
363
410
|
} else if (asXml && noEmpty) {
|
364
411
|
buffer.append(">");
|
365
412
|
} else {
|
@@ -390,7 +437,142 @@ public class SaveContextVisitor {
|
|
390
437
|
return element.isEmpty();
|
391
438
|
}
|
392
439
|
|
440
|
+
private Attr[] getAttrsAndNamespaces(Element element) {
|
441
|
+
NamedNodeMap attrs = element.getAttributes();
|
442
|
+
if (!canonical) {
|
443
|
+
if (attrs == null || attrs.getLength() == 0) return new Attr[0];
|
444
|
+
Attr[] attrsAndNamespaces = new Attr[attrs.getLength()];
|
445
|
+
for (int i=0; i<attrs.getLength(); i++) {
|
446
|
+
attrsAndNamespaces[i] = (Attr) attrs.item(i);
|
447
|
+
}
|
448
|
+
return attrsAndNamespaces;
|
449
|
+
} else {
|
450
|
+
List<Attr> namespaces = new ArrayList<Attr>();
|
451
|
+
List<Attr> attributes = new ArrayList<Attr>();
|
452
|
+
if (subsets) {
|
453
|
+
getAttrsOfAncestors(element.getParentNode(), namespaces, attributes);
|
454
|
+
Attr[] namespaceOfAncestors = getSortedArray(namespaces);
|
455
|
+
Attr[] attributeOfAncestors = getSortedArray(attributes);
|
456
|
+
c14nNamespaceStack.push(namespaceOfAncestors);
|
457
|
+
c14nAttrStack.push(attributeOfAncestors);
|
458
|
+
subsets = false; // namespace propagation should be done only once on top level node.
|
459
|
+
}
|
460
|
+
|
461
|
+
getNamespacesAndAttrs(element, namespaces, attributes);
|
462
|
+
|
463
|
+
Attr[] namespaceArray = getSortedArray(namespaces);
|
464
|
+
Attr[] attributeArray = getSortedArray(attributes);
|
465
|
+
Attr[] allAttrs = new Attr[namespaceArray.length + attributeArray.length];
|
466
|
+
for (int i=0; i<allAttrs.length; i++) {
|
467
|
+
if (i < namespaceArray.length) {
|
468
|
+
allAttrs[i] = namespaceArray[i];
|
469
|
+
} else {
|
470
|
+
allAttrs[i] = attributeArray[i-namespaceArray.length];
|
471
|
+
}
|
472
|
+
}
|
473
|
+
c14nNamespaceStack.push(namespaceArray);
|
474
|
+
c14nAttrStack.push(attributeArray);
|
475
|
+
return allAttrs;
|
476
|
+
}
|
477
|
+
|
478
|
+
}
|
479
|
+
|
480
|
+
private void getAttrsOfAncestors(Node parent, List<Attr> namespaces, List<Attr> attributes) {
|
481
|
+
if (parent == null) return;
|
482
|
+
NamedNodeMap attrs = parent.getAttributes();
|
483
|
+
if (attrs == null || attrs.getLength() == 0) return;
|
484
|
+
for (int i=0; i < attrs.getLength(); i++) {
|
485
|
+
Attr attr = (Attr)attrs.item(i);
|
486
|
+
if (isNamespace(attr.getNodeName())) namespaces.add(attr);
|
487
|
+
else attributes.add(attr);
|
488
|
+
}
|
489
|
+
getAttrsOfAncestors(parent.getParentNode(), namespaces, attributes);
|
490
|
+
}
|
491
|
+
|
492
|
+
private void getNamespacesAndAttrs(Node current, List<Attr> namespaces, List<Attr> attributes) {
|
493
|
+
NamedNodeMap attrs = current.getAttributes();
|
494
|
+
for (int i=0; i<attrs.getLength(); i++) {
|
495
|
+
Attr attr = (Attr)attrs.item(i);
|
496
|
+
if (isNamespace(attr.getNodeName())) {
|
497
|
+
getNamespacesWithPropagated(namespaces, attr);
|
498
|
+
} else {
|
499
|
+
getAttributesWithPropagated(attributes, attr);
|
500
|
+
}
|
501
|
+
if (exclusive) {
|
502
|
+
verifyXmlSpace(attributes, attrs);
|
503
|
+
}
|
504
|
+
}
|
505
|
+
}
|
506
|
+
|
507
|
+
private void getNamespacesWithPropagated(List<Attr> namespaces, Attr attr) {
|
508
|
+
boolean newNamespace = true;
|
509
|
+
Iterator<Attr[]> iter = c14nNamespaceStack.iterator();
|
510
|
+
while (iter.hasNext()) {
|
511
|
+
Attr[] parentNamespaces = iter.next();
|
512
|
+
for (int n=0; n < parentNamespaces.length; n++) {
|
513
|
+
if (parentNamespaces[n].getNodeName().equals(attr.getNodeName())) {
|
514
|
+
if (parentNamespaces[n].getNodeValue().equals(attr.getNodeValue())) {
|
515
|
+
// exactly the same namespace should not be added
|
516
|
+
newNamespace = false;
|
517
|
+
} else {
|
518
|
+
// in case of namespace url change, propagated namespace will be override
|
519
|
+
namespaces.remove(parentNamespaces[n]);
|
520
|
+
}
|
521
|
+
}
|
522
|
+
}
|
523
|
+
if (newNamespace && !namespaces.contains(attr)) namespaces.add(attr);
|
524
|
+
}
|
525
|
+
}
|
526
|
+
|
527
|
+
private void getAttributesWithPropagated(List<Attr> attributes, Attr attr) {
|
528
|
+
boolean newAttribute = true;
|
529
|
+
Iterator<Attr[]> iter = c14nAttrStack.iterator();
|
530
|
+
while (iter.hasNext()) {
|
531
|
+
Attr[] parentAttr = iter.next();
|
532
|
+
for (int n=0; n < parentAttr.length; n++) {
|
533
|
+
if (!parentAttr[n].getNodeName().startsWith("xml:")) continue;
|
534
|
+
if (parentAttr[n].getNodeName().equals(attr.getNodeName())) {
|
535
|
+
if (parentAttr[n].getNodeValue().equals(attr.getNodeValue())) {
|
536
|
+
// exactly the same attribute should not be added
|
537
|
+
newAttribute = false;
|
538
|
+
} else {
|
539
|
+
// in case of attribute value change, propagated attribute will be override
|
540
|
+
attributes.remove(parentAttr[n]);
|
541
|
+
}
|
542
|
+
}
|
543
|
+
}
|
544
|
+
if (newAttribute) attributes.add(attr);
|
545
|
+
}
|
546
|
+
}
|
547
|
+
|
548
|
+
private void verifyXmlSpace(List<Attr> attributes, NamedNodeMap attrs) {
|
549
|
+
Attr attr = (Attr) attrs.getNamedItem("xml:space");
|
550
|
+
if (attr == null) {
|
551
|
+
for (int i=0; i < attributes.size(); i++) {
|
552
|
+
if (attributes.get(i).getNodeName().equals("xml:space")) {
|
553
|
+
attributes.remove(i);
|
554
|
+
break;
|
555
|
+
}
|
556
|
+
}
|
557
|
+
}
|
558
|
+
}
|
559
|
+
|
560
|
+
private Attr[] getSortedArray(List<Attr> attrList) {
|
561
|
+
Attr[] attrArray = attrList.toArray(new Attr[0]);
|
562
|
+
Arrays.sort(attrArray, new Comparator<Attr>() {
|
563
|
+
@Override
|
564
|
+
public int compare(Attr attr0, Attr attr1) {
|
565
|
+
return attr0.getNodeName().compareTo(attr1.getNodeName());
|
566
|
+
}
|
567
|
+
});
|
568
|
+
return attrArray;
|
569
|
+
}
|
570
|
+
|
393
571
|
public void leave(Element element) {
|
572
|
+
if (canonical) {
|
573
|
+
c14nNamespaceStack.poll();
|
574
|
+
c14nAttrStack.poll();
|
575
|
+
}
|
394
576
|
String name = element.getTagName();
|
395
577
|
if (element.hasChildNodes()) {
|
396
578
|
if (needIndentInClosing(element)) {
|
@@ -504,6 +686,7 @@ public class SaveContextVisitor {
|
|
504
686
|
if (asHtml) buffer.append(">");
|
505
687
|
else buffer.append("?>");
|
506
688
|
buffer.append("\n");
|
689
|
+
if (canonical) c14nNodeList.add(pi);
|
507
690
|
return true;
|
508
691
|
}
|
509
692
|
|
@@ -514,6 +697,13 @@ public class SaveContextVisitor {
|
|
514
697
|
private static char lineSeparator = '\n'; // System.getProperty("line.separator"); ?
|
515
698
|
public boolean enter(Text text) {
|
516
699
|
String textContent = text.getNodeValue();
|
700
|
+
if (canonical) {
|
701
|
+
c14nNodeList.add(text);
|
702
|
+
if (isWhitespaceText(textContent)) {
|
703
|
+
buffer.append(canonicalizeWhitespce(textContent));
|
704
|
+
return true;
|
705
|
+
}
|
706
|
+
}
|
517
707
|
if (needIndentText() && "".equals(textContent.trim())) return true;
|
518
708
|
if (needIndentText()) {
|
519
709
|
String current = indentation.peek();
|
@@ -521,9 +711,10 @@ public class SaveContextVisitor {
|
|
521
711
|
indentation.push(current + indentString);
|
522
712
|
if (textContent.charAt(0) == lineSeparator) textContent = textContent.substring(1);
|
523
713
|
}
|
524
|
-
if (
|
714
|
+
if (text.getUserData(NokogiriHelpers.ENCODED_STRING) == null || !((Boolean)text.getUserData(NokogiriHelpers.ENCODED_STRING))) {
|
525
715
|
textContent = encodeJavaString(textContent);
|
526
716
|
}
|
717
|
+
|
527
718
|
if (getEncoding(text) == null) {
|
528
719
|
textContent = encodeStringToHtmlEntity(textContent);
|
529
720
|
}
|