nokogiri 1.11.1-java → 1.11.2-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +12 -12
- data/LICENSE.md +1 -1
- data/README.md +20 -15
- data/ext/java/nokogiri/EncodingHandler.java +78 -59
- data/ext/java/nokogiri/HtmlDocument.java +137 -114
- data/ext/java/nokogiri/HtmlElementDescription.java +104 -87
- data/ext/java/nokogiri/HtmlEntityLookup.java +31 -26
- data/ext/java/nokogiri/HtmlSaxParserContext.java +220 -192
- data/ext/java/nokogiri/HtmlSaxPushParser.java +164 -139
- data/ext/java/nokogiri/NokogiriService.java +597 -526
- data/ext/java/nokogiri/XmlAttr.java +120 -96
- data/ext/java/nokogiri/XmlAttributeDecl.java +97 -76
- data/ext/java/nokogiri/XmlCdata.java +35 -26
- data/ext/java/nokogiri/XmlComment.java +48 -37
- data/ext/java/nokogiri/XmlDocument.java +642 -540
- data/ext/java/nokogiri/XmlDocumentFragment.java +127 -107
- data/ext/java/nokogiri/XmlDtd.java +450 -384
- data/ext/java/nokogiri/XmlElement.java +25 -18
- data/ext/java/nokogiri/XmlElementContent.java +345 -286
- data/ext/java/nokogiri/XmlElementDecl.java +126 -95
- data/ext/java/nokogiri/XmlEntityDecl.java +121 -97
- data/ext/java/nokogiri/XmlEntityReference.java +51 -42
- data/ext/java/nokogiri/XmlNamespace.java +177 -145
- data/ext/java/nokogiri/XmlNode.java +1843 -1588
- data/ext/java/nokogiri/XmlNodeSet.java +361 -299
- data/ext/java/nokogiri/XmlProcessingInstruction.java +49 -39
- data/ext/java/nokogiri/XmlReader.java +513 -418
- data/ext/java/nokogiri/XmlRelaxng.java +91 -78
- data/ext/java/nokogiri/XmlSaxParserContext.java +330 -285
- data/ext/java/nokogiri/XmlSaxPushParser.java +229 -190
- data/ext/java/nokogiri/XmlSchema.java +328 -263
- data/ext/java/nokogiri/XmlSyntaxError.java +113 -83
- data/ext/java/nokogiri/XmlText.java +57 -46
- data/ext/java/nokogiri/XmlXpathContext.java +240 -206
- data/ext/java/nokogiri/XsltStylesheet.java +282 -239
- data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +199 -168
- data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
- data/ext/java/nokogiri/internals/NokogiriDomParser.java +65 -50
- data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +25 -18
- data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -254
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +738 -622
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +186 -143
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +83 -68
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +66 -49
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +86 -69
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +44 -29
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +118 -101
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -24
- data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +25 -17
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +57 -42
- data/ext/java/nokogiri/internals/ParserContext.java +206 -179
- data/ext/java/nokogiri/internals/ReaderNode.java +478 -371
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -707
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +28 -19
- data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +5 -4
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +208 -177
- data/ext/java/nokogiri/internals/XmlSaxParser.java +24 -17
- data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
- data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
- data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
- data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
- data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
- data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
- data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
- data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
- data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
- data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
- data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
- data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
- data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
- data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
- data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
- data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -570
- data/ext/nokogiri/depend +34 -474
- data/ext/nokogiri/extconf.rb +253 -183
- data/ext/nokogiri/html_document.c +10 -15
- data/ext/nokogiri/html_element_description.c +84 -71
- data/ext/nokogiri/html_entity_lookup.c +21 -16
- data/ext/nokogiri/html_sax_parser_context.c +66 -65
- data/ext/nokogiri/html_sax_push_parser.c +29 -27
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +171 -63
- data/ext/nokogiri/test_global_handlers.c +3 -4
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +221 -164
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +30 -27
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +17 -11
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +56 -49
- data/ext/nokogiri/xml_node.c +338 -286
- data/ext/nokogiri/xml_node_set.c +168 -156
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +191 -157
- data/ext/nokogiri/xml_relax_ng.c +29 -23
- data/ext/nokogiri/xml_sax_parser.c +117 -112
- data/ext/nokogiri/xml_sax_parser_context.c +100 -85
- data/ext/nokogiri/xml_sax_push_parser.c +34 -27
- data/ext/nokogiri/xml_schema.c +48 -42
- data/ext/nokogiri/xml_syntax_error.c +21 -23
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +134 -127
- data/ext/nokogiri/xslt_stylesheet.c +157 -157
- data/lib/nokogiri.rb +1 -22
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -15
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +31 -8
- data/lib/nokogiri/xml/document.rb +31 -11
- data/lib/nokogiri/xml/node.rb +38 -42
- data/lib/nokogiri/xml/reader.rb +2 -9
- data/lib/nokogiri/xml/xpath.rb +1 -3
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- metadata +7 -8
- data/ext/nokogiri/xml_io.c +0 -63
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
@@ -17,10 +17,10 @@
|
|
17
17
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
18
|
* permit persons to whom the Software is furnished to do so, subject to
|
19
19
|
* the following conditions:
|
20
|
-
*
|
20
|
+
*
|
21
21
|
* The above copyright notice and this permission notice shall be
|
22
22
|
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
23
|
+
*
|
24
24
|
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
25
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
26
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
@@ -44,30 +44,39 @@ import org.xml.sax.SAXParseException;
|
|
44
44
|
|
45
45
|
/**
|
46
46
|
* Error handler for Relax and W3C XML Schema.
|
47
|
-
*
|
47
|
+
*
|
48
48
|
* @author sergio
|
49
49
|
* @author Yoko Harada <yokolet@gmail.com>
|
50
50
|
*/
|
51
|
-
public class SchemaErrorHandler implements ErrorHandler
|
51
|
+
public class SchemaErrorHandler implements ErrorHandler
|
52
|
+
{
|
52
53
|
|
53
|
-
|
54
|
-
|
54
|
+
private final Ruby runtime;
|
55
|
+
final RubyArray errors;
|
55
56
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
57
|
+
public
|
58
|
+
SchemaErrorHandler(Ruby ruby, RubyArray array)
|
59
|
+
{
|
60
|
+
this.runtime = ruby;
|
61
|
+
this.errors = array;
|
62
|
+
}
|
60
63
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
+
public void
|
65
|
+
warning(SAXParseException ex) throws SAXException
|
66
|
+
{
|
67
|
+
errors.append(XmlSyntaxError.createWarning(runtime, ex));
|
68
|
+
}
|
64
69
|
|
65
|
-
|
66
|
-
|
67
|
-
|
70
|
+
public void
|
71
|
+
error(SAXParseException ex) throws SAXException
|
72
|
+
{
|
73
|
+
errors.append(XmlSyntaxError.createError(runtime, ex));
|
74
|
+
}
|
68
75
|
|
69
|
-
|
70
|
-
|
71
|
-
|
76
|
+
public void
|
77
|
+
fatalError(SAXParseException ex) throws SAXException
|
78
|
+
{
|
79
|
+
throw ex;
|
80
|
+
}
|
72
81
|
|
73
82
|
}
|
@@ -32,137 +32,143 @@ import org.w3c.dom.Node;
|
|
32
32
|
/**
|
33
33
|
* @author kares
|
34
34
|
*/
|
35
|
-
public final class XalanDTMManagerPatch extends org.apache.xml.dtm.ref.DTMManagerDefault
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
35
|
+
public final class XalanDTMManagerPatch extends org.apache.xml.dtm.ref.DTMManagerDefault
|
36
|
+
{
|
37
|
+
|
38
|
+
/**
|
39
|
+
* Given a W3C DOM node, try and return a DTM handle.
|
40
|
+
* Note: calling this may be non-optimal, and there is no guarantee that
|
41
|
+
* the node will be found in any particular DTM.
|
42
|
+
*
|
43
|
+
* @param node Non-null reference to a DOM node.
|
44
|
+
*
|
45
|
+
* @return a valid DTM handle.
|
46
|
+
*/
|
47
|
+
@Override
|
48
|
+
public /* synchronized */ int
|
49
|
+
getDTMHandleFromNode(org.w3c.dom.Node node)
|
50
|
+
{
|
51
|
+
//if (node == null) // "node must be non-null for getDTMHandleFromNode!");
|
52
|
+
// throw new IllegalArgumentException(XMLMessages.createXMLMessage(XMLErrorResources.ER_NODE_NON_NULL, null));
|
53
|
+
assert node != null;
|
54
|
+
|
55
|
+
if (node instanceof org.apache.xml.dtm.ref.DTMNodeProxy) {
|
56
|
+
return ((org.apache.xml.dtm.ref.DTMNodeProxy) node).getDTMNodeNumber();
|
57
|
+
}
|
55
58
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
}
|
84
|
-
}
|
59
|
+
// Find the DOM2DTMs wrapped around this Document (if any)
|
60
|
+
// and check whether they contain the Node in question.
|
61
|
+
//
|
62
|
+
// NOTE that since a DOM2DTM may represent a subtree rather
|
63
|
+
// than a full document, we have to be prepared to check more
|
64
|
+
// than one -- and there is no guarantee that we will find
|
65
|
+
// one that contains ancestors or siblings of the node we're
|
66
|
+
// seeking.
|
67
|
+
//
|
68
|
+
// %REVIEW% We could search for the one which contains this
|
69
|
+
// node at the deepest level, and thus covers the widest
|
70
|
+
// subtree, but that's going to entail additional work
|
71
|
+
// checking more DTMs... and getHandleOfNode is not a
|
72
|
+
// cheap operation in most implementations.
|
73
|
+
//
|
74
|
+
// TODO: %REVIEW% If overflow addressing, we may recheck a DTM
|
75
|
+
// already examined. Ouch. But with the increased number of DTMs,
|
76
|
+
// scanning back to check this is painful.
|
77
|
+
// POSSIBLE SOLUTIONS:
|
78
|
+
// Generate a list of _unique_ DTM objects?
|
79
|
+
// Have each DTM cache last DOM node search?
|
80
|
+
for (int i = 0; i < m_dtms.length; i++) {
|
81
|
+
DTM thisDTM = m_dtms[i];
|
82
|
+
if (thisDTM instanceof DOM2DTM) {
|
83
|
+
int handle = ((DOM2DTM) thisDTM).getHandleOfNode(node);
|
84
|
+
if (handle != DTM.NULL) {
|
85
|
+
return handle;
|
85
86
|
}
|
87
|
+
}
|
88
|
+
}
|
86
89
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
90
|
+
// Not found; generate a new DTM.
|
91
|
+
//
|
92
|
+
// %REVIEW% Is this really desirable, or should we return null
|
93
|
+
// and make folks explicitly instantiate from a DOMSource? The
|
94
|
+
// latter is more work but gives the caller the opportunity to
|
95
|
+
// explicitly add the DTM to a DTMManager... and thus to know when
|
96
|
+
// it can be discarded again, which is something we need to pay much
|
97
|
+
// more attention to. (Especially since only DTMs which are assigned
|
98
|
+
// to a manager can use the overflow addressing scheme.)
|
99
|
+
//
|
100
|
+
// %BUG% If the source node was a DOM2DTM$defaultNamespaceDeclarationNode
|
101
|
+
// and the DTM wasn't registered with this DTMManager, we will create
|
102
|
+
// a new DTM and _still_ not be able to find the node (since it will
|
103
|
+
// be resynthesized). Another reason to push hard on making all DTMs
|
104
|
+
// be managed DTMs.
|
105
|
+
|
106
|
+
// Since the real root of our tree may be a DocumentFragment, we need to
|
107
|
+
// use getParent to find the root, instead of getOwnerDocument. Otherwise
|
108
|
+
// DOM2DTM#getHandleOfNode will be very unhappy.
|
109
|
+
Node root = node;
|
110
|
+
int rootType = root.getNodeType();
|
111
|
+
Node p = (rootType == Node.ATTRIBUTE_NODE) ? ((org.w3c.dom.Attr) root).getOwnerElement() : root.getParentNode();
|
112
|
+
for (; p != null; p = p.getParentNode()) { root = p; }
|
113
|
+
|
114
|
+
// DOM2DTM dtm = (DOM2DTM) getDTM(new DOMSource(root), false, null);
|
115
|
+
DOM2DTM dtm = getDTM(new DOMSource(root), false, null/*, true, true*/);
|
116
|
+
|
117
|
+
int handle;
|
118
|
+
|
119
|
+
if (node instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTMdefaultNamespaceDeclarationNode
|
120
|
+
|| node instanceof DOM2DTMdefaultNamespaceDeclarationNode) {
|
121
|
+
// Can't return the same node since it's unique to a specific DTM,
|
122
|
+
// but can return the equivalent node -- find the corresponding
|
123
|
+
// Document Element, then ask it for the xml: namespace decl.
|
124
|
+
handle = dtm.getHandleOfNode(((org.w3c.dom.Attr) node).getOwnerElement());
|
125
|
+
handle = dtm.getAttributeNode(handle, node.getNamespaceURI(), node.getLocalName());
|
126
|
+
} else {
|
127
|
+
handle = dtm.getHandleOfNode(node);
|
128
|
+
|
129
|
+
rootType = root.getNodeType();
|
130
|
+
// Is Node actually within the same document? If not, don't search!
|
131
|
+
// This would be easier if m_root was always the Document node, but
|
132
|
+
// we decided to allow wrapping a DTM around a subtree.
|
133
|
+
if ((root == node) ||
|
134
|
+
(rootType == Node.DOCUMENT_NODE && root == node.getOwnerDocument()) ||
|
135
|
+
(rootType != Node.DOCUMENT_NODE && root.getOwnerDocument() == node.getOwnerDocument())
|
136
|
+
) {
|
137
|
+
// If node _is_ in m_root's tree, find its handle
|
96
138
|
//
|
97
|
-
// %
|
98
|
-
//
|
99
|
-
//
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
DOM2DTM dtm = getDTM(new DOMSource(root), false, null/*, true, true*/);
|
112
|
-
|
113
|
-
int handle;
|
114
|
-
|
115
|
-
if (node instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTMdefaultNamespaceDeclarationNode
|
116
|
-
|| node instanceof DOM2DTMdefaultNamespaceDeclarationNode) {
|
117
|
-
// Can't return the same node since it's unique to a specific DTM,
|
118
|
-
// but can return the equivalent node -- find the corresponding
|
119
|
-
// Document Element, then ask it for the xml: namespace decl.
|
120
|
-
handle = dtm.getHandleOfNode(((org.w3c.dom.Attr) node).getOwnerElement());
|
121
|
-
handle = dtm.getAttributeNode(handle, node.getNamespaceURI(), node.getLocalName());
|
122
|
-
}
|
123
|
-
else {
|
124
|
-
handle = dtm.getHandleOfNode(node);
|
125
|
-
|
126
|
-
rootType = root.getNodeType();
|
127
|
-
// Is Node actually within the same document? If not, don't search!
|
128
|
-
// This would be easier if m_root was always the Document node, but
|
129
|
-
// we decided to allow wrapping a DTM around a subtree.
|
130
|
-
if((root==node) ||
|
131
|
-
(rootType==Node.DOCUMENT_NODE && root==node.getOwnerDocument()) ||
|
132
|
-
(rootType!=Node.DOCUMENT_NODE && root.getOwnerDocument()==node.getOwnerDocument())
|
133
|
-
)
|
134
|
-
{
|
135
|
-
// If node _is_ in m_root's tree, find its handle
|
136
|
-
//
|
137
|
-
// %OPT% This check may be improved significantly when DOM
|
138
|
-
// Level 3 nodeKey and relative-order tests become
|
139
|
-
// available!
|
140
|
-
for (Node cursor = node; cursor != null;
|
141
|
-
cursor = (cursor.getNodeType()!=Node.ATTRIBUTE_NODE)
|
142
|
-
? cursor.getParentNode()
|
143
|
-
: ((org.w3c.dom.Attr)cursor).getOwnerElement()) {
|
144
|
-
if (cursor==root) {
|
145
|
-
// We know this node; find its handle.
|
146
|
-
return (dtm).getHandleFromNode(node);
|
147
|
-
}
|
148
|
-
} // for ancestors of node
|
149
|
-
} // if node and m_root in same Document
|
150
|
-
}
|
151
|
-
|
152
|
-
if (DTM.NULL == handle)
|
153
|
-
throw new RuntimeException(XMLMessages.createXMLMessage(XMLErrorResources.ER_COULD_NOT_RESOLVE_NODE, null)); //"Could not resolve the node to a handle!");
|
139
|
+
// %OPT% This check may be improved significantly when DOM
|
140
|
+
// Level 3 nodeKey and relative-order tests become
|
141
|
+
// available!
|
142
|
+
for (Node cursor = node; cursor != null;
|
143
|
+
cursor = (cursor.getNodeType() != Node.ATTRIBUTE_NODE)
|
144
|
+
? cursor.getParentNode()
|
145
|
+
: ((org.w3c.dom.Attr)cursor).getOwnerElement()) {
|
146
|
+
if (cursor == root) {
|
147
|
+
// We know this node; find its handle.
|
148
|
+
return (dtm).getHandleFromNode(node);
|
149
|
+
}
|
150
|
+
} // for ancestors of node
|
151
|
+
} // if node and m_root in same Document
|
152
|
+
}
|
154
153
|
|
155
|
-
|
154
|
+
if (DTM.NULL == handle) {
|
155
|
+
throw new RuntimeException(XMLMessages.createXMLMessage(XMLErrorResources.ER_COULD_NOT_RESOLVE_NODE,
|
156
|
+
null)); //"Could not resolve the node to a handle!");
|
156
157
|
}
|
157
158
|
|
158
|
-
|
159
|
-
|
160
|
-
int documentID = dtmPos << IDENT_DTM_NODE_BITS;
|
159
|
+
return handle;
|
160
|
+
}
|
161
161
|
|
162
|
-
|
162
|
+
private DOM2DTM
|
163
|
+
getDTM(DOMSource source, boolean unique, DTMWSFilter whiteSpaceFilter/*, boolean incremental, boolean doIndexing*/)
|
164
|
+
{
|
165
|
+
int dtmPos = getFirstFreeDTMID();
|
166
|
+
int documentID = dtmPos << IDENT_DTM_NODE_BITS;
|
163
167
|
|
164
|
-
|
165
|
-
|
166
|
-
|
168
|
+
DOM2DTM dtm = new DOM2DTM(this, source, documentID, whiteSpaceFilter, m_xsf, true);
|
169
|
+
|
170
|
+
addDTM(dtm, dtmPos, 0);
|
171
|
+
return dtm;
|
172
|
+
}
|
167
173
|
|
168
174
|
}
|
@@ -17,10 +17,10 @@
|
|
17
17
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
18
|
* permit persons to whom the Software is furnished to do so, subject to
|
19
19
|
* the following conditions:
|
20
|
-
*
|
20
|
+
*
|
21
21
|
* The above copyright notice and this permission notice shall be
|
22
22
|
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
23
|
+
*
|
24
24
|
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
25
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
26
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
@@ -37,6 +37,7 @@ package nokogiri.internals;
|
|
37
37
|
*
|
38
38
|
* @author Patrick Mahoney <pat@polycrystal.org>
|
39
39
|
*/
|
40
|
-
public interface XmlDeclHandler
|
41
|
-
|
40
|
+
public interface XmlDeclHandler
|
41
|
+
{
|
42
|
+
public void xmlDecl(String version, String encoding, String standalone);
|
42
43
|
}
|
@@ -17,10 +17,10 @@
|
|
17
17
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
18
|
* permit persons to whom the Software is furnished to do so, subject to
|
19
19
|
* the following conditions:
|
20
|
-
*
|
20
|
+
*
|
21
21
|
* The above copyright notice and this permission notice shall be
|
22
22
|
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
23
|
+
*
|
24
24
|
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
25
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
26
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
@@ -46,6 +46,7 @@ import org.jruby.RubyClass;
|
|
46
46
|
import org.jruby.RubyFixnum;
|
47
47
|
import org.jruby.exceptions.RaiseException;
|
48
48
|
import org.jruby.runtime.ThreadContext;
|
49
|
+
import org.jruby.runtime.Helpers;
|
49
50
|
import org.jruby.runtime.builtin.IRubyObject;
|
50
51
|
import org.w3c.dom.Document;
|
51
52
|
import org.w3c.dom.Node;
|
@@ -61,214 +62,244 @@ import nokogiri.XmlSyntaxError;
|
|
61
62
|
* Parser class for XML DOM processing. This class actually parses XML document
|
62
63
|
* and creates DOM tree in Java side. However, DOM tree in Ruby side is not since
|
63
64
|
* we delay creating objects for performance.
|
64
|
-
*
|
65
|
+
*
|
65
66
|
* @author sergio
|
66
67
|
* @author Yoko Harada <yokolet@gmail.com>
|
67
68
|
*/
|
68
|
-
public class XmlDomParserContext extends ParserContext
|
69
|
+
public class XmlDomParserContext extends ParserContext
|
70
|
+
{
|
69
71
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
72
|
+
protected static final String FEATURE_LOAD_EXTERNAL_DTD =
|
73
|
+
"http://apache.org/xml/features/nonvalidating/load-external-dtd";
|
74
|
+
protected static final String FEATURE_LOAD_DTD_GRAMMAR =
|
75
|
+
"http://apache.org/xml/features/nonvalidating/load-dtd-grammar";
|
76
|
+
protected static final String FEATURE_INCLUDE_IGNORABLE_WHITESPACE =
|
77
|
+
"http://apache.org/xml/features/dom/include-ignorable-whitespace";
|
78
|
+
protected static final String CONTINUE_AFTER_FATAL_ERROR =
|
79
|
+
"http://apache.org/xml/features/continue-after-fatal-error";
|
80
|
+
protected static final String FEATURE_NOT_EXPAND_ENTITY =
|
81
|
+
"http://apache.org/xml/features/dom/create-entity-ref-nodes";
|
82
|
+
protected static final String FEATURE_VALIDATION = "http://xml.org/sax/features/validation";
|
83
|
+
private static final String XINCLUDE_FEATURE_ID = "http://apache.org/xml/features/xinclude";
|
84
|
+
private static final String SECURITY_MANAGER = "http://apache.org/xml/properties/security-manager";
|
83
85
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
86
|
+
protected ParserContext.Options options;
|
87
|
+
protected DOMParser parser;
|
88
|
+
protected NokogiriErrorHandler errorHandler;
|
89
|
+
protected IRubyObject ruby_encoding;
|
88
90
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
super(runtime);
|
95
|
-
this.options = new ParserContext.Options(RubyFixnum.fix2long(options));
|
96
|
-
java_encoding = NokogiriHelpers.getValidEncodingOrNull(encoding);
|
97
|
-
ruby_encoding = encoding;
|
98
|
-
initErrorHandler();
|
99
|
-
initParser(runtime);
|
100
|
-
}
|
91
|
+
public
|
92
|
+
XmlDomParserContext(Ruby runtime, IRubyObject options)
|
93
|
+
{
|
94
|
+
this(runtime, runtime.getNil(), options);
|
95
|
+
}
|
101
96
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
97
|
+
public
|
98
|
+
XmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options)
|
99
|
+
{
|
100
|
+
super(runtime);
|
101
|
+
this.options = new ParserContext.Options(RubyFixnum.fix2long(options));
|
102
|
+
java_encoding = NokogiriHelpers.getValidEncodingOrNull(encoding);
|
103
|
+
ruby_encoding = encoding;
|
104
|
+
initErrorHandler();
|
105
|
+
initParser(runtime);
|
106
|
+
}
|
109
107
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
108
|
+
protected void
|
109
|
+
initErrorHandler()
|
110
|
+
{
|
111
|
+
if (options.recover) {
|
112
|
+
errorHandler = new NokogiriNonStrictErrorHandler(options.noError, options.noWarning);
|
113
|
+
} else {
|
114
|
+
errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning);
|
115
|
+
}
|
116
|
+
}
|
115
117
|
|
116
|
-
|
117
|
-
|
118
|
+
protected void
|
119
|
+
initParser(Ruby runtime)
|
120
|
+
{
|
121
|
+
if (options.xInclude) {
|
122
|
+
System.setProperty("org.apache.xerces.xni.parser.XMLParserConfiguration",
|
123
|
+
"org.apache.xerces.parsers.XIncludeParserConfiguration");
|
124
|
+
}
|
118
125
|
|
119
|
-
|
120
|
-
|
126
|
+
parser = new NokogiriDomParser(options);
|
127
|
+
parser.setErrorHandler(errorHandler);
|
121
128
|
|
122
|
-
|
123
|
-
|
124
|
-
}
|
129
|
+
// Fix for Issue#586. This limits entity expansion up to 100000 and nodes up to 3000.
|
130
|
+
setProperty(SECURITY_MANAGER, new org.apache.xerces.util.SecurityManager());
|
125
131
|
|
126
|
-
|
127
|
-
|
128
|
-
|
132
|
+
if (options.noBlanks) {
|
133
|
+
setFeature(FEATURE_INCLUDE_IGNORABLE_WHITESPACE, false);
|
134
|
+
}
|
129
135
|
|
130
|
-
|
131
|
-
|
132
|
-
|
136
|
+
if (options.recover) {
|
137
|
+
setFeature(CONTINUE_AFTER_FATAL_ERROR, true);
|
138
|
+
}
|
133
139
|
|
134
|
-
|
135
|
-
|
136
|
-
}
|
137
|
-
// If we turn off loading of external DTDs complete, we don't
|
138
|
-
// getthe publicID. Instead of turning off completely, we use
|
139
|
-
// an entity resolver that returns empty documents.
|
140
|
-
if (options.dtdLoad) {
|
141
|
-
setFeature(FEATURE_LOAD_EXTERNAL_DTD, true);
|
142
|
-
setFeature(FEATURE_LOAD_DTD_GRAMMAR, true);
|
143
|
-
}
|
144
|
-
parser.setEntityResolver(new NokogiriEntityResolver(runtime, errorHandler, options));
|
140
|
+
if (options.dtdValid) {
|
141
|
+
setFeature(FEATURE_VALIDATION, true);
|
145
142
|
}
|
146
143
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
}
|
144
|
+
if (!options.noEnt) {
|
145
|
+
setFeature(FEATURE_NOT_EXPAND_ENTITY, true);
|
146
|
+
}
|
147
|
+
// If we turn off loading of external DTDs complete, we don't
|
148
|
+
// getthe publicID. Instead of turning off completely, we use
|
149
|
+
// an entity resolver that returns empty documents.
|
150
|
+
if (options.dtdLoad) {
|
151
|
+
setFeature(FEATURE_LOAD_EXTERNAL_DTD, true);
|
152
|
+
setFeature(FEATURE_LOAD_DTD_GRAMMAR, true);
|
157
153
|
}
|
154
|
+
parser.setEntityResolver(new NokogiriEntityResolver(runtime, errorHandler, options));
|
155
|
+
}
|
158
156
|
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
157
|
+
/**
|
158
|
+
* Convenience method that catches and ignores SAXException
|
159
|
+
* (unrecognized and unsupported exceptions).
|
160
|
+
*/
|
161
|
+
protected void
|
162
|
+
setFeature(String feature, boolean value)
|
163
|
+
{
|
164
|
+
try {
|
165
|
+
parser.setFeature(feature, value);
|
166
|
+
} catch (SAXException e) {
|
167
|
+
// ignore
|
169
168
|
}
|
169
|
+
}
|
170
170
|
|
171
|
-
|
172
|
-
|
171
|
+
/**
|
172
|
+
* Convenience method that catches and ignores SAXException
|
173
|
+
* (unrecognized and unsupported exceptions).
|
174
|
+
*/
|
175
|
+
protected void
|
176
|
+
setProperty(String property, Object value)
|
177
|
+
{
|
178
|
+
try {
|
179
|
+
parser.setProperty(property, value);
|
180
|
+
} catch (SAXException e) {
|
181
|
+
// ignore
|
173
182
|
}
|
183
|
+
}
|
174
184
|
|
185
|
+
public void
|
186
|
+
addErrorsIfNecessary(ThreadContext context, XmlDocument doc)
|
187
|
+
{
|
188
|
+
doc.setInstanceVariable("@errors", mapErrors(context, errorHandler));
|
189
|
+
}
|
175
190
|
|
176
|
-
public static RubyArray mapErrors(ThreadContext context, NokogiriErrorHandler errorHandler) {
|
177
|
-
final Ruby runtime = context.runtime;
|
178
|
-
final List<Exception> errors = errorHandler.getErrors();
|
179
|
-
final IRubyObject[] errorsAry = new IRubyObject[errors.size()];
|
180
|
-
for (int i = 0; i < errors.size(); i++) {
|
181
|
-
XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(runtime);
|
182
|
-
xmlSyntaxError.setException(errors.get(i));
|
183
|
-
errorsAry[i] = xmlSyntaxError;
|
184
|
-
}
|
185
|
-
return runtime.newArrayNoCopy(errorsAry);
|
186
|
-
}
|
187
191
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
xmlSyntaxError.setException(ex);
|
199
|
-
throw xmlSyntaxError.toThrowable();
|
200
|
-
}
|
192
|
+
public static RubyArray
|
193
|
+
mapErrors(ThreadContext context, NokogiriErrorHandler errorHandler)
|
194
|
+
{
|
195
|
+
final Ruby runtime = context.runtime;
|
196
|
+
final List<Exception> errors = errorHandler.getErrors();
|
197
|
+
final IRubyObject[] errorsAry = new IRubyObject[errors.size()];
|
198
|
+
for (int i = 0; i < errors.size(); i++) {
|
199
|
+
XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(runtime);
|
200
|
+
xmlSyntaxError.setException(errors.get(i));
|
201
|
+
errorsAry[i] = xmlSyntaxError;
|
201
202
|
}
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
203
|
+
return runtime.newArrayNoCopy(errorsAry);
|
204
|
+
}
|
205
|
+
|
206
|
+
public XmlDocument
|
207
|
+
getDocumentWithErrorsOrRaiseException(ThreadContext context, RubyClass klazz, Exception ex)
|
208
|
+
{
|
209
|
+
if (options.recover) {
|
210
|
+
XmlDocument xmlDocument = getInterruptedOrNewXmlDocument(context, klazz);
|
211
|
+
this.addErrorsIfNecessary(context, xmlDocument);
|
212
|
+
XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
|
213
|
+
xmlSyntaxError.setException(ex);
|
214
|
+
((RubyArray) xmlDocument.getInstanceVariable("@errors")).append(xmlSyntaxError);
|
215
|
+
return xmlDocument;
|
216
|
+
} else {
|
217
|
+
XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(context.runtime);
|
218
|
+
xmlSyntaxError.setException(ex);
|
219
|
+
throw xmlSyntaxError.toThrowable();
|
208
220
|
}
|
221
|
+
}
|
209
222
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
223
|
+
private XmlDocument
|
224
|
+
getInterruptedOrNewXmlDocument(ThreadContext context, RubyClass klass)
|
225
|
+
{
|
226
|
+
Document document = parser.getDocument();
|
227
|
+
XmlDocument xmlDocument = new XmlDocument(context.runtime, klass, document);
|
228
|
+
xmlDocument.setEncoding(ruby_encoding);
|
229
|
+
return xmlDocument;
|
230
|
+
}
|
217
231
|
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
232
|
+
/**
|
233
|
+
* This method is broken out so that HtmlDomParserContext can
|
234
|
+
* override it.
|
235
|
+
*/
|
236
|
+
protected XmlDocument
|
237
|
+
wrapDocument(ThreadContext context, RubyClass klass, Document doc)
|
238
|
+
{
|
239
|
+
XmlDocument xmlDocument = new XmlDocument(context.runtime, klass, doc);
|
240
|
+
Helpers.invoke(context, xmlDocument, "initialize");
|
241
|
+
xmlDocument.setEncoding(ruby_encoding);
|
242
|
+
|
243
|
+
if (options.dtdLoad) {
|
244
|
+
IRubyObject dtd = XmlDtd.newFromExternalSubset(context.runtime, doc);
|
245
|
+
if (!dtd.isNil()) {
|
246
|
+
doc.setUserData(XmlDocument.DTD_EXTERNAL_SUBSET, (XmlDtd) dtd, null);
|
247
|
+
}
|
225
248
|
}
|
249
|
+
return xmlDocument;
|
250
|
+
}
|
226
251
|
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
252
|
+
/**
|
253
|
+
* Must call setInputSource() before this method.
|
254
|
+
*/
|
255
|
+
public XmlDocument
|
256
|
+
parse(ThreadContext context, RubyClass klass, IRubyObject url)
|
257
|
+
{
|
258
|
+
XmlDocument xmlDoc;
|
259
|
+
try {
|
260
|
+
Document doc = do_parse();
|
261
|
+
xmlDoc = wrapDocument(context, klass, doc);
|
262
|
+
xmlDoc.setUrl(url);
|
263
|
+
addErrorsIfNecessary(context, xmlDoc);
|
264
|
+
return xmlDoc;
|
265
|
+
} catch (SAXException e) {
|
266
|
+
return getDocumentWithErrorsOrRaiseException(context, klass, e);
|
267
|
+
} catch (IOException e) {
|
268
|
+
return getDocumentWithErrorsOrRaiseException(context, klass, e);
|
243
269
|
}
|
270
|
+
}
|
244
271
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
List<Node> emptyNodes = new ArrayList<Node>();
|
254
|
-
findEmptyTexts(parser.getDocument(), emptyNodes);
|
255
|
-
if (emptyNodes.size() > 0) {
|
256
|
-
for (Node node : emptyNodes) {
|
257
|
-
node.getParentNode().removeChild(node);
|
258
|
-
}
|
259
|
-
}
|
260
|
-
}
|
261
|
-
return parser.getDocument();
|
272
|
+
protected Document
|
273
|
+
do_parse() throws SAXException, IOException
|
274
|
+
{
|
275
|
+
try {
|
276
|
+
parser.parse(getInputSource());
|
277
|
+
} catch (NullPointerException ex) {
|
278
|
+
// FIXME: this is really a hack to fix #838. Xerces will throw a NullPointerException
|
279
|
+
// if we tried to parse '<? ?>'. We should submit a patch to Xerces.
|
262
280
|
}
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
for (int i=0; i < children.getLength(); i++) {
|
270
|
-
findEmptyTexts(children.item(i), emptyNodes);
|
271
|
-
}
|
281
|
+
if (options.noBlanks) {
|
282
|
+
List<Node> emptyNodes = new ArrayList<Node>();
|
283
|
+
findEmptyTexts(parser.getDocument(), emptyNodes);
|
284
|
+
if (emptyNodes.size() > 0) {
|
285
|
+
for (Node node : emptyNodes) {
|
286
|
+
node.getParentNode().removeChild(node);
|
272
287
|
}
|
288
|
+
}
|
289
|
+
}
|
290
|
+
return parser.getDocument();
|
291
|
+
}
|
292
|
+
|
293
|
+
private static void
|
294
|
+
findEmptyTexts(Node node, List<Node> emptyNodes)
|
295
|
+
{
|
296
|
+
if (node.getNodeType() == Node.TEXT_NODE && isBlank(node.getTextContent())) {
|
297
|
+
emptyNodes.add(node);
|
298
|
+
} else {
|
299
|
+
NodeList children = node.getChildNodes();
|
300
|
+
for (int i = 0; i < children.getLength(); i++) {
|
301
|
+
findEmptyTexts(children.item(i), emptyNodes);
|
302
|
+
}
|
273
303
|
}
|
304
|
+
}
|
274
305
|
}
|