nokogiri 1.11.1-java → 1.11.2-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +12 -12
- data/LICENSE.md +1 -1
- data/README.md +20 -15
- data/ext/java/nokogiri/EncodingHandler.java +78 -59
- data/ext/java/nokogiri/HtmlDocument.java +137 -114
- data/ext/java/nokogiri/HtmlElementDescription.java +104 -87
- data/ext/java/nokogiri/HtmlEntityLookup.java +31 -26
- data/ext/java/nokogiri/HtmlSaxParserContext.java +220 -192
- data/ext/java/nokogiri/HtmlSaxPushParser.java +164 -139
- data/ext/java/nokogiri/NokogiriService.java +597 -526
- data/ext/java/nokogiri/XmlAttr.java +120 -96
- data/ext/java/nokogiri/XmlAttributeDecl.java +97 -76
- data/ext/java/nokogiri/XmlCdata.java +35 -26
- data/ext/java/nokogiri/XmlComment.java +48 -37
- data/ext/java/nokogiri/XmlDocument.java +642 -540
- data/ext/java/nokogiri/XmlDocumentFragment.java +127 -107
- data/ext/java/nokogiri/XmlDtd.java +450 -384
- data/ext/java/nokogiri/XmlElement.java +25 -18
- data/ext/java/nokogiri/XmlElementContent.java +345 -286
- data/ext/java/nokogiri/XmlElementDecl.java +126 -95
- data/ext/java/nokogiri/XmlEntityDecl.java +121 -97
- data/ext/java/nokogiri/XmlEntityReference.java +51 -42
- data/ext/java/nokogiri/XmlNamespace.java +177 -145
- data/ext/java/nokogiri/XmlNode.java +1843 -1588
- data/ext/java/nokogiri/XmlNodeSet.java +361 -299
- data/ext/java/nokogiri/XmlProcessingInstruction.java +49 -39
- data/ext/java/nokogiri/XmlReader.java +513 -418
- data/ext/java/nokogiri/XmlRelaxng.java +91 -78
- data/ext/java/nokogiri/XmlSaxParserContext.java +330 -285
- data/ext/java/nokogiri/XmlSaxPushParser.java +229 -190
- data/ext/java/nokogiri/XmlSchema.java +328 -263
- data/ext/java/nokogiri/XmlSyntaxError.java +113 -83
- data/ext/java/nokogiri/XmlText.java +57 -46
- data/ext/java/nokogiri/XmlXpathContext.java +240 -206
- data/ext/java/nokogiri/XsltStylesheet.java +282 -239
- data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +199 -168
- data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
- data/ext/java/nokogiri/internals/NokogiriDomParser.java +65 -50
- data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +25 -18
- data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -254
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +738 -622
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +186 -143
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +83 -68
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +66 -49
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +86 -69
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +44 -29
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +118 -101
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -24
- data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +25 -17
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +57 -42
- data/ext/java/nokogiri/internals/ParserContext.java +206 -179
- data/ext/java/nokogiri/internals/ReaderNode.java +478 -371
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -707
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +28 -19
- data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +5 -4
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +208 -177
- data/ext/java/nokogiri/internals/XmlSaxParser.java +24 -17
- data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
- data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
- data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
- data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
- data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
- data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
- data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
- data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
- data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
- data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
- data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
- data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
- data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
- data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
- data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
- data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -570
- data/ext/nokogiri/depend +34 -474
- data/ext/nokogiri/extconf.rb +253 -183
- data/ext/nokogiri/html_document.c +10 -15
- data/ext/nokogiri/html_element_description.c +84 -71
- data/ext/nokogiri/html_entity_lookup.c +21 -16
- data/ext/nokogiri/html_sax_parser_context.c +66 -65
- data/ext/nokogiri/html_sax_push_parser.c +29 -27
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +171 -63
- data/ext/nokogiri/test_global_handlers.c +3 -4
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +221 -164
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +30 -27
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +17 -11
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +56 -49
- data/ext/nokogiri/xml_node.c +338 -286
- data/ext/nokogiri/xml_node_set.c +168 -156
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +191 -157
- data/ext/nokogiri/xml_relax_ng.c +29 -23
- data/ext/nokogiri/xml_sax_parser.c +117 -112
- data/ext/nokogiri/xml_sax_parser_context.c +100 -85
- data/ext/nokogiri/xml_sax_push_parser.c +34 -27
- data/ext/nokogiri/xml_schema.c +48 -42
- data/ext/nokogiri/xml_syntax_error.c +21 -23
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +134 -127
- data/ext/nokogiri/xslt_stylesheet.c +157 -157
- data/lib/nokogiri.rb +1 -22
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -15
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +31 -8
- data/lib/nokogiri/xml/document.rb +31 -11
- data/lib/nokogiri/xml/node.rb +38 -42
- data/lib/nokogiri/xml/reader.rb +2 -9
- data/lib/nokogiri/xml/xpath.rb +1 -3
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- metadata +7 -8
- data/ext/nokogiri/xml_io.c +0 -63
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
@@ -17,10 +17,10 @@
|
|
17
17
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
18
|
* permit persons to whom the Software is furnished to do so, subject to
|
19
19
|
* the following conditions:
|
20
|
-
*
|
20
|
+
*
|
21
21
|
* The above copyright notice and this permission notice shall be
|
22
22
|
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
23
|
+
*
|
24
24
|
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
25
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
26
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
@@ -63,89 +63,102 @@ import org.xml.sax.SAXException;
|
|
63
63
|
|
64
64
|
/**
|
65
65
|
* Class for Nokogiri::XML::RelaxNG
|
66
|
-
*
|
66
|
+
*
|
67
67
|
* @author sergio
|
68
68
|
* @author Yoko Harada <yokolet@gmail.com>
|
69
69
|
*/
|
70
|
-
@JRubyClass(name="Nokogiri::XML::RelaxNG", parent="Nokogiri::XML::Schema")
|
71
|
-
public class XmlRelaxng extends XmlSchema
|
72
|
-
|
70
|
+
@JRubyClass(name = "Nokogiri::XML::RelaxNG", parent = "Nokogiri::XML::Schema")
|
71
|
+
public class XmlRelaxng extends XmlSchema
|
72
|
+
{
|
73
|
+
private Verifier verifier;
|
73
74
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
Schema schema = xmlRelaxng.getSchema(source, context);
|
95
|
-
xmlRelaxng.setVerifier(schema.newVerifier());
|
96
|
-
return xmlRelaxng;
|
97
|
-
} catch (VerifierConfigurationException ex) {
|
98
|
-
throw context.getRuntime().newRuntimeError("Could not parse document: " + ex.getMessage());
|
99
|
-
}
|
75
|
+
public
|
76
|
+
XmlRelaxng(Ruby ruby, RubyClass klazz)
|
77
|
+
{
|
78
|
+
super(ruby, klazz);
|
79
|
+
}
|
80
|
+
|
81
|
+
private void
|
82
|
+
setVerifier(Verifier verifier)
|
83
|
+
{
|
84
|
+
this.verifier = verifier;
|
85
|
+
}
|
86
|
+
|
87
|
+
static XmlSchema
|
88
|
+
createSchemaInstance(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions)
|
89
|
+
{
|
90
|
+
Ruby runtime = context.getRuntime();
|
91
|
+
XmlRelaxng xmlRelaxng = (XmlRelaxng) NokogiriService.XML_RELAXNG_ALLOCATOR.allocate(runtime, klazz);
|
92
|
+
|
93
|
+
if (parseOptions == null) {
|
94
|
+
parseOptions = defaultParseOptions(context.getRuntime());
|
100
95
|
}
|
101
96
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
StreamResult result = new StreamResult(xmlAsWriter);
|
112
|
-
try {
|
113
|
-
TransformerFactory.newInstance().newTransformer().transform(ds, result);
|
114
|
-
} catch (TransformerConfigurationException ex) {
|
115
|
-
throw context.getRuntime()
|
116
|
-
.newRuntimeError("Could not parse document: "+ex.getMessage());
|
117
|
-
} catch (TransformerException ex) {
|
118
|
-
throw context.getRuntime()
|
119
|
-
.newRuntimeError("Could not parse document: "+ex.getMessage());
|
120
|
-
}
|
121
|
-
try {
|
122
|
-
is = new ByteArrayInputStream(xmlAsWriter.toString().getBytes("UTF-8"));
|
123
|
-
} catch (UnsupportedEncodingException ex) {
|
124
|
-
throw context.getRuntime()
|
125
|
-
.newRuntimeError("Could not parse document: "+ex.getMessage());
|
126
|
-
}
|
127
|
-
}
|
128
|
-
|
129
|
-
try {
|
130
|
-
return factory.compileSchema(is);
|
131
|
-
} catch (VerifierConfigurationException ex) {
|
132
|
-
throw context.getRuntime()
|
133
|
-
.newRuntimeError("Could not parse document: "+ex.getMessage());
|
134
|
-
} catch (SAXException ex) {
|
135
|
-
throw context.getRuntime()
|
136
|
-
.newRuntimeError("Could not parse document: "+ex.getMessage());
|
137
|
-
} catch (IOException ex) {
|
138
|
-
throw context.getRuntime().newIOError(ex.getClass() + ": " + ex.getMessage());
|
139
|
-
}
|
97
|
+
xmlRelaxng.setInstanceVariable("@errors", runtime.newEmptyArray());
|
98
|
+
xmlRelaxng.setInstanceVariable("@parse_options", parseOptions);
|
99
|
+
|
100
|
+
try {
|
101
|
+
Schema schema = xmlRelaxng.getSchema(source, context);
|
102
|
+
xmlRelaxng.setVerifier(schema.newVerifier());
|
103
|
+
return xmlRelaxng;
|
104
|
+
} catch (VerifierConfigurationException ex) {
|
105
|
+
throw context.getRuntime().newRuntimeError("Could not parse document: " + ex.getMessage());
|
140
106
|
}
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
107
|
+
}
|
108
|
+
|
109
|
+
private Schema
|
110
|
+
getSchema(Source source, ThreadContext context)
|
111
|
+
{
|
112
|
+
InputStream is;
|
113
|
+
VerifierFactory factory = new com.thaiopensource.relaxng.jarv.VerifierFactoryImpl();
|
114
|
+
if (source instanceof StreamSource) {
|
115
|
+
StreamSource ss = (StreamSource)source;
|
116
|
+
is = ss.getInputStream();
|
117
|
+
} else { //if (this.source instanceof DOMSource)
|
118
|
+
DOMSource ds = (DOMSource)source;
|
119
|
+
StringWriter xmlAsWriter = new StringWriter();
|
120
|
+
StreamResult result = new StreamResult(xmlAsWriter);
|
121
|
+
try {
|
122
|
+
TransformerFactory.newInstance().newTransformer().transform(ds, result);
|
123
|
+
} catch (TransformerConfigurationException ex) {
|
124
|
+
throw context.getRuntime()
|
125
|
+
.newRuntimeError("Could not parse document: " + ex.getMessage());
|
126
|
+
} catch (TransformerException ex) {
|
127
|
+
throw context.getRuntime()
|
128
|
+
.newRuntimeError("Could not parse document: " + ex.getMessage());
|
129
|
+
}
|
130
|
+
try {
|
131
|
+
is = new ByteArrayInputStream(xmlAsWriter.toString().getBytes("UTF-8"));
|
132
|
+
} catch (UnsupportedEncodingException ex) {
|
133
|
+
throw context.getRuntime()
|
134
|
+
.newRuntimeError("Could not parse document: " + ex.getMessage());
|
135
|
+
}
|
145
136
|
}
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
137
|
+
|
138
|
+
try {
|
139
|
+
return factory.compileSchema(is);
|
140
|
+
} catch (VerifierConfigurationException ex) {
|
141
|
+
throw context.getRuntime()
|
142
|
+
.newRuntimeError("Could not parse document: " + ex.getMessage());
|
143
|
+
} catch (SAXException ex) {
|
144
|
+
throw context.getRuntime()
|
145
|
+
.newRuntimeError("Could not parse document: " + ex.getMessage());
|
146
|
+
} catch (IOException ex) {
|
147
|
+
throw context.getRuntime().newIOError(ex.getClass() + ": " + ex.getMessage());
|
150
148
|
}
|
149
|
+
}
|
150
|
+
|
151
|
+
@Override
|
152
|
+
protected void
|
153
|
+
setErrorHandler(ErrorHandler errorHandler)
|
154
|
+
{
|
155
|
+
verifier.setErrorHandler(errorHandler);
|
156
|
+
}
|
157
|
+
|
158
|
+
@Override
|
159
|
+
protected void
|
160
|
+
validate(Document document) throws SAXException, IOException
|
161
|
+
{
|
162
|
+
verifier.verify(document);
|
163
|
+
}
|
151
164
|
}
|
@@ -17,10 +17,10 @@
|
|
17
17
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
18
|
* permit persons to whom the Software is furnished to do so, subject to
|
19
19
|
* the following conditions:
|
20
|
-
*
|
20
|
+
*
|
21
21
|
* The above copyright notice and this permission notice shall be
|
22
22
|
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
23
|
+
*
|
24
24
|
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
25
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
26
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
@@ -67,308 +67,353 @@ import nokogiri.internals.XmlSaxParser;
|
|
67
67
|
* @author Patrick Mahoney <pat@polycrystal.org>
|
68
68
|
* @author Yoko Harada <yokolet@gmail.com>
|
69
69
|
*/
|
70
|
-
@JRubyClass(name="Nokogiri::XML::SAX::ParserContext")
|
71
|
-
public class XmlSaxParserContext extends ParserContext
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
* Create and return a copy of this object.
|
106
|
-
*
|
107
|
-
* @return a clone of this object
|
108
|
-
*/
|
109
|
-
@Override
|
110
|
-
public Object clone() throws CloneNotSupportedException {
|
111
|
-
return super.clone();
|
112
|
-
}
|
113
|
-
|
114
|
-
protected AbstractSAXParser createParser() throws SAXException {
|
115
|
-
XmlSaxParser parser = new XmlSaxParser();
|
116
|
-
parser.setFeature(FEATURE_NAMESPACE_PREFIXES, true);
|
117
|
-
parser.setFeature(FEATURE_LOAD_EXTERNAL_DTD, false);
|
118
|
-
return parser;
|
119
|
-
}
|
120
|
-
|
121
|
-
/**
|
122
|
-
* Create a new parser context that will parse the string
|
123
|
-
* <code>data</code>.
|
124
|
-
*/
|
125
|
-
@JRubyMethod(name="memory", meta=true)
|
126
|
-
public static IRubyObject parse_memory(ThreadContext context,
|
127
|
-
IRubyObject klazz,
|
128
|
-
IRubyObject data) {
|
129
|
-
final Ruby runtime = context.runtime;
|
130
|
-
XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
|
131
|
-
ctx.initialize(runtime);
|
132
|
-
ctx.setStringInputSource(context, data, runtime.getNil());
|
133
|
-
return ctx;
|
134
|
-
}
|
135
|
-
|
136
|
-
/**
|
137
|
-
* Create a new parser context that will read from the file
|
138
|
-
* <code>data</code> and parse.
|
139
|
-
*/
|
140
|
-
@JRubyMethod(name="file", meta=true)
|
141
|
-
public static IRubyObject parse_file(ThreadContext context,
|
142
|
-
IRubyObject klazz,
|
143
|
-
IRubyObject data) {
|
144
|
-
final Ruby runtime = context.runtime;
|
145
|
-
XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
|
146
|
-
ctx.initialize(context.getRuntime());
|
147
|
-
ctx.setInputSourceFile(context, data);
|
148
|
-
return ctx;
|
149
|
-
}
|
150
|
-
|
151
|
-
/**
|
152
|
-
* Create a new parser context that will read from the IO or
|
153
|
-
* StringIO <code>data</code> and parse.
|
154
|
-
*
|
155
|
-
* TODO: Currently ignores encoding <code>enc</code>.
|
156
|
-
*/
|
157
|
-
@JRubyMethod(name="io", meta=true)
|
158
|
-
public static IRubyObject parse_io(ThreadContext context,
|
159
|
-
IRubyObject klazz,
|
160
|
-
IRubyObject data,
|
161
|
-
IRubyObject enc) {
|
162
|
-
//int encoding = (int)enc.convertToInteger().getLongValue();
|
163
|
-
final Ruby runtime = context.runtime;
|
164
|
-
XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
|
165
|
-
ctx.initialize(runtime);
|
166
|
-
ctx.setIOInputSource(context, data, runtime.getNil());
|
167
|
-
return ctx;
|
70
|
+
@JRubyClass(name = "Nokogiri::XML::SAX::ParserContext")
|
71
|
+
public class XmlSaxParserContext extends ParserContext
|
72
|
+
{
|
73
|
+
|
74
|
+
protected static final String FEATURE_NAMESPACES =
|
75
|
+
"http://xml.org/sax/features/namespaces";
|
76
|
+
protected static final String FEATURE_NAMESPACE_PREFIXES =
|
77
|
+
"http://xml.org/sax/features/namespace-prefixes";
|
78
|
+
protected static final String FEATURE_LOAD_EXTERNAL_DTD =
|
79
|
+
"http://apache.org/xml/features/nonvalidating/load-external-dtd";
|
80
|
+
protected static final String FEATURE_CONTINUE_AFTER_FATAL_ERROR =
|
81
|
+
"http://apache.org/xml/features/continue-after-fatal-error";
|
82
|
+
|
83
|
+
protected AbstractSAXParser parser;
|
84
|
+
|
85
|
+
protected NokogiriHandler handler;
|
86
|
+
private boolean replaceEntities = true;
|
87
|
+
private boolean recovery = false;
|
88
|
+
|
89
|
+
public
|
90
|
+
XmlSaxParserContext(final Ruby ruby, RubyClass rubyClass)
|
91
|
+
{
|
92
|
+
super(ruby, rubyClass);
|
93
|
+
}
|
94
|
+
|
95
|
+
protected void
|
96
|
+
initialize(Ruby runtime)
|
97
|
+
{
|
98
|
+
try {
|
99
|
+
parser = createParser();
|
100
|
+
} catch (SAXException se) {
|
101
|
+
// Unexpected failure in XML subsystem
|
102
|
+
RaiseException ex = runtime.newRuntimeError(se.toString());
|
103
|
+
ex.initCause(se);
|
104
|
+
throw ex;
|
168
105
|
}
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
106
|
+
}
|
107
|
+
|
108
|
+
/**
|
109
|
+
* Create and return a copy of this object.
|
110
|
+
*
|
111
|
+
* @return a clone of this object
|
112
|
+
*/
|
113
|
+
@Override
|
114
|
+
public Object
|
115
|
+
clone() throws CloneNotSupportedException
|
116
|
+
{
|
117
|
+
return super.clone();
|
118
|
+
}
|
119
|
+
|
120
|
+
protected AbstractSAXParser
|
121
|
+
createParser() throws SAXException
|
122
|
+
{
|
123
|
+
XmlSaxParser parser = new XmlSaxParser();
|
124
|
+
parser.setFeature(FEATURE_NAMESPACE_PREFIXES, true);
|
125
|
+
parser.setFeature(FEATURE_LOAD_EXTERNAL_DTD, false);
|
126
|
+
return parser;
|
127
|
+
}
|
128
|
+
|
129
|
+
/**
|
130
|
+
* Create a new parser context that will parse the string
|
131
|
+
* <code>data</code>.
|
132
|
+
*/
|
133
|
+
@JRubyMethod(name = "memory", meta = true)
|
134
|
+
public static IRubyObject
|
135
|
+
parse_memory(ThreadContext context,
|
136
|
+
IRubyObject klazz,
|
137
|
+
IRubyObject data)
|
138
|
+
{
|
139
|
+
final Ruby runtime = context.runtime;
|
140
|
+
XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
|
141
|
+
ctx.initialize(runtime);
|
142
|
+
ctx.setStringInputSource(context, data, runtime.getNil());
|
143
|
+
return ctx;
|
144
|
+
}
|
145
|
+
|
146
|
+
/**
|
147
|
+
* Create a new parser context that will read from the file
|
148
|
+
* <code>data</code> and parse.
|
149
|
+
*/
|
150
|
+
@JRubyMethod(name = "file", meta = true)
|
151
|
+
public static IRubyObject
|
152
|
+
parse_file(ThreadContext context,
|
153
|
+
IRubyObject klazz,
|
154
|
+
IRubyObject data)
|
155
|
+
{
|
156
|
+
final Ruby runtime = context.runtime;
|
157
|
+
XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
|
158
|
+
ctx.initialize(context.getRuntime());
|
159
|
+
ctx.setInputSourceFile(context, data);
|
160
|
+
return ctx;
|
161
|
+
}
|
162
|
+
|
163
|
+
/**
|
164
|
+
* Create a new parser context that will read from the IO or
|
165
|
+
* StringIO <code>data</code> and parse.
|
166
|
+
*
|
167
|
+
* TODO: Currently ignores encoding <code>enc</code>.
|
168
|
+
*/
|
169
|
+
@JRubyMethod(name = "io", meta = true)
|
170
|
+
public static IRubyObject
|
171
|
+
parse_io(ThreadContext context,
|
172
|
+
IRubyObject klazz,
|
173
|
+
IRubyObject data,
|
174
|
+
IRubyObject enc)
|
175
|
+
{
|
176
|
+
//int encoding = (int)enc.convertToInteger().getLongValue();
|
177
|
+
final Ruby runtime = context.runtime;
|
178
|
+
XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
|
179
|
+
ctx.initialize(runtime);
|
180
|
+
ctx.setIOInputSource(context, data, runtime.getNil());
|
181
|
+
return ctx;
|
182
|
+
}
|
183
|
+
|
184
|
+
/**
|
185
|
+
* Create a new parser context that will read from a raw input stream.
|
186
|
+
* Meant to be run in a separate thread by XmlSaxPushParser.
|
187
|
+
*/
|
188
|
+
static XmlSaxParserContext
|
189
|
+
parse_stream(final Ruby runtime, RubyClass klazz, InputStream stream)
|
190
|
+
{
|
191
|
+
XmlSaxParserContext ctx = newInstance(runtime, klazz);
|
192
|
+
ctx.initialize(runtime);
|
193
|
+
ctx.setInputSource(stream);
|
194
|
+
return ctx;
|
195
|
+
}
|
196
|
+
|
197
|
+
private static XmlSaxParserContext
|
198
|
+
newInstance(final Ruby runtime, final RubyClass klazz)
|
199
|
+
{
|
200
|
+
return (XmlSaxParserContext) NokogiriService.XML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(runtime, klazz);
|
201
|
+
}
|
202
|
+
|
203
|
+
/**
|
204
|
+
* Set a property of the underlying parser.
|
205
|
+
*/
|
206
|
+
protected void
|
207
|
+
setProperty(String key, Object val)
|
208
|
+
throws SAXNotRecognizedException, SAXNotSupportedException
|
209
|
+
{
|
210
|
+
parser.setProperty(key, val);
|
211
|
+
}
|
212
|
+
|
213
|
+
protected void
|
214
|
+
setContentHandler(ContentHandler handler)
|
215
|
+
{
|
216
|
+
parser.setContentHandler(handler);
|
217
|
+
}
|
218
|
+
|
219
|
+
protected void
|
220
|
+
setErrorHandler(ErrorHandler handler)
|
221
|
+
{
|
222
|
+
parser.setErrorHandler(handler);
|
223
|
+
}
|
224
|
+
|
225
|
+
public final NokogiriHandler
|
226
|
+
getNokogiriHandler() { return handler; }
|
227
|
+
|
228
|
+
/**
|
229
|
+
* Perform any initialization prior to parsing with the handler
|
230
|
+
* <code>handlerRuby</code>. Convenience hook for subclasses.
|
231
|
+
*/
|
232
|
+
protected void
|
233
|
+
preParse(Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler)
|
234
|
+
{
|
235
|
+
((XmlSaxParser) parser).setXmlDeclHandler(handler);
|
236
|
+
if (recovery) {
|
237
|
+
try {
|
238
|
+
parser.setFeature(FEATURE_CONTINUE_AFTER_FATAL_ERROR, true);
|
239
|
+
} catch (Exception e) {
|
240
|
+
// Unexpected failure in XML subsystem
|
241
|
+
throw runtime.newRuntimeError(e.getMessage());
|
242
|
+
}
|
179
243
|
}
|
180
|
-
|
181
|
-
|
182
|
-
|
244
|
+
}
|
245
|
+
|
246
|
+
protected void
|
247
|
+
postParse(Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler)
|
248
|
+
{
|
249
|
+
// noop
|
250
|
+
}
|
251
|
+
|
252
|
+
protected void
|
253
|
+
do_parse() throws SAXException, IOException
|
254
|
+
{
|
255
|
+
parser.parse(getInputSource());
|
256
|
+
}
|
257
|
+
|
258
|
+
@JRubyMethod
|
259
|
+
public IRubyObject
|
260
|
+
parse_with(ThreadContext context, IRubyObject handlerRuby)
|
261
|
+
{
|
262
|
+
final Ruby runtime = context.getRuntime();
|
263
|
+
|
264
|
+
if (!invoke(context, handlerRuby, "respond_to?", runtime.newSymbol("document")).isTrue()) {
|
265
|
+
throw runtime.newArgumentError("argument must respond_to document");
|
183
266
|
}
|
184
267
|
|
185
|
-
|
186
|
-
|
187
|
-
*/
|
188
|
-
protected void setProperty(String key, Object val)
|
189
|
-
throws SAXNotRecognizedException, SAXNotSupportedException {
|
190
|
-
parser.setProperty(key, val);
|
191
|
-
}
|
268
|
+
NokogiriHandler handler = this.handler = new NokogiriHandler(runtime, handlerRuby);
|
269
|
+
preParse(runtime, handlerRuby, handler);
|
192
270
|
|
193
|
-
|
194
|
-
|
195
|
-
}
|
271
|
+
setContentHandler(handler);
|
272
|
+
setErrorHandler(handler);
|
196
273
|
|
197
|
-
|
198
|
-
|
274
|
+
try {
|
275
|
+
setProperty("http://xml.org/sax/properties/lexical-handler", handler);
|
276
|
+
} catch (Exception ex) {
|
277
|
+
throw runtime.newRuntimeError("Problem while creating XML SAX Parser: " + ex.toString());
|
199
278
|
}
|
200
279
|
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
// Unexpected failure in XML subsystem
|
215
|
-
throw runtime.newRuntimeError(e.getMessage());
|
216
|
-
}
|
280
|
+
try {
|
281
|
+
try {
|
282
|
+
do_parse();
|
283
|
+
} catch (SAXParseException ex) {
|
284
|
+
// A bad document (<foo><bar></foo>) should call the
|
285
|
+
// error handler instead of raising a SAX exception.
|
286
|
+
|
287
|
+
// However, an EMPTY document should raise a RuntimeError.
|
288
|
+
// This is a bit kludgy, but AFAIK SAX doesn't distinguish
|
289
|
+
// between empty and bad whereas Nokogiri does.
|
290
|
+
String message = ex.getMessage();
|
291
|
+
if (message != null && message.contains("Premature end of file.") && stringDataSize < 1) {
|
292
|
+
throw runtime.newRuntimeError("couldn't parse document: " + message);
|
217
293
|
}
|
294
|
+
handler.error(ex);
|
295
|
+
}
|
296
|
+
} catch (SAXException ex) {
|
297
|
+
// Unexpected failure in XML subsystem
|
298
|
+
throw runtime.newRuntimeError(ex.getMessage());
|
299
|
+
} catch (IOException ex) {
|
300
|
+
throw runtime.newIOErrorFromException(ex);
|
218
301
|
}
|
219
302
|
|
220
|
-
|
221
|
-
|
303
|
+
postParse(runtime, handlerRuby, handler);
|
304
|
+
|
305
|
+
//maybeTrimLeadingAndTrailingWhitespace(context, handlerRuby);
|
306
|
+
|
307
|
+
return runtime.getNil();
|
308
|
+
}
|
309
|
+
|
310
|
+
/**
|
311
|
+
* Can take a boolean assignment.
|
312
|
+
*
|
313
|
+
* @param context
|
314
|
+
* @param value
|
315
|
+
* @return
|
316
|
+
*/
|
317
|
+
@JRubyMethod(name = "replace_entities=")
|
318
|
+
public IRubyObject
|
319
|
+
set_replace_entities(ThreadContext context, IRubyObject value)
|
320
|
+
{
|
321
|
+
replaceEntities = value.isTrue();
|
322
|
+
return this;
|
323
|
+
}
|
324
|
+
|
325
|
+
@JRubyMethod(name = "replace_entities")
|
326
|
+
public IRubyObject
|
327
|
+
get_replace_entities(ThreadContext context)
|
328
|
+
{
|
329
|
+
return context.runtime.newBoolean(replaceEntities);
|
330
|
+
}
|
331
|
+
|
332
|
+
/**
|
333
|
+
* Can take a boolean assignment.
|
334
|
+
*
|
335
|
+
* @param context
|
336
|
+
* @param value
|
337
|
+
* @return
|
338
|
+
*/
|
339
|
+
@JRubyMethod(name = "recovery=")
|
340
|
+
public IRubyObject
|
341
|
+
set_recovery(ThreadContext context, IRubyObject value)
|
342
|
+
{
|
343
|
+
recovery = value.isTrue();
|
344
|
+
return this;
|
345
|
+
}
|
346
|
+
|
347
|
+
@JRubyMethod(name = "recovery")
|
348
|
+
public IRubyObject
|
349
|
+
get_recovery(ThreadContext context)
|
350
|
+
{
|
351
|
+
return context.runtime.newBoolean(recovery);
|
352
|
+
}
|
353
|
+
|
354
|
+
/**
|
355
|
+
* If the handler's document is a FragmentHandler, attempt to trim
|
356
|
+
* leading and trailing whitespace.
|
357
|
+
*
|
358
|
+
* This is a bit hackish and depends heavily on the internals of
|
359
|
+
* FragmentHandler.
|
360
|
+
*/
|
361
|
+
protected void
|
362
|
+
maybeTrimLeadingAndTrailingWhitespace(ThreadContext context, IRubyObject parser)
|
363
|
+
{
|
364
|
+
RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter();
|
365
|
+
RubyModule mod = context.getRuntime().getClassFromPath("Nokogiri::XML::FragmentHandler");
|
366
|
+
|
367
|
+
IRubyObject handler = adapter.getInstanceVariable(parser, "@document");
|
368
|
+
if (handler == null || handler.isNil() || !adapter.isKindOf(handler, mod)) {
|
369
|
+
return;
|
222
370
|
}
|
223
|
-
|
224
|
-
|
225
|
-
|
371
|
+
IRubyObject stack = adapter.getInstanceVariable(handler, "@stack");
|
372
|
+
if (stack == null || stack.isNil()) {
|
373
|
+
return;
|
226
374
|
}
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
if(!invoke(context, handlerRuby, "respond_to?", runtime.newSymbol("document")).isTrue()) {
|
233
|
-
throw runtime.newArgumentError("argument must respond_to document");
|
234
|
-
}
|
235
|
-
|
236
|
-
NokogiriHandler handler = this.handler = new NokogiriHandler(runtime, handlerRuby);
|
237
|
-
preParse(runtime, handlerRuby, handler);
|
238
|
-
|
239
|
-
setContentHandler(handler);
|
240
|
-
setErrorHandler(handler);
|
241
|
-
|
242
|
-
try{
|
243
|
-
setProperty("http://xml.org/sax/properties/lexical-handler", handler);
|
244
|
-
}
|
245
|
-
catch (Exception ex) {
|
246
|
-
throw runtime.newRuntimeError("Problem while creating XML SAX Parser: " + ex.toString());
|
247
|
-
}
|
248
|
-
|
249
|
-
try{
|
250
|
-
try {
|
251
|
-
do_parse();
|
252
|
-
}
|
253
|
-
catch (SAXParseException ex) {
|
254
|
-
// A bad document (<foo><bar></foo>) should call the
|
255
|
-
// error handler instead of raising a SAX exception.
|
256
|
-
|
257
|
-
// However, an EMPTY document should raise a RuntimeError.
|
258
|
-
// This is a bit kludgy, but AFAIK SAX doesn't distinguish
|
259
|
-
// between empty and bad whereas Nokogiri does.
|
260
|
-
String message = ex.getMessage();
|
261
|
-
if (message != null && message.contains("Premature end of file.") && stringDataSize < 1) {
|
262
|
-
throw runtime.newRuntimeError("couldn't parse document: " + message);
|
263
|
-
}
|
264
|
-
handler.error(ex);
|
265
|
-
}
|
266
|
-
}
|
267
|
-
catch (SAXException ex) {
|
268
|
-
// Unexpected failure in XML subsystem
|
269
|
-
throw runtime.newRuntimeError(ex.getMessage());
|
270
|
-
}
|
271
|
-
catch (IOException ex) {
|
272
|
-
throw runtime.newIOErrorFromException(ex);
|
273
|
-
}
|
274
|
-
|
275
|
-
postParse(runtime, handlerRuby, handler);
|
276
|
-
|
277
|
-
//maybeTrimLeadingAndTrailingWhitespace(context, handlerRuby);
|
278
|
-
|
279
|
-
return runtime.getNil();
|
375
|
+
// doc is finally a DocumentFragment whose nodes we can check
|
376
|
+
IRubyObject doc = adapter.callMethod(stack, "first");
|
377
|
+
if (doc == null || doc.isNil()) {
|
378
|
+
return;
|
280
379
|
}
|
281
380
|
|
282
|
-
|
283
|
-
* Can take a boolean assignment.
|
284
|
-
*
|
285
|
-
* @param context
|
286
|
-
* @param value
|
287
|
-
* @return
|
288
|
-
*/
|
289
|
-
@JRubyMethod(name = "replace_entities=")
|
290
|
-
public IRubyObject set_replace_entities(ThreadContext context, IRubyObject value) {
|
291
|
-
replaceEntities = value.isTrue();
|
292
|
-
return this;
|
293
|
-
}
|
381
|
+
IRubyObject children;
|
294
382
|
|
295
|
-
|
296
|
-
|
297
|
-
|
383
|
+
for (;;) {
|
384
|
+
children = adapter.callMethod(doc, "children");
|
385
|
+
IRubyObject first = adapter.callMethod(children, "first");
|
386
|
+
if (NokogiriHelpers.isBlank(first)) { adapter.callMethod(first, "unlink"); }
|
387
|
+
else { break; }
|
298
388
|
}
|
299
389
|
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
* @return
|
306
|
-
*/
|
307
|
-
@JRubyMethod(name = "recovery=")
|
308
|
-
public IRubyObject set_recovery(ThreadContext context, IRubyObject value) {
|
309
|
-
recovery = value.isTrue();
|
310
|
-
return this;
|
390
|
+
for (;;) {
|
391
|
+
children = adapter.callMethod(doc, "children");
|
392
|
+
IRubyObject last = adapter.callMethod(children, "last");
|
393
|
+
if (NokogiriHelpers.isBlank(last)) { adapter.callMethod(last, "unlink"); }
|
394
|
+
else { break; }
|
311
395
|
}
|
312
396
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
return;
|
335
|
-
// doc is finally a DocumentFragment whose nodes we can check
|
336
|
-
IRubyObject doc = adapter.callMethod(stack, "first");
|
337
|
-
if (doc == null || doc.isNil())
|
338
|
-
return;
|
339
|
-
|
340
|
-
IRubyObject children;
|
341
|
-
|
342
|
-
for (;;) {
|
343
|
-
children = adapter.callMethod(doc, "children");
|
344
|
-
IRubyObject first = adapter.callMethod(children, "first");
|
345
|
-
if (NokogiriHelpers.isBlank(first)) adapter.callMethod(first, "unlink");
|
346
|
-
else break;
|
347
|
-
}
|
348
|
-
|
349
|
-
for (;;) {
|
350
|
-
children = adapter.callMethod(doc, "children");
|
351
|
-
IRubyObject last = adapter.callMethod(children, "last");
|
352
|
-
if (NokogiriHelpers.isBlank(last)) adapter.callMethod(last, "unlink");
|
353
|
-
else break;
|
354
|
-
}
|
355
|
-
|
356
|
-
// While we have a document, normalize it.
|
357
|
-
((XmlNode) doc).normalize();
|
358
|
-
}
|
359
|
-
|
360
|
-
@JRubyMethod(name="column")
|
361
|
-
public IRubyObject column(ThreadContext context) {
|
362
|
-
final Integer number = handler.getColumn();
|
363
|
-
if (number == null) return context.getRuntime().getNil();
|
364
|
-
return RubyFixnum.newFixnum(context.getRuntime(), number.longValue());
|
365
|
-
}
|
366
|
-
|
367
|
-
@JRubyMethod(name="line")
|
368
|
-
public IRubyObject line(ThreadContext context) {
|
369
|
-
final Integer number = handler.getLine();
|
370
|
-
if (number == null) return context.getRuntime().getNil();
|
371
|
-
return RubyFixnum.newFixnum(context.getRuntime(), number.longValue());
|
372
|
-
}
|
397
|
+
// While we have a document, normalize it.
|
398
|
+
((XmlNode) doc).normalize();
|
399
|
+
}
|
400
|
+
|
401
|
+
@JRubyMethod(name = "column")
|
402
|
+
public IRubyObject
|
403
|
+
column(ThreadContext context)
|
404
|
+
{
|
405
|
+
final Integer number = handler.getColumn();
|
406
|
+
if (number == null) { return context.getRuntime().getNil(); }
|
407
|
+
return RubyFixnum.newFixnum(context.getRuntime(), number.longValue());
|
408
|
+
}
|
409
|
+
|
410
|
+
@JRubyMethod(name = "line")
|
411
|
+
public IRubyObject
|
412
|
+
line(ThreadContext context)
|
413
|
+
{
|
414
|
+
final Integer number = handler.getLine();
|
415
|
+
if (number == null) { return context.getRuntime().getNil(); }
|
416
|
+
return RubyFixnum.newFixnum(context.getRuntime(), number.longValue());
|
417
|
+
}
|
373
418
|
|
374
419
|
}
|