nokogiri 1.11.1-java → 1.11.2-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +12 -12
  3. data/LICENSE.md +1 -1
  4. data/README.md +20 -15
  5. data/ext/java/nokogiri/EncodingHandler.java +78 -59
  6. data/ext/java/nokogiri/HtmlDocument.java +137 -114
  7. data/ext/java/nokogiri/HtmlElementDescription.java +104 -87
  8. data/ext/java/nokogiri/HtmlEntityLookup.java +31 -26
  9. data/ext/java/nokogiri/HtmlSaxParserContext.java +220 -192
  10. data/ext/java/nokogiri/HtmlSaxPushParser.java +164 -139
  11. data/ext/java/nokogiri/NokogiriService.java +597 -526
  12. data/ext/java/nokogiri/XmlAttr.java +120 -96
  13. data/ext/java/nokogiri/XmlAttributeDecl.java +97 -76
  14. data/ext/java/nokogiri/XmlCdata.java +35 -26
  15. data/ext/java/nokogiri/XmlComment.java +48 -37
  16. data/ext/java/nokogiri/XmlDocument.java +642 -540
  17. data/ext/java/nokogiri/XmlDocumentFragment.java +127 -107
  18. data/ext/java/nokogiri/XmlDtd.java +450 -384
  19. data/ext/java/nokogiri/XmlElement.java +25 -18
  20. data/ext/java/nokogiri/XmlElementContent.java +345 -286
  21. data/ext/java/nokogiri/XmlElementDecl.java +126 -95
  22. data/ext/java/nokogiri/XmlEntityDecl.java +121 -97
  23. data/ext/java/nokogiri/XmlEntityReference.java +51 -42
  24. data/ext/java/nokogiri/XmlNamespace.java +177 -145
  25. data/ext/java/nokogiri/XmlNode.java +1843 -1588
  26. data/ext/java/nokogiri/XmlNodeSet.java +361 -299
  27. data/ext/java/nokogiri/XmlProcessingInstruction.java +49 -39
  28. data/ext/java/nokogiri/XmlReader.java +513 -418
  29. data/ext/java/nokogiri/XmlRelaxng.java +91 -78
  30. data/ext/java/nokogiri/XmlSaxParserContext.java +330 -285
  31. data/ext/java/nokogiri/XmlSaxPushParser.java +229 -190
  32. data/ext/java/nokogiri/XmlSchema.java +328 -263
  33. data/ext/java/nokogiri/XmlSyntaxError.java +113 -83
  34. data/ext/java/nokogiri/XmlText.java +57 -46
  35. data/ext/java/nokogiri/XmlXpathContext.java +240 -206
  36. data/ext/java/nokogiri/XsltStylesheet.java +282 -239
  37. data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
  38. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +199 -168
  39. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
  40. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
  41. data/ext/java/nokogiri/internals/NokogiriDomParser.java +65 -50
  42. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
  43. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +25 -18
  44. data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -254
  45. data/ext/java/nokogiri/internals/NokogiriHelpers.java +738 -622
  46. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +186 -143
  47. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +83 -68
  48. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +66 -49
  49. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +86 -69
  50. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +44 -29
  51. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +118 -101
  52. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -24
  53. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +25 -17
  54. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +57 -42
  55. data/ext/java/nokogiri/internals/ParserContext.java +206 -179
  56. data/ext/java/nokogiri/internals/ReaderNode.java +478 -371
  57. data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -707
  58. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +28 -19
  59. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
  60. data/ext/java/nokogiri/internals/XmlDeclHandler.java +5 -4
  61. data/ext/java/nokogiri/internals/XmlDomParserContext.java +208 -177
  62. data/ext/java/nokogiri/internals/XmlSaxParser.java +24 -17
  63. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
  64. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
  65. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
  66. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
  67. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
  68. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
  69. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
  77. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
  78. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
  79. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
  80. data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
  81. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
  82. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
  83. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
  84. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
  85. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
  86. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
  87. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
  88. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
  89. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
  90. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
  91. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -570
  92. data/ext/nokogiri/depend +34 -474
  93. data/ext/nokogiri/extconf.rb +253 -183
  94. data/ext/nokogiri/html_document.c +10 -15
  95. data/ext/nokogiri/html_element_description.c +84 -71
  96. data/ext/nokogiri/html_entity_lookup.c +21 -16
  97. data/ext/nokogiri/html_sax_parser_context.c +66 -65
  98. data/ext/nokogiri/html_sax_push_parser.c +29 -27
  99. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  100. data/ext/nokogiri/nokogiri.c +171 -63
  101. data/ext/nokogiri/test_global_handlers.c +3 -4
  102. data/ext/nokogiri/xml_attr.c +15 -15
  103. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  104. data/ext/nokogiri/xml_cdata.c +13 -18
  105. data/ext/nokogiri/xml_comment.c +19 -26
  106. data/ext/nokogiri/xml_document.c +221 -164
  107. data/ext/nokogiri/xml_document_fragment.c +13 -15
  108. data/ext/nokogiri/xml_dtd.c +54 -48
  109. data/ext/nokogiri/xml_element_content.c +30 -27
  110. data/ext/nokogiri/xml_element_decl.c +22 -22
  111. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  112. data/ext/nokogiri/xml_entity_decl.c +32 -30
  113. data/ext/nokogiri/xml_entity_reference.c +16 -18
  114. data/ext/nokogiri/xml_namespace.c +56 -49
  115. data/ext/nokogiri/xml_node.c +338 -286
  116. data/ext/nokogiri/xml_node_set.c +168 -156
  117. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  118. data/ext/nokogiri/xml_reader.c +191 -157
  119. data/ext/nokogiri/xml_relax_ng.c +29 -23
  120. data/ext/nokogiri/xml_sax_parser.c +117 -112
  121. data/ext/nokogiri/xml_sax_parser_context.c +100 -85
  122. data/ext/nokogiri/xml_sax_push_parser.c +34 -27
  123. data/ext/nokogiri/xml_schema.c +48 -42
  124. data/ext/nokogiri/xml_syntax_error.c +21 -23
  125. data/ext/nokogiri/xml_text.c +13 -17
  126. data/ext/nokogiri/xml_xpath_context.c +134 -127
  127. data/ext/nokogiri/xslt_stylesheet.c +157 -157
  128. data/lib/nokogiri.rb +1 -22
  129. data/lib/nokogiri/css/parser.rb +1 -1
  130. data/lib/nokogiri/extension.rb +26 -0
  131. data/lib/nokogiri/html/document_fragment.rb +15 -15
  132. data/lib/nokogiri/nokogiri.jar +0 -0
  133. data/lib/nokogiri/version/constant.rb +1 -1
  134. data/lib/nokogiri/version/info.rb +31 -8
  135. data/lib/nokogiri/xml/document.rb +31 -11
  136. data/lib/nokogiri/xml/node.rb +38 -42
  137. data/lib/nokogiri/xml/reader.rb +2 -9
  138. data/lib/nokogiri/xml/xpath.rb +1 -3
  139. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  140. metadata +7 -8
  141. data/ext/nokogiri/xml_io.c +0 -63
  142. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
@@ -17,10 +17,10 @@
17
17
  * distribute, sublicense, and/or sell copies of the Software, and to
18
18
  * permit persons to whom the Software is furnished to do so, subject to
19
19
  * the following conditions:
20
- *
20
+ *
21
21
  * The above copyright notice and this permission notice shall be
22
22
  * included in all copies or substantial portions of the Software.
23
- *
23
+ *
24
24
  * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
25
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
26
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -63,89 +63,102 @@ import org.xml.sax.SAXException;
63
63
 
64
64
  /**
65
65
  * Class for Nokogiri::XML::RelaxNG
66
- *
66
+ *
67
67
  * @author sergio
68
68
  * @author Yoko Harada <yokolet@gmail.com>
69
69
  */
70
- @JRubyClass(name="Nokogiri::XML::RelaxNG", parent="Nokogiri::XML::Schema")
71
- public class XmlRelaxng extends XmlSchema {
72
- private Verifier verifier;
70
+ @JRubyClass(name = "Nokogiri::XML::RelaxNG", parent = "Nokogiri::XML::Schema")
71
+ public class XmlRelaxng extends XmlSchema
72
+ {
73
+ private Verifier verifier;
73
74
 
74
- public XmlRelaxng(Ruby ruby, RubyClass klazz) {
75
- super(ruby, klazz);
76
- }
77
-
78
- private void setVerifier(Verifier verifier) {
79
- this.verifier = verifier;
80
- }
81
-
82
- static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions) {
83
- Ruby runtime = context.getRuntime();
84
- XmlRelaxng xmlRelaxng = (XmlRelaxng) NokogiriService.XML_RELAXNG_ALLOCATOR.allocate(runtime, klazz);
85
-
86
- if (parseOptions == null) {
87
- parseOptions = defaultParseOptions(context.getRuntime());
88
- }
89
-
90
- xmlRelaxng.setInstanceVariable("@errors", runtime.newEmptyArray());
91
- xmlRelaxng.setInstanceVariable("@parse_options", parseOptions);
92
-
93
- try {
94
- Schema schema = xmlRelaxng.getSchema(source, context);
95
- xmlRelaxng.setVerifier(schema.newVerifier());
96
- return xmlRelaxng;
97
- } catch (VerifierConfigurationException ex) {
98
- throw context.getRuntime().newRuntimeError("Could not parse document: " + ex.getMessage());
99
- }
75
+ public
76
+ XmlRelaxng(Ruby ruby, RubyClass klazz)
77
+ {
78
+ super(ruby, klazz);
79
+ }
80
+
81
+ private void
82
+ setVerifier(Verifier verifier)
83
+ {
84
+ this.verifier = verifier;
85
+ }
86
+
87
+ static XmlSchema
88
+ createSchemaInstance(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions)
89
+ {
90
+ Ruby runtime = context.getRuntime();
91
+ XmlRelaxng xmlRelaxng = (XmlRelaxng) NokogiriService.XML_RELAXNG_ALLOCATOR.allocate(runtime, klazz);
92
+
93
+ if (parseOptions == null) {
94
+ parseOptions = defaultParseOptions(context.getRuntime());
100
95
  }
101
96
 
102
- private Schema getSchema(Source source, ThreadContext context) {
103
- InputStream is;
104
- VerifierFactory factory = new com.thaiopensource.relaxng.jarv.VerifierFactoryImpl();
105
- if (source instanceof StreamSource) {
106
- StreamSource ss = (StreamSource)source;
107
- is = ss.getInputStream();
108
- } else { //if (this.source instanceof DOMSource)
109
- DOMSource ds = (DOMSource)source;
110
- StringWriter xmlAsWriter = new StringWriter();
111
- StreamResult result = new StreamResult(xmlAsWriter);
112
- try {
113
- TransformerFactory.newInstance().newTransformer().transform(ds, result);
114
- } catch (TransformerConfigurationException ex) {
115
- throw context.getRuntime()
116
- .newRuntimeError("Could not parse document: "+ex.getMessage());
117
- } catch (TransformerException ex) {
118
- throw context.getRuntime()
119
- .newRuntimeError("Could not parse document: "+ex.getMessage());
120
- }
121
- try {
122
- is = new ByteArrayInputStream(xmlAsWriter.toString().getBytes("UTF-8"));
123
- } catch (UnsupportedEncodingException ex) {
124
- throw context.getRuntime()
125
- .newRuntimeError("Could not parse document: "+ex.getMessage());
126
- }
127
- }
128
-
129
- try {
130
- return factory.compileSchema(is);
131
- } catch (VerifierConfigurationException ex) {
132
- throw context.getRuntime()
133
- .newRuntimeError("Could not parse document: "+ex.getMessage());
134
- } catch (SAXException ex) {
135
- throw context.getRuntime()
136
- .newRuntimeError("Could not parse document: "+ex.getMessage());
137
- } catch (IOException ex) {
138
- throw context.getRuntime().newIOError(ex.getClass() + ": " + ex.getMessage());
139
- }
97
+ xmlRelaxng.setInstanceVariable("@errors", runtime.newEmptyArray());
98
+ xmlRelaxng.setInstanceVariable("@parse_options", parseOptions);
99
+
100
+ try {
101
+ Schema schema = xmlRelaxng.getSchema(source, context);
102
+ xmlRelaxng.setVerifier(schema.newVerifier());
103
+ return xmlRelaxng;
104
+ } catch (VerifierConfigurationException ex) {
105
+ throw context.getRuntime().newRuntimeError("Could not parse document: " + ex.getMessage());
140
106
  }
141
-
142
- @Override
143
- protected void setErrorHandler(ErrorHandler errorHandler) {
144
- verifier.setErrorHandler(errorHandler);
107
+ }
108
+
109
+ private Schema
110
+ getSchema(Source source, ThreadContext context)
111
+ {
112
+ InputStream is;
113
+ VerifierFactory factory = new com.thaiopensource.relaxng.jarv.VerifierFactoryImpl();
114
+ if (source instanceof StreamSource) {
115
+ StreamSource ss = (StreamSource)source;
116
+ is = ss.getInputStream();
117
+ } else { //if (this.source instanceof DOMSource)
118
+ DOMSource ds = (DOMSource)source;
119
+ StringWriter xmlAsWriter = new StringWriter();
120
+ StreamResult result = new StreamResult(xmlAsWriter);
121
+ try {
122
+ TransformerFactory.newInstance().newTransformer().transform(ds, result);
123
+ } catch (TransformerConfigurationException ex) {
124
+ throw context.getRuntime()
125
+ .newRuntimeError("Could not parse document: " + ex.getMessage());
126
+ } catch (TransformerException ex) {
127
+ throw context.getRuntime()
128
+ .newRuntimeError("Could not parse document: " + ex.getMessage());
129
+ }
130
+ try {
131
+ is = new ByteArrayInputStream(xmlAsWriter.toString().getBytes("UTF-8"));
132
+ } catch (UnsupportedEncodingException ex) {
133
+ throw context.getRuntime()
134
+ .newRuntimeError("Could not parse document: " + ex.getMessage());
135
+ }
145
136
  }
146
-
147
- @Override
148
- protected void validate(Document document) throws SAXException, IOException {
149
- verifier.verify(document);
137
+
138
+ try {
139
+ return factory.compileSchema(is);
140
+ } catch (VerifierConfigurationException ex) {
141
+ throw context.getRuntime()
142
+ .newRuntimeError("Could not parse document: " + ex.getMessage());
143
+ } catch (SAXException ex) {
144
+ throw context.getRuntime()
145
+ .newRuntimeError("Could not parse document: " + ex.getMessage());
146
+ } catch (IOException ex) {
147
+ throw context.getRuntime().newIOError(ex.getClass() + ": " + ex.getMessage());
150
148
  }
149
+ }
150
+
151
+ @Override
152
+ protected void
153
+ setErrorHandler(ErrorHandler errorHandler)
154
+ {
155
+ verifier.setErrorHandler(errorHandler);
156
+ }
157
+
158
+ @Override
159
+ protected void
160
+ validate(Document document) throws SAXException, IOException
161
+ {
162
+ verifier.verify(document);
163
+ }
151
164
  }
@@ -17,10 +17,10 @@
17
17
  * distribute, sublicense, and/or sell copies of the Software, and to
18
18
  * permit persons to whom the Software is furnished to do so, subject to
19
19
  * the following conditions:
20
- *
20
+ *
21
21
  * The above copyright notice and this permission notice shall be
22
22
  * included in all copies or substantial portions of the Software.
23
- *
23
+ *
24
24
  * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
25
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
26
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -67,308 +67,353 @@ import nokogiri.internals.XmlSaxParser;
67
67
  * @author Patrick Mahoney <pat@polycrystal.org>
68
68
  * @author Yoko Harada <yokolet@gmail.com>
69
69
  */
70
- @JRubyClass(name="Nokogiri::XML::SAX::ParserContext")
71
- public class XmlSaxParserContext extends ParserContext {
72
-
73
- protected static final String FEATURE_NAMESPACES =
74
- "http://xml.org/sax/features/namespaces";
75
- protected static final String FEATURE_NAMESPACE_PREFIXES =
76
- "http://xml.org/sax/features/namespace-prefixes";
77
- protected static final String FEATURE_LOAD_EXTERNAL_DTD =
78
- "http://apache.org/xml/features/nonvalidating/load-external-dtd";
79
- protected static final String FEATURE_CONTINUE_AFTER_FATAL_ERROR =
80
- "http://apache.org/xml/features/continue-after-fatal-error";
81
-
82
- protected AbstractSAXParser parser;
83
-
84
- protected NokogiriHandler handler;
85
- private boolean replaceEntities = true;
86
- private boolean recovery = false;
87
-
88
- public XmlSaxParserContext(final Ruby ruby, RubyClass rubyClass) {
89
- super(ruby, rubyClass);
90
- }
91
-
92
- protected void initialize(Ruby runtime) {
93
- try {
94
- parser = createParser();
95
- }
96
- catch (SAXException se) {
97
- // Unexpected failure in XML subsystem
98
- RaiseException ex = runtime.newRuntimeError(se.toString());
99
- ex.initCause(se);
100
- throw ex;
101
- }
102
- }
103
-
104
- /**
105
- * Create and return a copy of this object.
106
- *
107
- * @return a clone of this object
108
- */
109
- @Override
110
- public Object clone() throws CloneNotSupportedException {
111
- return super.clone();
112
- }
113
-
114
- protected AbstractSAXParser createParser() throws SAXException {
115
- XmlSaxParser parser = new XmlSaxParser();
116
- parser.setFeature(FEATURE_NAMESPACE_PREFIXES, true);
117
- parser.setFeature(FEATURE_LOAD_EXTERNAL_DTD, false);
118
- return parser;
119
- }
120
-
121
- /**
122
- * Create a new parser context that will parse the string
123
- * <code>data</code>.
124
- */
125
- @JRubyMethod(name="memory", meta=true)
126
- public static IRubyObject parse_memory(ThreadContext context,
127
- IRubyObject klazz,
128
- IRubyObject data) {
129
- final Ruby runtime = context.runtime;
130
- XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
131
- ctx.initialize(runtime);
132
- ctx.setStringInputSource(context, data, runtime.getNil());
133
- return ctx;
134
- }
135
-
136
- /**
137
- * Create a new parser context that will read from the file
138
- * <code>data</code> and parse.
139
- */
140
- @JRubyMethod(name="file", meta=true)
141
- public static IRubyObject parse_file(ThreadContext context,
142
- IRubyObject klazz,
143
- IRubyObject data) {
144
- final Ruby runtime = context.runtime;
145
- XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
146
- ctx.initialize(context.getRuntime());
147
- ctx.setInputSourceFile(context, data);
148
- return ctx;
149
- }
150
-
151
- /**
152
- * Create a new parser context that will read from the IO or
153
- * StringIO <code>data</code> and parse.
154
- *
155
- * TODO: Currently ignores encoding <code>enc</code>.
156
- */
157
- @JRubyMethod(name="io", meta=true)
158
- public static IRubyObject parse_io(ThreadContext context,
159
- IRubyObject klazz,
160
- IRubyObject data,
161
- IRubyObject enc) {
162
- //int encoding = (int)enc.convertToInteger().getLongValue();
163
- final Ruby runtime = context.runtime;
164
- XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
165
- ctx.initialize(runtime);
166
- ctx.setIOInputSource(context, data, runtime.getNil());
167
- return ctx;
70
+ @JRubyClass(name = "Nokogiri::XML::SAX::ParserContext")
71
+ public class XmlSaxParserContext extends ParserContext
72
+ {
73
+
74
+ protected static final String FEATURE_NAMESPACES =
75
+ "http://xml.org/sax/features/namespaces";
76
+ protected static final String FEATURE_NAMESPACE_PREFIXES =
77
+ "http://xml.org/sax/features/namespace-prefixes";
78
+ protected static final String FEATURE_LOAD_EXTERNAL_DTD =
79
+ "http://apache.org/xml/features/nonvalidating/load-external-dtd";
80
+ protected static final String FEATURE_CONTINUE_AFTER_FATAL_ERROR =
81
+ "http://apache.org/xml/features/continue-after-fatal-error";
82
+
83
+ protected AbstractSAXParser parser;
84
+
85
+ protected NokogiriHandler handler;
86
+ private boolean replaceEntities = true;
87
+ private boolean recovery = false;
88
+
89
+ public
90
+ XmlSaxParserContext(final Ruby ruby, RubyClass rubyClass)
91
+ {
92
+ super(ruby, rubyClass);
93
+ }
94
+
95
+ protected void
96
+ initialize(Ruby runtime)
97
+ {
98
+ try {
99
+ parser = createParser();
100
+ } catch (SAXException se) {
101
+ // Unexpected failure in XML subsystem
102
+ RaiseException ex = runtime.newRuntimeError(se.toString());
103
+ ex.initCause(se);
104
+ throw ex;
168
105
  }
169
-
170
- /**
171
- * Create a new parser context that will read from a raw input stream.
172
- * Meant to be run in a separate thread by XmlSaxPushParser.
173
- */
174
- static XmlSaxParserContext parse_stream(final Ruby runtime, RubyClass klazz, InputStream stream) {
175
- XmlSaxParserContext ctx = newInstance(runtime, klazz);
176
- ctx.initialize(runtime);
177
- ctx.setInputSource(stream);
178
- return ctx;
106
+ }
107
+
108
+ /**
109
+ * Create and return a copy of this object.
110
+ *
111
+ * @return a clone of this object
112
+ */
113
+ @Override
114
+ public Object
115
+ clone() throws CloneNotSupportedException
116
+ {
117
+ return super.clone();
118
+ }
119
+
120
+ protected AbstractSAXParser
121
+ createParser() throws SAXException
122
+ {
123
+ XmlSaxParser parser = new XmlSaxParser();
124
+ parser.setFeature(FEATURE_NAMESPACE_PREFIXES, true);
125
+ parser.setFeature(FEATURE_LOAD_EXTERNAL_DTD, false);
126
+ return parser;
127
+ }
128
+
129
+ /**
130
+ * Create a new parser context that will parse the string
131
+ * <code>data</code>.
132
+ */
133
+ @JRubyMethod(name = "memory", meta = true)
134
+ public static IRubyObject
135
+ parse_memory(ThreadContext context,
136
+ IRubyObject klazz,
137
+ IRubyObject data)
138
+ {
139
+ final Ruby runtime = context.runtime;
140
+ XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
141
+ ctx.initialize(runtime);
142
+ ctx.setStringInputSource(context, data, runtime.getNil());
143
+ return ctx;
144
+ }
145
+
146
+ /**
147
+ * Create a new parser context that will read from the file
148
+ * <code>data</code> and parse.
149
+ */
150
+ @JRubyMethod(name = "file", meta = true)
151
+ public static IRubyObject
152
+ parse_file(ThreadContext context,
153
+ IRubyObject klazz,
154
+ IRubyObject data)
155
+ {
156
+ final Ruby runtime = context.runtime;
157
+ XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
158
+ ctx.initialize(context.getRuntime());
159
+ ctx.setInputSourceFile(context, data);
160
+ return ctx;
161
+ }
162
+
163
+ /**
164
+ * Create a new parser context that will read from the IO or
165
+ * StringIO <code>data</code> and parse.
166
+ *
167
+ * TODO: Currently ignores encoding <code>enc</code>.
168
+ */
169
+ @JRubyMethod(name = "io", meta = true)
170
+ public static IRubyObject
171
+ parse_io(ThreadContext context,
172
+ IRubyObject klazz,
173
+ IRubyObject data,
174
+ IRubyObject enc)
175
+ {
176
+ //int encoding = (int)enc.convertToInteger().getLongValue();
177
+ final Ruby runtime = context.runtime;
178
+ XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
179
+ ctx.initialize(runtime);
180
+ ctx.setIOInputSource(context, data, runtime.getNil());
181
+ return ctx;
182
+ }
183
+
184
+ /**
185
+ * Create a new parser context that will read from a raw input stream.
186
+ * Meant to be run in a separate thread by XmlSaxPushParser.
187
+ */
188
+ static XmlSaxParserContext
189
+ parse_stream(final Ruby runtime, RubyClass klazz, InputStream stream)
190
+ {
191
+ XmlSaxParserContext ctx = newInstance(runtime, klazz);
192
+ ctx.initialize(runtime);
193
+ ctx.setInputSource(stream);
194
+ return ctx;
195
+ }
196
+
197
+ private static XmlSaxParserContext
198
+ newInstance(final Ruby runtime, final RubyClass klazz)
199
+ {
200
+ return (XmlSaxParserContext) NokogiriService.XML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(runtime, klazz);
201
+ }
202
+
203
+ /**
204
+ * Set a property of the underlying parser.
205
+ */
206
+ protected void
207
+ setProperty(String key, Object val)
208
+ throws SAXNotRecognizedException, SAXNotSupportedException
209
+ {
210
+ parser.setProperty(key, val);
211
+ }
212
+
213
+ protected void
214
+ setContentHandler(ContentHandler handler)
215
+ {
216
+ parser.setContentHandler(handler);
217
+ }
218
+
219
+ protected void
220
+ setErrorHandler(ErrorHandler handler)
221
+ {
222
+ parser.setErrorHandler(handler);
223
+ }
224
+
225
+ public final NokogiriHandler
226
+ getNokogiriHandler() { return handler; }
227
+
228
+ /**
229
+ * Perform any initialization prior to parsing with the handler
230
+ * <code>handlerRuby</code>. Convenience hook for subclasses.
231
+ */
232
+ protected void
233
+ preParse(Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler)
234
+ {
235
+ ((XmlSaxParser) parser).setXmlDeclHandler(handler);
236
+ if (recovery) {
237
+ try {
238
+ parser.setFeature(FEATURE_CONTINUE_AFTER_FATAL_ERROR, true);
239
+ } catch (Exception e) {
240
+ // Unexpected failure in XML subsystem
241
+ throw runtime.newRuntimeError(e.getMessage());
242
+ }
179
243
  }
180
-
181
- private static XmlSaxParserContext newInstance(final Ruby runtime, final RubyClass klazz) {
182
- return (XmlSaxParserContext) NokogiriService.XML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(runtime, klazz);
244
+ }
245
+
246
+ protected void
247
+ postParse(Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler)
248
+ {
249
+ // noop
250
+ }
251
+
252
+ protected void
253
+ do_parse() throws SAXException, IOException
254
+ {
255
+ parser.parse(getInputSource());
256
+ }
257
+
258
+ @JRubyMethod
259
+ public IRubyObject
260
+ parse_with(ThreadContext context, IRubyObject handlerRuby)
261
+ {
262
+ final Ruby runtime = context.getRuntime();
263
+
264
+ if (!invoke(context, handlerRuby, "respond_to?", runtime.newSymbol("document")).isTrue()) {
265
+ throw runtime.newArgumentError("argument must respond_to document");
183
266
  }
184
267
 
185
- /**
186
- * Set a property of the underlying parser.
187
- */
188
- protected void setProperty(String key, Object val)
189
- throws SAXNotRecognizedException, SAXNotSupportedException {
190
- parser.setProperty(key, val);
191
- }
268
+ NokogiriHandler handler = this.handler = new NokogiriHandler(runtime, handlerRuby);
269
+ preParse(runtime, handlerRuby, handler);
192
270
 
193
- protected void setContentHandler(ContentHandler handler) {
194
- parser.setContentHandler(handler);
195
- }
271
+ setContentHandler(handler);
272
+ setErrorHandler(handler);
196
273
 
197
- protected void setErrorHandler(ErrorHandler handler) {
198
- parser.setErrorHandler(handler);
274
+ try {
275
+ setProperty("http://xml.org/sax/properties/lexical-handler", handler);
276
+ } catch (Exception ex) {
277
+ throw runtime.newRuntimeError("Problem while creating XML SAX Parser: " + ex.toString());
199
278
  }
200
279
 
201
- public final NokogiriHandler getNokogiriHandler() { return handler; }
202
-
203
- /**
204
- * Perform any initialization prior to parsing with the handler
205
- * <code>handlerRuby</code>. Convenience hook for subclasses.
206
- */
207
- protected void preParse(Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) {
208
- ((XmlSaxParser) parser).setXmlDeclHandler(handler);
209
- if (recovery) {
210
- try {
211
- parser.setFeature(FEATURE_CONTINUE_AFTER_FATAL_ERROR, true);
212
- }
213
- catch (Exception e) {
214
- // Unexpected failure in XML subsystem
215
- throw runtime.newRuntimeError(e.getMessage());
216
- }
280
+ try {
281
+ try {
282
+ do_parse();
283
+ } catch (SAXParseException ex) {
284
+ // A bad document (<foo><bar></foo>) should call the
285
+ // error handler instead of raising a SAX exception.
286
+
287
+ // However, an EMPTY document should raise a RuntimeError.
288
+ // This is a bit kludgy, but AFAIK SAX doesn't distinguish
289
+ // between empty and bad whereas Nokogiri does.
290
+ String message = ex.getMessage();
291
+ if (message != null && message.contains("Premature end of file.") && stringDataSize < 1) {
292
+ throw runtime.newRuntimeError("couldn't parse document: " + message);
217
293
  }
294
+ handler.error(ex);
295
+ }
296
+ } catch (SAXException ex) {
297
+ // Unexpected failure in XML subsystem
298
+ throw runtime.newRuntimeError(ex.getMessage());
299
+ } catch (IOException ex) {
300
+ throw runtime.newIOErrorFromException(ex);
218
301
  }
219
302
 
220
- protected void postParse(Ruby runtime, IRubyObject handlerRuby, NokogiriHandler handler) {
221
- // noop
303
+ postParse(runtime, handlerRuby, handler);
304
+
305
+ //maybeTrimLeadingAndTrailingWhitespace(context, handlerRuby);
306
+
307
+ return runtime.getNil();
308
+ }
309
+
310
+ /**
311
+ * Can take a boolean assignment.
312
+ *
313
+ * @param context
314
+ * @param value
315
+ * @return
316
+ */
317
+ @JRubyMethod(name = "replace_entities=")
318
+ public IRubyObject
319
+ set_replace_entities(ThreadContext context, IRubyObject value)
320
+ {
321
+ replaceEntities = value.isTrue();
322
+ return this;
323
+ }
324
+
325
+ @JRubyMethod(name = "replace_entities")
326
+ public IRubyObject
327
+ get_replace_entities(ThreadContext context)
328
+ {
329
+ return context.runtime.newBoolean(replaceEntities);
330
+ }
331
+
332
+ /**
333
+ * Can take a boolean assignment.
334
+ *
335
+ * @param context
336
+ * @param value
337
+ * @return
338
+ */
339
+ @JRubyMethod(name = "recovery=")
340
+ public IRubyObject
341
+ set_recovery(ThreadContext context, IRubyObject value)
342
+ {
343
+ recovery = value.isTrue();
344
+ return this;
345
+ }
346
+
347
+ @JRubyMethod(name = "recovery")
348
+ public IRubyObject
349
+ get_recovery(ThreadContext context)
350
+ {
351
+ return context.runtime.newBoolean(recovery);
352
+ }
353
+
354
+ /**
355
+ * If the handler's document is a FragmentHandler, attempt to trim
356
+ * leading and trailing whitespace.
357
+ *
358
+ * This is a bit hackish and depends heavily on the internals of
359
+ * FragmentHandler.
360
+ */
361
+ protected void
362
+ maybeTrimLeadingAndTrailingWhitespace(ThreadContext context, IRubyObject parser)
363
+ {
364
+ RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter();
365
+ RubyModule mod = context.getRuntime().getClassFromPath("Nokogiri::XML::FragmentHandler");
366
+
367
+ IRubyObject handler = adapter.getInstanceVariable(parser, "@document");
368
+ if (handler == null || handler.isNil() || !adapter.isKindOf(handler, mod)) {
369
+ return;
222
370
  }
223
-
224
- protected void do_parse() throws SAXException, IOException {
225
- parser.parse(getInputSource());
371
+ IRubyObject stack = adapter.getInstanceVariable(handler, "@stack");
372
+ if (stack == null || stack.isNil()) {
373
+ return;
226
374
  }
227
-
228
- @JRubyMethod
229
- public IRubyObject parse_with(ThreadContext context, IRubyObject handlerRuby) {
230
- final Ruby runtime = context.getRuntime();
231
-
232
- if(!invoke(context, handlerRuby, "respond_to?", runtime.newSymbol("document")).isTrue()) {
233
- throw runtime.newArgumentError("argument must respond_to document");
234
- }
235
-
236
- NokogiriHandler handler = this.handler = new NokogiriHandler(runtime, handlerRuby);
237
- preParse(runtime, handlerRuby, handler);
238
-
239
- setContentHandler(handler);
240
- setErrorHandler(handler);
241
-
242
- try{
243
- setProperty("http://xml.org/sax/properties/lexical-handler", handler);
244
- }
245
- catch (Exception ex) {
246
- throw runtime.newRuntimeError("Problem while creating XML SAX Parser: " + ex.toString());
247
- }
248
-
249
- try{
250
- try {
251
- do_parse();
252
- }
253
- catch (SAXParseException ex) {
254
- // A bad document (<foo><bar></foo>) should call the
255
- // error handler instead of raising a SAX exception.
256
-
257
- // However, an EMPTY document should raise a RuntimeError.
258
- // This is a bit kludgy, but AFAIK SAX doesn't distinguish
259
- // between empty and bad whereas Nokogiri does.
260
- String message = ex.getMessage();
261
- if (message != null && message.contains("Premature end of file.") && stringDataSize < 1) {
262
- throw runtime.newRuntimeError("couldn't parse document: " + message);
263
- }
264
- handler.error(ex);
265
- }
266
- }
267
- catch (SAXException ex) {
268
- // Unexpected failure in XML subsystem
269
- throw runtime.newRuntimeError(ex.getMessage());
270
- }
271
- catch (IOException ex) {
272
- throw runtime.newIOErrorFromException(ex);
273
- }
274
-
275
- postParse(runtime, handlerRuby, handler);
276
-
277
- //maybeTrimLeadingAndTrailingWhitespace(context, handlerRuby);
278
-
279
- return runtime.getNil();
375
+ // doc is finally a DocumentFragment whose nodes we can check
376
+ IRubyObject doc = adapter.callMethod(stack, "first");
377
+ if (doc == null || doc.isNil()) {
378
+ return;
280
379
  }
281
380
 
282
- /**
283
- * Can take a boolean assignment.
284
- *
285
- * @param context
286
- * @param value
287
- * @return
288
- */
289
- @JRubyMethod(name = "replace_entities=")
290
- public IRubyObject set_replace_entities(ThreadContext context, IRubyObject value) {
291
- replaceEntities = value.isTrue();
292
- return this;
293
- }
381
+ IRubyObject children;
294
382
 
295
- @JRubyMethod(name="replace_entities")
296
- public IRubyObject get_replace_entities(ThreadContext context) {
297
- return context.runtime.newBoolean(replaceEntities);
383
+ for (;;) {
384
+ children = adapter.callMethod(doc, "children");
385
+ IRubyObject first = adapter.callMethod(children, "first");
386
+ if (NokogiriHelpers.isBlank(first)) { adapter.callMethod(first, "unlink"); }
387
+ else { break; }
298
388
  }
299
389
 
300
- /**
301
- * Can take a boolean assignment.
302
- *
303
- * @param context
304
- * @param value
305
- * @return
306
- */
307
- @JRubyMethod(name = "recovery=")
308
- public IRubyObject set_recovery(ThreadContext context, IRubyObject value) {
309
- recovery = value.isTrue();
310
- return this;
390
+ for (;;) {
391
+ children = adapter.callMethod(doc, "children");
392
+ IRubyObject last = adapter.callMethod(children, "last");
393
+ if (NokogiriHelpers.isBlank(last)) { adapter.callMethod(last, "unlink"); }
394
+ else { break; }
311
395
  }
312
396
 
313
- @JRubyMethod(name="recovery")
314
- public IRubyObject get_recovery(ThreadContext context) {
315
- return context.runtime.newBoolean(recovery);
316
- }
317
-
318
- /**
319
- * If the handler's document is a FragmentHandler, attempt to trim
320
- * leading and trailing whitespace.
321
- *
322
- * This is a bit hackish and depends heavily on the internals of
323
- * FragmentHandler.
324
- */
325
- protected void maybeTrimLeadingAndTrailingWhitespace(ThreadContext context, IRubyObject parser) {
326
- RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter();
327
- RubyModule mod = context.getRuntime().getClassFromPath("Nokogiri::XML::FragmentHandler");
328
-
329
- IRubyObject handler = adapter.getInstanceVariable(parser, "@document");
330
- if (handler == null || handler.isNil() || !adapter.isKindOf(handler, mod))
331
- return;
332
- IRubyObject stack = adapter.getInstanceVariable(handler, "@stack");
333
- if (stack == null || stack.isNil())
334
- return;
335
- // doc is finally a DocumentFragment whose nodes we can check
336
- IRubyObject doc = adapter.callMethod(stack, "first");
337
- if (doc == null || doc.isNil())
338
- return;
339
-
340
- IRubyObject children;
341
-
342
- for (;;) {
343
- children = adapter.callMethod(doc, "children");
344
- IRubyObject first = adapter.callMethod(children, "first");
345
- if (NokogiriHelpers.isBlank(first)) adapter.callMethod(first, "unlink");
346
- else break;
347
- }
348
-
349
- for (;;) {
350
- children = adapter.callMethod(doc, "children");
351
- IRubyObject last = adapter.callMethod(children, "last");
352
- if (NokogiriHelpers.isBlank(last)) adapter.callMethod(last, "unlink");
353
- else break;
354
- }
355
-
356
- // While we have a document, normalize it.
357
- ((XmlNode) doc).normalize();
358
- }
359
-
360
- @JRubyMethod(name="column")
361
- public IRubyObject column(ThreadContext context) {
362
- final Integer number = handler.getColumn();
363
- if (number == null) return context.getRuntime().getNil();
364
- return RubyFixnum.newFixnum(context.getRuntime(), number.longValue());
365
- }
366
-
367
- @JRubyMethod(name="line")
368
- public IRubyObject line(ThreadContext context) {
369
- final Integer number = handler.getLine();
370
- if (number == null) return context.getRuntime().getNil();
371
- return RubyFixnum.newFixnum(context.getRuntime(), number.longValue());
372
- }
397
+ // While we have a document, normalize it.
398
+ ((XmlNode) doc).normalize();
399
+ }
400
+
401
+ @JRubyMethod(name = "column")
402
+ public IRubyObject
403
+ column(ThreadContext context)
404
+ {
405
+ final Integer number = handler.getColumn();
406
+ if (number == null) { return context.getRuntime().getNil(); }
407
+ return RubyFixnum.newFixnum(context.getRuntime(), number.longValue());
408
+ }
409
+
410
+ @JRubyMethod(name = "line")
411
+ public IRubyObject
412
+ line(ThreadContext context)
413
+ {
414
+ final Integer number = handler.getLine();
415
+ if (number == null) { return context.getRuntime().getNil(); }
416
+ return RubyFixnum.newFixnum(context.getRuntime(), number.longValue());
417
+ }
373
418
 
374
419
  }