nokogiri 1.5.0-java → 1.5.1-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +56 -12
- data/CHANGELOG.rdoc +45 -0
- data/C_CODING_STYLE.rdoc +27 -0
- data/Manifest.txt +4 -0
- data/README.rdoc +11 -7
- data/Rakefile +44 -26
- data/bin/nokogiri +10 -2
- data/ext/java/nokogiri/HtmlDocument.java +37 -2
- data/ext/java/nokogiri/NokogiriService.java +10 -2
- data/ext/java/nokogiri/XmlAttr.java +1 -1
- data/ext/java/nokogiri/XmlDocument.java +68 -11
- data/ext/java/nokogiri/XmlDocumentFragment.java +16 -5
- data/ext/java/nokogiri/XmlElement.java +0 -40
- data/ext/java/nokogiri/XmlNamespace.java +8 -1
- data/ext/java/nokogiri/XmlNode.java +131 -27
- data/ext/java/nokogiri/XmlNodeSet.java +4 -1
- data/ext/java/nokogiri/XmlSaxParserContext.java +2 -13
- data/ext/java/nokogiri/XmlXpathContext.java +4 -1
- data/ext/java/nokogiri/XsltStylesheet.java +198 -37
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +40 -2
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +82 -9
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +4 -3
- data/ext/java/nokogiri/internals/ParserContext.java +33 -3
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +203 -12
- data/ext/java/nokogiri/internals/XmlDomParser.java +33 -2
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +32 -12
- data/ext/nokogiri/extconf.rb +11 -3
- data/ext/nokogiri/html_document.c +16 -0
- data/ext/nokogiri/html_sax_parser_context.c +59 -37
- data/ext/nokogiri/html_sax_push_parser.c +87 -0
- data/ext/nokogiri/html_sax_push_parser.h +9 -0
- data/ext/nokogiri/nokogiri.c +6 -8
- data/ext/nokogiri/nokogiri.h +3 -0
- data/ext/nokogiri/xml_document.c +101 -3
- data/ext/nokogiri/xml_document.h +3 -3
- data/ext/nokogiri/xml_node.c +150 -58
- data/ext/nokogiri/xml_node_set.c +169 -120
- data/ext/nokogiri/xml_node_set.h +5 -0
- data/ext/nokogiri/xml_sax_parser_context.c +64 -41
- data/ext/nokogiri/xml_text.c +2 -0
- data/ext/nokogiri/xml_xpath_context.c +30 -24
- data/ext/nokogiri/xslt_stylesheet.c +62 -16
- data/ext/nokogiri/xslt_stylesheet.h +5 -0
- data/lib/nokogiri/css/parser.rb +163 -157
- data/lib/nokogiri/css/parser.y +6 -3
- data/lib/nokogiri/css/tokenizer.rb +1 -1
- data/lib/nokogiri/css/tokenizer.rex +1 -1
- data/lib/nokogiri/html.rb +1 -0
- data/lib/nokogiri/html/document.rb +82 -42
- data/lib/nokogiri/html/sax/push_parser.rb +16 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml.rb +6 -0
- data/lib/nokogiri/xml/builder.rb +7 -1
- data/lib/nokogiri/xml/document.rb +32 -17
- data/lib/nokogiri/xml/document_fragment.rb +6 -1
- data/lib/nokogiri/xml/node.rb +40 -9
- data/lib/nokogiri/xslt.rb +5 -1
- data/tasks/cross_compile.rb +1 -0
- data/tasks/nokogiri.org.rb +6 -0
- data/tasks/test.rb +1 -0
- data/test/css/test_xpath_visitor.rb +6 -0
- data/test/helper.rb +1 -0
- data/test/html/test_document.rb +26 -0
- data/test/html/test_document_fragment.rb +1 -2
- data/test/test_memory_leak.rb +81 -1
- data/test/test_xslt_transforms.rb +152 -123
- data/test/xml/test_builder.rb +24 -2
- data/test/xml/test_c14n.rb +151 -0
- data/test/xml/test_document.rb +48 -0
- data/test/xml/test_namespace.rb +5 -0
- data/test/xml/test_node.rb +82 -1
- data/test/xml/test_node_attributes.rb +19 -0
- data/test/xml/test_node_inheritance.rb +32 -0
- data/test/xml/test_node_reparenting.rb +32 -0
- data/test/xml/test_node_set.rb +16 -8
- data/test/xml/test_reader_encoding.rb +16 -0
- data/test/xml/test_unparented_node.rb +32 -0
- data/test/xml/test_xinclude.rb +83 -0
- data/test/xml/test_xpath.rb +22 -0
- metadata +147 -123
@@ -35,8 +35,14 @@ package nokogiri.internals;
|
|
35
35
|
import java.io.IOException;
|
36
36
|
|
37
37
|
import nokogiri.XmlDocument;
|
38
|
+
import nokogiri.internals.ParserContext.Options;
|
38
39
|
|
39
40
|
import org.apache.xerces.parsers.DOMParser;
|
41
|
+
import org.apache.xerces.parsers.XIncludeParserConfiguration;
|
42
|
+
import org.apache.xerces.xni.XMLResourceIdentifier;
|
43
|
+
import org.apache.xerces.xni.XNIException;
|
44
|
+
import org.apache.xerces.xni.parser.XMLEntityResolver;
|
45
|
+
import org.apache.xerces.xni.parser.XMLInputSource;
|
40
46
|
import org.apache.xerces.xni.parser.XMLParserConfiguration;
|
41
47
|
import org.cyberneko.dtd.DTDConfiguration;
|
42
48
|
import org.w3c.dom.Document;
|
@@ -51,14 +57,21 @@ import org.xml.sax.SAXException;
|
|
51
57
|
*/
|
52
58
|
public class XmlDomParser extends DOMParser {
|
53
59
|
DOMParser dtd;
|
60
|
+
ParserContext.Options options;
|
54
61
|
|
55
|
-
public XmlDomParser() {
|
62
|
+
public XmlDomParser(ParserContext.Options options) {
|
56
63
|
super();
|
64
|
+
this.options = options;
|
57
65
|
|
58
66
|
DTDConfiguration dtdConfig = new DTDConfiguration();
|
59
67
|
dtd = new DOMParser(dtdConfig);
|
60
68
|
|
61
|
-
XMLParserConfiguration config
|
69
|
+
XMLParserConfiguration config;
|
70
|
+
if (options.xInclude) {
|
71
|
+
config = new XIncludeParserConfiguration();
|
72
|
+
} else {
|
73
|
+
config = getXMLParserConfiguration();
|
74
|
+
}
|
62
75
|
config.setDTDHandler(dtdConfig);
|
63
76
|
config.setDTDContentModelHandler(dtdConfig);
|
64
77
|
}
|
@@ -66,6 +79,9 @@ public class XmlDomParser extends DOMParser {
|
|
66
79
|
@Override
|
67
80
|
public void parse(InputSource source) throws SAXException, IOException {
|
68
81
|
dtd.reset();
|
82
|
+
if (options.xInclude) {
|
83
|
+
super.setEntityResolver(new NokogiriXInlcudeEntityResolver(source));
|
84
|
+
}
|
69
85
|
super.parse(source);
|
70
86
|
Document doc = getDocument();
|
71
87
|
if (doc == null)
|
@@ -73,4 +89,19 @@ public class XmlDomParser extends DOMParser {
|
|
73
89
|
|
74
90
|
doc.setUserData(XmlDocument.DTD_RAW_DOCUMENT, dtd.getDocument(), null);
|
75
91
|
}
|
92
|
+
|
93
|
+
private class NokogiriXInlcudeEntityResolver implements org.xml.sax.EntityResolver {
|
94
|
+
InputSource source;
|
95
|
+
private NokogiriXInlcudeEntityResolver(InputSource source) {
|
96
|
+
this.source = source;
|
97
|
+
}
|
98
|
+
|
99
|
+
@Override
|
100
|
+
public InputSource resolveEntity(String publicId, String systemId)
|
101
|
+
throws SAXException, IOException {
|
102
|
+
if (systemId != null) source.setSystemId(systemId);
|
103
|
+
if (publicId != null) source.setPublicId(publicId);
|
104
|
+
return source;
|
105
|
+
}
|
106
|
+
}
|
76
107
|
}
|
@@ -34,7 +34,6 @@ package nokogiri.internals;
|
|
34
34
|
|
35
35
|
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
36
36
|
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
|
37
|
-
|
38
37
|
import java.io.ByteArrayInputStream;
|
39
38
|
import java.io.IOException;
|
40
39
|
import java.util.ArrayList;
|
@@ -73,6 +72,8 @@ public class XmlDomParserContext extends ParserContext {
|
|
73
72
|
protected static final String FEATURE_INCLUDE_IGNORABLE_WHITESPACE =
|
74
73
|
"http://apache.org/xml/features/dom/include-ignorable-whitespace";
|
75
74
|
protected static final String FEATURE_VALIDATION = "http://xml.org/sax/features/validation";
|
75
|
+
private static final String XINCLUDE_FEATURE_ID = "http://apache.org/xml/features/xinclude";
|
76
|
+
private static final String SECURITY_MANAGER = "http://apache.org/xml/properties/security-manager";
|
76
77
|
|
77
78
|
protected ParserContext.Options options;
|
78
79
|
protected DOMParser parser;
|
@@ -102,9 +103,17 @@ public class XmlDomParserContext extends ParserContext {
|
|
102
103
|
}
|
103
104
|
|
104
105
|
protected void initParser(Ruby runtime) {
|
105
|
-
|
106
|
+
if (options.xInclude) {
|
107
|
+
System.setProperty("org.apache.xerces.xni.parser.XMLParserConfiguration",
|
108
|
+
"org.apache.xerces.parsers.XIncludeParserConfiguration");
|
109
|
+
}
|
110
|
+
|
111
|
+
parser = new XmlDomParser(options);
|
106
112
|
parser.setErrorHandler(errorHandler);
|
107
113
|
|
114
|
+
// Fix for Issue#586. This limits entity expansion up to 100000 and nodes up to 3000.
|
115
|
+
setProperty(SECURITY_MANAGER, new org.apache.xerces.util.SecurityManager());
|
116
|
+
|
108
117
|
if (options.noBlanks) {
|
109
118
|
setFeature(FEATURE_INCLUDE_IGNORABLE_WHITESPACE, false);
|
110
119
|
}
|
@@ -160,20 +169,30 @@ public class XmlDomParserContext extends ParserContext {
|
|
160
169
|
doc.setInstanceVariable("@errors", errors);
|
161
170
|
}
|
162
171
|
|
163
|
-
public XmlDocument getDocumentWithErrorsOrRaiseException(ThreadContext context, Exception ex) {
|
172
|
+
public XmlDocument getDocumentWithErrorsOrRaiseException(ThreadContext context, RubyClass klazz, Exception ex) {
|
164
173
|
if (options.recover) {
|
165
|
-
XmlDocument
|
166
|
-
this.addErrorsIfNecessary(context,
|
174
|
+
XmlDocument xmlDocument = getInterruptedOrNewXmlDocument(context, klazz);
|
175
|
+
this.addErrorsIfNecessary(context, xmlDocument);
|
167
176
|
XmlSyntaxError xmlSyntaxError = (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(context.getRuntime(), getNokogiriClass(context.getRuntime(), "Nokogiri::XML::SyntaxError"));
|
168
177
|
xmlSyntaxError.setException(ex);
|
169
|
-
((RubyArray)
|
170
|
-
return
|
178
|
+
((RubyArray) xmlDocument.getInstanceVariable("@errors")).append(xmlSyntaxError);
|
179
|
+
return xmlDocument;
|
171
180
|
} else {
|
172
181
|
XmlSyntaxError xmlSyntaxError = (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(context.getRuntime(), getNokogiriClass(context.getRuntime(), "Nokogiri::XML::SyntaxError"));
|
173
182
|
xmlSyntaxError.setException(ex);
|
174
183
|
throw new RaiseException(xmlSyntaxError);
|
175
184
|
}
|
176
185
|
}
|
186
|
+
|
187
|
+
private XmlDocument getInterruptedOrNewXmlDocument(ThreadContext context, RubyClass klazz) {
|
188
|
+
Document document = parser.getDocument();
|
189
|
+
XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz);
|
190
|
+
if (document != null) {
|
191
|
+
xmlDocument.setDocumentNode(context, document);
|
192
|
+
}
|
193
|
+
xmlDocument.setEncoding(ruby_encoding);
|
194
|
+
return xmlDocument;
|
195
|
+
}
|
177
196
|
|
178
197
|
protected XmlDocument getNewEmptyDocument(ThreadContext context) {
|
179
198
|
IRubyObject[] args = new IRubyObject[0];
|
@@ -188,7 +207,7 @@ public class XmlDomParserContext extends ParserContext {
|
|
188
207
|
RubyClass klazz,
|
189
208
|
Document doc) {
|
190
209
|
XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz);
|
191
|
-
xmlDocument.
|
210
|
+
xmlDocument.setDocumentNode(context, doc);
|
192
211
|
xmlDocument.setEncoding(ruby_encoding);
|
193
212
|
|
194
213
|
if (options.dtdLoad) {
|
@@ -202,18 +221,19 @@ public class XmlDomParserContext extends ParserContext {
|
|
202
221
|
* Must call setInputSource() before this method.
|
203
222
|
*/
|
204
223
|
public XmlDocument parse(ThreadContext context,
|
205
|
-
IRubyObject
|
224
|
+
IRubyObject klazz,
|
206
225
|
IRubyObject url) {
|
226
|
+
XmlDocument xmlDoc;
|
207
227
|
try {
|
208
228
|
Document doc = do_parse();
|
209
|
-
|
229
|
+
xmlDoc = wrapDocument(context, (RubyClass)klazz, doc);
|
210
230
|
xmlDoc.setUrl(url);
|
211
231
|
addErrorsIfNecessary(context, xmlDoc);
|
212
232
|
return xmlDoc;
|
213
233
|
} catch (SAXException e) {
|
214
|
-
return getDocumentWithErrorsOrRaiseException(context, e);
|
234
|
+
return getDocumentWithErrorsOrRaiseException(context, (RubyClass)klazz, e);
|
215
235
|
} catch (IOException e) {
|
216
|
-
return getDocumentWithErrorsOrRaiseException(context, e);
|
236
|
+
return getDocumentWithErrorsOrRaiseException(context, (RubyClass)klazz, e);
|
217
237
|
}
|
218
238
|
}
|
219
239
|
|
data/ext/nokogiri/extconf.rb
CHANGED
@@ -17,7 +17,7 @@ end
|
|
17
17
|
$CFLAGS << " #{ENV["CFLAGS"]}"
|
18
18
|
$LIBS << " #{ENV["LIBS"]}"
|
19
19
|
|
20
|
-
if RbConfig::CONFIG['target_os'] == 'mingw32' || RbConfig::CONFIG['target_os'] =~ /
|
20
|
+
if RbConfig::CONFIG['target_os'] == 'mingw32' || RbConfig::CONFIG['target_os'] =~ /mswin/
|
21
21
|
$CFLAGS << " -DXP_WIN -DXP_WIN32 -DUSE_INCLUDED_VASPRINTF"
|
22
22
|
elsif RbConfig::CONFIG['target_os'] =~ /solaris/
|
23
23
|
$CFLAGS << " -DUSE_INCLUDED_VASPRINTF"
|
@@ -30,10 +30,11 @@ if RbConfig::MAKEFILE_CONFIG['CC'] =~ /mingw/
|
|
30
30
|
end
|
31
31
|
|
32
32
|
if RbConfig::MAKEFILE_CONFIG['CC'] =~ /gcc/
|
33
|
-
$CFLAGS << " -O3
|
33
|
+
$CFLAGS << " -O3" unless $CFLAGS[/-O\d/]
|
34
|
+
$CFLAGS << " -Wall -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
|
34
35
|
end
|
35
36
|
|
36
|
-
if RbConfig::CONFIG['target_os'] =~ /
|
37
|
+
if RbConfig::CONFIG['target_os'] =~ /mswin/
|
37
38
|
lib_prefix = 'lib'
|
38
39
|
|
39
40
|
# There's no default include/lib dir on Windows. Let's just add the Ruby ones
|
@@ -80,6 +81,13 @@ else
|
|
80
81
|
'/usr/local/include/libxml2',
|
81
82
|
File.join(INCLUDEDIR, "libxml2")
|
82
83
|
] + HEADER_DIRS
|
84
|
+
|
85
|
+
# If the user has homebrew installed, use the libxml2 inside homebrew
|
86
|
+
brew_prefix = `brew --prefix libxml2 2> /dev/null`.chomp
|
87
|
+
unless brew_prefix.empty?
|
88
|
+
LIB_DIRS.unshift File.join(brew_prefix, 'lib')
|
89
|
+
XML2_HEADER_DIRS.unshift File.join(brew_prefix, 'include/libxml2')
|
90
|
+
end
|
83
91
|
end
|
84
92
|
|
85
93
|
dir_config('zlib', HEADER_DIRS, LIB_DIRS)
|
@@ -1,5 +1,7 @@
|
|
1
1
|
#include <html_document.h>
|
2
2
|
|
3
|
+
static ID id_encoding_found;
|
4
|
+
|
3
5
|
/*
|
4
6
|
* call-seq:
|
5
7
|
* new
|
@@ -56,6 +58,18 @@ static VALUE read_io( VALUE klass,
|
|
56
58
|
);
|
57
59
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
58
60
|
|
61
|
+
/*
|
62
|
+
* If EncodingFound has occurred in EncodingReader, make sure to do
|
63
|
+
* a cleanup and propagate the error.
|
64
|
+
*/
|
65
|
+
if (rb_respond_to(io, id_encoding_found)) {
|
66
|
+
VALUE encoding_found = rb_funcall(io, id_encoding_found, 0);
|
67
|
+
if (!NIL_P(encoding_found)) {
|
68
|
+
xmlFreeDoc(doc);
|
69
|
+
rb_exc_raise(encoding_found);
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
59
73
|
if(doc == NULL) {
|
60
74
|
xmlErrorPtr error;
|
61
75
|
|
@@ -151,4 +165,6 @@ void init_html_document()
|
|
151
165
|
rb_define_singleton_method(klass, "new", new, -1);
|
152
166
|
|
153
167
|
rb_define_method(klass, "type", type, 0);
|
168
|
+
|
169
|
+
id_encoding_found = rb_intern("encoding_found");
|
154
170
|
}
|
@@ -13,31 +13,35 @@ static void deallocate(xmlParserCtxtPtr ctxt)
|
|
13
13
|
NOKOGIRI_DEBUG_END(handler);
|
14
14
|
}
|
15
15
|
|
16
|
-
static VALUE
|
16
|
+
static VALUE
|
17
|
+
parse_memory(VALUE klass, VALUE data, VALUE encoding)
|
17
18
|
{
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
19
|
+
htmlParserCtxtPtr ctxt;
|
20
|
+
|
21
|
+
if (NIL_P(data))
|
22
|
+
rb_raise(rb_eArgError, "data cannot be nil");
|
23
|
+
if (!(int)RSTRING_LEN(data))
|
24
|
+
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
25
|
+
|
26
|
+
ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
|
27
|
+
(int)RSTRING_LEN(data));
|
28
|
+
if (ctxt->sax) {
|
29
|
+
xmlFree(ctxt->sax);
|
30
|
+
ctxt->sax = NULL;
|
31
|
+
}
|
28
32
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
33
|
+
if (RTEST(encoding)) {
|
34
|
+
xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValuePtr(encoding));
|
35
|
+
if (enc != NULL) {
|
36
|
+
xmlSwitchToEncoding(ctxt, enc);
|
37
|
+
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
38
|
+
rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
|
39
|
+
StringValuePtr(encoding));
|
40
|
+
}
|
41
|
+
}
|
37
42
|
}
|
38
|
-
}
|
39
43
|
|
40
|
-
|
44
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
41
45
|
}
|
42
46
|
|
43
47
|
static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
|
@@ -49,30 +53,48 @@ static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
|
|
49
53
|
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
50
54
|
}
|
51
55
|
|
52
|
-
static VALUE
|
56
|
+
static VALUE
|
57
|
+
parse_doc(VALUE ctxt_val)
|
58
|
+
{
|
59
|
+
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
|
60
|
+
htmlParseDocument(ctxt);
|
61
|
+
return Qnil;
|
62
|
+
}
|
63
|
+
|
64
|
+
static VALUE
|
65
|
+
parse_doc_finalize(VALUE ctxt_val)
|
53
66
|
{
|
54
|
-
|
55
|
-
|
67
|
+
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
|
68
|
+
|
69
|
+
if (ctxt->myDoc)
|
70
|
+
xmlFreeDoc(ctxt->myDoc);
|
56
71
|
|
57
|
-
|
58
|
-
|
72
|
+
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
|
73
|
+
return Qnil;
|
74
|
+
}
|
75
|
+
|
76
|
+
static VALUE
|
77
|
+
parse_with(VALUE self, VALUE sax_handler)
|
78
|
+
{
|
79
|
+
htmlParserCtxtPtr ctxt;
|
80
|
+
htmlSAXHandlerPtr sax;
|
59
81
|
|
60
|
-
|
61
|
-
|
82
|
+
if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
|
83
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
62
84
|
|
63
|
-
|
64
|
-
|
65
|
-
xmlFree(ctxt->sax);
|
85
|
+
Data_Get_Struct(self, htmlParserCtxt, ctxt);
|
86
|
+
Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
|
66
87
|
|
67
|
-
|
68
|
-
|
88
|
+
/* Free the sax handler since we'll assign our own */
|
89
|
+
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
|
90
|
+
xmlFree(ctxt->sax);
|
69
91
|
|
70
|
-
|
92
|
+
ctxt->sax = sax;
|
93
|
+
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
71
94
|
|
72
|
-
|
95
|
+
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
|
73
96
|
|
74
|
-
|
75
|
-
return self;
|
97
|
+
return self;
|
76
98
|
}
|
77
99
|
|
78
100
|
void init_html_sax_parser_context()
|
@@ -0,0 +1,87 @@
|
|
1
|
+
#include <html_sax_push_parser.h>
|
2
|
+
|
3
|
+
/*
|
4
|
+
* call-seq:
|
5
|
+
* native_write(chunk, last_chunk)
|
6
|
+
*
|
7
|
+
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
|
8
|
+
*/
|
9
|
+
static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
10
|
+
{
|
11
|
+
xmlParserCtxtPtr ctx;
|
12
|
+
const char * chunk = NULL;
|
13
|
+
int size = 0;
|
14
|
+
|
15
|
+
|
16
|
+
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
17
|
+
|
18
|
+
if(Qnil != _chunk) {
|
19
|
+
chunk = StringValuePtr(_chunk);
|
20
|
+
size = (int)RSTRING_LEN(_chunk);
|
21
|
+
}
|
22
|
+
|
23
|
+
if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
|
24
|
+
if (!(ctx->options & XML_PARSE_RECOVER)) {
|
25
|
+
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
|
26
|
+
Nokogiri_error_raise(NULL, e);
|
27
|
+
}
|
28
|
+
}
|
29
|
+
|
30
|
+
return self;
|
31
|
+
}
|
32
|
+
|
33
|
+
/*
|
34
|
+
* call-seq:
|
35
|
+
* initialize_native(xml_sax, filename)
|
36
|
+
*
|
37
|
+
* Initialize the push parser with +xml_sax+ using +filename+
|
38
|
+
*/
|
39
|
+
static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
|
40
|
+
VALUE encoding)
|
41
|
+
{
|
42
|
+
htmlSAXHandlerPtr sax;
|
43
|
+
const char * filename = NULL;
|
44
|
+
htmlParserCtxtPtr ctx;
|
45
|
+
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
|
46
|
+
|
47
|
+
Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
|
48
|
+
|
49
|
+
if(_filename != Qnil) filename = StringValuePtr(_filename);
|
50
|
+
|
51
|
+
if (!NIL_P(encoding)) {
|
52
|
+
enc = xmlParseCharEncoding(StringValuePtr(encoding));
|
53
|
+
if (enc == XML_CHAR_ENCODING_ERROR)
|
54
|
+
rb_raise(rb_eArgError, "Unsupported Encoding");
|
55
|
+
}
|
56
|
+
|
57
|
+
ctx = htmlCreatePushParserCtxt(
|
58
|
+
sax,
|
59
|
+
NULL,
|
60
|
+
NULL,
|
61
|
+
0,
|
62
|
+
filename,
|
63
|
+
enc
|
64
|
+
);
|
65
|
+
if(ctx == NULL)
|
66
|
+
rb_raise(rb_eRuntimeError, "Could not create a parser context");
|
67
|
+
|
68
|
+
ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
|
69
|
+
|
70
|
+
ctx->sax2 = 1;
|
71
|
+
DATA_PTR(self) = ctx;
|
72
|
+
return self;
|
73
|
+
}
|
74
|
+
|
75
|
+
VALUE cNokogiriHtmlSaxPushParser;
|
76
|
+
void init_html_sax_push_parser()
|
77
|
+
{
|
78
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
79
|
+
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
80
|
+
VALUE sax = rb_define_module_under(html, "SAX");
|
81
|
+
VALUE klass = rb_define_class_under(sax, "PushParser", cNokogiriXmlSaxPushParser);
|
82
|
+
|
83
|
+
cNokogiriHtmlSaxPushParser = klass;
|
84
|
+
|
85
|
+
rb_define_private_method(klass, "initialize_native", initialize_native, 3);
|
86
|
+
rb_define_private_method(klass, "native_write", native_write, 2);
|
87
|
+
}
|