nokogiri 1.11.0.rc3-java → 1.11.4-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1015 -947
- data/LICENSE.md +1 -1
- data/README.md +168 -91
- data/dependencies.yml +12 -12
- data/ext/java/nokogiri/EncodingHandler.java +76 -89
- data/ext/java/nokogiri/HtmlDocument.java +135 -144
- data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
- data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
- data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
- data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
- data/ext/java/nokogiri/NokogiriService.java +595 -556
- data/ext/java/nokogiri/XmlAttr.java +118 -126
- data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
- data/ext/java/nokogiri/XmlCdata.java +35 -58
- data/ext/java/nokogiri/XmlComment.java +46 -67
- data/ext/java/nokogiri/XmlDocument.java +645 -572
- data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
- data/ext/java/nokogiri/XmlDtd.java +448 -414
- data/ext/java/nokogiri/XmlElement.java +23 -48
- data/ext/java/nokogiri/XmlElementContent.java +343 -316
- data/ext/java/nokogiri/XmlElementDecl.java +124 -125
- data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
- data/ext/java/nokogiri/XmlEntityReference.java +49 -72
- data/ext/java/nokogiri/XmlNamespace.java +175 -175
- data/ext/java/nokogiri/XmlNode.java +1843 -1622
- data/ext/java/nokogiri/XmlNodeSet.java +361 -331
- data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
- data/ext/java/nokogiri/XmlReader.java +513 -450
- data/ext/java/nokogiri/XmlRelaxng.java +89 -101
- data/ext/java/nokogiri/XmlSaxParserContext.java +328 -310
- data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
- data/ext/java/nokogiri/XmlSchema.java +335 -242
- data/ext/java/nokogiri/XmlSyntaxError.java +113 -119
- data/ext/java/nokogiri/XmlText.java +55 -76
- data/ext/java/nokogiri/XmlXpathContext.java +242 -210
- data/ext/java/nokogiri/XsltStylesheet.java +280 -269
- data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -190
- data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
- data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
- data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
- data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +79 -89
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +119 -78
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -54
- data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
- data/ext/java/nokogiri/internals/ParserContext.java +206 -211
- data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
- data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
- data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
- data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
- data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
- data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
- data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
- data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
- data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
- data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
- data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
- data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
- data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
- data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
- data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
- data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
- data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
- data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
- data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
- data/ext/nokogiri/depend +37 -358
- data/ext/nokogiri/extconf.rb +581 -374
- data/ext/nokogiri/html_document.c +78 -82
- data/ext/nokogiri/html_element_description.c +84 -71
- data/ext/nokogiri/html_entity_lookup.c +21 -16
- data/ext/nokogiri/html_sax_parser_context.c +69 -66
- data/ext/nokogiri/html_sax_push_parser.c +42 -34
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +192 -93
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +246 -188
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +30 -27
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +17 -11
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +56 -49
- data/ext/nokogiri/xml_node.c +371 -320
- data/ext/nokogiri/xml_node_set.c +168 -156
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +191 -157
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +118 -118
- data/ext/nokogiri/xml_sax_parser_context.c +103 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +95 -47
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +206 -123
- data/ext/nokogiri/xslt_stylesheet.c +158 -161
- data/lib/nokogiri.rb +3 -7
- data/lib/nokogiri/css/parser.rb +3 -3
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/xpath_visitor.rb +70 -42
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/html/document.rb +12 -26
- data/lib/nokogiri/html/document_fragment.rb +15 -15
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +2 -149
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +205 -0
- data/lib/nokogiri/xml/document.rb +91 -35
- data/lib/nokogiri/xml/document_fragment.rb +4 -6
- data/lib/nokogiri/xml/node.rb +89 -69
- data/lib/nokogiri/xml/parse_options.rb +6 -0
- data/lib/nokogiri/xml/reader.rb +2 -9
- data/lib/nokogiri/xml/relax_ng.rb +6 -2
- data/lib/nokogiri/xml/schema.rb +12 -4
- data/lib/nokogiri/xml/searchable.rb +3 -1
- data/lib/nokogiri/xml/xpath.rb +1 -3
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- metadata +86 -177
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/nokogiri.h +0 -134
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -63
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -1,4 +1,6 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
static VALUE cNokogiriHtmlEntityLookup;
|
2
4
|
|
3
5
|
/*
|
4
6
|
* call-seq:
|
@@ -6,27 +8,30 @@
|
|
6
8
|
*
|
7
9
|
* Get the HTML::EntityDescription for +key+
|
8
10
|
*/
|
9
|
-
static VALUE
|
11
|
+
static VALUE
|
12
|
+
get(VALUE _, VALUE rb_entity_name)
|
10
13
|
{
|
11
|
-
|
12
|
-
|
13
|
-
VALUE
|
14
|
+
VALUE cNokogiriHtmlEntityDescription;
|
15
|
+
const htmlEntityDesc *c_entity_desc;
|
16
|
+
VALUE rb_constructor_args[3];
|
14
17
|
|
15
|
-
|
16
|
-
|
18
|
+
c_entity_desc = htmlEntityLookup((const xmlChar *)StringValueCStr(rb_entity_name));
|
19
|
+
if (NULL == c_entity_desc) {
|
20
|
+
return Qnil;
|
21
|
+
}
|
17
22
|
|
18
|
-
|
19
|
-
|
20
|
-
|
23
|
+
rb_constructor_args[0] = INT2NUM((long)c_entity_desc->value);
|
24
|
+
rb_constructor_args[1] = NOKOGIRI_STR_NEW2(c_entity_desc->name);
|
25
|
+
rb_constructor_args[2] = NOKOGIRI_STR_NEW2(c_entity_desc->desc);
|
21
26
|
|
22
|
-
|
27
|
+
cNokogiriHtmlEntityDescription = rb_const_get_at(mNokogiriHtml, rb_intern("EntityDescription"));
|
28
|
+
return rb_class_new_instance(3, rb_constructor_args, cNokogiriHtmlEntityDescription);
|
23
29
|
}
|
24
30
|
|
25
|
-
void
|
31
|
+
void
|
32
|
+
noko_init_html_entity_lookup()
|
26
33
|
{
|
27
|
-
|
28
|
-
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
29
|
-
VALUE klass = rb_define_class_under(html, "EntityLookup", rb_cObject);
|
34
|
+
cNokogiriHtmlEntityLookup = rb_define_class_under(mNokogiriHtml, "EntityLookup", rb_cObject);
|
30
35
|
|
31
|
-
rb_define_method(
|
36
|
+
rb_define_method(cNokogiriHtmlEntityLookup, "get", get, 1);
|
32
37
|
}
|
@@ -1,116 +1,119 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
3
|
VALUE cNokogiriHtmlSaxParserContext ;
|
4
4
|
|
5
|
-
static void
|
5
|
+
static void
|
6
|
+
deallocate(xmlParserCtxtPtr ctxt)
|
6
7
|
{
|
7
|
-
NOKOGIRI_DEBUG_START(
|
8
|
+
NOKOGIRI_DEBUG_START(ctxt);
|
8
9
|
|
9
10
|
ctxt->sax = NULL;
|
10
11
|
|
11
12
|
htmlFreeParserCtxt(ctxt);
|
12
13
|
|
13
|
-
NOKOGIRI_DEBUG_END(
|
14
|
+
NOKOGIRI_DEBUG_END(ctxt);
|
14
15
|
}
|
15
16
|
|
16
17
|
static VALUE
|
17
18
|
parse_memory(VALUE klass, VALUE data, VALUE encoding)
|
18
19
|
{
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
20
|
+
htmlParserCtxtPtr ctxt;
|
21
|
+
|
22
|
+
if (NIL_P(data)) {
|
23
|
+
rb_raise(rb_eArgError, "data cannot be nil");
|
24
|
+
}
|
25
|
+
if (!(int)RSTRING_LEN(data)) {
|
26
|
+
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
27
|
+
}
|
28
|
+
|
29
|
+
ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
|
30
|
+
(int)RSTRING_LEN(data));
|
31
|
+
if (ctxt->sax) {
|
32
|
+
xmlFree(ctxt->sax);
|
33
|
+
ctxt->sax = NULL;
|
34
|
+
}
|
35
|
+
|
36
|
+
if (RTEST(encoding)) {
|
37
|
+
xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding));
|
38
|
+
if (enc != NULL) {
|
39
|
+
xmlSwitchToEncoding(ctxt, enc);
|
40
|
+
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
41
|
+
rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
|
42
|
+
StringValueCStr(encoding));
|
43
|
+
}
|
31
44
|
}
|
45
|
+
}
|
32
46
|
|
33
|
-
|
34
|
-
xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding));
|
35
|
-
if (enc != NULL) {
|
36
|
-
xmlSwitchToEncoding(ctxt, enc);
|
37
|
-
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
38
|
-
rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
|
39
|
-
StringValueCStr(encoding));
|
40
|
-
}
|
41
|
-
}
|
42
|
-
}
|
43
|
-
|
44
|
-
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
47
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
45
48
|
}
|
46
49
|
|
47
|
-
static VALUE
|
50
|
+
static VALUE
|
51
|
+
parse_file(VALUE klass, VALUE filename, VALUE encoding)
|
48
52
|
{
|
49
53
|
htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(
|
50
|
-
|
51
|
-
|
52
|
-
|
54
|
+
StringValueCStr(filename),
|
55
|
+
StringValueCStr(encoding)
|
56
|
+
);
|
53
57
|
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
54
58
|
}
|
55
59
|
|
56
60
|
static VALUE
|
57
61
|
parse_doc(VALUE ctxt_val)
|
58
62
|
{
|
59
|
-
|
60
|
-
|
61
|
-
|
63
|
+
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
|
64
|
+
htmlParseDocument(ctxt);
|
65
|
+
return Qnil;
|
62
66
|
}
|
63
67
|
|
64
68
|
static VALUE
|
65
69
|
parse_doc_finalize(VALUE ctxt_val)
|
66
70
|
{
|
67
|
-
|
71
|
+
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
|
68
72
|
|
69
|
-
|
70
|
-
|
73
|
+
if (ctxt->myDoc) {
|
74
|
+
xmlFreeDoc(ctxt->myDoc);
|
75
|
+
}
|
71
76
|
|
72
|
-
|
73
|
-
|
77
|
+
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
|
78
|
+
return Qnil;
|
74
79
|
}
|
75
80
|
|
76
81
|
static VALUE
|
77
82
|
parse_with(VALUE self, VALUE sax_handler)
|
78
83
|
{
|
79
|
-
|
80
|
-
|
84
|
+
htmlParserCtxtPtr ctxt;
|
85
|
+
htmlSAXHandlerPtr sax;
|
86
|
+
|
87
|
+
if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
|
88
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
89
|
+
}
|
81
90
|
|
82
|
-
|
83
|
-
|
91
|
+
Data_Get_Struct(self, htmlParserCtxt, ctxt);
|
92
|
+
Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
|
84
93
|
|
85
|
-
|
86
|
-
|
94
|
+
/* Free the sax handler since we'll assign our own */
|
95
|
+
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
|
96
|
+
xmlFree(ctxt->sax);
|
97
|
+
}
|
87
98
|
|
88
|
-
|
89
|
-
|
90
|
-
xmlFree(ctxt->sax);
|
99
|
+
ctxt->sax = sax;
|
100
|
+
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
91
101
|
|
92
|
-
|
93
|
-
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
102
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
94
103
|
|
95
|
-
|
104
|
+
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
|
96
105
|
|
97
|
-
|
106
|
+
return self;
|
98
107
|
}
|
99
108
|
|
100
|
-
void
|
109
|
+
void
|
110
|
+
noko_init_html_sax_parser_context()
|
101
111
|
{
|
102
|
-
|
103
|
-
|
104
|
-
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
105
|
-
VALUE sax = rb_define_module_under(xml, "SAX");
|
106
|
-
VALUE hsax = rb_define_module_under(html, "SAX");
|
107
|
-
VALUE pc = rb_define_class_under(sax, "ParserContext", rb_cObject);
|
108
|
-
VALUE klass = rb_define_class_under(hsax, "ParserContext", pc);
|
109
|
-
|
110
|
-
cNokogiriHtmlSaxParserContext = klass;
|
112
|
+
assert(cNokogiriXmlSaxParserContext);
|
113
|
+
cNokogiriHtmlSaxParserContext = rb_define_class_under(mNokogiriHtmlSax, "ParserContext", cNokogiriXmlSaxParserContext);
|
111
114
|
|
112
|
-
rb_define_singleton_method(
|
113
|
-
rb_define_singleton_method(
|
115
|
+
rb_define_singleton_method(cNokogiriHtmlSaxParserContext, "memory", parse_memory, 2);
|
116
|
+
rb_define_singleton_method(cNokogiriHtmlSaxParserContext, "file", parse_file, 2);
|
114
117
|
|
115
|
-
rb_define_method(
|
118
|
+
rb_define_method(cNokogiriHtmlSaxParserContext, "parse_with", parse_with, 1);
|
116
119
|
}
|
@@ -1,4 +1,6 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriHtmlSaxPushParser;
|
2
4
|
|
3
5
|
/*
|
4
6
|
* call-seq:
|
@@ -6,25 +8,32 @@
|
|
6
8
|
*
|
7
9
|
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
|
8
10
|
*/
|
9
|
-
static VALUE
|
11
|
+
static VALUE
|
12
|
+
native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
10
13
|
{
|
11
14
|
xmlParserCtxtPtr ctx;
|
12
|
-
const char *
|
13
|
-
int size
|
14
|
-
|
15
|
+
const char *chunk = NULL;
|
16
|
+
int size = 0;
|
17
|
+
int status = 0;
|
18
|
+
libxmlStructuredErrorHandlerState handler_state;
|
15
19
|
|
16
20
|
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
17
21
|
|
18
|
-
if(Qnil != _chunk) {
|
22
|
+
if (Qnil != _chunk) {
|
19
23
|
chunk = StringValuePtr(_chunk);
|
20
24
|
size = (int)RSTRING_LEN(_chunk);
|
21
25
|
}
|
22
26
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
27
|
+
Nokogiri_structured_error_func_save_and_set(&handler_state, NULL, NULL);
|
28
|
+
|
29
|
+
status = htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0);
|
30
|
+
|
31
|
+
Nokogiri_structured_error_func_restore(&handler_state);
|
32
|
+
|
33
|
+
if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
|
34
|
+
// TODO: there appear to be no tests for this block
|
35
|
+
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
|
36
|
+
Nokogiri_error_raise(NULL, e);
|
28
37
|
}
|
29
38
|
|
30
39
|
return self;
|
@@ -36,34 +45,37 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
36
45
|
*
|
37
46
|
* Initialize the push parser with +xml_sax+ using +filename+
|
38
47
|
*/
|
39
|
-
static VALUE
|
40
|
-
|
48
|
+
static VALUE
|
49
|
+
initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
|
50
|
+
VALUE encoding)
|
41
51
|
{
|
42
52
|
htmlSAXHandlerPtr sax;
|
43
|
-
const char *
|
53
|
+
const char *filename = NULL;
|
44
54
|
htmlParserCtxtPtr ctx;
|
45
55
|
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
|
46
56
|
|
47
57
|
Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
|
48
58
|
|
49
|
-
if(_filename != Qnil) filename = StringValueCStr(_filename);
|
59
|
+
if (_filename != Qnil) { filename = StringValueCStr(_filename); }
|
50
60
|
|
51
61
|
if (!NIL_P(encoding)) {
|
52
62
|
enc = xmlParseCharEncoding(StringValueCStr(encoding));
|
53
|
-
if (enc == XML_CHAR_ENCODING_ERROR)
|
63
|
+
if (enc == XML_CHAR_ENCODING_ERROR) {
|
54
64
|
rb_raise(rb_eArgError, "Unsupported Encoding");
|
65
|
+
}
|
55
66
|
}
|
56
67
|
|
57
68
|
ctx = htmlCreatePushParserCtxt(
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
if(ctx == NULL)
|
69
|
+
sax,
|
70
|
+
NULL,
|
71
|
+
NULL,
|
72
|
+
0,
|
73
|
+
filename,
|
74
|
+
enc
|
75
|
+
);
|
76
|
+
if (ctx == NULL) {
|
66
77
|
rb_raise(rb_eRuntimeError, "Could not create a parser context");
|
78
|
+
}
|
67
79
|
|
68
80
|
ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
|
69
81
|
|
@@ -72,16 +84,12 @@ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
|
|
72
84
|
return self;
|
73
85
|
}
|
74
86
|
|
75
|
-
|
76
|
-
|
87
|
+
void
|
88
|
+
noko_init_html_sax_push_parser()
|
77
89
|
{
|
78
|
-
|
79
|
-
|
80
|
-
VALUE sax = rb_define_module_under(html, "SAX");
|
81
|
-
VALUE klass = rb_define_class_under(sax, "PushParser", cNokogiriXmlSaxPushParser);
|
82
|
-
|
83
|
-
cNokogiriHtmlSaxPushParser = klass;
|
90
|
+
assert(cNokogiriXmlSaxPushParser);
|
91
|
+
cNokogiriHtmlSaxPushParser = rb_define_class_under(mNokogiriHtmlSax, "PushParser", cNokogiriXmlSaxPushParser);
|
84
92
|
|
85
|
-
rb_define_private_method(
|
86
|
-
rb_define_private_method(
|
93
|
+
rb_define_private_method(cNokogiriHtmlSaxPushParser, "initialize_native", initialize_native, 3);
|
94
|
+
rb_define_private_method(cNokogiriHtmlSaxPushParser, "native_write", native_write, 2);
|
87
95
|
}
|
@@ -0,0 +1,121 @@
|
|
1
|
+
#ifndef HAVE_XMLFIRSTELEMENTCHILD
|
2
|
+
#include <nokogiri.h>
|
3
|
+
/**
|
4
|
+
* xmlFirstElementChild:
|
5
|
+
* @parent: the parent node
|
6
|
+
*
|
7
|
+
* Finds the first child node of that element which is a Element node
|
8
|
+
* Note the handling of entities references is different than in
|
9
|
+
* the W3C DOM element traversal spec since we don't have back reference
|
10
|
+
* from entities content to entities references.
|
11
|
+
*
|
12
|
+
* Returns the first element child or NULL if not available
|
13
|
+
*/
|
14
|
+
xmlNodePtr
|
15
|
+
xmlFirstElementChild(xmlNodePtr parent)
|
16
|
+
{
|
17
|
+
xmlNodePtr cur = NULL;
|
18
|
+
|
19
|
+
if (parent == NULL) {
|
20
|
+
return (NULL);
|
21
|
+
}
|
22
|
+
switch (parent->type) {
|
23
|
+
case XML_ELEMENT_NODE:
|
24
|
+
case XML_ENTITY_NODE:
|
25
|
+
case XML_DOCUMENT_NODE:
|
26
|
+
case XML_HTML_DOCUMENT_NODE:
|
27
|
+
cur = parent->children;
|
28
|
+
break;
|
29
|
+
default:
|
30
|
+
return (NULL);
|
31
|
+
}
|
32
|
+
while (cur != NULL) {
|
33
|
+
if (cur->type == XML_ELEMENT_NODE) {
|
34
|
+
return (cur);
|
35
|
+
}
|
36
|
+
cur = cur->next;
|
37
|
+
}
|
38
|
+
return (NULL);
|
39
|
+
}
|
40
|
+
|
41
|
+
/**
|
42
|
+
* xmlNextElementSibling:
|
43
|
+
* @node: the current node
|
44
|
+
*
|
45
|
+
* Finds the first closest next sibling of the node which is an
|
46
|
+
* element node.
|
47
|
+
* Note the handling of entities references is different than in
|
48
|
+
* the W3C DOM element traversal spec since we don't have back reference
|
49
|
+
* from entities content to entities references.
|
50
|
+
*
|
51
|
+
* Returns the next element sibling or NULL if not available
|
52
|
+
*/
|
53
|
+
xmlNodePtr
|
54
|
+
xmlNextElementSibling(xmlNodePtr node)
|
55
|
+
{
|
56
|
+
if (node == NULL) {
|
57
|
+
return (NULL);
|
58
|
+
}
|
59
|
+
switch (node->type) {
|
60
|
+
case XML_ELEMENT_NODE:
|
61
|
+
case XML_TEXT_NODE:
|
62
|
+
case XML_CDATA_SECTION_NODE:
|
63
|
+
case XML_ENTITY_REF_NODE:
|
64
|
+
case XML_ENTITY_NODE:
|
65
|
+
case XML_PI_NODE:
|
66
|
+
case XML_COMMENT_NODE:
|
67
|
+
case XML_DTD_NODE:
|
68
|
+
case XML_XINCLUDE_START:
|
69
|
+
case XML_XINCLUDE_END:
|
70
|
+
node = node->next;
|
71
|
+
break;
|
72
|
+
default:
|
73
|
+
return (NULL);
|
74
|
+
}
|
75
|
+
while (node != NULL) {
|
76
|
+
if (node->type == XML_ELEMENT_NODE) {
|
77
|
+
return (node);
|
78
|
+
}
|
79
|
+
node = node->next;
|
80
|
+
}
|
81
|
+
return (NULL);
|
82
|
+
}
|
83
|
+
|
84
|
+
/**
|
85
|
+
* xmlLastElementChild:
|
86
|
+
* @parent: the parent node
|
87
|
+
*
|
88
|
+
* Finds the last child node of that element which is a Element node
|
89
|
+
* Note the handling of entities references is different than in
|
90
|
+
* the W3C DOM element traversal spec since we don't have back reference
|
91
|
+
* from entities content to entities references.
|
92
|
+
*
|
93
|
+
* Returns the last element child or NULL if not available
|
94
|
+
*/
|
95
|
+
xmlNodePtr
|
96
|
+
xmlLastElementChild(xmlNodePtr parent)
|
97
|
+
{
|
98
|
+
xmlNodePtr cur = NULL;
|
99
|
+
|
100
|
+
if (parent == NULL) {
|
101
|
+
return (NULL);
|
102
|
+
}
|
103
|
+
switch (parent->type) {
|
104
|
+
case XML_ELEMENT_NODE:
|
105
|
+
case XML_ENTITY_NODE:
|
106
|
+
case XML_DOCUMENT_NODE:
|
107
|
+
case XML_HTML_DOCUMENT_NODE:
|
108
|
+
cur = parent->last;
|
109
|
+
break;
|
110
|
+
default:
|
111
|
+
return (NULL);
|
112
|
+
}
|
113
|
+
while (cur != NULL) {
|
114
|
+
if (cur->type == XML_ELEMENT_NODE) {
|
115
|
+
return (cur);
|
116
|
+
}
|
117
|
+
cur = cur->prev;
|
118
|
+
}
|
119
|
+
return (NULL);
|
120
|
+
}
|
121
|
+
#endif
|