nokogiri 1.10.7 → 1.16.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +42 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +188 -96
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +862 -421
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +165 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +108 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +251 -105
- data/ext/nokogiri/nokogiri.h +222 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +17 -17
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +39 -31
- data/ext/nokogiri/xml_comment.c +20 -27
- data/ext/nokogiri/xml_document.c +408 -243
- data/ext/nokogiri/xml_document_fragment.c +13 -17
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1343 -674
- data/ext/nokogiri/xml_node_set.c +246 -216
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +305 -213
- data/ext/nokogiri/xml_relax_ng.c +87 -78
- data/ext/nokogiri/xml_sax_parser.c +149 -124
- data/ext/nokogiri/xml_sax_parser_context.c +149 -103
- data/ext/nokogiri/xml_sax_push_parser.c +65 -37
- data/ext/nokogiri/xml_schema.c +138 -82
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +35 -26
- data/ext/nokogiri/xml_xpath_context.c +363 -178
- data/ext/nokogiri/xslt_stylesheet.c +335 -189
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +126 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +630 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
- data/gumbo-parser/src/parser.c +4891 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +5 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +205 -96
- data/lib/nokogiri/css.rb +56 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +326 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +75 -34
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +312 -127
- data/lib/nokogiri/xml/document_fragment.rb +93 -48
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1096 -419
- data/lib/nokogiri/xml/node_set.rb +137 -61
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +42 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +39 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +120 -72
- data/lib/nokogiri/xml/syntax_error.rb +7 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +39 -38
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +101 -22
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +29 -25
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +121 -291
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,18 +1,44 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
3
|
VALUE cNokogiriXmlSaxParserContext ;
|
4
4
|
|
5
|
-
static
|
6
|
-
{
|
7
|
-
NOKOGIRI_DEBUG_START(handler);
|
5
|
+
static ID id_read;
|
8
6
|
|
7
|
+
static void
|
8
|
+
xml_sax_parser_context_free(void *data)
|
9
|
+
{
|
10
|
+
xmlParserCtxtPtr ctxt = data;
|
9
11
|
ctxt->sax = NULL;
|
10
|
-
|
11
12
|
xmlFreeParserCtxt(ctxt);
|
13
|
+
}
|
14
|
+
|
15
|
+
/*
|
16
|
+
* note that htmlParserCtxtPtr == xmlParserCtxtPtr and xmlFreeParserCtxt() == htmlFreeParserCtxt()
|
17
|
+
* so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
|
18
|
+
*/
|
19
|
+
static const rb_data_type_t xml_sax_parser_context_type = {
|
20
|
+
.wrap_struct_name = "Nokogiri::XML::SAX::ParserContext",
|
21
|
+
.function = {
|
22
|
+
.dfree = xml_sax_parser_context_free,
|
23
|
+
},
|
24
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
25
|
+
};
|
26
|
+
|
27
|
+
xmlParserCtxtPtr
|
28
|
+
noko_xml_sax_parser_context_unwrap(VALUE rb_context)
|
29
|
+
{
|
30
|
+
xmlParserCtxtPtr c_context;
|
31
|
+
TypedData_Get_Struct(rb_context, xmlParserCtxt, &xml_sax_parser_context_type, c_context);
|
32
|
+
return c_context;
|
33
|
+
}
|
12
34
|
|
13
|
-
|
35
|
+
VALUE
|
36
|
+
noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
|
37
|
+
{
|
38
|
+
return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
|
14
39
|
}
|
15
40
|
|
41
|
+
|
16
42
|
/*
|
17
43
|
* call-seq:
|
18
44
|
* parse_io(io, encoding)
|
@@ -22,19 +48,27 @@ static void deallocate(xmlParserCtxtPtr ctxt)
|
|
22
48
|
static VALUE
|
23
49
|
parse_io(VALUE klass, VALUE io, VALUE encoding)
|
24
50
|
{
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
51
|
+
xmlParserCtxtPtr ctxt;
|
52
|
+
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
|
53
|
+
|
54
|
+
if (!rb_respond_to(io, id_read)) {
|
55
|
+
rb_raise(rb_eTypeError, "argument expected to respond to :read");
|
56
|
+
}
|
57
|
+
|
58
|
+
ctxt = xmlCreateIOParserCtxt(NULL, NULL,
|
59
|
+
(xmlInputReadCallback)noko_io_read,
|
60
|
+
(xmlInputCloseCallback)noko_io_close,
|
61
|
+
(void *)io, enc);
|
62
|
+
if (!ctxt) {
|
63
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
64
|
+
}
|
65
|
+
|
66
|
+
if (ctxt->sax) {
|
67
|
+
xmlFree(ctxt->sax);
|
68
|
+
ctxt->sax = NULL;
|
69
|
+
}
|
70
|
+
|
71
|
+
return noko_xml_sax_parser_context_wrap(klass, ctxt);
|
38
72
|
}
|
39
73
|
|
40
74
|
/*
|
@@ -43,10 +77,17 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
|
|
43
77
|
*
|
44
78
|
* Parse file given +filename+
|
45
79
|
*/
|
46
|
-
static VALUE
|
80
|
+
static VALUE
|
81
|
+
parse_file(VALUE klass, VALUE filename)
|
47
82
|
{
|
48
83
|
xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
|
49
|
-
|
84
|
+
|
85
|
+
if (ctxt->sax) {
|
86
|
+
xmlFree(ctxt->sax);
|
87
|
+
ctxt->sax = NULL;
|
88
|
+
}
|
89
|
+
|
90
|
+
return noko_xml_sax_parser_context_wrap(klass, ctxt);
|
50
91
|
}
|
51
92
|
|
52
93
|
/*
|
@@ -58,41 +99,43 @@ static VALUE parse_file(VALUE klass, VALUE filename)
|
|
58
99
|
static VALUE
|
59
100
|
parse_memory(VALUE klass, VALUE data)
|
60
101
|
{
|
61
|
-
|
102
|
+
xmlParserCtxtPtr ctxt;
|
103
|
+
|
104
|
+
Check_Type(data, T_STRING);
|
62
105
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
106
|
+
if (!(int)RSTRING_LEN(data)) {
|
107
|
+
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
108
|
+
}
|
67
109
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
110
|
+
ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
|
111
|
+
(int)RSTRING_LEN(data));
|
112
|
+
if (ctxt->sax) {
|
113
|
+
xmlFree(ctxt->sax);
|
114
|
+
ctxt->sax = NULL;
|
115
|
+
}
|
74
116
|
|
75
|
-
|
117
|
+
return noko_xml_sax_parser_context_wrap(klass, ctxt);
|
76
118
|
}
|
77
119
|
|
78
120
|
static VALUE
|
79
121
|
parse_doc(VALUE ctxt_val)
|
80
122
|
{
|
81
|
-
|
82
|
-
|
83
|
-
|
123
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
124
|
+
xmlParseDocument(ctxt);
|
125
|
+
return Qnil;
|
84
126
|
}
|
85
127
|
|
86
128
|
static VALUE
|
87
129
|
parse_doc_finalize(VALUE ctxt_val)
|
88
130
|
{
|
89
|
-
|
131
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
90
132
|
|
91
|
-
|
92
|
-
|
133
|
+
if (NULL != ctxt->myDoc) {
|
134
|
+
xmlFreeDoc(ctxt->myDoc);
|
135
|
+
}
|
93
136
|
|
94
|
-
|
95
|
-
|
137
|
+
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
|
138
|
+
return Qnil;
|
96
139
|
}
|
97
140
|
|
98
141
|
/*
|
@@ -104,25 +147,24 @@ parse_doc_finalize(VALUE ctxt_val)
|
|
104
147
|
static VALUE
|
105
148
|
parse_with(VALUE self, VALUE sax_handler)
|
106
149
|
{
|
107
|
-
|
108
|
-
|
150
|
+
xmlParserCtxtPtr ctxt;
|
151
|
+
xmlSAXHandlerPtr sax;
|
109
152
|
|
110
|
-
|
111
|
-
|
153
|
+
if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
|
154
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
155
|
+
}
|
112
156
|
|
113
|
-
|
114
|
-
|
157
|
+
ctxt = noko_xml_sax_parser_context_unwrap(self);
|
158
|
+
sax = noko_sax_handler_unwrap(sax_handler);
|
115
159
|
|
116
|
-
|
117
|
-
|
118
|
-
xmlFree(ctxt->sax);
|
160
|
+
ctxt->sax = sax;
|
161
|
+
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
119
162
|
|
120
|
-
|
121
|
-
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
163
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
122
164
|
|
123
|
-
|
165
|
+
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
|
124
166
|
|
125
|
-
|
167
|
+
return Qnil;
|
126
168
|
}
|
127
169
|
|
128
170
|
/*
|
@@ -132,15 +174,16 @@ parse_with(VALUE self, VALUE sax_handler)
|
|
132
174
|
* Should this parser replace entities? & will get converted to '&' if
|
133
175
|
* set to true
|
134
176
|
*/
|
135
|
-
static VALUE
|
177
|
+
static VALUE
|
178
|
+
set_replace_entities(VALUE self, VALUE value)
|
136
179
|
{
|
137
|
-
xmlParserCtxtPtr ctxt;
|
138
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
180
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
139
181
|
|
140
|
-
if(Qfalse == value)
|
182
|
+
if (Qfalse == value) {
|
141
183
|
ctxt->replaceEntities = 0;
|
142
|
-
else
|
184
|
+
} else {
|
143
185
|
ctxt->replaceEntities = 1;
|
186
|
+
}
|
144
187
|
|
145
188
|
return value;
|
146
189
|
}
|
@@ -152,15 +195,16 @@ static VALUE set_replace_entities(VALUE self, VALUE value)
|
|
152
195
|
* Should this parser replace entities? & will get converted to '&' if
|
153
196
|
* set to true
|
154
197
|
*/
|
155
|
-
static VALUE
|
198
|
+
static VALUE
|
199
|
+
get_replace_entities(VALUE self)
|
156
200
|
{
|
157
|
-
xmlParserCtxtPtr ctxt;
|
158
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
201
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
159
202
|
|
160
|
-
if(0 == ctxt->replaceEntities)
|
203
|
+
if (0 == ctxt->replaceEntities) {
|
161
204
|
return Qfalse;
|
162
|
-
else
|
205
|
+
} else {
|
163
206
|
return Qtrue;
|
207
|
+
}
|
164
208
|
}
|
165
209
|
|
166
210
|
/*
|
@@ -168,16 +212,16 @@ static VALUE get_replace_entities(VALUE self)
|
|
168
212
|
*
|
169
213
|
* Get the current line the parser context is processing.
|
170
214
|
*/
|
171
|
-
static VALUE
|
215
|
+
static VALUE
|
216
|
+
line(VALUE self)
|
172
217
|
{
|
173
|
-
xmlParserCtxtPtr ctxt;
|
174
218
|
xmlParserInputPtr io;
|
175
|
-
|
176
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
219
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
177
220
|
|
178
221
|
io = ctxt->input;
|
179
|
-
if(io)
|
222
|
+
if (io) {
|
180
223
|
return INT2NUM(io->line);
|
224
|
+
}
|
181
225
|
|
182
226
|
return Qnil;
|
183
227
|
}
|
@@ -187,16 +231,16 @@ static VALUE line(VALUE self)
|
|
187
231
|
*
|
188
232
|
* Get the current column the parser context is processing.
|
189
233
|
*/
|
190
|
-
static VALUE
|
234
|
+
static VALUE
|
235
|
+
column(VALUE self)
|
191
236
|
{
|
192
|
-
xmlParserCtxtPtr ctxt;
|
237
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
193
238
|
xmlParserInputPtr io;
|
194
239
|
|
195
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
196
|
-
|
197
240
|
io = ctxt->input;
|
198
|
-
if(io)
|
241
|
+
if (io) {
|
199
242
|
return INT2NUM(io->col);
|
243
|
+
}
|
200
244
|
|
201
245
|
return Qnil;
|
202
246
|
}
|
@@ -208,15 +252,16 @@ static VALUE column(VALUE self)
|
|
208
252
|
* Should this parser recover from structural errors? It will not stop processing
|
209
253
|
* file on structural errors if set to true
|
210
254
|
*/
|
211
|
-
static VALUE
|
255
|
+
static VALUE
|
256
|
+
set_recovery(VALUE self, VALUE value)
|
212
257
|
{
|
213
|
-
xmlParserCtxtPtr ctxt;
|
214
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
258
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
215
259
|
|
216
|
-
if(value == Qfalse)
|
260
|
+
if (value == Qfalse) {
|
217
261
|
ctxt->recovery = 0;
|
218
|
-
else
|
262
|
+
} else {
|
219
263
|
ctxt->recovery = 1;
|
264
|
+
}
|
220
265
|
|
221
266
|
return value;
|
222
267
|
}
|
@@ -228,35 +273,36 @@ static VALUE set_recovery(VALUE self, VALUE value)
|
|
228
273
|
* Should this parser recover from structural errors? It will not stop processing
|
229
274
|
* file on structural errors if set to true
|
230
275
|
*/
|
231
|
-
static VALUE
|
276
|
+
static VALUE
|
277
|
+
get_recovery(VALUE self)
|
232
278
|
{
|
233
|
-
xmlParserCtxtPtr ctxt;
|
234
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
279
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
235
280
|
|
236
|
-
if(ctxt->recovery == 0)
|
281
|
+
if (ctxt->recovery == 0) {
|
237
282
|
return Qfalse;
|
238
|
-
else
|
283
|
+
} else {
|
239
284
|
return Qtrue;
|
285
|
+
}
|
240
286
|
}
|
241
287
|
|
242
|
-
void
|
288
|
+
void
|
289
|
+
noko_init_xml_sax_parser_context(void)
|
243
290
|
{
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
cNokogiriXmlSaxParserContext
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
rb_define_method(
|
256
|
-
rb_define_method(
|
257
|
-
rb_define_method(
|
258
|
-
rb_define_method(
|
259
|
-
|
260
|
-
|
261
|
-
rb_define_method(klass, "column", column, 0);
|
291
|
+
cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject);
|
292
|
+
|
293
|
+
rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
|
294
|
+
|
295
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
|
296
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
|
297
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);
|
298
|
+
|
299
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", parse_with, 1);
|
300
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=", set_replace_entities, 1);
|
301
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities", get_replace_entities, 0);
|
302
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", set_recovery, 1);
|
303
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
|
304
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
|
305
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
|
306
|
+
|
307
|
+
id_read = rb_intern("read");
|
262
308
|
}
|
@@ -1,18 +1,37 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
3
|
+
VALUE cNokogiriXmlSaxPushParser ;
|
4
|
+
|
5
|
+
static void
|
6
|
+
xml_sax_push_parser_free(void *data)
|
4
7
|
{
|
5
|
-
|
8
|
+
xmlParserCtxtPtr ctx = data;
|
6
9
|
if (ctx != NULL) {
|
7
10
|
NOKOGIRI_SAX_TUPLE_DESTROY(ctx->userData);
|
8
11
|
xmlFreeParserCtxt(ctx);
|
9
12
|
}
|
10
|
-
NOKOGIRI_DEBUG_END(ctx);
|
11
13
|
}
|
12
14
|
|
13
|
-
static
|
15
|
+
static const rb_data_type_t xml_sax_push_parser_type = {
|
16
|
+
.wrap_struct_name = "Nokogiri::XML::SAX::PushParser",
|
17
|
+
.function = {
|
18
|
+
.dfree = xml_sax_push_parser_free,
|
19
|
+
},
|
20
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
21
|
+
};
|
22
|
+
|
23
|
+
static VALUE
|
24
|
+
allocate(VALUE klass)
|
25
|
+
{
|
26
|
+
return TypedData_Wrap_Struct(klass, &xml_sax_push_parser_type, NULL);
|
27
|
+
}
|
28
|
+
|
29
|
+
xmlParserCtxtPtr
|
30
|
+
noko_xml_sax_push_parser_unwrap(VALUE rb_parser)
|
14
31
|
{
|
15
|
-
|
32
|
+
xmlParserCtxtPtr c_parser;
|
33
|
+
TypedData_Get_Struct(rb_parser, xmlParserCtxt, &xml_sax_push_parser_type, c_parser);
|
34
|
+
return c_parser;
|
16
35
|
}
|
17
36
|
|
18
37
|
/*
|
@@ -21,23 +40,26 @@ static VALUE allocate(VALUE klass)
|
|
21
40
|
*
|
22
41
|
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
|
23
42
|
*/
|
24
|
-
static VALUE
|
43
|
+
static VALUE
|
44
|
+
native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
25
45
|
{
|
26
46
|
xmlParserCtxtPtr ctx;
|
27
|
-
const char *
|
47
|
+
const char *chunk = NULL;
|
28
48
|
int size = 0;
|
29
49
|
|
30
50
|
|
31
|
-
|
51
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
32
52
|
|
33
53
|
if (Qnil != _chunk) {
|
34
54
|
chunk = StringValuePtr(_chunk);
|
35
55
|
size = (int)RSTRING_LEN(_chunk);
|
36
56
|
}
|
37
57
|
|
58
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
59
|
+
|
38
60
|
if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
|
39
61
|
if (!(ctx->options & XML_PARSE_RECOVER)) {
|
40
|
-
|
62
|
+
xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
|
41
63
|
Nokogiri_error_raise(NULL, e);
|
42
64
|
}
|
43
65
|
}
|
@@ -51,13 +73,14 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
51
73
|
*
|
52
74
|
* Initialize the push parser with +xml_sax+ using +filename+
|
53
75
|
*/
|
54
|
-
static VALUE
|
76
|
+
static VALUE
|
77
|
+
initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
|
55
78
|
{
|
56
79
|
xmlSAXHandlerPtr sax;
|
57
|
-
const char *
|
80
|
+
const char *filename = NULL;
|
58
81
|
xmlParserCtxtPtr ctx;
|
59
82
|
|
60
|
-
|
83
|
+
sax = noko_sax_handler_unwrap(_xml_sax);
|
61
84
|
|
62
85
|
if (_filename != Qnil) { filename = StringValueCStr(_filename); }
|
63
86
|
|
@@ -79,18 +102,22 @@ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
|
|
79
102
|
return self;
|
80
103
|
}
|
81
104
|
|
82
|
-
static VALUE
|
105
|
+
static VALUE
|
106
|
+
get_options(VALUE self)
|
83
107
|
{
|
84
108
|
xmlParserCtxtPtr ctx;
|
85
|
-
|
109
|
+
|
110
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
86
111
|
|
87
112
|
return INT2NUM(ctx->options);
|
88
113
|
}
|
89
114
|
|
90
|
-
static VALUE
|
115
|
+
static VALUE
|
116
|
+
set_options(VALUE self, VALUE options)
|
91
117
|
{
|
92
118
|
xmlParserCtxtPtr ctx;
|
93
|
-
|
119
|
+
|
120
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
94
121
|
|
95
122
|
if (xmlCtxtUseOptions(ctx, (int)NUM2INT(options)) != 0) {
|
96
123
|
rb_raise(rb_eRuntimeError, "Cannot set XML parser context options");
|
@@ -106,10 +133,12 @@ static VALUE set_options(VALUE self, VALUE options)
|
|
106
133
|
* Should this parser replace entities? & will get converted to '&' if
|
107
134
|
* set to true
|
108
135
|
*/
|
109
|
-
static VALUE
|
136
|
+
static VALUE
|
137
|
+
get_replace_entities(VALUE self)
|
110
138
|
{
|
111
139
|
xmlParserCtxtPtr ctx;
|
112
|
-
|
140
|
+
|
141
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
113
142
|
|
114
143
|
if (0 == ctx->replaceEntities) {
|
115
144
|
return Qfalse;
|
@@ -125,10 +154,12 @@ static VALUE get_replace_entities(VALUE self)
|
|
125
154
|
* Should this parser replace entities? & will get converted to '&' if
|
126
155
|
* set to true
|
127
156
|
*/
|
128
|
-
static VALUE
|
157
|
+
static VALUE
|
158
|
+
set_replace_entities(VALUE self, VALUE value)
|
129
159
|
{
|
130
160
|
xmlParserCtxtPtr ctx;
|
131
|
-
|
161
|
+
|
162
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
132
163
|
|
133
164
|
if (Qfalse == value) {
|
134
165
|
ctx->replaceEntities = 0;
|
@@ -139,21 +170,18 @@ static VALUE set_replace_entities(VALUE self, VALUE value)
|
|
139
170
|
return value;
|
140
171
|
}
|
141
172
|
|
142
|
-
|
143
|
-
void
|
173
|
+
void
|
174
|
+
noko_init_xml_sax_push_parser(void)
|
144
175
|
{
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
cNokogiriXmlSaxPushParser =
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
rb_define_private_method(
|
155
|
-
|
156
|
-
rb_define_method(klass, "options=", set_options, 1);
|
157
|
-
rb_define_method(klass, "replace_entities", get_replace_entities, 0);
|
158
|
-
rb_define_method(klass, "replace_entities=", set_replace_entities, 1);
|
176
|
+
cNokogiriXmlSaxPushParser = rb_define_class_under(mNokogiriXmlSax, "PushParser", rb_cObject);
|
177
|
+
|
178
|
+
rb_define_alloc_func(cNokogiriXmlSaxPushParser, allocate);
|
179
|
+
|
180
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "options", get_options, 0);
|
181
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "options=", set_options, 1);
|
182
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities", get_replace_entities, 0);
|
183
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities=", set_replace_entities, 1);
|
184
|
+
|
185
|
+
rb_define_private_method(cNokogiriXmlSaxPushParser, "initialize_native", initialize_native, 2);
|
186
|
+
rb_define_private_method(cNokogiriXmlSaxPushParser, "native_write", native_write, 2);
|
159
187
|
}
|