nokogiri 1.11.1 → 1.12.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +232 -11
- data/LICENSE.md +1 -1
- data/README.md +27 -21
- data/dependencies.yml +12 -12
- data/ext/nokogiri/depend +35 -474
- data/ext/nokogiri/extconf.rb +391 -243
- data/ext/nokogiri/gumbo.c +611 -0
- data/ext/nokogiri/{html_document.c → html4_document.c} +18 -23
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +119 -0
- data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +29 -27
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +206 -66
- data/ext/nokogiri/nokogiri.h +166 -76
- data/ext/nokogiri/test_global_handlers.c +3 -4
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +258 -200
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +28 -17
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +58 -49
- data/ext/nokogiri/xml_node.c +473 -414
- data/ext/nokogiri/xml_node_set.c +174 -162
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +193 -157
- data/ext/nokogiri/xml_relax_ng.c +29 -23
- data/ext/nokogiri/xml_sax_parser.c +111 -106
- data/ext/nokogiri/xml_sax_parser_context.c +102 -85
- data/ext/nokogiri/xml_sax_push_parser.c +34 -27
- data/ext/nokogiri/xml_schema.c +49 -41
- data/ext/nokogiri/xml_syntax_error.c +21 -23
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +86 -77
- data/ext/nokogiri/xslt_stylesheet.c +157 -156
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +17 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4886 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri.rb +31 -50
- data/lib/nokogiri/css.rb +14 -14
- data/lib/nokogiri/css/parser.rb +2 -2
- data/lib/nokogiri/css/parser.y +1 -1
- data/lib/nokogiri/css/syntax_error.rb +1 -1
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +31 -27
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/{html → html4}/builder.rb +2 -2
- data/lib/nokogiri/{html → html4}/document.rb +4 -4
- data/lib/nokogiri/{html → html4}/document_fragment.rb +17 -17
- data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +42 -9
- data/lib/nokogiri/xml.rb +35 -36
- data/lib/nokogiri/xml/document.rb +74 -28
- data/lib/nokogiri/xml/node.rb +45 -47
- data/lib/nokogiri/xml/parse_options.rb +2 -0
- data/lib/nokogiri/xml/pp.rb +2 -2
- data/lib/nokogiri/xml/reader.rb +2 -9
- data/lib/nokogiri/xml/sax.rb +4 -4
- data/lib/nokogiri/xml/sax/document.rb +24 -30
- data/lib/nokogiri/xml/xpath.rb +3 -5
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- data/lib/nokogiri/xslt.rb +16 -16
- data/lib/nokogiri/xslt/stylesheet.rb +1 -1
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +0 -0
- data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- metadata +117 -109
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -118
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -63
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -25
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/sax/parser_context.rb +0 -17
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
- data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -1,8 +1,9 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
3
|
VALUE cNokogiriXmlSaxParserContext ;
|
4
4
|
|
5
|
-
static void
|
5
|
+
static void
|
6
|
+
deallocate(xmlParserCtxtPtr ctxt)
|
6
7
|
{
|
7
8
|
NOKOGIRI_DEBUG_START(ctxt);
|
8
9
|
|
@@ -22,19 +23,19 @@ static void deallocate(xmlParserCtxtPtr ctxt)
|
|
22
23
|
static VALUE
|
23
24
|
parse_io(VALUE klass, VALUE io, VALUE encoding)
|
24
25
|
{
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
26
|
+
xmlParserCtxtPtr ctxt;
|
27
|
+
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
|
28
|
+
|
29
|
+
ctxt = xmlCreateIOParserCtxt(NULL, NULL,
|
30
|
+
(xmlInputReadCallback)noko_io_read,
|
31
|
+
(xmlInputCloseCallback)noko_io_close,
|
32
|
+
(void *)io, enc);
|
33
|
+
if (ctxt->sax) {
|
34
|
+
xmlFree(ctxt->sax);
|
35
|
+
ctxt->sax = NULL;
|
36
|
+
}
|
37
|
+
|
38
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
38
39
|
}
|
39
40
|
|
40
41
|
/*
|
@@ -43,7 +44,8 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
|
|
43
44
|
*
|
44
45
|
* Parse file given +filename+
|
45
46
|
*/
|
46
|
-
static VALUE
|
47
|
+
static VALUE
|
48
|
+
parse_file(VALUE klass, VALUE filename)
|
47
49
|
{
|
48
50
|
xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
|
49
51
|
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
@@ -58,41 +60,44 @@ static VALUE parse_file(VALUE klass, VALUE filename)
|
|
58
60
|
static VALUE
|
59
61
|
parse_memory(VALUE klass, VALUE data)
|
60
62
|
{
|
61
|
-
|
63
|
+
xmlParserCtxtPtr ctxt;
|
62
64
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
65
|
+
if (NIL_P(data)) {
|
66
|
+
rb_raise(rb_eArgError, "data cannot be nil");
|
67
|
+
}
|
68
|
+
if (!(int)RSTRING_LEN(data)) {
|
69
|
+
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
70
|
+
}
|
67
71
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
72
|
+
ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
|
73
|
+
(int)RSTRING_LEN(data));
|
74
|
+
if (ctxt->sax) {
|
75
|
+
xmlFree(ctxt->sax);
|
76
|
+
ctxt->sax = NULL;
|
77
|
+
}
|
74
78
|
|
75
|
-
|
79
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
76
80
|
}
|
77
81
|
|
78
82
|
static VALUE
|
79
83
|
parse_doc(VALUE ctxt_val)
|
80
84
|
{
|
81
|
-
|
82
|
-
|
83
|
-
|
85
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
86
|
+
xmlParseDocument(ctxt);
|
87
|
+
return Qnil;
|
84
88
|
}
|
85
89
|
|
86
90
|
static VALUE
|
87
91
|
parse_doc_finalize(VALUE ctxt_val)
|
88
92
|
{
|
89
|
-
|
93
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
90
94
|
|
91
|
-
|
92
|
-
|
95
|
+
if (NULL != ctxt->myDoc) {
|
96
|
+
xmlFreeDoc(ctxt->myDoc);
|
97
|
+
}
|
93
98
|
|
94
|
-
|
95
|
-
|
99
|
+
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
|
100
|
+
return Qnil;
|
96
101
|
}
|
97
102
|
|
98
103
|
/*
|
@@ -104,27 +109,29 @@ parse_doc_finalize(VALUE ctxt_val)
|
|
104
109
|
static VALUE
|
105
110
|
parse_with(VALUE self, VALUE sax_handler)
|
106
111
|
{
|
107
|
-
|
108
|
-
|
112
|
+
xmlParserCtxtPtr ctxt;
|
113
|
+
xmlSAXHandlerPtr sax;
|
109
114
|
|
110
|
-
|
111
|
-
|
115
|
+
if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
|
116
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
117
|
+
}
|
112
118
|
|
113
|
-
|
114
|
-
|
119
|
+
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
120
|
+
Data_Get_Struct(sax_handler, xmlSAXHandler, sax);
|
115
121
|
|
116
|
-
|
117
|
-
|
118
|
-
|
122
|
+
/* Free the sax handler since we'll assign our own */
|
123
|
+
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
|
124
|
+
xmlFree(ctxt->sax);
|
125
|
+
}
|
119
126
|
|
120
|
-
|
121
|
-
|
127
|
+
ctxt->sax = sax;
|
128
|
+
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
122
129
|
|
123
|
-
|
130
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
124
131
|
|
125
|
-
|
132
|
+
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
|
126
133
|
|
127
|
-
|
134
|
+
return Qnil;
|
128
135
|
}
|
129
136
|
|
130
137
|
/*
|
@@ -134,15 +141,17 @@ parse_with(VALUE self, VALUE sax_handler)
|
|
134
141
|
* Should this parser replace entities? & will get converted to '&' if
|
135
142
|
* set to true
|
136
143
|
*/
|
137
|
-
static VALUE
|
144
|
+
static VALUE
|
145
|
+
set_replace_entities(VALUE self, VALUE value)
|
138
146
|
{
|
139
147
|
xmlParserCtxtPtr ctxt;
|
140
148
|
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
141
149
|
|
142
|
-
if(Qfalse == value)
|
150
|
+
if (Qfalse == value) {
|
143
151
|
ctxt->replaceEntities = 0;
|
144
|
-
else
|
152
|
+
} else {
|
145
153
|
ctxt->replaceEntities = 1;
|
154
|
+
}
|
146
155
|
|
147
156
|
return value;
|
148
157
|
}
|
@@ -154,15 +163,17 @@ static VALUE set_replace_entities(VALUE self, VALUE value)
|
|
154
163
|
* Should this parser replace entities? & will get converted to '&' if
|
155
164
|
* set to true
|
156
165
|
*/
|
157
|
-
static VALUE
|
166
|
+
static VALUE
|
167
|
+
get_replace_entities(VALUE self)
|
158
168
|
{
|
159
169
|
xmlParserCtxtPtr ctxt;
|
160
170
|
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
161
171
|
|
162
|
-
if(0 == ctxt->replaceEntities)
|
172
|
+
if (0 == ctxt->replaceEntities) {
|
163
173
|
return Qfalse;
|
164
|
-
else
|
174
|
+
} else {
|
165
175
|
return Qtrue;
|
176
|
+
}
|
166
177
|
}
|
167
178
|
|
168
179
|
/*
|
@@ -170,7 +181,8 @@ static VALUE get_replace_entities(VALUE self)
|
|
170
181
|
*
|
171
182
|
* Get the current line the parser context is processing.
|
172
183
|
*/
|
173
|
-
static VALUE
|
184
|
+
static VALUE
|
185
|
+
line(VALUE self)
|
174
186
|
{
|
175
187
|
xmlParserCtxtPtr ctxt;
|
176
188
|
xmlParserInputPtr io;
|
@@ -178,8 +190,9 @@ static VALUE line(VALUE self)
|
|
178
190
|
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
179
191
|
|
180
192
|
io = ctxt->input;
|
181
|
-
if(io)
|
193
|
+
if (io) {
|
182
194
|
return INT2NUM(io->line);
|
195
|
+
}
|
183
196
|
|
184
197
|
return Qnil;
|
185
198
|
}
|
@@ -189,7 +202,8 @@ static VALUE line(VALUE self)
|
|
189
202
|
*
|
190
203
|
* Get the current column the parser context is processing.
|
191
204
|
*/
|
192
|
-
static VALUE
|
205
|
+
static VALUE
|
206
|
+
column(VALUE self)
|
193
207
|
{
|
194
208
|
xmlParserCtxtPtr ctxt;
|
195
209
|
xmlParserInputPtr io;
|
@@ -197,8 +211,9 @@ static VALUE column(VALUE self)
|
|
197
211
|
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
198
212
|
|
199
213
|
io = ctxt->input;
|
200
|
-
if(io)
|
214
|
+
if (io) {
|
201
215
|
return INT2NUM(io->col);
|
216
|
+
}
|
202
217
|
|
203
218
|
return Qnil;
|
204
219
|
}
|
@@ -210,15 +225,17 @@ static VALUE column(VALUE self)
|
|
210
225
|
* Should this parser recover from structural errors? It will not stop processing
|
211
226
|
* file on structural errors if set to true
|
212
227
|
*/
|
213
|
-
static VALUE
|
228
|
+
static VALUE
|
229
|
+
set_recovery(VALUE self, VALUE value)
|
214
230
|
{
|
215
231
|
xmlParserCtxtPtr ctxt;
|
216
232
|
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
217
233
|
|
218
|
-
if(value == Qfalse)
|
234
|
+
if (value == Qfalse) {
|
219
235
|
ctxt->recovery = 0;
|
220
|
-
else
|
236
|
+
} else {
|
221
237
|
ctxt->recovery = 1;
|
238
|
+
}
|
222
239
|
|
223
240
|
return value;
|
224
241
|
}
|
@@ -230,35 +247,35 @@ static VALUE set_recovery(VALUE self, VALUE value)
|
|
230
247
|
* Should this parser recover from structural errors? It will not stop processing
|
231
248
|
* file on structural errors if set to true
|
232
249
|
*/
|
233
|
-
static VALUE
|
250
|
+
static VALUE
|
251
|
+
get_recovery(VALUE self)
|
234
252
|
{
|
235
253
|
xmlParserCtxtPtr ctxt;
|
236
254
|
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
237
255
|
|
238
|
-
if(ctxt->recovery == 0)
|
256
|
+
if (ctxt->recovery == 0) {
|
239
257
|
return Qfalse;
|
240
|
-
else
|
258
|
+
} else {
|
241
259
|
return Qtrue;
|
260
|
+
}
|
242
261
|
}
|
243
262
|
|
244
|
-
void
|
263
|
+
void
|
264
|
+
noko_init_xml_sax_parser_context()
|
245
265
|
{
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
cNokogiriXmlSaxParserContext
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
rb_define_method(
|
258
|
-
rb_define_method(
|
259
|
-
rb_define_method(
|
260
|
-
rb_define_method(
|
261
|
-
rb_define_method(klass, "recovery", get_recovery, 0);
|
262
|
-
rb_define_method(klass, "line", line, 0);
|
263
|
-
rb_define_method(klass, "column", column, 0);
|
266
|
+
cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject);
|
267
|
+
|
268
|
+
rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
|
269
|
+
|
270
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
|
271
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
|
272
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);
|
273
|
+
|
274
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", parse_with, 1);
|
275
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=", set_replace_entities, 1);
|
276
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities", get_replace_entities, 0);
|
277
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", set_recovery, 1);
|
278
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
|
279
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
|
280
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
|
264
281
|
}
|
@@ -1,6 +1,9 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
3
|
+
VALUE cNokogiriXmlSaxPushParser ;
|
4
|
+
|
5
|
+
static void
|
6
|
+
deallocate(xmlParserCtxtPtr ctx)
|
4
7
|
{
|
5
8
|
NOKOGIRI_DEBUG_START(ctx);
|
6
9
|
if (ctx != NULL) {
|
@@ -10,7 +13,8 @@ static void deallocate(xmlParserCtxtPtr ctx)
|
|
10
13
|
NOKOGIRI_DEBUG_END(ctx);
|
11
14
|
}
|
12
15
|
|
13
|
-
static VALUE
|
16
|
+
static VALUE
|
17
|
+
allocate(VALUE klass)
|
14
18
|
{
|
15
19
|
return Data_Wrap_Struct(klass, NULL, deallocate, NULL);
|
16
20
|
}
|
@@ -21,10 +25,11 @@ static VALUE allocate(VALUE klass)
|
|
21
25
|
*
|
22
26
|
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
|
23
27
|
*/
|
24
|
-
static VALUE
|
28
|
+
static VALUE
|
29
|
+
native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
25
30
|
{
|
26
31
|
xmlParserCtxtPtr ctx;
|
27
|
-
const char *
|
32
|
+
const char *chunk = NULL;
|
28
33
|
int size = 0;
|
29
34
|
|
30
35
|
|
@@ -53,10 +58,11 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
53
58
|
*
|
54
59
|
* Initialize the push parser with +xml_sax+ using +filename+
|
55
60
|
*/
|
56
|
-
static VALUE
|
61
|
+
static VALUE
|
62
|
+
initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
|
57
63
|
{
|
58
64
|
xmlSAXHandlerPtr sax;
|
59
|
-
const char *
|
65
|
+
const char *filename = NULL;
|
60
66
|
xmlParserCtxtPtr ctx;
|
61
67
|
|
62
68
|
Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
|
@@ -81,7 +87,8 @@ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
|
|
81
87
|
return self;
|
82
88
|
}
|
83
89
|
|
84
|
-
static VALUE
|
90
|
+
static VALUE
|
91
|
+
get_options(VALUE self)
|
85
92
|
{
|
86
93
|
xmlParserCtxtPtr ctx;
|
87
94
|
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
@@ -89,7 +96,8 @@ static VALUE get_options(VALUE self)
|
|
89
96
|
return INT2NUM(ctx->options);
|
90
97
|
}
|
91
98
|
|
92
|
-
static VALUE
|
99
|
+
static VALUE
|
100
|
+
set_options(VALUE self, VALUE options)
|
93
101
|
{
|
94
102
|
xmlParserCtxtPtr ctx;
|
95
103
|
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
@@ -108,7 +116,8 @@ static VALUE set_options(VALUE self, VALUE options)
|
|
108
116
|
* Should this parser replace entities? & will get converted to '&' if
|
109
117
|
* set to true
|
110
118
|
*/
|
111
|
-
static VALUE
|
119
|
+
static VALUE
|
120
|
+
get_replace_entities(VALUE self)
|
112
121
|
{
|
113
122
|
xmlParserCtxtPtr ctx;
|
114
123
|
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
@@ -127,7 +136,8 @@ static VALUE get_replace_entities(VALUE self)
|
|
127
136
|
* Should this parser replace entities? & will get converted to '&' if
|
128
137
|
* set to true
|
129
138
|
*/
|
130
|
-
static VALUE
|
139
|
+
static VALUE
|
140
|
+
set_replace_entities(VALUE self, VALUE value)
|
131
141
|
{
|
132
142
|
xmlParserCtxtPtr ctx;
|
133
143
|
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
@@ -141,21 +151,18 @@ static VALUE set_replace_entities(VALUE self, VALUE value)
|
|
141
151
|
return value;
|
142
152
|
}
|
143
153
|
|
144
|
-
|
145
|
-
|
154
|
+
void
|
155
|
+
noko_init_xml_sax_push_parser()
|
146
156
|
{
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
cNokogiriXmlSaxPushParser =
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
rb_define_private_method(
|
157
|
-
|
158
|
-
rb_define_method(klass, "options=", set_options, 1);
|
159
|
-
rb_define_method(klass, "replace_entities", get_replace_entities, 0);
|
160
|
-
rb_define_method(klass, "replace_entities=", set_replace_entities, 1);
|
157
|
+
cNokogiriXmlSaxPushParser = rb_define_class_under(mNokogiriXmlSax, "PushParser", rb_cObject);
|
158
|
+
|
159
|
+
rb_define_alloc_func(cNokogiriXmlSaxPushParser, allocate);
|
160
|
+
|
161
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "options", get_options, 0);
|
162
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "options=", set_options, 1);
|
163
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities", get_replace_entities, 0);
|
164
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities=", set_replace_entities, 1);
|
165
|
+
|
166
|
+
rb_define_private_method(cNokogiriXmlSaxPushParser, "initialize_native", initialize_native, 2);
|
167
|
+
rb_define_private_method(cNokogiriXmlSaxPushParser, "native_write", native_write, 2);
|
161
168
|
}
|
data/ext/nokogiri/xml_schema.c
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
3
|
+
VALUE cNokogiriXmlSchema;
|
4
|
+
|
5
|
+
static void
|
6
|
+
dealloc(xmlSchemaPtr schema)
|
4
7
|
{
|
5
8
|
NOKOGIRI_DEBUG_START(schema);
|
6
9
|
xmlSchemaFree(schema);
|
@@ -13,7 +16,8 @@ static void dealloc(xmlSchemaPtr schema)
|
|
13
16
|
*
|
14
17
|
* Validate a Nokogiri::XML::Document against this Schema.
|
15
18
|
*/
|
16
|
-
static VALUE
|
19
|
+
static VALUE
|
20
|
+
validate_document(VALUE self, VALUE document)
|
17
21
|
{
|
18
22
|
xmlDocPtr doc;
|
19
23
|
xmlSchemaPtr schema;
|
@@ -27,7 +31,7 @@ static VALUE validate_document(VALUE self, VALUE document)
|
|
27
31
|
|
28
32
|
valid_ctxt = xmlSchemaNewValidCtxt(schema);
|
29
33
|
|
30
|
-
if(NULL == valid_ctxt) {
|
34
|
+
if (NULL == valid_ctxt) {
|
31
35
|
/* we have a problem */
|
32
36
|
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
33
37
|
}
|
@@ -53,7 +57,8 @@ static VALUE validate_document(VALUE self, VALUE document)
|
|
53
57
|
*
|
54
58
|
* Validate a file against this Schema.
|
55
59
|
*/
|
56
|
-
static VALUE
|
60
|
+
static VALUE
|
61
|
+
validate_file(VALUE self, VALUE rb_filename)
|
57
62
|
{
|
58
63
|
xmlSchemaPtr schema;
|
59
64
|
xmlSchemaValidCtxtPtr valid_ctxt;
|
@@ -61,13 +66,13 @@ static VALUE validate_file(VALUE self, VALUE rb_filename)
|
|
61
66
|
VALUE errors;
|
62
67
|
|
63
68
|
Data_Get_Struct(self, xmlSchema, schema);
|
64
|
-
filename = (const char*)StringValueCStr(rb_filename) ;
|
69
|
+
filename = (const char *)StringValueCStr(rb_filename) ;
|
65
70
|
|
66
71
|
errors = rb_ary_new();
|
67
72
|
|
68
73
|
valid_ctxt = xmlSchemaNewValidCtxt(schema);
|
69
74
|
|
70
|
-
if(NULL == valid_ctxt) {
|
75
|
+
if (NULL == valid_ctxt) {
|
71
76
|
/* we have a problem */
|
72
77
|
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
73
78
|
}
|
@@ -93,7 +98,8 @@ static VALUE validate_file(VALUE self, VALUE rb_filename)
|
|
93
98
|
*
|
94
99
|
* Create a new Schema from the contents of +string+
|
95
100
|
*/
|
96
|
-
static VALUE
|
101
|
+
static VALUE
|
102
|
+
read_memory(int argc, VALUE *argv, VALUE klass)
|
97
103
|
{
|
98
104
|
VALUE content;
|
99
105
|
VALUE parse_options;
|
@@ -107,7 +113,7 @@ static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
|
|
107
113
|
|
108
114
|
scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
|
109
115
|
if (scanned_args == 1) {
|
110
|
-
parse_options =
|
116
|
+
parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
111
117
|
}
|
112
118
|
parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
|
113
119
|
|
@@ -121,7 +127,7 @@ static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
|
|
121
127
|
ctx,
|
122
128
|
Nokogiri_error_array_pusher,
|
123
129
|
(void *)errors
|
124
|
-
|
130
|
+
);
|
125
131
|
#endif
|
126
132
|
|
127
133
|
if (parse_options_int & XML_PARSE_NONET) {
|
@@ -138,12 +144,13 @@ static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
|
|
138
144
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
139
145
|
xmlSchemaFreeParserCtxt(ctx);
|
140
146
|
|
141
|
-
if(NULL == schema) {
|
147
|
+
if (NULL == schema) {
|
142
148
|
xmlErrorPtr error = xmlGetLastError();
|
143
|
-
if(error)
|
149
|
+
if (error) {
|
144
150
|
Nokogiri_error_raise(NULL, error);
|
145
|
-
else
|
151
|
+
} else {
|
146
152
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
153
|
+
}
|
147
154
|
|
148
155
|
return Qnil;
|
149
156
|
}
|
@@ -160,24 +167,25 @@ static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
|
|
160
167
|
* out from under the VALUE pointer. This function checks to see if any of
|
161
168
|
* those nodes have been exposed to Ruby, and if so we should raise an exception.
|
162
169
|
*/
|
163
|
-
static int
|
170
|
+
static int
|
171
|
+
has_blank_nodes_p(VALUE cache)
|
164
172
|
{
|
165
|
-
|
173
|
+
long i;
|
166
174
|
|
167
|
-
|
168
|
-
|
169
|
-
|
175
|
+
if (NIL_P(cache)) {
|
176
|
+
return 0;
|
177
|
+
}
|
170
178
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
}
|
179
|
+
for (i = 0; i < RARRAY_LEN(cache); i++) {
|
180
|
+
xmlNodePtr node;
|
181
|
+
VALUE element = rb_ary_entry(cache, i);
|
182
|
+
Data_Get_Struct(element, xmlNode, node);
|
183
|
+
if (xmlIsBlankNode(node)) {
|
184
|
+
return 1;
|
178
185
|
}
|
186
|
+
}
|
179
187
|
|
180
|
-
|
188
|
+
return 0;
|
181
189
|
}
|
182
190
|
|
183
191
|
/*
|
@@ -186,7 +194,8 @@ static int has_blank_nodes_p(VALUE cache)
|
|
186
194
|
*
|
187
195
|
* Create a new Schema from the Nokogiri::XML::Document +doc+
|
188
196
|
*/
|
189
|
-
static VALUE
|
197
|
+
static VALUE
|
198
|
+
from_document(int argc, VALUE *argv, VALUE klass)
|
190
199
|
{
|
191
200
|
VALUE document;
|
192
201
|
VALUE parse_options;
|
@@ -205,7 +214,7 @@ static VALUE from_document(int argc, VALUE *argv, VALUE klass)
|
|
205
214
|
doc = doc->doc; /* In case someone passes us a node. ugh. */
|
206
215
|
|
207
216
|
if (scanned_args == 1) {
|
208
|
-
parse_options =
|
217
|
+
parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
209
218
|
}
|
210
219
|
parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
|
211
220
|
|
@@ -240,12 +249,13 @@ static VALUE from_document(int argc, VALUE *argv, VALUE klass)
|
|
240
249
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
241
250
|
xmlSchemaFreeParserCtxt(ctx);
|
242
251
|
|
243
|
-
if(NULL == schema) {
|
252
|
+
if (NULL == schema) {
|
244
253
|
xmlErrorPtr error = xmlGetLastError();
|
245
|
-
if(error)
|
254
|
+
if (error) {
|
246
255
|
Nokogiri_error_raise(NULL, error);
|
247
|
-
else
|
256
|
+
} else {
|
248
257
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
258
|
+
}
|
249
259
|
|
250
260
|
return Qnil;
|
251
261
|
}
|
@@ -259,18 +269,16 @@ static VALUE from_document(int argc, VALUE *argv, VALUE klass)
|
|
259
269
|
return Qnil;
|
260
270
|
}
|
261
271
|
|
262
|
-
|
263
|
-
|
272
|
+
void
|
273
|
+
noko_init_xml_schema()
|
264
274
|
{
|
265
|
-
|
266
|
-
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
267
|
-
VALUE klass = rb_define_class_under(xml, "Schema", rb_cObject);
|
275
|
+
cNokogiriXmlSchema = rb_define_class_under(mNokogiriXml, "Schema", rb_cObject);
|
268
276
|
|
269
|
-
cNokogiriXmlSchema
|
277
|
+
rb_undef_alloc_func(cNokogiriXmlSchema);
|
270
278
|
|
271
|
-
rb_define_singleton_method(
|
272
|
-
rb_define_singleton_method(
|
279
|
+
rb_define_singleton_method(cNokogiriXmlSchema, "read_memory", read_memory, -1);
|
280
|
+
rb_define_singleton_method(cNokogiriXmlSchema, "from_document", from_document, -1);
|
273
281
|
|
274
|
-
rb_define_private_method(
|
275
|
-
rb_define_private_method(
|
282
|
+
rb_define_private_method(cNokogiriXmlSchema, "validate_document", validate_document, 1);
|
283
|
+
rb_define_private_method(cNokogiriXmlSchema, "validate_file", validate_file, 1);
|
276
284
|
}
|