nokogiri 1.14.0.rc1-arm-linux
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +287 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +41 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1082 -0
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +114 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +204 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +172 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +128 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +81 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +232 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +153 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +499 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1244 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +656 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +218 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +91 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1312 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +463 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +368 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +947 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +77 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +226 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +503 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +575 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +137 -0
- data/ext/nokogiri/include/libxslt/attributes.h +38 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1982 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +179 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +310 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +259 -0
- data/ext/nokogiri/nokogiri.h +235 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +57 -0
- data/ext/nokogiri/xml_comment.c +62 -0
- data/ext/nokogiri/xml_document.c +689 -0
- data/ext/nokogiri/xml_document_fragment.c +44 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +128 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +104 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +186 -0
- data/ext/nokogiri/xml_node.c +2425 -0
- data/ext/nokogiri/xml_node_set.c +496 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +794 -0
- data/ext/nokogiri/xml_relax_ng.c +183 -0
- data/ext/nokogiri/xml_sax_parser.c +316 -0
- data/ext/nokogiri/xml_sax_parser_context.c +283 -0
- data/ext/nokogiri/xml_sax_push_parser.c +166 -0
- data/ext/nokogiri/xml_schema.c +282 -0
- data/ext/nokogiri/xml_syntax_error.c +85 -0
- data/ext/nokogiri/xml_text.c +48 -0
- data/ext/nokogiri/xml_xpath_context.c +413 -0
- data/ext/nokogiri/xslt_stylesheet.c +363 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +111 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/2.7/nokogiri.so +0 -0
- data/lib/nokogiri/3.0/nokogiri.so +0 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +54 -0
- data/lib/nokogiri/css/parser.rb +770 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +96 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +56 -0
- data/lib/nokogiri/css/xpath_visitor.rb +359 -0
- data/lib/nokogiri/css.rb +66 -0
- data/lib/nokogiri/decorators/slop.rb +44 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +63 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +98 -0
- data/lib/nokogiri/html5.rb +389 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +223 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +20 -0
- data/lib/nokogiri/xml/builder.rb +487 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +471 -0
- data/lib/nokogiri/xml/document_fragment.rb +205 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +38 -0
- data/lib/nokogiri/xml/element_decl.rb +15 -0
- data/lib/nokogiri/xml/entity_decl.rb +21 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +58 -0
- data/lib/nokogiri/xml/node/save_options.rb +68 -0
- data/lib/nokogiri/xml/node.rb +1563 -0
- data/lib/nokogiri/xml/node_set.rb +446 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +57 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +105 -0
- data/lib/nokogiri/xml/relax_ng.rb +38 -0
- data/lib/nokogiri/xml/sax/document.rb +167 -0
- data/lib/nokogiri/xml/sax/parser.rb +125 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
- data/lib/nokogiri/xml/sax.rb +6 -0
- data/lib/nokogiri/xml/schema.rb +73 -0
- data/lib/nokogiri/xml/searchable.rb +270 -0
- data/lib/nokogiri/xml/syntax_error.rb +72 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xml.rb +76 -0
- data/lib/nokogiri/xslt/stylesheet.rb +27 -0
- data/lib/nokogiri/xslt.rb +65 -0
- data/lib/nokogiri.rb +120 -0
- data/lib/xsd/xmlparser/nokogiri.rb +104 -0
- metadata +317 -0
@@ -0,0 +1,283 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriXmlSaxParserContext ;
|
4
|
+
|
5
|
+
static ID id_read;
|
6
|
+
|
7
|
+
static void
|
8
|
+
deallocate(xmlParserCtxtPtr ctxt)
|
9
|
+
{
|
10
|
+
ctxt->sax = NULL;
|
11
|
+
xmlFreeParserCtxt(ctxt);
|
12
|
+
}
|
13
|
+
|
14
|
+
/*
|
15
|
+
* call-seq:
|
16
|
+
* parse_io(io, encoding)
|
17
|
+
*
|
18
|
+
* Parse +io+ object with +encoding+
|
19
|
+
*/
|
20
|
+
static VALUE
|
21
|
+
parse_io(VALUE klass, VALUE io, VALUE encoding)
|
22
|
+
{
|
23
|
+
xmlParserCtxtPtr ctxt;
|
24
|
+
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
|
25
|
+
|
26
|
+
if (!rb_respond_to(io, id_read)) {
|
27
|
+
rb_raise(rb_eTypeError, "argument expected to respond to :read");
|
28
|
+
}
|
29
|
+
|
30
|
+
ctxt = xmlCreateIOParserCtxt(NULL, NULL,
|
31
|
+
(xmlInputReadCallback)noko_io_read,
|
32
|
+
(xmlInputCloseCallback)noko_io_close,
|
33
|
+
(void *)io, enc);
|
34
|
+
if (ctxt->sax) {
|
35
|
+
xmlFree(ctxt->sax);
|
36
|
+
ctxt->sax = NULL;
|
37
|
+
}
|
38
|
+
|
39
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
40
|
+
}
|
41
|
+
|
42
|
+
/*
|
43
|
+
* call-seq:
|
44
|
+
* parse_file(filename)
|
45
|
+
*
|
46
|
+
* Parse file given +filename+
|
47
|
+
*/
|
48
|
+
static VALUE
|
49
|
+
parse_file(VALUE klass, VALUE filename)
|
50
|
+
{
|
51
|
+
xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
|
52
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
53
|
+
}
|
54
|
+
|
55
|
+
/*
|
56
|
+
* call-seq:
|
57
|
+
* parse_memory(data)
|
58
|
+
*
|
59
|
+
* Parse the XML stored in memory in +data+
|
60
|
+
*/
|
61
|
+
static VALUE
|
62
|
+
parse_memory(VALUE klass, VALUE data)
|
63
|
+
{
|
64
|
+
xmlParserCtxtPtr ctxt;
|
65
|
+
|
66
|
+
Check_Type(data, T_STRING);
|
67
|
+
|
68
|
+
if (!(int)RSTRING_LEN(data)) {
|
69
|
+
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
70
|
+
}
|
71
|
+
|
72
|
+
ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
|
73
|
+
(int)RSTRING_LEN(data));
|
74
|
+
if (ctxt->sax) {
|
75
|
+
xmlFree(ctxt->sax);
|
76
|
+
ctxt->sax = NULL;
|
77
|
+
}
|
78
|
+
|
79
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
80
|
+
}
|
81
|
+
|
82
|
+
static VALUE
|
83
|
+
parse_doc(VALUE ctxt_val)
|
84
|
+
{
|
85
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
86
|
+
xmlParseDocument(ctxt);
|
87
|
+
return Qnil;
|
88
|
+
}
|
89
|
+
|
90
|
+
static VALUE
|
91
|
+
parse_doc_finalize(VALUE ctxt_val)
|
92
|
+
{
|
93
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
94
|
+
|
95
|
+
if (NULL != ctxt->myDoc) {
|
96
|
+
xmlFreeDoc(ctxt->myDoc);
|
97
|
+
}
|
98
|
+
|
99
|
+
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
|
100
|
+
return Qnil;
|
101
|
+
}
|
102
|
+
|
103
|
+
/*
|
104
|
+
* call-seq:
|
105
|
+
* parse_with(sax_handler)
|
106
|
+
*
|
107
|
+
* Use +sax_handler+ and parse the current document
|
108
|
+
*/
|
109
|
+
static VALUE
|
110
|
+
parse_with(VALUE self, VALUE sax_handler)
|
111
|
+
{
|
112
|
+
xmlParserCtxtPtr ctxt;
|
113
|
+
xmlSAXHandlerPtr sax;
|
114
|
+
|
115
|
+
if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
|
116
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
117
|
+
}
|
118
|
+
|
119
|
+
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
120
|
+
Data_Get_Struct(sax_handler, xmlSAXHandler, sax);
|
121
|
+
|
122
|
+
/* Free the sax handler since we'll assign our own */
|
123
|
+
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
|
124
|
+
xmlFree(ctxt->sax);
|
125
|
+
}
|
126
|
+
|
127
|
+
ctxt->sax = sax;
|
128
|
+
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
129
|
+
|
130
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
131
|
+
|
132
|
+
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
|
133
|
+
|
134
|
+
return Qnil;
|
135
|
+
}
|
136
|
+
|
137
|
+
/*
|
138
|
+
* call-seq:
|
139
|
+
* replace_entities=(boolean)
|
140
|
+
*
|
141
|
+
* Should this parser replace entities? & will get converted to '&' if
|
142
|
+
* set to true
|
143
|
+
*/
|
144
|
+
static VALUE
|
145
|
+
set_replace_entities(VALUE self, VALUE value)
|
146
|
+
{
|
147
|
+
xmlParserCtxtPtr ctxt;
|
148
|
+
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
149
|
+
|
150
|
+
if (Qfalse == value) {
|
151
|
+
ctxt->replaceEntities = 0;
|
152
|
+
} else {
|
153
|
+
ctxt->replaceEntities = 1;
|
154
|
+
}
|
155
|
+
|
156
|
+
return value;
|
157
|
+
}
|
158
|
+
|
159
|
+
/*
|
160
|
+
* call-seq:
|
161
|
+
* replace_entities
|
162
|
+
*
|
163
|
+
* Should this parser replace entities? & will get converted to '&' if
|
164
|
+
* set to true
|
165
|
+
*/
|
166
|
+
static VALUE
|
167
|
+
get_replace_entities(VALUE self)
|
168
|
+
{
|
169
|
+
xmlParserCtxtPtr ctxt;
|
170
|
+
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
171
|
+
|
172
|
+
if (0 == ctxt->replaceEntities) {
|
173
|
+
return Qfalse;
|
174
|
+
} else {
|
175
|
+
return Qtrue;
|
176
|
+
}
|
177
|
+
}
|
178
|
+
|
179
|
+
/*
|
180
|
+
* call-seq: line
|
181
|
+
*
|
182
|
+
* Get the current line the parser context is processing.
|
183
|
+
*/
|
184
|
+
static VALUE
|
185
|
+
line(VALUE self)
|
186
|
+
{
|
187
|
+
xmlParserCtxtPtr ctxt;
|
188
|
+
xmlParserInputPtr io;
|
189
|
+
|
190
|
+
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
191
|
+
|
192
|
+
io = ctxt->input;
|
193
|
+
if (io) {
|
194
|
+
return INT2NUM(io->line);
|
195
|
+
}
|
196
|
+
|
197
|
+
return Qnil;
|
198
|
+
}
|
199
|
+
|
200
|
+
/*
|
201
|
+
* call-seq: column
|
202
|
+
*
|
203
|
+
* Get the current column the parser context is processing.
|
204
|
+
*/
|
205
|
+
static VALUE
|
206
|
+
column(VALUE self)
|
207
|
+
{
|
208
|
+
xmlParserCtxtPtr ctxt;
|
209
|
+
xmlParserInputPtr io;
|
210
|
+
|
211
|
+
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
212
|
+
|
213
|
+
io = ctxt->input;
|
214
|
+
if (io) {
|
215
|
+
return INT2NUM(io->col);
|
216
|
+
}
|
217
|
+
|
218
|
+
return Qnil;
|
219
|
+
}
|
220
|
+
|
221
|
+
/*
|
222
|
+
* call-seq:
|
223
|
+
* recovery=(boolean)
|
224
|
+
*
|
225
|
+
* Should this parser recover from structural errors? It will not stop processing
|
226
|
+
* file on structural errors if set to true
|
227
|
+
*/
|
228
|
+
static VALUE
|
229
|
+
set_recovery(VALUE self, VALUE value)
|
230
|
+
{
|
231
|
+
xmlParserCtxtPtr ctxt;
|
232
|
+
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
233
|
+
|
234
|
+
if (value == Qfalse) {
|
235
|
+
ctxt->recovery = 0;
|
236
|
+
} else {
|
237
|
+
ctxt->recovery = 1;
|
238
|
+
}
|
239
|
+
|
240
|
+
return value;
|
241
|
+
}
|
242
|
+
|
243
|
+
/*
|
244
|
+
* call-seq:
|
245
|
+
* recovery
|
246
|
+
*
|
247
|
+
* Should this parser recover from structural errors? It will not stop processing
|
248
|
+
* file on structural errors if set to true
|
249
|
+
*/
|
250
|
+
static VALUE
|
251
|
+
get_recovery(VALUE self)
|
252
|
+
{
|
253
|
+
xmlParserCtxtPtr ctxt;
|
254
|
+
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
255
|
+
|
256
|
+
if (ctxt->recovery == 0) {
|
257
|
+
return Qfalse;
|
258
|
+
} else {
|
259
|
+
return Qtrue;
|
260
|
+
}
|
261
|
+
}
|
262
|
+
|
263
|
+
void
|
264
|
+
noko_init_xml_sax_parser_context()
|
265
|
+
{
|
266
|
+
cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject);
|
267
|
+
|
268
|
+
rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
|
269
|
+
|
270
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
|
271
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
|
272
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);
|
273
|
+
|
274
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", parse_with, 1);
|
275
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=", set_replace_entities, 1);
|
276
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities", get_replace_entities, 0);
|
277
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", set_recovery, 1);
|
278
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
|
279
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
|
280
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
|
281
|
+
|
282
|
+
id_read = rb_intern("read");
|
283
|
+
}
|
@@ -0,0 +1,166 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriXmlSaxPushParser ;
|
4
|
+
|
5
|
+
static void
|
6
|
+
deallocate(xmlParserCtxtPtr ctx)
|
7
|
+
{
|
8
|
+
if (ctx != NULL) {
|
9
|
+
NOKOGIRI_SAX_TUPLE_DESTROY(ctx->userData);
|
10
|
+
xmlFreeParserCtxt(ctx);
|
11
|
+
}
|
12
|
+
}
|
13
|
+
|
14
|
+
static VALUE
|
15
|
+
allocate(VALUE klass)
|
16
|
+
{
|
17
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, NULL);
|
18
|
+
}
|
19
|
+
|
20
|
+
/*
|
21
|
+
* call-seq:
|
22
|
+
* native_write(chunk, last_chunk)
|
23
|
+
*
|
24
|
+
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
|
25
|
+
*/
|
26
|
+
static VALUE
|
27
|
+
native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
28
|
+
{
|
29
|
+
xmlParserCtxtPtr ctx;
|
30
|
+
const char *chunk = NULL;
|
31
|
+
int size = 0;
|
32
|
+
|
33
|
+
|
34
|
+
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
35
|
+
|
36
|
+
if (Qnil != _chunk) {
|
37
|
+
chunk = StringValuePtr(_chunk);
|
38
|
+
size = (int)RSTRING_LEN(_chunk);
|
39
|
+
}
|
40
|
+
|
41
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
42
|
+
|
43
|
+
if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
|
44
|
+
if (!(ctx->options & XML_PARSE_RECOVER)) {
|
45
|
+
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
|
46
|
+
Nokogiri_error_raise(NULL, e);
|
47
|
+
}
|
48
|
+
}
|
49
|
+
|
50
|
+
return self;
|
51
|
+
}
|
52
|
+
|
53
|
+
/*
|
54
|
+
* call-seq:
|
55
|
+
* initialize_native(xml_sax, filename)
|
56
|
+
*
|
57
|
+
* Initialize the push parser with +xml_sax+ using +filename+
|
58
|
+
*/
|
59
|
+
static VALUE
|
60
|
+
initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
|
61
|
+
{
|
62
|
+
xmlSAXHandlerPtr sax;
|
63
|
+
const char *filename = NULL;
|
64
|
+
xmlParserCtxtPtr ctx;
|
65
|
+
|
66
|
+
Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
|
67
|
+
|
68
|
+
if (_filename != Qnil) { filename = StringValueCStr(_filename); }
|
69
|
+
|
70
|
+
ctx = xmlCreatePushParserCtxt(
|
71
|
+
sax,
|
72
|
+
NULL,
|
73
|
+
NULL,
|
74
|
+
0,
|
75
|
+
filename
|
76
|
+
);
|
77
|
+
if (ctx == NULL) {
|
78
|
+
rb_raise(rb_eRuntimeError, "Could not create a parser context");
|
79
|
+
}
|
80
|
+
|
81
|
+
ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
|
82
|
+
|
83
|
+
ctx->sax2 = 1;
|
84
|
+
DATA_PTR(self) = ctx;
|
85
|
+
return self;
|
86
|
+
}
|
87
|
+
|
88
|
+
static VALUE
|
89
|
+
get_options(VALUE self)
|
90
|
+
{
|
91
|
+
xmlParserCtxtPtr ctx;
|
92
|
+
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
93
|
+
|
94
|
+
return INT2NUM(ctx->options);
|
95
|
+
}
|
96
|
+
|
97
|
+
static VALUE
|
98
|
+
set_options(VALUE self, VALUE options)
|
99
|
+
{
|
100
|
+
xmlParserCtxtPtr ctx;
|
101
|
+
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
102
|
+
|
103
|
+
if (xmlCtxtUseOptions(ctx, (int)NUM2INT(options)) != 0) {
|
104
|
+
rb_raise(rb_eRuntimeError, "Cannot set XML parser context options");
|
105
|
+
}
|
106
|
+
|
107
|
+
return Qnil;
|
108
|
+
}
|
109
|
+
|
110
|
+
/*
|
111
|
+
* call-seq:
|
112
|
+
* replace_entities
|
113
|
+
*
|
114
|
+
* Should this parser replace entities? & will get converted to '&' if
|
115
|
+
* set to true
|
116
|
+
*/
|
117
|
+
static VALUE
|
118
|
+
get_replace_entities(VALUE self)
|
119
|
+
{
|
120
|
+
xmlParserCtxtPtr ctx;
|
121
|
+
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
122
|
+
|
123
|
+
if (0 == ctx->replaceEntities) {
|
124
|
+
return Qfalse;
|
125
|
+
} else {
|
126
|
+
return Qtrue;
|
127
|
+
}
|
128
|
+
}
|
129
|
+
|
130
|
+
/*
|
131
|
+
* call-seq:
|
132
|
+
* replace_entities=(boolean)
|
133
|
+
*
|
134
|
+
* Should this parser replace entities? & will get converted to '&' if
|
135
|
+
* set to true
|
136
|
+
*/
|
137
|
+
static VALUE
|
138
|
+
set_replace_entities(VALUE self, VALUE value)
|
139
|
+
{
|
140
|
+
xmlParserCtxtPtr ctx;
|
141
|
+
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
142
|
+
|
143
|
+
if (Qfalse == value) {
|
144
|
+
ctx->replaceEntities = 0;
|
145
|
+
} else {
|
146
|
+
ctx->replaceEntities = 1;
|
147
|
+
}
|
148
|
+
|
149
|
+
return value;
|
150
|
+
}
|
151
|
+
|
152
|
+
void
|
153
|
+
noko_init_xml_sax_push_parser()
|
154
|
+
{
|
155
|
+
cNokogiriXmlSaxPushParser = rb_define_class_under(mNokogiriXmlSax, "PushParser", rb_cObject);
|
156
|
+
|
157
|
+
rb_define_alloc_func(cNokogiriXmlSaxPushParser, allocate);
|
158
|
+
|
159
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "options", get_options, 0);
|
160
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "options=", set_options, 1);
|
161
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities", get_replace_entities, 0);
|
162
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities=", set_replace_entities, 1);
|
163
|
+
|
164
|
+
rb_define_private_method(cNokogiriXmlSaxPushParser, "initialize_native", initialize_native, 2);
|
165
|
+
rb_define_private_method(cNokogiriXmlSaxPushParser, "native_write", native_write, 2);
|
166
|
+
}
|
@@ -0,0 +1,282 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriXmlSchema;
|
4
|
+
|
5
|
+
static void
|
6
|
+
dealloc(xmlSchemaPtr schema)
|
7
|
+
{
|
8
|
+
xmlSchemaFree(schema);
|
9
|
+
}
|
10
|
+
|
11
|
+
/*
|
12
|
+
* call-seq:
|
13
|
+
* validate_document(document)
|
14
|
+
*
|
15
|
+
* Validate a Nokogiri::XML::Document against this Schema.
|
16
|
+
*/
|
17
|
+
static VALUE
|
18
|
+
validate_document(VALUE self, VALUE document)
|
19
|
+
{
|
20
|
+
xmlDocPtr doc;
|
21
|
+
xmlSchemaPtr schema;
|
22
|
+
xmlSchemaValidCtxtPtr valid_ctxt;
|
23
|
+
VALUE errors;
|
24
|
+
|
25
|
+
Data_Get_Struct(self, xmlSchema, schema);
|
26
|
+
Noko_Node_Get_Struct(document, xmlDoc, doc);
|
27
|
+
|
28
|
+
errors = rb_ary_new();
|
29
|
+
|
30
|
+
valid_ctxt = xmlSchemaNewValidCtxt(schema);
|
31
|
+
|
32
|
+
if (NULL == valid_ctxt) {
|
33
|
+
/* we have a problem */
|
34
|
+
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
35
|
+
}
|
36
|
+
|
37
|
+
#ifdef HAVE_XMLSCHEMASETVALIDSTRUCTUREDERRORS
|
38
|
+
xmlSchemaSetValidStructuredErrors(
|
39
|
+
valid_ctxt,
|
40
|
+
Nokogiri_error_array_pusher,
|
41
|
+
(void *)errors
|
42
|
+
);
|
43
|
+
#endif
|
44
|
+
|
45
|
+
xmlSchemaValidateDoc(valid_ctxt, doc);
|
46
|
+
|
47
|
+
xmlSchemaFreeValidCtxt(valid_ctxt);
|
48
|
+
|
49
|
+
return errors;
|
50
|
+
}
|
51
|
+
|
52
|
+
/*
|
53
|
+
* call-seq:
|
54
|
+
* validate_file(filename)
|
55
|
+
*
|
56
|
+
* Validate a file against this Schema.
|
57
|
+
*/
|
58
|
+
static VALUE
|
59
|
+
validate_file(VALUE self, VALUE rb_filename)
|
60
|
+
{
|
61
|
+
xmlSchemaPtr schema;
|
62
|
+
xmlSchemaValidCtxtPtr valid_ctxt;
|
63
|
+
const char *filename ;
|
64
|
+
VALUE errors;
|
65
|
+
|
66
|
+
Data_Get_Struct(self, xmlSchema, schema);
|
67
|
+
filename = (const char *)StringValueCStr(rb_filename) ;
|
68
|
+
|
69
|
+
errors = rb_ary_new();
|
70
|
+
|
71
|
+
valid_ctxt = xmlSchemaNewValidCtxt(schema);
|
72
|
+
|
73
|
+
if (NULL == valid_ctxt) {
|
74
|
+
/* we have a problem */
|
75
|
+
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
76
|
+
}
|
77
|
+
|
78
|
+
#ifdef HAVE_XMLSCHEMASETVALIDSTRUCTUREDERRORS
|
79
|
+
xmlSchemaSetValidStructuredErrors(
|
80
|
+
valid_ctxt,
|
81
|
+
Nokogiri_error_array_pusher,
|
82
|
+
(void *)errors
|
83
|
+
);
|
84
|
+
#endif
|
85
|
+
|
86
|
+
xmlSchemaValidateFile(valid_ctxt, filename, 0);
|
87
|
+
|
88
|
+
xmlSchemaFreeValidCtxt(valid_ctxt);
|
89
|
+
|
90
|
+
return errors;
|
91
|
+
}
|
92
|
+
|
93
|
+
/*
|
94
|
+
* call-seq:
|
95
|
+
* read_memory(string)
|
96
|
+
*
|
97
|
+
* Create a new Schema from the contents of +string+
|
98
|
+
*/
|
99
|
+
static VALUE
|
100
|
+
read_memory(int argc, VALUE *argv, VALUE klass)
|
101
|
+
{
|
102
|
+
VALUE content;
|
103
|
+
VALUE parse_options;
|
104
|
+
int parse_options_int;
|
105
|
+
xmlSchemaParserCtxtPtr ctx;
|
106
|
+
xmlSchemaPtr schema;
|
107
|
+
VALUE errors;
|
108
|
+
VALUE rb_schema;
|
109
|
+
int scanned_args = 0;
|
110
|
+
xmlExternalEntityLoader old_loader = 0;
|
111
|
+
|
112
|
+
scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
|
113
|
+
if (scanned_args == 1) {
|
114
|
+
parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
115
|
+
}
|
116
|
+
parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
|
117
|
+
|
118
|
+
ctx = xmlSchemaNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
|
119
|
+
|
120
|
+
errors = rb_ary_new();
|
121
|
+
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
122
|
+
|
123
|
+
#ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
|
124
|
+
xmlSchemaSetParserStructuredErrors(
|
125
|
+
ctx,
|
126
|
+
Nokogiri_error_array_pusher,
|
127
|
+
(void *)errors
|
128
|
+
);
|
129
|
+
#endif
|
130
|
+
|
131
|
+
if (parse_options_int & XML_PARSE_NONET) {
|
132
|
+
old_loader = xmlGetExternalEntityLoader();
|
133
|
+
xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
|
134
|
+
}
|
135
|
+
|
136
|
+
schema = xmlSchemaParse(ctx);
|
137
|
+
|
138
|
+
if (old_loader) {
|
139
|
+
xmlSetExternalEntityLoader(old_loader);
|
140
|
+
}
|
141
|
+
|
142
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
143
|
+
xmlSchemaFreeParserCtxt(ctx);
|
144
|
+
|
145
|
+
if (NULL == schema) {
|
146
|
+
xmlErrorPtr error = xmlGetLastError();
|
147
|
+
if (error) {
|
148
|
+
Nokogiri_error_raise(NULL, error);
|
149
|
+
} else {
|
150
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
151
|
+
}
|
152
|
+
|
153
|
+
return Qnil;
|
154
|
+
}
|
155
|
+
|
156
|
+
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
157
|
+
rb_iv_set(rb_schema, "@errors", errors);
|
158
|
+
rb_iv_set(rb_schema, "@parse_options", parse_options);
|
159
|
+
|
160
|
+
return rb_schema;
|
161
|
+
}
|
162
|
+
|
163
|
+
/* Schema creation will remove and deallocate "blank" nodes.
|
164
|
+
* If those blank nodes have been exposed to Ruby, they could get freed
|
165
|
+
* out from under the VALUE pointer. This function checks to see if any of
|
166
|
+
* those nodes have been exposed to Ruby, and if so we should raise an exception.
|
167
|
+
*/
|
168
|
+
static int
|
169
|
+
has_blank_nodes_p(VALUE cache)
|
170
|
+
{
|
171
|
+
long i;
|
172
|
+
|
173
|
+
if (NIL_P(cache)) {
|
174
|
+
return 0;
|
175
|
+
}
|
176
|
+
|
177
|
+
for (i = 0; i < RARRAY_LEN(cache); i++) {
|
178
|
+
xmlNodePtr node;
|
179
|
+
VALUE element = rb_ary_entry(cache, i);
|
180
|
+
Noko_Node_Get_Struct(element, xmlNode, node);
|
181
|
+
if (xmlIsBlankNode(node)) {
|
182
|
+
return 1;
|
183
|
+
}
|
184
|
+
}
|
185
|
+
|
186
|
+
return 0;
|
187
|
+
}
|
188
|
+
|
189
|
+
/*
|
190
|
+
* call-seq:
|
191
|
+
* from_document(doc)
|
192
|
+
*
|
193
|
+
* Create a new Schema from the Nokogiri::XML::Document +doc+
|
194
|
+
*/
|
195
|
+
static VALUE
|
196
|
+
from_document(int argc, VALUE *argv, VALUE klass)
|
197
|
+
{
|
198
|
+
VALUE document;
|
199
|
+
VALUE parse_options;
|
200
|
+
int parse_options_int;
|
201
|
+
xmlDocPtr doc;
|
202
|
+
xmlSchemaParserCtxtPtr ctx;
|
203
|
+
xmlSchemaPtr schema;
|
204
|
+
VALUE errors;
|
205
|
+
VALUE rb_schema;
|
206
|
+
int scanned_args = 0;
|
207
|
+
xmlExternalEntityLoader old_loader = 0;
|
208
|
+
|
209
|
+
scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
|
210
|
+
|
211
|
+
Noko_Node_Get_Struct(document, xmlDoc, doc);
|
212
|
+
doc = doc->doc; /* In case someone passes us a node. ugh. */
|
213
|
+
|
214
|
+
if (scanned_args == 1) {
|
215
|
+
parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
216
|
+
}
|
217
|
+
parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
|
218
|
+
|
219
|
+
if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) {
|
220
|
+
rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous");
|
221
|
+
}
|
222
|
+
|
223
|
+
ctx = xmlSchemaNewDocParserCtxt(doc);
|
224
|
+
|
225
|
+
errors = rb_ary_new();
|
226
|
+
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
227
|
+
|
228
|
+
#ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
|
229
|
+
xmlSchemaSetParserStructuredErrors(
|
230
|
+
ctx,
|
231
|
+
Nokogiri_error_array_pusher,
|
232
|
+
(void *)errors
|
233
|
+
);
|
234
|
+
#endif
|
235
|
+
|
236
|
+
if (parse_options_int & XML_PARSE_NONET) {
|
237
|
+
old_loader = xmlGetExternalEntityLoader();
|
238
|
+
xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
|
239
|
+
}
|
240
|
+
|
241
|
+
schema = xmlSchemaParse(ctx);
|
242
|
+
|
243
|
+
if (old_loader) {
|
244
|
+
xmlSetExternalEntityLoader(old_loader);
|
245
|
+
}
|
246
|
+
|
247
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
248
|
+
xmlSchemaFreeParserCtxt(ctx);
|
249
|
+
|
250
|
+
if (NULL == schema) {
|
251
|
+
xmlErrorPtr error = xmlGetLastError();
|
252
|
+
if (error) {
|
253
|
+
Nokogiri_error_raise(NULL, error);
|
254
|
+
} else {
|
255
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
256
|
+
}
|
257
|
+
|
258
|
+
return Qnil;
|
259
|
+
}
|
260
|
+
|
261
|
+
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
262
|
+
rb_iv_set(rb_schema, "@errors", errors);
|
263
|
+
rb_iv_set(rb_schema, "@parse_options", parse_options);
|
264
|
+
|
265
|
+
return rb_schema;
|
266
|
+
|
267
|
+
return Qnil;
|
268
|
+
}
|
269
|
+
|
270
|
+
void
|
271
|
+
noko_init_xml_schema()
|
272
|
+
{
|
273
|
+
cNokogiriXmlSchema = rb_define_class_under(mNokogiriXml, "Schema", rb_cObject);
|
274
|
+
|
275
|
+
rb_undef_alloc_func(cNokogiriXmlSchema);
|
276
|
+
|
277
|
+
rb_define_singleton_method(cNokogiriXmlSchema, "read_memory", read_memory, -1);
|
278
|
+
rb_define_singleton_method(cNokogiriXmlSchema, "from_document", from_document, -1);
|
279
|
+
|
280
|
+
rb_define_private_method(cNokogiriXmlSchema, "validate_document", validate_document, 1);
|
281
|
+
rb_define_private_method(cNokogiriXmlSchema, "validate_file", validate_file, 1);
|
282
|
+
}
|