nokogiri 1.10.9 → 1.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +190 -95
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +909 -422
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +258 -105
- data/ext/nokogiri/nokogiri.h +207 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +18 -18
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +33 -33
- data/ext/nokogiri/xml_comment.c +19 -31
- data/ext/nokogiri/xml_document.c +499 -323
- data/ext/nokogiri/xml_document_fragment.c +17 -36
- data/ext/nokogiri/xml_dtd.c +65 -59
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1429 -723
- data/ext/nokogiri/xml_node_set.c +257 -225
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +340 -231
- data/ext/nokogiri/xml_relax_ng.c +87 -99
- data/ext/nokogiri/xml_sax_parser.c +269 -176
- data/ext/nokogiri/xml_sax_parser_context.c +286 -152
- data/ext/nokogiri/xml_sax_push_parser.c +111 -64
- data/ext/nokogiri/xml_schema.c +132 -140
- data/ext/nokogiri/xml_syntax_error.c +52 -23
- data/ext/nokogiri/xml_text.c +37 -30
- data/ext/nokogiri/xml_xpath_context.c +373 -185
- data/ext/nokogiri/xslt_stylesheet.c +342 -191
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +658 -0
- data/gumbo-parser/src/error.h +152 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
- data/gumbo-parser/src/parser.c +4932 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +14 -8
- data/lib/nokogiri/css/parser.rb +399 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +16 -71
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +7 -5
- data/lib/nokogiri/css/tokenizer.rex +11 -9
- data/lib/nokogiri/css/xpath_visitor.rb +242 -96
- data/lib/nokogiri/css.rb +122 -17
- data/lib/nokogiri/decorators/slop.rb +11 -11
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +83 -35
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +359 -130
- data/lib/nokogiri/xml/document_fragment.rb +170 -54
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1168 -420
- data/lib/nokogiri/xml/node_set.rb +145 -67
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +47 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +68 -41
- data/lib/nokogiri/xml/relax_ng.rb +60 -17
- data/lib/nokogiri/xml/sax/document.rb +198 -111
- data/lib/nokogiri/xml/sax/parser.rb +144 -67
- data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
- data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
- data/lib/nokogiri/xml/sax.rb +54 -4
- data/lib/nokogiri/xml/schema.rb +116 -39
- data/lib/nokogiri/xml/searchable.rb +139 -95
- data/lib/nokogiri/xml/syntax_error.rb +29 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +15 -4
- data/lib/nokogiri/xml.rb +45 -55
- data/lib/nokogiri/xslt/stylesheet.rb +32 -8
- data/lib/nokogiri/xslt.rb +103 -30
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +32 -29
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +123 -295
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser.rb +0 -62
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,44 +1,64 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
3
|
+
VALUE cNokogiriXmlSaxPushParser ;
|
4
|
+
|
5
|
+
static void
|
6
|
+
xml_sax_push_parser_free(void *data)
|
4
7
|
{
|
5
|
-
|
6
|
-
if (ctx
|
7
|
-
|
8
|
+
xmlParserCtxtPtr ctx = data;
|
9
|
+
if (ctx->myDoc) {
|
10
|
+
xmlFreeDoc(ctx->myDoc);
|
11
|
+
}
|
12
|
+
if (ctx) {
|
8
13
|
xmlFreeParserCtxt(ctx);
|
9
14
|
}
|
10
|
-
NOKOGIRI_DEBUG_END(ctx);
|
11
15
|
}
|
12
16
|
|
13
|
-
static
|
17
|
+
static const rb_data_type_t xml_sax_push_parser_type = {
|
18
|
+
.wrap_struct_name = "xmlParserCtxt",
|
19
|
+
.function = {
|
20
|
+
.dfree = xml_sax_push_parser_free,
|
21
|
+
},
|
22
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
23
|
+
};
|
24
|
+
|
25
|
+
static VALUE
|
26
|
+
xml_sax_push_parser_allocate(VALUE klass)
|
27
|
+
{
|
28
|
+
return TypedData_Wrap_Struct(klass, &xml_sax_push_parser_type, NULL);
|
29
|
+
}
|
30
|
+
|
31
|
+
xmlParserCtxtPtr
|
32
|
+
noko_xml_sax_push_parser_unwrap(VALUE rb_parser)
|
14
33
|
{
|
15
|
-
|
34
|
+
xmlParserCtxtPtr c_parser;
|
35
|
+
TypedData_Get_Struct(rb_parser, xmlParserCtxt, &xml_sax_push_parser_type, c_parser);
|
36
|
+
return c_parser;
|
16
37
|
}
|
17
38
|
|
18
39
|
/*
|
19
|
-
* call-seq:
|
20
|
-
* native_write(chunk, last_chunk)
|
21
|
-
*
|
22
40
|
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
|
23
41
|
*/
|
24
|
-
static VALUE
|
42
|
+
static VALUE
|
43
|
+
noko_xml_sax_push_parser__native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
25
44
|
{
|
26
45
|
xmlParserCtxtPtr ctx;
|
27
|
-
const char *
|
46
|
+
const char *chunk = NULL;
|
28
47
|
int size = 0;
|
29
48
|
|
30
|
-
|
31
|
-
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
49
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
32
50
|
|
33
51
|
if (Qnil != _chunk) {
|
34
52
|
chunk = StringValuePtr(_chunk);
|
35
53
|
size = (int)RSTRING_LEN(_chunk);
|
36
54
|
}
|
37
55
|
|
56
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
57
|
+
|
38
58
|
if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
|
39
|
-
if (!(ctx
|
40
|
-
|
41
|
-
|
59
|
+
if (!(xmlCtxtGetOptions(ctx) & XML_PARSE_RECOVER)) {
|
60
|
+
xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
|
61
|
+
noko__error_raise(NULL, e);
|
42
62
|
}
|
43
63
|
}
|
44
64
|
|
@@ -51,13 +71,14 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
51
71
|
*
|
52
72
|
* Initialize the push parser with +xml_sax+ using +filename+
|
53
73
|
*/
|
54
|
-
static VALUE
|
74
|
+
static VALUE
|
75
|
+
noko_xml_sax_push_parser__initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
|
55
76
|
{
|
56
77
|
xmlSAXHandlerPtr sax;
|
57
|
-
const char *
|
78
|
+
const char *filename = NULL;
|
58
79
|
xmlParserCtxtPtr ctx;
|
59
80
|
|
60
|
-
|
81
|
+
sax = noko_xml_sax_parser_unwrap(_xml_sax);
|
61
82
|
|
62
83
|
if (_filename != Qnil) { filename = StringValueCStr(_filename); }
|
63
84
|
|
@@ -72,28 +93,34 @@ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
|
|
72
93
|
rb_raise(rb_eRuntimeError, "Could not create a parser context");
|
73
94
|
}
|
74
95
|
|
75
|
-
ctx->userData =
|
96
|
+
ctx->userData = ctx;
|
97
|
+
ctx->_private = (void *)_xml_sax;
|
76
98
|
|
77
|
-
ctx->sax2 = 1;
|
78
99
|
DATA_PTR(self) = ctx;
|
79
100
|
return self;
|
80
101
|
}
|
81
102
|
|
82
|
-
static VALUE
|
103
|
+
static VALUE
|
104
|
+
noko_xml_sax_push_parser__options_get(VALUE self)
|
83
105
|
{
|
84
106
|
xmlParserCtxtPtr ctx;
|
85
|
-
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
86
107
|
|
87
|
-
|
108
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
109
|
+
|
110
|
+
return INT2NUM(xmlCtxtGetOptions(ctx));
|
88
111
|
}
|
89
112
|
|
90
|
-
static VALUE
|
113
|
+
static VALUE
|
114
|
+
noko_xml_sax_push_parser__options_set(VALUE self, VALUE options)
|
91
115
|
{
|
116
|
+
int error;
|
92
117
|
xmlParserCtxtPtr ctx;
|
93
|
-
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
94
118
|
|
95
|
-
|
96
|
-
|
119
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
120
|
+
|
121
|
+
error = xmlCtxtSetOptions(ctx, (int)NUM2INT(options));
|
122
|
+
if (error) {
|
123
|
+
rb_raise(rb_eRuntimeError, "Cannot set XML parser context options (%x)", error);
|
97
124
|
}
|
98
125
|
|
99
126
|
return Qnil;
|
@@ -101,59 +128,79 @@ static VALUE set_options(VALUE self, VALUE options)
|
|
101
128
|
|
102
129
|
/*
|
103
130
|
* call-seq:
|
104
|
-
*
|
131
|
+
* replace_entities
|
132
|
+
*
|
133
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
105
134
|
*
|
106
|
-
*
|
107
|
-
*
|
135
|
+
* [Returns] (Boolean) Value of the parse option. (Default +false+)
|
136
|
+
*
|
137
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
138
|
+
* replacement.
|
108
139
|
*/
|
109
|
-
static VALUE
|
140
|
+
static VALUE
|
141
|
+
noko_xml_sax_push_parser__replace_entities_get(VALUE self)
|
110
142
|
{
|
111
|
-
xmlParserCtxtPtr
|
112
|
-
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
143
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_push_parser_unwrap(self);
|
113
144
|
|
114
|
-
if (
|
115
|
-
return Qfalse;
|
116
|
-
} else {
|
145
|
+
if (xmlCtxtGetOptions(ctxt) & XML_PARSE_NOENT) {
|
117
146
|
return Qtrue;
|
147
|
+
} else {
|
148
|
+
return Qfalse;
|
118
149
|
}
|
119
150
|
}
|
120
151
|
|
121
152
|
/*
|
122
153
|
* call-seq:
|
123
|
-
*
|
154
|
+
* replace_entities=(value)
|
155
|
+
*
|
156
|
+
* See Document@Entity+Handling for an explanation of the behavior controlled by this flag.
|
124
157
|
*
|
125
|
-
*
|
126
|
-
*
|
158
|
+
* [Parameters]
|
159
|
+
* - +value+ (Boolean) Whether external parsed entities will be resolved.
|
160
|
+
*
|
161
|
+
* ⚠ <b>It is UNSAFE to set this option to +true+</b> when parsing untrusted documents. The option
|
162
|
+
* defaults to +false+ for this reason.
|
163
|
+
*
|
164
|
+
* This option is perhaps misnamed by the libxml2 author, since it controls resolution and not
|
165
|
+
* replacement.
|
127
166
|
*/
|
128
|
-
static VALUE
|
167
|
+
static VALUE
|
168
|
+
noko_xml_sax_push_parser__replace_entities_set(VALUE self, VALUE value)
|
129
169
|
{
|
130
|
-
|
131
|
-
|
170
|
+
int error;
|
171
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_push_parser_unwrap(self);
|
132
172
|
|
133
|
-
if (
|
134
|
-
|
173
|
+
if (RB_TEST(value)) {
|
174
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) | XML_PARSE_NOENT);
|
135
175
|
} else {
|
136
|
-
|
176
|
+
error = xmlCtxtSetOptions(ctxt, xmlCtxtGetOptions(ctxt) & ~XML_PARSE_NOENT);
|
177
|
+
}
|
178
|
+
|
179
|
+
if (error) {
|
180
|
+
rb_raise(rb_eRuntimeError, "failed to set parser context options (%x)", error);
|
137
181
|
}
|
138
182
|
|
139
183
|
return value;
|
140
184
|
}
|
141
185
|
|
142
|
-
|
143
|
-
void
|
186
|
+
void
|
187
|
+
noko_init_xml_sax_push_parser(void)
|
144
188
|
{
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
rb_define_method(
|
156
|
-
|
157
|
-
|
158
|
-
|
189
|
+
cNokogiriXmlSaxPushParser = rb_define_class_under(mNokogiriXmlSax, "PushParser", rb_cObject);
|
190
|
+
|
191
|
+
rb_define_alloc_func(cNokogiriXmlSaxPushParser, xml_sax_push_parser_allocate);
|
192
|
+
|
193
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "options",
|
194
|
+
noko_xml_sax_push_parser__options_get, 0);
|
195
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "options=",
|
196
|
+
noko_xml_sax_push_parser__options_set, 1);
|
197
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities",
|
198
|
+
noko_xml_sax_push_parser__replace_entities_get, 0);
|
199
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities=",
|
200
|
+
noko_xml_sax_push_parser__replace_entities_set, 1);
|
201
|
+
|
202
|
+
rb_define_private_method(cNokogiriXmlSaxPushParser, "initialize_native",
|
203
|
+
noko_xml_sax_push_parser__initialize_native, 2);
|
204
|
+
rb_define_private_method(cNokogiriXmlSaxPushParser, "native_write",
|
205
|
+
noko_xml_sax_push_parser__native_write, 2);
|
159
206
|
}
|
data/ext/nokogiri/xml_schema.c
CHANGED
@@ -1,234 +1,226 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
3
|
+
VALUE cNokogiriXmlSchema;
|
4
|
+
|
5
|
+
static void
|
6
|
+
xml_schema_deallocate(void *data)
|
4
7
|
{
|
5
|
-
|
8
|
+
xmlSchemaPtr schema = data;
|
6
9
|
xmlSchemaFree(schema);
|
7
|
-
NOKOGIRI_DEBUG_END(schema);
|
8
10
|
}
|
9
11
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
12
|
+
static const rb_data_type_t xml_schema_type = {
|
13
|
+
.wrap_struct_name = "xmlSchema",
|
14
|
+
.function = {
|
15
|
+
.dfree = xml_schema_deallocate,
|
16
|
+
},
|
17
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
18
|
+
};
|
19
|
+
|
20
|
+
static VALUE
|
21
|
+
noko_xml_schema__validate_document(VALUE self, VALUE document)
|
17
22
|
{
|
18
23
|
xmlDocPtr doc;
|
19
24
|
xmlSchemaPtr schema;
|
20
25
|
xmlSchemaValidCtxtPtr valid_ctxt;
|
21
26
|
VALUE errors;
|
22
27
|
|
23
|
-
|
24
|
-
|
28
|
+
TypedData_Get_Struct(self, xmlSchema, &xml_schema_type, schema);
|
29
|
+
doc = noko_xml_document_unwrap(document);
|
25
30
|
|
26
31
|
errors = rb_ary_new();
|
27
32
|
|
28
33
|
valid_ctxt = xmlSchemaNewValidCtxt(schema);
|
29
34
|
|
30
|
-
if(NULL == valid_ctxt) {
|
35
|
+
if (NULL == valid_ctxt) {
|
31
36
|
/* we have a problem */
|
32
37
|
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
33
38
|
}
|
34
39
|
|
35
|
-
#ifdef HAVE_XMLSCHEMASETVALIDSTRUCTUREDERRORS
|
36
40
|
xmlSchemaSetValidStructuredErrors(
|
37
41
|
valid_ctxt,
|
38
|
-
|
42
|
+
noko__error_array_pusher,
|
39
43
|
(void *)errors
|
40
44
|
);
|
41
|
-
#endif
|
42
45
|
|
43
|
-
xmlSchemaValidateDoc(valid_ctxt, doc);
|
46
|
+
int status = xmlSchemaValidateDoc(valid_ctxt, doc);
|
44
47
|
|
45
48
|
xmlSchemaFreeValidCtxt(valid_ctxt);
|
46
49
|
|
50
|
+
if (status != 0) {
|
51
|
+
if (RARRAY_LEN(errors) == 0) {
|
52
|
+
rb_ary_push(errors, rb_str_new2("Could not validate document"));
|
53
|
+
}
|
54
|
+
}
|
55
|
+
|
47
56
|
return errors;
|
48
57
|
}
|
49
58
|
|
50
|
-
|
51
|
-
|
52
|
-
* validate_file(filename)
|
53
|
-
*
|
54
|
-
* Validate a file against this Schema.
|
55
|
-
*/
|
56
|
-
static VALUE validate_file(VALUE self, VALUE rb_filename)
|
59
|
+
static VALUE
|
60
|
+
noko_xml_schema__validate_file(VALUE self, VALUE rb_filename)
|
57
61
|
{
|
58
62
|
xmlSchemaPtr schema;
|
59
63
|
xmlSchemaValidCtxtPtr valid_ctxt;
|
60
64
|
const char *filename ;
|
61
65
|
VALUE errors;
|
62
66
|
|
63
|
-
|
64
|
-
filename = (const char*)StringValueCStr(rb_filename) ;
|
67
|
+
TypedData_Get_Struct(self, xmlSchema, &xml_schema_type, schema);
|
68
|
+
filename = (const char *)StringValueCStr(rb_filename) ;
|
65
69
|
|
66
70
|
errors = rb_ary_new();
|
67
71
|
|
68
72
|
valid_ctxt = xmlSchemaNewValidCtxt(schema);
|
69
73
|
|
70
|
-
if(NULL == valid_ctxt) {
|
74
|
+
if (NULL == valid_ctxt) {
|
71
75
|
/* we have a problem */
|
72
76
|
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
73
77
|
}
|
74
78
|
|
75
|
-
#ifdef HAVE_XMLSCHEMASETVALIDSTRUCTUREDERRORS
|
76
79
|
xmlSchemaSetValidStructuredErrors(
|
77
80
|
valid_ctxt,
|
78
|
-
|
81
|
+
noko__error_array_pusher,
|
79
82
|
(void *)errors
|
80
83
|
);
|
81
|
-
#endif
|
82
84
|
|
83
|
-
xmlSchemaValidateFile(valid_ctxt, filename, 0);
|
85
|
+
int status = xmlSchemaValidateFile(valid_ctxt, filename, 0);
|
84
86
|
|
85
87
|
xmlSchemaFreeValidCtxt(valid_ctxt);
|
86
88
|
|
89
|
+
if (status != 0) {
|
90
|
+
if (RARRAY_LEN(errors) == 0) {
|
91
|
+
rb_ary_push(errors, rb_str_new2("Could not validate file."));
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
87
95
|
return errors;
|
88
96
|
}
|
89
97
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
static VALUE read_memory(VALUE klass, VALUE content)
|
98
|
+
static VALUE
|
99
|
+
xml_schema_parse_schema(
|
100
|
+
VALUE rb_class,
|
101
|
+
xmlSchemaParserCtxtPtr c_parser_context,
|
102
|
+
VALUE rb_parse_options
|
103
|
+
)
|
97
104
|
{
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
105
|
+
xmlExternalEntityLoader saved_loader = 0;
|
106
|
+
libxmlStructuredErrorHandlerState handler_state;
|
107
|
+
|
108
|
+
if (NIL_P(rb_parse_options)) {
|
109
|
+
rb_parse_options = rb_const_get_at(
|
110
|
+
rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")),
|
111
|
+
rb_intern("DEFAULT_SCHEMA")
|
112
|
+
);
|
113
|
+
}
|
114
|
+
int c_parse_options = (int)NUM2INT(rb_funcall(rb_parse_options, rb_intern("to_i"), 0));
|
115
|
+
|
116
|
+
VALUE rb_errors = rb_ary_new();
|
117
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
106
118
|
|
107
|
-
#ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
|
108
119
|
xmlSchemaSetParserStructuredErrors(
|
109
|
-
|
110
|
-
|
111
|
-
(void *)
|
120
|
+
c_parser_context,
|
121
|
+
noko__error_array_pusher,
|
122
|
+
(void *)rb_errors
|
112
123
|
);
|
113
|
-
#endif
|
114
|
-
|
115
|
-
schema = xmlSchemaParse(ctx);
|
116
|
-
|
117
|
-
xmlSetStructuredErrorFunc(NULL, NULL);
|
118
|
-
xmlSchemaFreeParserCtxt(ctx);
|
119
|
-
|
120
|
-
if(NULL == schema) {
|
121
|
-
xmlErrorPtr error = xmlGetLastError();
|
122
|
-
if(error)
|
123
|
-
Nokogiri_error_raise(NULL, error);
|
124
|
-
else
|
125
|
-
rb_raise(rb_eRuntimeError, "Could not parse document");
|
126
124
|
|
127
|
-
|
125
|
+
if (c_parse_options & XML_PARSE_NONET) {
|
126
|
+
saved_loader = xmlGetExternalEntityLoader();
|
127
|
+
xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
|
128
128
|
}
|
129
129
|
|
130
|
-
|
131
|
-
rb_iv_set(rb_schema, "@errors", errors);
|
130
|
+
xmlSchemaPtr c_schema = xmlSchemaParse(c_parser_context);
|
132
131
|
|
133
|
-
|
134
|
-
|
132
|
+
if (saved_loader) {
|
133
|
+
xmlSetExternalEntityLoader(saved_loader);
|
134
|
+
}
|
135
135
|
|
136
|
-
|
137
|
-
|
138
|
-
* out from under the VALUE pointer. This function checks to see if any of
|
139
|
-
* those nodes have been exposed to Ruby, and if so we should raise an exception.
|
140
|
-
*/
|
141
|
-
static int has_blank_nodes_p(VALUE cache)
|
142
|
-
{
|
143
|
-
long i;
|
136
|
+
xmlSchemaFreeParserCtxt(c_parser_context);
|
137
|
+
noko__structured_error_func_restore(&handler_state);
|
144
138
|
|
145
|
-
|
146
|
-
|
139
|
+
if (NULL == c_schema) {
|
140
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
141
|
+
if (RB_TEST(exception)) {
|
142
|
+
rb_exc_raise(exception);
|
143
|
+
} else {
|
144
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
147
145
|
}
|
146
|
+
}
|
148
147
|
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
Data_Get_Struct(element, xmlNode, node);
|
153
|
-
if (xmlIsBlankNode(node)) {
|
154
|
-
return 1;
|
155
|
-
}
|
156
|
-
}
|
148
|
+
VALUE rb_schema = TypedData_Wrap_Struct(rb_class, &xml_schema_type, c_schema);
|
149
|
+
rb_iv_set(rb_schema, "@errors", rb_errors);
|
150
|
+
rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
|
157
151
|
|
158
|
-
|
152
|
+
return rb_schema;
|
159
153
|
}
|
160
154
|
|
161
155
|
/*
|
162
|
-
* call-seq:
|
163
|
-
*
|
156
|
+
* :call-seq:
|
157
|
+
* from_document(input) → Nokogiri::XML::Schema
|
158
|
+
* from_document(input, parse_options) → Nokogiri::XML::Schema
|
159
|
+
*
|
160
|
+
* Parse an \XSD schema definition from a Document to create a new Nokogiri::XML::Schema
|
161
|
+
*
|
162
|
+
* [Parameters]
|
163
|
+
* - +input+ (XML::Document) A document containing the \XSD schema definition
|
164
|
+
* - +parse_options+ (Nokogiri::XML::ParseOptions)
|
165
|
+
* Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA
|
164
166
|
*
|
165
|
-
*
|
167
|
+
* [Returns] Nokogiri::XML::Schema
|
166
168
|
*/
|
167
|
-
static VALUE
|
169
|
+
static VALUE
|
170
|
+
noko_xml_schema_s_from_document(int argc, VALUE *argv, VALUE rb_class)
|
168
171
|
{
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
VALUE
|
172
|
+
/* TODO: deprecate this method and put file-or-string logic into .new so that becomes the
|
173
|
+
* preferred entry point, and this can become a private method */
|
174
|
+
VALUE rb_document;
|
175
|
+
VALUE rb_parse_options;
|
173
176
|
VALUE rb_schema;
|
177
|
+
xmlDocPtr c_document;
|
178
|
+
xmlSchemaParserCtxtPtr c_parser_context;
|
179
|
+
int defensive_copy_p = 0;
|
174
180
|
|
175
|
-
|
181
|
+
rb_scan_args(argc, argv, "11", &rb_document, &rb_parse_options);
|
176
182
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous");
|
183
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
|
184
|
+
rb_raise(rb_eTypeError,
|
185
|
+
"expected parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
|
186
|
+
rb_obj_class(rb_document));
|
182
187
|
}
|
183
188
|
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
Nokogiri_error_array_pusher,
|
193
|
-
(void *)errors
|
194
|
-
);
|
195
|
-
#endif
|
196
|
-
|
197
|
-
schema = xmlSchemaParse(ctx);
|
189
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
190
|
+
xmlNodePtr deprecated_node_type_arg;
|
191
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to Schema.from_document is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
|
192
|
+
Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
|
193
|
+
c_document = deprecated_node_type_arg->doc;
|
194
|
+
} else {
|
195
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
196
|
+
}
|
198
197
|
|
199
|
-
|
200
|
-
|
198
|
+
if (noko_xml_document_has_wrapped_blank_nodes_p(c_document)) {
|
199
|
+
// see https://github.com/sparklemotion/nokogiri/pull/2001
|
200
|
+
c_document = xmlCopyDoc(c_document, 1);
|
201
|
+
defensive_copy_p = 1;
|
202
|
+
}
|
201
203
|
|
202
|
-
|
203
|
-
|
204
|
-
if(error)
|
205
|
-
Nokogiri_error_raise(NULL, error);
|
206
|
-
else
|
207
|
-
rb_raise(rb_eRuntimeError, "Could not parse document");
|
204
|
+
c_parser_context = xmlSchemaNewDocParserCtxt(c_document);
|
205
|
+
rb_schema = xml_schema_parse_schema(rb_class, c_parser_context, rb_parse_options);
|
208
206
|
|
209
|
-
|
207
|
+
if (defensive_copy_p) {
|
208
|
+
xmlFreeDoc(c_document);
|
209
|
+
c_document = NULL;
|
210
210
|
}
|
211
211
|
|
212
|
-
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
213
|
-
rb_iv_set(rb_schema, "@errors", errors);
|
214
|
-
|
215
212
|
return rb_schema;
|
216
|
-
|
217
|
-
return Qnil;
|
218
213
|
}
|
219
214
|
|
220
|
-
|
221
|
-
void
|
215
|
+
void
|
216
|
+
noko_init_xml_schema(void)
|
222
217
|
{
|
223
|
-
|
224
|
-
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
225
|
-
VALUE klass = rb_define_class_under(xml, "Schema", rb_cObject);
|
218
|
+
cNokogiriXmlSchema = rb_define_class_under(mNokogiriXml, "Schema", rb_cObject);
|
226
219
|
|
227
|
-
cNokogiriXmlSchema
|
220
|
+
rb_undef_alloc_func(cNokogiriXmlSchema);
|
228
221
|
|
229
|
-
rb_define_singleton_method(
|
230
|
-
rb_define_singleton_method(klass, "from_document", from_document, 1);
|
222
|
+
rb_define_singleton_method(cNokogiriXmlSchema, "from_document", noko_xml_schema_s_from_document, -1);
|
231
223
|
|
232
|
-
rb_define_private_method(
|
233
|
-
rb_define_private_method(
|
224
|
+
rb_define_private_method(cNokogiriXmlSchema, "validate_document", noko_xml_schema__validate_document, 1);
|
225
|
+
rb_define_private_method(cNokogiriXmlSchema, "validate_file", noko_xml_schema__validate_file, 1);
|
234
226
|
}
|