nokogiri 1.12.5 → 1.13.8
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/README.md +9 -7
- data/bin/nokogiri +63 -50
- data/dependencies.yml +13 -64
- data/ext/nokogiri/extconf.rb +66 -44
- data/ext/nokogiri/gumbo.c +1 -1
- data/ext/nokogiri/html4_sax_parser_context.c +2 -3
- data/ext/nokogiri/nokogiri.h +8 -0
- data/ext/nokogiri/xml_attr.c +2 -2
- data/ext/nokogiri/xml_attribute_decl.c +3 -3
- data/ext/nokogiri/xml_cdata.c +1 -1
- data/ext/nokogiri/xml_document.c +36 -36
- data/ext/nokogiri/xml_document_fragment.c +0 -2
- data/ext/nokogiri/xml_dtd.c +10 -10
- data/ext/nokogiri/xml_element_decl.c +3 -3
- data/ext/nokogiri/xml_encoding_handler.c +25 -11
- data/ext/nokogiri/xml_entity_decl.c +5 -5
- data/ext/nokogiri/xml_node.c +707 -381
- data/ext/nokogiri/xml_node_set.c +4 -4
- data/ext/nokogiri/xml_reader.c +88 -11
- data/ext/nokogiri/xml_sax_parser_context.c +10 -3
- data/ext/nokogiri/xml_schema.c +3 -3
- data/ext/nokogiri/xml_text.c +1 -1
- data/ext/nokogiri/xml_xpath_context.c +73 -50
- data/ext/nokogiri/xslt_stylesheet.c +107 -9
- data/gumbo-parser/src/parser.c +0 -11
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +360 -341
- data/lib/nokogiri/css/parser.y +249 -244
- data/lib/nokogiri/css/parser_extras.rb +22 -20
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +179 -82
- data/lib/nokogiri/css.rb +38 -6
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/gumbo.rb +1 -0
- data/lib/nokogiri/html.rb +16 -10
- data/lib/nokogiri/html4/builder.rb +1 -0
- data/lib/nokogiri/html4/document.rb +88 -77
- data/lib/nokogiri/html4/document_fragment.rb +11 -7
- data/lib/nokogiri/html4/element_description.rb +1 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +426 -520
- data/lib/nokogiri/html4/entity_lookup.rb +2 -1
- data/lib/nokogiri/html4/sax/parser.rb +5 -2
- data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
- data/lib/nokogiri/html4.rb +11 -5
- data/lib/nokogiri/html5/document.rb +27 -10
- data/lib/nokogiri/html5/document_fragment.rb +5 -2
- data/lib/nokogiri/html5/node.rb +10 -3
- data/lib/nokogiri/html5.rb +69 -64
- data/lib/nokogiri/jruby/dependencies.rb +10 -9
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +20 -13
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +5 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +34 -32
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +144 -103
- data/lib/nokogiri/xml/document_fragment.rb +41 -38
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +2 -0
- data/lib/nokogiri/xml/node/save_options.rb +8 -4
- data/lib/nokogiri/xml/node.rb +521 -351
- data/lib/nokogiri/xml/node_set.rb +50 -54
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +12 -7
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +24 -26
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/reader.rb +20 -24
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +20 -19
- data/lib/nokogiri/xml/sax/parser.rb +37 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +93 -62
- data/lib/nokogiri/xml/syntax_error.rb +5 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +12 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +4 -3
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/nokogiri/xslt.rb +21 -13
- data/lib/nokogiri.rb +19 -16
- data/lib/xsd/xmlparser/nokogiri.rb +25 -24
- data/patches/libxml2/0004-use-glibc-strlen.patch +3 -3
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2443 -1914
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
- data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
- metadata +104 -32
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
data/ext/nokogiri/xml_node_set.c
CHANGED
@@ -156,7 +156,7 @@ push(VALUE self, VALUE rb_node)
|
|
156
156
|
Check_Node_Set_Node_Type(rb_node);
|
157
157
|
|
158
158
|
Data_Get_Struct(self, xmlNodeSet, node_set);
|
159
|
-
|
159
|
+
Noko_Node_Get_Struct(rb_node, xmlNode, node);
|
160
160
|
|
161
161
|
xmlXPathNodeSetAdd(node_set, node);
|
162
162
|
|
@@ -179,7 +179,7 @@ delete (VALUE self, VALUE rb_node)
|
|
179
179
|
Check_Node_Set_Node_Type(rb_node);
|
180
180
|
|
181
181
|
Data_Get_Struct(self, xmlNodeSet, node_set);
|
182
|
-
|
182
|
+
Noko_Node_Get_Struct(rb_node, xmlNode, node);
|
183
183
|
|
184
184
|
if (xmlXPathNodeSetContains(node_set, node)) {
|
185
185
|
xpath_node_set_del(node_set, node);
|
@@ -228,7 +228,7 @@ include_eh(VALUE self, VALUE rb_node)
|
|
228
228
|
Check_Node_Set_Node_Type(rb_node);
|
229
229
|
|
230
230
|
Data_Get_Struct(self, xmlNodeSet, node_set);
|
231
|
-
|
231
|
+
Noko_Node_Get_Struct(rb_node, xmlNode, node);
|
232
232
|
|
233
233
|
return (xmlXPathNodeSetContains(node_set, node) ? Qtrue : Qfalse);
|
234
234
|
}
|
@@ -430,7 +430,7 @@ unlink_nodeset(VALUE self)
|
|
430
430
|
xmlNodePtr node_ptr;
|
431
431
|
node = noko_xml_node_wrap(Qnil, node_set->nodeTab[j]);
|
432
432
|
rb_funcall(node, rb_intern("unlink"), 0); /* modifies the C struct out from under the object */
|
433
|
-
|
433
|
+
Noko_Node_Get_Struct(node, xmlNode, node_ptr);
|
434
434
|
node_set->nodeTab[j] = node_ptr ;
|
435
435
|
}
|
436
436
|
}
|
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -31,6 +31,7 @@ has_attributes(xmlTextReaderPtr reader)
|
|
31
31
|
return (0);
|
32
32
|
}
|
33
33
|
|
34
|
+
// TODO: merge this function into the `namespaces` method implementation
|
34
35
|
static void
|
35
36
|
Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
|
36
37
|
{
|
@@ -148,9 +149,13 @@ namespaces(VALUE self)
|
|
148
149
|
}
|
149
150
|
|
150
151
|
/*
|
151
|
-
|
152
|
-
|
153
|
-
|
152
|
+
:call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
|
153
|
+
|
154
|
+
Get the attributes of the current node as an Array of XML:Attr
|
155
|
+
|
156
|
+
⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri.
|
157
|
+
|
158
|
+
See related: #attribute_hash, #attributes
|
154
159
|
*/
|
155
160
|
static VALUE
|
156
161
|
rb_xml_reader_attribute_nodes(VALUE rb_reader)
|
@@ -160,6 +165,10 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader)
|
|
160
165
|
VALUE attr_nodes;
|
161
166
|
int j;
|
162
167
|
|
168
|
+
// TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598
|
169
|
+
// After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
|
170
|
+
NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
|
171
|
+
|
163
172
|
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
|
164
173
|
|
165
174
|
if (! has_attributes(c_reader)) {
|
@@ -181,6 +190,47 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader)
|
|
181
190
|
return attr_nodes;
|
182
191
|
}
|
183
192
|
|
193
|
+
/*
|
194
|
+
:call-seq: attribute_hash() → Hash<String ⇒ String>
|
195
|
+
|
196
|
+
Get the attributes of the current node as a Hash of names and values.
|
197
|
+
|
198
|
+
See related: #attributes and #namespaces
|
199
|
+
*/
|
200
|
+
static VALUE
|
201
|
+
rb_xml_reader_attribute_hash(VALUE rb_reader)
|
202
|
+
{
|
203
|
+
VALUE rb_attributes = rb_hash_new();
|
204
|
+
xmlTextReaderPtr c_reader;
|
205
|
+
xmlNodePtr c_node;
|
206
|
+
xmlAttrPtr c_property;
|
207
|
+
|
208
|
+
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
|
209
|
+
|
210
|
+
if (!has_attributes(c_reader)) {
|
211
|
+
return rb_attributes;
|
212
|
+
}
|
213
|
+
|
214
|
+
c_node = xmlTextReaderExpand(c_reader);
|
215
|
+
c_property = c_node->properties;
|
216
|
+
while (c_property != NULL) {
|
217
|
+
VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name);
|
218
|
+
VALUE rb_value = Qnil;
|
219
|
+
xmlChar *c_value = xmlNodeGetContent((xmlNode *)c_property);
|
220
|
+
|
221
|
+
if (c_value) {
|
222
|
+
rb_value = NOKOGIRI_STR_NEW2(c_value);
|
223
|
+
xmlFree(c_value);
|
224
|
+
}
|
225
|
+
|
226
|
+
rb_hash_aset(rb_attributes, rb_name, rb_value);
|
227
|
+
|
228
|
+
c_property = c_property->next;
|
229
|
+
}
|
230
|
+
|
231
|
+
return rb_attributes;
|
232
|
+
}
|
233
|
+
|
184
234
|
/*
|
185
235
|
* call-seq:
|
186
236
|
* attribute_at(index)
|
@@ -414,16 +464,23 @@ name(VALUE self)
|
|
414
464
|
* Get the xml:base of the node
|
415
465
|
*/
|
416
466
|
static VALUE
|
417
|
-
|
467
|
+
rb_xml_reader_base_uri(VALUE rb_reader)
|
418
468
|
{
|
419
|
-
|
420
|
-
|
469
|
+
VALUE rb_base_uri;
|
470
|
+
xmlTextReaderPtr c_reader;
|
471
|
+
xmlChar *c_base_uri;
|
421
472
|
|
422
|
-
Data_Get_Struct(
|
423
|
-
|
424
|
-
|
473
|
+
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
|
474
|
+
|
475
|
+
c_base_uri = xmlTextReaderBaseUri(c_reader);
|
476
|
+
if (c_base_uri == NULL) {
|
477
|
+
return Qnil;
|
478
|
+
}
|
425
479
|
|
426
|
-
|
480
|
+
rb_base_uri = NOKOGIRI_STR_NEW2(c_base_uri);
|
481
|
+
xmlFree(c_base_uri);
|
482
|
+
|
483
|
+
return rb_base_uri;
|
427
484
|
}
|
428
485
|
|
429
486
|
/*
|
@@ -652,6 +709,24 @@ empty_element_p(VALUE self)
|
|
652
709
|
return Qfalse;
|
653
710
|
}
|
654
711
|
|
712
|
+
static VALUE
|
713
|
+
rb_xml_reader_encoding(VALUE rb_reader)
|
714
|
+
{
|
715
|
+
xmlTextReaderPtr c_reader;
|
716
|
+
const char *parser_encoding;
|
717
|
+
VALUE constructor_encoding;
|
718
|
+
|
719
|
+
constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
720
|
+
if (RTEST(constructor_encoding)) {
|
721
|
+
return constructor_encoding;
|
722
|
+
}
|
723
|
+
|
724
|
+
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
|
725
|
+
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
726
|
+
if (parser_encoding == NULL) { return Qnil; }
|
727
|
+
return NOKOGIRI_STR_NEW2(parser_encoding);
|
728
|
+
}
|
729
|
+
|
655
730
|
void
|
656
731
|
noko_init_xml_reader()
|
657
732
|
{
|
@@ -671,11 +746,13 @@ noko_init_xml_reader()
|
|
671
746
|
rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
|
672
747
|
rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
|
673
748
|
rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
|
749
|
+
rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
|
674
750
|
rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
|
675
|
-
rb_define_method(cNokogiriXmlReader, "base_uri",
|
751
|
+
rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
|
676
752
|
rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0);
|
677
753
|
rb_define_method(cNokogiriXmlReader, "depth", depth, 0);
|
678
754
|
rb_define_method(cNokogiriXmlReader, "empty_element?", empty_element_p, 0);
|
755
|
+
rb_define_method(cNokogiriXmlReader, "encoding", rb_xml_reader_encoding, 0);
|
679
756
|
rb_define_method(cNokogiriXmlReader, "inner_xml", inner_xml, 0);
|
680
757
|
rb_define_method(cNokogiriXmlReader, "lang", lang, 0);
|
681
758
|
rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
|
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
VALUE cNokogiriXmlSaxParserContext ;
|
4
4
|
|
5
|
+
static ID id_read;
|
6
|
+
|
5
7
|
static void
|
6
8
|
deallocate(xmlParserCtxtPtr ctxt)
|
7
9
|
{
|
@@ -26,6 +28,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
|
|
26
28
|
xmlParserCtxtPtr ctxt;
|
27
29
|
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
|
28
30
|
|
31
|
+
if (!rb_respond_to(io, id_read)) {
|
32
|
+
rb_raise(rb_eTypeError, "argument expected to respond to :read");
|
33
|
+
}
|
34
|
+
|
29
35
|
ctxt = xmlCreateIOParserCtxt(NULL, NULL,
|
30
36
|
(xmlInputReadCallback)noko_io_read,
|
31
37
|
(xmlInputCloseCallback)noko_io_close,
|
@@ -62,9 +68,8 @@ parse_memory(VALUE klass, VALUE data)
|
|
62
68
|
{
|
63
69
|
xmlParserCtxtPtr ctxt;
|
64
70
|
|
65
|
-
|
66
|
-
|
67
|
-
}
|
71
|
+
Check_Type(data, T_STRING);
|
72
|
+
|
68
73
|
if (!(int)RSTRING_LEN(data)) {
|
69
74
|
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
70
75
|
}
|
@@ -278,4 +283,6 @@ noko_init_xml_sax_parser_context()
|
|
278
283
|
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
|
279
284
|
rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
|
280
285
|
rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
|
286
|
+
|
287
|
+
id_read = rb_intern("read");
|
281
288
|
}
|
data/ext/nokogiri/xml_schema.c
CHANGED
@@ -25,7 +25,7 @@ validate_document(VALUE self, VALUE document)
|
|
25
25
|
VALUE errors;
|
26
26
|
|
27
27
|
Data_Get_Struct(self, xmlSchema, schema);
|
28
|
-
|
28
|
+
Noko_Node_Get_Struct(document, xmlDoc, doc);
|
29
29
|
|
30
30
|
errors = rb_ary_new();
|
31
31
|
|
@@ -179,7 +179,7 @@ has_blank_nodes_p(VALUE cache)
|
|
179
179
|
for (i = 0; i < RARRAY_LEN(cache); i++) {
|
180
180
|
xmlNodePtr node;
|
181
181
|
VALUE element = rb_ary_entry(cache, i);
|
182
|
-
|
182
|
+
Noko_Node_Get_Struct(element, xmlNode, node);
|
183
183
|
if (xmlIsBlankNode(node)) {
|
184
184
|
return 1;
|
185
185
|
}
|
@@ -210,7 +210,7 @@ from_document(int argc, VALUE *argv, VALUE klass)
|
|
210
210
|
|
211
211
|
scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
|
212
212
|
|
213
|
-
|
213
|
+
Noko_Node_Get_Struct(document, xmlDoc, doc);
|
214
214
|
doc = doc->doc; /* In case someone passes us a node. ugh. */
|
215
215
|
|
216
216
|
if (scanned_args == 1) {
|
data/ext/nokogiri/xml_text.c
CHANGED
@@ -20,7 +20,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
20
20
|
|
21
21
|
rb_scan_args(argc, argv, "2*", &string, &document, &rest);
|
22
22
|
|
23
|
-
|
23
|
+
Noko_Node_Get_Struct(document, xmlDoc, doc);
|
24
24
|
|
25
25
|
node = xmlNewText((xmlChar *)StringValueCStr(string));
|
26
26
|
node->doc = doc->doc;
|
@@ -86,6 +86,26 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
|
86
86
|
xmlXPathFreeObject(needle);
|
87
87
|
}
|
88
88
|
|
89
|
+
|
90
|
+
/* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should ignore namespaces */
|
91
|
+
static void
|
92
|
+
xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
|
93
|
+
{
|
94
|
+
xmlXPathObjectPtr element_name;
|
95
|
+
|
96
|
+
assert(ctxt->context->node);
|
97
|
+
|
98
|
+
CHECK_ARITY(1);
|
99
|
+
CAST_TO_STRING;
|
100
|
+
CHECK_TYPE(XPATH_STRING);
|
101
|
+
element_name = valuePop(ctxt);
|
102
|
+
|
103
|
+
valuePush(ctxt, xmlXPathNewBoolean(xmlStrEqual(ctxt->context->node->name, element_name->stringval)));
|
104
|
+
|
105
|
+
xmlXPathFreeObject(element_name);
|
106
|
+
}
|
107
|
+
|
108
|
+
|
89
109
|
/*
|
90
110
|
* call-seq:
|
91
111
|
* register_ns(prefix, uri)
|
@@ -128,11 +148,44 @@ register_variable(VALUE self, VALUE name, VALUE value)
|
|
128
148
|
return self;
|
129
149
|
}
|
130
150
|
|
151
|
+
|
152
|
+
/*
|
153
|
+
* convert an XPath object into a Ruby object of the appropriate type.
|
154
|
+
* returns Qundef if no conversion was possible.
|
155
|
+
*/
|
156
|
+
static VALUE
|
157
|
+
xpath2ruby(xmlXPathObjectPtr xobj, xmlXPathContextPtr xctx)
|
158
|
+
{
|
159
|
+
VALUE retval;
|
160
|
+
|
161
|
+
assert(xctx->doc);
|
162
|
+
assert(DOC_RUBY_OBJECT_TEST(xctx->doc));
|
163
|
+
|
164
|
+
switch (xobj->type) {
|
165
|
+
case XPATH_STRING:
|
166
|
+
retval = NOKOGIRI_STR_NEW2(xobj->stringval);
|
167
|
+
xmlFree(xobj->stringval);
|
168
|
+
return retval;
|
169
|
+
|
170
|
+
case XPATH_NODESET:
|
171
|
+
return noko_xml_node_set_wrap(xobj->nodesetval,
|
172
|
+
DOC_RUBY_OBJECT(xctx->doc));
|
173
|
+
|
174
|
+
case XPATH_NUMBER:
|
175
|
+
return rb_float_new(xobj->floatval);
|
176
|
+
|
177
|
+
case XPATH_BOOLEAN:
|
178
|
+
return (xobj->boolval == 1) ? Qtrue : Qfalse;
|
179
|
+
|
180
|
+
default:
|
181
|
+
return Qundef;
|
182
|
+
}
|
183
|
+
}
|
184
|
+
|
131
185
|
void
|
132
186
|
Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
|
133
187
|
const char *function_name)
|
134
188
|
{
|
135
|
-
int i;
|
136
189
|
VALUE result, doc;
|
137
190
|
VALUE *argv;
|
138
191
|
VALUE node_set = Qnil;
|
@@ -143,40 +196,25 @@ Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, i
|
|
143
196
|
assert(DOC_RUBY_OBJECT_TEST(ctx->context->doc));
|
144
197
|
|
145
198
|
argv = (VALUE *)calloc((size_t)nargs, sizeof(VALUE));
|
146
|
-
for (
|
147
|
-
rb_gc_register_address(&argv[
|
199
|
+
for (int j = 0 ; j < nargs ; ++j) {
|
200
|
+
rb_gc_register_address(&argv[j]);
|
148
201
|
}
|
149
202
|
|
150
203
|
doc = DOC_RUBY_OBJECT(ctx->context->doc);
|
151
204
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
break;
|
160
|
-
case XPATH_BOOLEAN:
|
161
|
-
argv[i] = obj->boolval == 1 ? Qtrue : Qfalse;
|
162
|
-
break;
|
163
|
-
case XPATH_NUMBER:
|
164
|
-
argv[i] = rb_float_new(obj->floatval);
|
165
|
-
break;
|
166
|
-
case XPATH_NODESET:
|
167
|
-
argv[i] = noko_xml_node_set_wrap(obj->nodesetval, doc);
|
168
|
-
break;
|
169
|
-
default:
|
170
|
-
argv[i] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
|
171
|
-
}
|
172
|
-
xmlXPathFreeNodeSetList(obj);
|
173
|
-
} while (i-- > 0);
|
205
|
+
for (int j = nargs - 1 ; j >= 0 ; --j) {
|
206
|
+
obj = valuePop(ctx);
|
207
|
+
argv[j] = xpath2ruby(obj, ctx->context);
|
208
|
+
if (argv[j] == Qundef) {
|
209
|
+
argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
|
210
|
+
}
|
211
|
+
xmlXPathFreeNodeSetList(obj);
|
174
212
|
}
|
175
213
|
|
176
214
|
result = rb_funcall2(handler, rb_intern((const char *)function_name), nargs, argv);
|
177
215
|
|
178
|
-
for (
|
179
|
-
rb_gc_unregister_address(&argv[
|
216
|
+
for (int j = 0 ; j < nargs ; ++j) {
|
217
|
+
rb_gc_unregister_address(&argv[j]);
|
180
218
|
}
|
181
219
|
free(argv);
|
182
220
|
|
@@ -275,7 +313,7 @@ static VALUE
|
|
275
313
|
evaluate(int argc, VALUE *argv, VALUE self)
|
276
314
|
{
|
277
315
|
VALUE search_path, xpath_handler;
|
278
|
-
VALUE
|
316
|
+
VALUE retval = Qnil;
|
279
317
|
xmlXPathContextPtr ctx;
|
280
318
|
xmlXPathObjectPtr xpath;
|
281
319
|
xmlChar *query;
|
@@ -310,31 +348,14 @@ evaluate(int argc, VALUE *argv, VALUE self)
|
|
310
348
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
311
349
|
}
|
312
350
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
switch (xpath->type) {
|
317
|
-
case XPATH_STRING:
|
318
|
-
thing = NOKOGIRI_STR_NEW2(xpath->stringval);
|
319
|
-
xmlFree(xpath->stringval);
|
320
|
-
break;
|
321
|
-
case XPATH_NODESET:
|
322
|
-
thing = noko_xml_node_set_wrap(xpath->nodesetval,
|
323
|
-
DOC_RUBY_OBJECT(ctx->doc));
|
324
|
-
break;
|
325
|
-
case XPATH_NUMBER:
|
326
|
-
thing = rb_float_new(xpath->floatval);
|
327
|
-
break;
|
328
|
-
case XPATH_BOOLEAN:
|
329
|
-
thing = xpath->boolval == 1 ? Qtrue : Qfalse;
|
330
|
-
break;
|
331
|
-
default:
|
332
|
-
thing = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
|
351
|
+
retval = xpath2ruby(xpath, ctx);
|
352
|
+
if (retval == Qundef) {
|
353
|
+
retval = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
|
333
354
|
}
|
334
355
|
|
335
356
|
xmlXPathFreeNodeSetList(xpath);
|
336
357
|
|
337
|
-
return
|
358
|
+
return retval;
|
338
359
|
}
|
339
360
|
|
340
361
|
/*
|
@@ -350,7 +371,7 @@ new (VALUE klass, VALUE nodeobj)
|
|
350
371
|
xmlXPathContextPtr ctx;
|
351
372
|
VALUE self;
|
352
373
|
|
353
|
-
|
374
|
+
Noko_Node_Get_Struct(nodeobj, xmlNode, node);
|
354
375
|
|
355
376
|
xmlXPathInit();
|
356
377
|
|
@@ -360,6 +381,8 @@ new (VALUE klass, VALUE nodeobj)
|
|
360
381
|
xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
|
361
382
|
xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
|
362
383
|
xpath_builtin_css_class);
|
384
|
+
xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI,
|
385
|
+
xpath_builtin_local_name_is);
|
363
386
|
|
364
387
|
self = Data_Wrap_Struct(klass, 0, deallocate, ctx);
|
365
388
|
return self;
|
@@ -107,19 +107,117 @@ serialize(VALUE self, VALUE xmlobj)
|
|
107
107
|
}
|
108
108
|
|
109
109
|
/*
|
110
|
-
*
|
111
|
-
*
|
110
|
+
* call-seq:
|
111
|
+
* transform(document)
|
112
|
+
* transform(document, params = {})
|
113
|
+
*
|
114
|
+
* Apply an XSLT stylesheet to an XML::Document.
|
115
|
+
*
|
116
|
+
* [Parameters]
|
117
|
+
* - +document+ (Nokogiri::XML::Document) the document to be transformed.
|
118
|
+
* - +params+ (Hash, Array) strings used as XSLT parameters.
|
119
|
+
*
|
120
|
+
* [Returns] Nokogiri::XML::Document
|
121
|
+
*
|
122
|
+
* *Example* of basic transformation:
|
123
|
+
*
|
124
|
+
* xslt = <<~XSLT
|
125
|
+
* <xsl:stylesheet version="1.0"
|
126
|
+
* xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
127
|
+
*
|
128
|
+
* <xsl:param name="title"/>
|
129
|
+
*
|
130
|
+
* <xsl:template match="/">
|
131
|
+
* <html>
|
132
|
+
* <body>
|
133
|
+
* <h1><xsl:value-of select="$title"/></h1>
|
134
|
+
* <ol>
|
135
|
+
* <xsl:for-each select="staff/employee">
|
136
|
+
* <li><xsl:value-of select="employeeId"></li>
|
137
|
+
* </xsl:for-each>
|
138
|
+
* </ol>
|
139
|
+
* </body>
|
140
|
+
* </html>
|
141
|
+
* </xsl:stylesheet>
|
142
|
+
* XSLT
|
143
|
+
*
|
144
|
+
* xml = <<~XML
|
145
|
+
* <?xml version="1.0"?>
|
146
|
+
* <staff>
|
147
|
+
* <employee>
|
148
|
+
* <employeeId>EMP0001</employeeId>
|
149
|
+
* <position>Accountant</position>
|
150
|
+
* </employee>
|
151
|
+
* <employee>
|
152
|
+
* <employeeId>EMP0002</employeeId>
|
153
|
+
* <position>Developer</position>
|
154
|
+
* </employee>
|
155
|
+
* </staff>
|
156
|
+
* XML
|
157
|
+
*
|
158
|
+
* doc = Nokogiri::XML::Document.parse(xml)
|
159
|
+
* stylesheet = Nokogiri::XSLT.parse(xslt)
|
160
|
+
*
|
161
|
+
* ⚠ Note that the +h1+ element is empty because no param has been provided!
|
162
|
+
*
|
163
|
+
* stylesheet.transform(doc).to_xml
|
164
|
+
* # => "<html><body>\n" +
|
165
|
+
* # "<h1></h1>\n" +
|
166
|
+
* # "<ol>\n" +
|
167
|
+
* # "<li>EMP0001</li>\n" +
|
168
|
+
* # "<li>EMP0002</li>\n" +
|
169
|
+
* # "</ol>\n" +
|
170
|
+
* # "</body></html>\n"
|
171
|
+
*
|
172
|
+
* *Example* of using an input parameter hash:
|
173
|
+
*
|
174
|
+
* ⚠ The title is populated, but note how we need to quote-escape the value.
|
175
|
+
*
|
176
|
+
* stylesheet.transform(doc, { "title" => "'Employee List'" }).to_xml
|
177
|
+
* # => "<html><body>\n" +
|
178
|
+
* # "<h1>Employee List</h1>\n" +
|
179
|
+
* # "<ol>\n" +
|
180
|
+
* # "<li>EMP0001</li>\n" +
|
181
|
+
* # "<li>EMP0002</li>\n" +
|
182
|
+
* # "</ol>\n" +
|
183
|
+
* # "</body></html>\n"
|
184
|
+
*
|
185
|
+
* *Example* using the XSLT.quote_params helper method to safely quote-escape strings:
|
186
|
+
*
|
187
|
+
* stylesheet.transform(doc, Nokogiri::XSLT.quote_params({ "title" => "Aaron's List" })).to_xml
|
188
|
+
* # => "<html><body>\n" +
|
189
|
+
* # "<h1>Aaron's List</h1>\n" +
|
190
|
+
* # "<ol>\n" +
|
191
|
+
* # "<li>EMP0001</li>\n" +
|
192
|
+
* # "<li>EMP0002</li>\n" +
|
193
|
+
* # "</ol>\n" +
|
194
|
+
* # "</body></html>\n"
|
195
|
+
*
|
196
|
+
* *Example* using an array of XSLT parameters
|
197
|
+
*
|
198
|
+
* You can also use an array if you want to.
|
112
199
|
*
|
113
|
-
*
|
114
|
-
*
|
115
|
-
*
|
200
|
+
* stylesheet.transform(doc, ["title", "'Employee List'"]).to_xml
|
201
|
+
* # => "<html><body>\n" +
|
202
|
+
* # "<h1>Employee List</h1>\n" +
|
203
|
+
* # "<ol>\n" +
|
204
|
+
* # "<li>EMP0001</li>\n" +
|
205
|
+
* # "<li>EMP0002</li>\n" +
|
206
|
+
* # "</ol>\n" +
|
207
|
+
* # "</body></html>\n"
|
116
208
|
*
|
117
|
-
*
|
209
|
+
* Or pass an array to XSLT.quote_params:
|
118
210
|
*
|
119
|
-
*
|
120
|
-
*
|
121
|
-
*
|
211
|
+
* stylesheet.transform(doc, Nokogiri::XSLT.quote_params(["title", "Aaron's List"])).to_xml
|
212
|
+
* # => "<html><body>\n" +
|
213
|
+
* # "<h1>Aaron's List</h1>\n" +
|
214
|
+
* # "<ol>\n" +
|
215
|
+
* # "<li>EMP0001</li>\n" +
|
216
|
+
* # "<li>EMP0002</li>\n" +
|
217
|
+
* # "</ol>\n" +
|
218
|
+
* # "</body></html>\n"
|
122
219
|
*
|
220
|
+
* See: Nokogiri::XSLT.quote_params
|
123
221
|
*/
|
124
222
|
static VALUE
|
125
223
|
transform(int argc, VALUE *argv, VALUE self)
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -4423,11 +4423,6 @@ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
4423
4423
|
/* Parse error */
|
4424
4424
|
parser_add_parse_error(parser, token);
|
4425
4425
|
|
4426
|
-
/*
|
4427
|
-
* Fragment case: If the parser was originally created for the HTML
|
4428
|
-
* fragment parsing algorithm, then act as described in the "any other
|
4429
|
-
* start tag" entry below.
|
4430
|
-
*/
|
4431
4426
|
while (
|
4432
4427
|
!(
|
4433
4428
|
is_mathml_integration_point(get_current_node(parser))
|
@@ -4437,12 +4432,6 @@ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
4437
4432
|
) {
|
4438
4433
|
pop_current_node(parser);
|
4439
4434
|
}
|
4440
|
-
// XXX: The spec currently says to handle this using the in body insertion
|
4441
|
-
// mode rules. That seems wrong. See
|
4442
|
-
// <https://github.com/whatwg/html/issues/6808>. Instead, use the current
|
4443
|
-
// insertion mode which seems like it works.
|
4444
|
-
//
|
4445
|
-
// handle_in_body(parser, token);
|
4446
4435
|
handle_html_content(parser, token);
|
4447
4436
|
return;
|
4448
4437
|
}
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "set"
|
5
|
+
|
6
|
+
module Nokogiri
|
7
|
+
#
|
8
|
+
# Some classes in Nokogiri are namespaced as a group, for example
|
9
|
+
# Document, DocumentFragment, and Builder.
|
10
|
+
#
|
11
|
+
# It's sometimes necessary to look up the related class, e.g.:
|
12
|
+
#
|
13
|
+
# XML::Builder → XML::Document
|
14
|
+
# HTML4::Builder → HTML4::Document
|
15
|
+
# HTML5::Document → HTML5::DocumentFragment
|
16
|
+
#
|
17
|
+
# This module is included into those key classes who need to do this.
|
18
|
+
#
|
19
|
+
module ClassResolver
|
20
|
+
# #related_class restricts matching namespaces to those matching this set.
|
21
|
+
VALID_NAMESPACES = Set.new(["HTML", "HTML4", "HTML5", "XML"])
|
22
|
+
|
23
|
+
# :call-seq:
|
24
|
+
# related_class(class_name) → Class
|
25
|
+
#
|
26
|
+
# Find a class constant within the
|
27
|
+
#
|
28
|
+
# Some examples:
|
29
|
+
#
|
30
|
+
# Nokogiri::XML::Document.new.related_class("DocumentFragment")
|
31
|
+
# # => Nokogiri::XML::DocumentFragment
|
32
|
+
# Nokogiri::HTML4::Document.new.related_class("DocumentFragment")
|
33
|
+
# # => Nokogiri::HTML4::DocumentFragment
|
34
|
+
#
|
35
|
+
# Note this will also work for subclasses that follow the same convention, e.g.:
|
36
|
+
#
|
37
|
+
# Loofah::HTML::Document.new.related_class("DocumentFragment")
|
38
|
+
# # => Loofah::HTML::DocumentFragment
|
39
|
+
#
|
40
|
+
# And even if it's a subclass, this will iterate through the superclasses:
|
41
|
+
#
|
42
|
+
# class ThisIsATopLevelClass < Nokogiri::HTML4::Builder ; end
|
43
|
+
# ThisIsATopLevelClass.new.related_class("Document")
|
44
|
+
# # => Nokogiri::HTML4::Document
|
45
|
+
#
|
46
|
+
def related_class(class_name)
|
47
|
+
klass = nil
|
48
|
+
inspecting = self.class
|
49
|
+
|
50
|
+
while inspecting
|
51
|
+
namespace_path = inspecting.name.split("::")[0..-2]
|
52
|
+
inspecting = inspecting.superclass
|
53
|
+
|
54
|
+
next unless VALID_NAMESPACES.include?(namespace_path.last)
|
55
|
+
|
56
|
+
related_class_name = (namespace_path << class_name).join("::")
|
57
|
+
klass = begin
|
58
|
+
Object.const_get(related_class_name)
|
59
|
+
rescue NameError
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
break if klass
|
63
|
+
end
|
64
|
+
klass
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|