nokogiri 1.12.5 → 1.13.6
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/README.md +9 -7
- data/bin/nokogiri +63 -50
- data/dependencies.yml +13 -64
- data/ext/nokogiri/extconf.rb +64 -44
- data/ext/nokogiri/html4_sax_parser_context.c +2 -3
- data/ext/nokogiri/xml_document.c +35 -35
- data/ext/nokogiri/xml_document_fragment.c +0 -2
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_encoding_handler.c +25 -11
- data/ext/nokogiri/xml_node.c +638 -333
- data/ext/nokogiri/xml_reader.c +37 -11
- data/ext/nokogiri/xml_sax_parser_context.c +10 -3
- data/ext/nokogiri/xml_xpath_context.c +72 -49
- data/ext/nokogiri/xslt_stylesheet.c +107 -9
- data/gumbo-parser/src/parser.c +0 -11
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +360 -341
- data/lib/nokogiri/css/parser.y +249 -244
- data/lib/nokogiri/css/parser_extras.rb +22 -20
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +179 -82
- data/lib/nokogiri/css.rb +38 -6
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/gumbo.rb +1 -0
- data/lib/nokogiri/html.rb +16 -10
- data/lib/nokogiri/html4/builder.rb +1 -0
- data/lib/nokogiri/html4/document.rb +88 -77
- data/lib/nokogiri/html4/document_fragment.rb +11 -7
- data/lib/nokogiri/html4/element_description.rb +1 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +426 -520
- data/lib/nokogiri/html4/entity_lookup.rb +2 -1
- data/lib/nokogiri/html4/sax/parser.rb +5 -2
- data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
- data/lib/nokogiri/html4.rb +11 -5
- data/lib/nokogiri/html5/document.rb +27 -10
- data/lib/nokogiri/html5/document_fragment.rb +5 -2
- data/lib/nokogiri/html5/node.rb +10 -3
- data/lib/nokogiri/html5.rb +69 -64
- data/lib/nokogiri/jruby/dependencies.rb +10 -9
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +20 -13
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +5 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +34 -32
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +144 -103
- data/lib/nokogiri/xml/document_fragment.rb +41 -38
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +2 -0
- data/lib/nokogiri/xml/node/save_options.rb +8 -4
- data/lib/nokogiri/xml/node.rb +521 -351
- data/lib/nokogiri/xml/node_set.rb +50 -54
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +12 -7
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +24 -26
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/reader.rb +17 -19
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +20 -19
- data/lib/nokogiri/xml/sax/parser.rb +37 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +93 -62
- data/lib/nokogiri/xml/syntax_error.rb +5 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +12 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +4 -3
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/nokogiri/xslt.rb +21 -13
- data/lib/nokogiri.rb +19 -16
- data/lib/xsd/xmlparser/nokogiri.rb +25 -24
- data/patches/libxml2/0004-use-glibc-strlen.patch +3 -3
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2443 -1914
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
- data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
- metadata +109 -31
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -148,9 +148,9 @@ namespaces(VALUE self)
|
|
148
148
|
}
|
149
149
|
|
150
150
|
/*
|
151
|
-
|
152
|
-
|
153
|
-
|
151
|
+
:call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
|
152
|
+
|
153
|
+
Get the attributes of the current node as an Array of Attr
|
154
154
|
*/
|
155
155
|
static VALUE
|
156
156
|
rb_xml_reader_attribute_nodes(VALUE rb_reader)
|
@@ -414,16 +414,23 @@ name(VALUE self)
|
|
414
414
|
* Get the xml:base of the node
|
415
415
|
*/
|
416
416
|
static VALUE
|
417
|
-
|
417
|
+
rb_xml_reader_base_uri(VALUE rb_reader)
|
418
418
|
{
|
419
|
-
|
420
|
-
|
419
|
+
VALUE rb_base_uri;
|
420
|
+
xmlTextReaderPtr c_reader;
|
421
|
+
xmlChar *c_base_uri;
|
421
422
|
|
422
|
-
Data_Get_Struct(
|
423
|
-
|
424
|
-
|
423
|
+
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
|
424
|
+
|
425
|
+
c_base_uri = xmlTextReaderBaseUri(c_reader);
|
426
|
+
if (c_base_uri == NULL) {
|
427
|
+
return Qnil;
|
428
|
+
}
|
429
|
+
|
430
|
+
rb_base_uri = NOKOGIRI_STR_NEW2(c_base_uri);
|
431
|
+
xmlFree(c_base_uri);
|
425
432
|
|
426
|
-
return
|
433
|
+
return rb_base_uri;
|
427
434
|
}
|
428
435
|
|
429
436
|
/*
|
@@ -652,6 +659,24 @@ empty_element_p(VALUE self)
|
|
652
659
|
return Qfalse;
|
653
660
|
}
|
654
661
|
|
662
|
+
static VALUE
|
663
|
+
rb_xml_reader_encoding(VALUE rb_reader)
|
664
|
+
{
|
665
|
+
xmlTextReaderPtr c_reader;
|
666
|
+
const char *parser_encoding;
|
667
|
+
VALUE constructor_encoding;
|
668
|
+
|
669
|
+
constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
670
|
+
if (RTEST(constructor_encoding)) {
|
671
|
+
return constructor_encoding;
|
672
|
+
}
|
673
|
+
|
674
|
+
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
|
675
|
+
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
676
|
+
if (parser_encoding == NULL) { return Qnil; }
|
677
|
+
return NOKOGIRI_STR_NEW2(parser_encoding);
|
678
|
+
}
|
679
|
+
|
655
680
|
void
|
656
681
|
noko_init_xml_reader()
|
657
682
|
{
|
@@ -672,10 +697,11 @@ noko_init_xml_reader()
|
|
672
697
|
rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
|
673
698
|
rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
|
674
699
|
rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
|
675
|
-
rb_define_method(cNokogiriXmlReader, "base_uri",
|
700
|
+
rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
|
676
701
|
rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0);
|
677
702
|
rb_define_method(cNokogiriXmlReader, "depth", depth, 0);
|
678
703
|
rb_define_method(cNokogiriXmlReader, "empty_element?", empty_element_p, 0);
|
704
|
+
rb_define_method(cNokogiriXmlReader, "encoding", rb_xml_reader_encoding, 0);
|
679
705
|
rb_define_method(cNokogiriXmlReader, "inner_xml", inner_xml, 0);
|
680
706
|
rb_define_method(cNokogiriXmlReader, "lang", lang, 0);
|
681
707
|
rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
|
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
VALUE cNokogiriXmlSaxParserContext ;
|
4
4
|
|
5
|
+
static ID id_read;
|
6
|
+
|
5
7
|
static void
|
6
8
|
deallocate(xmlParserCtxtPtr ctxt)
|
7
9
|
{
|
@@ -26,6 +28,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
|
|
26
28
|
xmlParserCtxtPtr ctxt;
|
27
29
|
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
|
28
30
|
|
31
|
+
if (!rb_respond_to(io, id_read)) {
|
32
|
+
rb_raise(rb_eTypeError, "argument expected to respond to :read");
|
33
|
+
}
|
34
|
+
|
29
35
|
ctxt = xmlCreateIOParserCtxt(NULL, NULL,
|
30
36
|
(xmlInputReadCallback)noko_io_read,
|
31
37
|
(xmlInputCloseCallback)noko_io_close,
|
@@ -62,9 +68,8 @@ parse_memory(VALUE klass, VALUE data)
|
|
62
68
|
{
|
63
69
|
xmlParserCtxtPtr ctxt;
|
64
70
|
|
65
|
-
|
66
|
-
|
67
|
-
}
|
71
|
+
Check_Type(data, T_STRING);
|
72
|
+
|
68
73
|
if (!(int)RSTRING_LEN(data)) {
|
69
74
|
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
70
75
|
}
|
@@ -278,4 +283,6 @@ noko_init_xml_sax_parser_context()
|
|
278
283
|
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
|
279
284
|
rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
|
280
285
|
rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
|
286
|
+
|
287
|
+
id_read = rb_intern("read");
|
281
288
|
}
|
@@ -86,6 +86,26 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
|
86
86
|
xmlXPathFreeObject(needle);
|
87
87
|
}
|
88
88
|
|
89
|
+
|
90
|
+
/* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should ignore namespaces */
|
91
|
+
static void
|
92
|
+
xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
|
93
|
+
{
|
94
|
+
xmlXPathObjectPtr element_name;
|
95
|
+
|
96
|
+
assert(ctxt->context->node);
|
97
|
+
|
98
|
+
CHECK_ARITY(1);
|
99
|
+
CAST_TO_STRING;
|
100
|
+
CHECK_TYPE(XPATH_STRING);
|
101
|
+
element_name = valuePop(ctxt);
|
102
|
+
|
103
|
+
valuePush(ctxt, xmlXPathNewBoolean(xmlStrEqual(ctxt->context->node->name, element_name->stringval)));
|
104
|
+
|
105
|
+
xmlXPathFreeObject(element_name);
|
106
|
+
}
|
107
|
+
|
108
|
+
|
89
109
|
/*
|
90
110
|
* call-seq:
|
91
111
|
* register_ns(prefix, uri)
|
@@ -128,11 +148,44 @@ register_variable(VALUE self, VALUE name, VALUE value)
|
|
128
148
|
return self;
|
129
149
|
}
|
130
150
|
|
151
|
+
|
152
|
+
/*
|
153
|
+
* convert an XPath object into a Ruby object of the appropriate type.
|
154
|
+
* returns Qundef if no conversion was possible.
|
155
|
+
*/
|
156
|
+
static VALUE
|
157
|
+
xpath2ruby(xmlXPathObjectPtr xobj, xmlXPathContextPtr xctx)
|
158
|
+
{
|
159
|
+
VALUE retval;
|
160
|
+
|
161
|
+
assert(xctx->doc);
|
162
|
+
assert(DOC_RUBY_OBJECT_TEST(xctx->doc));
|
163
|
+
|
164
|
+
switch (xobj->type) {
|
165
|
+
case XPATH_STRING:
|
166
|
+
retval = NOKOGIRI_STR_NEW2(xobj->stringval);
|
167
|
+
xmlFree(xobj->stringval);
|
168
|
+
return retval;
|
169
|
+
|
170
|
+
case XPATH_NODESET:
|
171
|
+
return noko_xml_node_set_wrap(xobj->nodesetval,
|
172
|
+
DOC_RUBY_OBJECT(xctx->doc));
|
173
|
+
|
174
|
+
case XPATH_NUMBER:
|
175
|
+
return rb_float_new(xobj->floatval);
|
176
|
+
|
177
|
+
case XPATH_BOOLEAN:
|
178
|
+
return (xobj->boolval == 1) ? Qtrue : Qfalse;
|
179
|
+
|
180
|
+
default:
|
181
|
+
return Qundef;
|
182
|
+
}
|
183
|
+
}
|
184
|
+
|
131
185
|
void
|
132
186
|
Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
|
133
187
|
const char *function_name)
|
134
188
|
{
|
135
|
-
int i;
|
136
189
|
VALUE result, doc;
|
137
190
|
VALUE *argv;
|
138
191
|
VALUE node_set = Qnil;
|
@@ -143,40 +196,25 @@ Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, i
|
|
143
196
|
assert(DOC_RUBY_OBJECT_TEST(ctx->context->doc));
|
144
197
|
|
145
198
|
argv = (VALUE *)calloc((size_t)nargs, sizeof(VALUE));
|
146
|
-
for (
|
147
|
-
rb_gc_register_address(&argv[
|
199
|
+
for (int j = 0 ; j < nargs ; ++j) {
|
200
|
+
rb_gc_register_address(&argv[j]);
|
148
201
|
}
|
149
202
|
|
150
203
|
doc = DOC_RUBY_OBJECT(ctx->context->doc);
|
151
204
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
break;
|
160
|
-
case XPATH_BOOLEAN:
|
161
|
-
argv[i] = obj->boolval == 1 ? Qtrue : Qfalse;
|
162
|
-
break;
|
163
|
-
case XPATH_NUMBER:
|
164
|
-
argv[i] = rb_float_new(obj->floatval);
|
165
|
-
break;
|
166
|
-
case XPATH_NODESET:
|
167
|
-
argv[i] = noko_xml_node_set_wrap(obj->nodesetval, doc);
|
168
|
-
break;
|
169
|
-
default:
|
170
|
-
argv[i] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
|
171
|
-
}
|
172
|
-
xmlXPathFreeNodeSetList(obj);
|
173
|
-
} while (i-- > 0);
|
205
|
+
for (int j = nargs - 1 ; j >= 0 ; --j) {
|
206
|
+
obj = valuePop(ctx);
|
207
|
+
argv[j] = xpath2ruby(obj, ctx->context);
|
208
|
+
if (argv[j] == Qundef) {
|
209
|
+
argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
|
210
|
+
}
|
211
|
+
xmlXPathFreeNodeSetList(obj);
|
174
212
|
}
|
175
213
|
|
176
214
|
result = rb_funcall2(handler, rb_intern((const char *)function_name), nargs, argv);
|
177
215
|
|
178
|
-
for (
|
179
|
-
rb_gc_unregister_address(&argv[
|
216
|
+
for (int j = 0 ; j < nargs ; ++j) {
|
217
|
+
rb_gc_unregister_address(&argv[j]);
|
180
218
|
}
|
181
219
|
free(argv);
|
182
220
|
|
@@ -275,7 +313,7 @@ static VALUE
|
|
275
313
|
evaluate(int argc, VALUE *argv, VALUE self)
|
276
314
|
{
|
277
315
|
VALUE search_path, xpath_handler;
|
278
|
-
VALUE
|
316
|
+
VALUE retval = Qnil;
|
279
317
|
xmlXPathContextPtr ctx;
|
280
318
|
xmlXPathObjectPtr xpath;
|
281
319
|
xmlChar *query;
|
@@ -310,31 +348,14 @@ evaluate(int argc, VALUE *argv, VALUE self)
|
|
310
348
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
311
349
|
}
|
312
350
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
switch (xpath->type) {
|
317
|
-
case XPATH_STRING:
|
318
|
-
thing = NOKOGIRI_STR_NEW2(xpath->stringval);
|
319
|
-
xmlFree(xpath->stringval);
|
320
|
-
break;
|
321
|
-
case XPATH_NODESET:
|
322
|
-
thing = noko_xml_node_set_wrap(xpath->nodesetval,
|
323
|
-
DOC_RUBY_OBJECT(ctx->doc));
|
324
|
-
break;
|
325
|
-
case XPATH_NUMBER:
|
326
|
-
thing = rb_float_new(xpath->floatval);
|
327
|
-
break;
|
328
|
-
case XPATH_BOOLEAN:
|
329
|
-
thing = xpath->boolval == 1 ? Qtrue : Qfalse;
|
330
|
-
break;
|
331
|
-
default:
|
332
|
-
thing = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
|
351
|
+
retval = xpath2ruby(xpath, ctx);
|
352
|
+
if (retval == Qundef) {
|
353
|
+
retval = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
|
333
354
|
}
|
334
355
|
|
335
356
|
xmlXPathFreeNodeSetList(xpath);
|
336
357
|
|
337
|
-
return
|
358
|
+
return retval;
|
338
359
|
}
|
339
360
|
|
340
361
|
/*
|
@@ -360,6 +381,8 @@ new (VALUE klass, VALUE nodeobj)
|
|
360
381
|
xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
|
361
382
|
xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
|
362
383
|
xpath_builtin_css_class);
|
384
|
+
xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI,
|
385
|
+
xpath_builtin_local_name_is);
|
363
386
|
|
364
387
|
self = Data_Wrap_Struct(klass, 0, deallocate, ctx);
|
365
388
|
return self;
|
@@ -107,19 +107,117 @@ serialize(VALUE self, VALUE xmlobj)
|
|
107
107
|
}
|
108
108
|
|
109
109
|
/*
|
110
|
-
*
|
111
|
-
*
|
110
|
+
* call-seq:
|
111
|
+
* transform(document)
|
112
|
+
* transform(document, params = {})
|
113
|
+
*
|
114
|
+
* Apply an XSLT stylesheet to an XML::Document.
|
115
|
+
*
|
116
|
+
* [Parameters]
|
117
|
+
* - +document+ (Nokogiri::XML::Document) the document to be transformed.
|
118
|
+
* - +params+ (Hash, Array) strings used as XSLT parameters.
|
119
|
+
*
|
120
|
+
* [Returns] Nokogiri::XML::Document
|
121
|
+
*
|
122
|
+
* *Example* of basic transformation:
|
123
|
+
*
|
124
|
+
* xslt = <<~XSLT
|
125
|
+
* <xsl:stylesheet version="1.0"
|
126
|
+
* xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
127
|
+
*
|
128
|
+
* <xsl:param name="title"/>
|
129
|
+
*
|
130
|
+
* <xsl:template match="/">
|
131
|
+
* <html>
|
132
|
+
* <body>
|
133
|
+
* <h1><xsl:value-of select="$title"/></h1>
|
134
|
+
* <ol>
|
135
|
+
* <xsl:for-each select="staff/employee">
|
136
|
+
* <li><xsl:value-of select="employeeId"></li>
|
137
|
+
* </xsl:for-each>
|
138
|
+
* </ol>
|
139
|
+
* </body>
|
140
|
+
* </html>
|
141
|
+
* </xsl:stylesheet>
|
142
|
+
* XSLT
|
143
|
+
*
|
144
|
+
* xml = <<~XML
|
145
|
+
* <?xml version="1.0"?>
|
146
|
+
* <staff>
|
147
|
+
* <employee>
|
148
|
+
* <employeeId>EMP0001</employeeId>
|
149
|
+
* <position>Accountant</position>
|
150
|
+
* </employee>
|
151
|
+
* <employee>
|
152
|
+
* <employeeId>EMP0002</employeeId>
|
153
|
+
* <position>Developer</position>
|
154
|
+
* </employee>
|
155
|
+
* </staff>
|
156
|
+
* XML
|
157
|
+
*
|
158
|
+
* doc = Nokogiri::XML::Document.parse(xml)
|
159
|
+
* stylesheet = Nokogiri::XSLT.parse(xslt)
|
160
|
+
*
|
161
|
+
* ⚠ Note that the +h1+ element is empty because no param has been provided!
|
162
|
+
*
|
163
|
+
* stylesheet.transform(doc).to_xml
|
164
|
+
* # => "<html><body>\n" +
|
165
|
+
* # "<h1></h1>\n" +
|
166
|
+
* # "<ol>\n" +
|
167
|
+
* # "<li>EMP0001</li>\n" +
|
168
|
+
* # "<li>EMP0002</li>\n" +
|
169
|
+
* # "</ol>\n" +
|
170
|
+
* # "</body></html>\n"
|
171
|
+
*
|
172
|
+
* *Example* of using an input parameter hash:
|
173
|
+
*
|
174
|
+
* ⚠ The title is populated, but note how we need to quote-escape the value.
|
175
|
+
*
|
176
|
+
* stylesheet.transform(doc, { "title" => "'Employee List'" }).to_xml
|
177
|
+
* # => "<html><body>\n" +
|
178
|
+
* # "<h1>Employee List</h1>\n" +
|
179
|
+
* # "<ol>\n" +
|
180
|
+
* # "<li>EMP0001</li>\n" +
|
181
|
+
* # "<li>EMP0002</li>\n" +
|
182
|
+
* # "</ol>\n" +
|
183
|
+
* # "</body></html>\n"
|
184
|
+
*
|
185
|
+
* *Example* using the XSLT.quote_params helper method to safely quote-escape strings:
|
186
|
+
*
|
187
|
+
* stylesheet.transform(doc, Nokogiri::XSLT.quote_params({ "title" => "Aaron's List" })).to_xml
|
188
|
+
* # => "<html><body>\n" +
|
189
|
+
* # "<h1>Aaron's List</h1>\n" +
|
190
|
+
* # "<ol>\n" +
|
191
|
+
* # "<li>EMP0001</li>\n" +
|
192
|
+
* # "<li>EMP0002</li>\n" +
|
193
|
+
* # "</ol>\n" +
|
194
|
+
* # "</body></html>\n"
|
195
|
+
*
|
196
|
+
* *Example* using an array of XSLT parameters
|
197
|
+
*
|
198
|
+
* You can also use an array if you want to.
|
112
199
|
*
|
113
|
-
*
|
114
|
-
*
|
115
|
-
*
|
200
|
+
* stylesheet.transform(doc, ["title", "'Employee List'"]).to_xml
|
201
|
+
* # => "<html><body>\n" +
|
202
|
+
* # "<h1>Employee List</h1>\n" +
|
203
|
+
* # "<ol>\n" +
|
204
|
+
* # "<li>EMP0001</li>\n" +
|
205
|
+
* # "<li>EMP0002</li>\n" +
|
206
|
+
* # "</ol>\n" +
|
207
|
+
* # "</body></html>\n"
|
116
208
|
*
|
117
|
-
*
|
209
|
+
* Or pass an array to XSLT.quote_params:
|
118
210
|
*
|
119
|
-
*
|
120
|
-
*
|
121
|
-
*
|
211
|
+
* stylesheet.transform(doc, Nokogiri::XSLT.quote_params(["title", "Aaron's List"])).to_xml
|
212
|
+
* # => "<html><body>\n" +
|
213
|
+
* # "<h1>Aaron's List</h1>\n" +
|
214
|
+
* # "<ol>\n" +
|
215
|
+
* # "<li>EMP0001</li>\n" +
|
216
|
+
* # "<li>EMP0002</li>\n" +
|
217
|
+
* # "</ol>\n" +
|
218
|
+
* # "</body></html>\n"
|
122
219
|
*
|
220
|
+
* See: Nokogiri::XSLT.quote_params
|
123
221
|
*/
|
124
222
|
static VALUE
|
125
223
|
transform(int argc, VALUE *argv, VALUE self)
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -4423,11 +4423,6 @@ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
4423
4423
|
/* Parse error */
|
4424
4424
|
parser_add_parse_error(parser, token);
|
4425
4425
|
|
4426
|
-
/*
|
4427
|
-
* Fragment case: If the parser was originally created for the HTML
|
4428
|
-
* fragment parsing algorithm, then act as described in the "any other
|
4429
|
-
* start tag" entry below.
|
4430
|
-
*/
|
4431
4426
|
while (
|
4432
4427
|
!(
|
4433
4428
|
is_mathml_integration_point(get_current_node(parser))
|
@@ -4437,12 +4432,6 @@ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
4437
4432
|
) {
|
4438
4433
|
pop_current_node(parser);
|
4439
4434
|
}
|
4440
|
-
// XXX: The spec currently says to handle this using the in body insertion
|
4441
|
-
// mode rules. That seems wrong. See
|
4442
|
-
// <https://github.com/whatwg/html/issues/6808>. Instead, use the current
|
4443
|
-
// insertion mode which seems like it works.
|
4444
|
-
//
|
4445
|
-
// handle_in_body(parser, token);
|
4446
4435
|
handle_html_content(parser, token);
|
4447
4436
|
return;
|
4448
4437
|
}
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "set"
|
5
|
+
|
6
|
+
module Nokogiri
|
7
|
+
#
|
8
|
+
# Some classes in Nokogiri are namespaced as a group, for example
|
9
|
+
# Document, DocumentFragment, and Builder.
|
10
|
+
#
|
11
|
+
# It's sometimes necessary to look up the related class, e.g.:
|
12
|
+
#
|
13
|
+
# XML::Builder → XML::Document
|
14
|
+
# HTML4::Builder → HTML4::Document
|
15
|
+
# HTML5::Document → HTML5::DocumentFragment
|
16
|
+
#
|
17
|
+
# This module is included into those key classes who need to do this.
|
18
|
+
#
|
19
|
+
module ClassResolver
|
20
|
+
# #related_class restricts matching namespaces to those matching this set.
|
21
|
+
VALID_NAMESPACES = Set.new(["HTML", "HTML4", "HTML5", "XML"])
|
22
|
+
|
23
|
+
# :call-seq:
|
24
|
+
# related_class(class_name) → Class
|
25
|
+
#
|
26
|
+
# Find a class constant within the
|
27
|
+
#
|
28
|
+
# Some examples:
|
29
|
+
#
|
30
|
+
# Nokogiri::XML::Document.new.related_class("DocumentFragment")
|
31
|
+
# # => Nokogiri::XML::DocumentFragment
|
32
|
+
# Nokogiri::HTML4::Document.new.related_class("DocumentFragment")
|
33
|
+
# # => Nokogiri::HTML4::DocumentFragment
|
34
|
+
#
|
35
|
+
# Note this will also work for subclasses that follow the same convention, e.g.:
|
36
|
+
#
|
37
|
+
# Loofah::HTML::Document.new.related_class("DocumentFragment")
|
38
|
+
# # => Loofah::HTML::DocumentFragment
|
39
|
+
#
|
40
|
+
# And even if it's a subclass, this will iterate through the superclasses:
|
41
|
+
#
|
42
|
+
# class ThisIsATopLevelClass < Nokogiri::HTML4::Builder ; end
|
43
|
+
# ThisIsATopLevelClass.new.related_class("Document")
|
44
|
+
# # => Nokogiri::HTML4::Document
|
45
|
+
#
|
46
|
+
def related_class(class_name)
|
47
|
+
klass = nil
|
48
|
+
inspecting = self.class
|
49
|
+
|
50
|
+
while inspecting
|
51
|
+
namespace_path = inspecting.name.split("::")[0..-2]
|
52
|
+
inspecting = inspecting.superclass
|
53
|
+
|
54
|
+
next unless VALID_NAMESPACES.include?(namespace_path.last)
|
55
|
+
|
56
|
+
related_class_name = (namespace_path << class_name).join("::")
|
57
|
+
klass = begin
|
58
|
+
Object.const_get(related_class_name)
|
59
|
+
rescue NameError
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
break if klass
|
63
|
+
end
|
64
|
+
klass
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/lib/nokogiri/css/node.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module CSS
|
4
|
-
class Node
|
5
|
+
class Node # :nodoc:
|
5
6
|
ALLOW_COMBINATOR_ON_SELF = [:DIRECT_ADJACENT_SELECTOR, :FOLLOWING_SELECTOR, :CHILD_SELECTOR]
|
6
7
|
|
7
8
|
# Get the type of this node
|
@@ -10,25 +11,25 @@ module Nokogiri
|
|
10
11
|
attr_accessor :value
|
11
12
|
|
12
13
|
# Create a new Node with +type+ and +value+
|
13
|
-
def initialize
|
14
|
+
def initialize(type, value)
|
14
15
|
@type = type
|
15
16
|
@value = value
|
16
17
|
end
|
17
18
|
|
18
19
|
# Accept +visitor+
|
19
|
-
def accept
|
20
|
+
def accept(visitor)
|
20
21
|
visitor.send(:"visit_#{type.to_s.downcase}", self)
|
21
22
|
end
|
22
23
|
|
23
24
|
###
|
24
25
|
# Convert this CSS node to xpath with +prefix+ using +visitor+
|
25
|
-
def to_xpath
|
26
|
-
prefix =
|
26
|
+
def to_xpath(prefix, visitor)
|
27
|
+
prefix = "." if ALLOW_COMBINATOR_ON_SELF.include?(type) && value.first.nil?
|
27
28
|
prefix + visitor.accept(self)
|
28
29
|
end
|
29
30
|
|
30
31
|
# Find a node by type using +types+
|
31
|
-
def find_by_type
|
32
|
+
def find_by_type(types)
|
32
33
|
matches = []
|
33
34
|
matches << self if to_type == types
|
34
35
|
@value.each do |v|
|
@@ -39,9 +40,9 @@ module Nokogiri
|
|
39
40
|
|
40
41
|
# Convert to_type
|
41
42
|
def to_type
|
42
|
-
[@type] + @value.map
|
43
|
+
[@type] + @value.map do |n|
|
43
44
|
n.to_type if n.respond_to?(:to_type)
|
44
|
-
|
45
|
+
end.compact
|
45
46
|
end
|
46
47
|
|
47
48
|
# Convert to array
|