nokogiri 1.12.2 → 1.13.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/README.md +9 -7
- data/bin/nokogiri +63 -50
- data/dependencies.yml +5 -6
- data/ext/nokogiri/extconf.rb +51 -35
- data/ext/nokogiri/gumbo.c +11 -11
- data/ext/nokogiri/html4_element_description.c +1 -1
- data/ext/nokogiri/html4_sax_parser_context.c +2 -1
- data/ext/nokogiri/nokogiri.c +1 -1
- data/ext/nokogiri/nokogiri.h +3 -0
- data/ext/nokogiri/xml_document.c +36 -36
- data/ext/nokogiri/xml_document_fragment.c +0 -2
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_encoding_handler.c +25 -11
- data/ext/nokogiri/xml_namespace.c +2 -2
- data/ext/nokogiri/xml_node.c +647 -335
- data/ext/nokogiri/xml_reader.c +37 -11
- data/ext/nokogiri/xml_xpath_context.c +72 -49
- data/gumbo-parser/src/parser.c +0 -11
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +11 -3
- data/lib/nokogiri/css/parser.y +10 -2
- data/lib/nokogiri/css/parser_extras.rb +20 -20
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +2 -1
- data/lib/nokogiri/css/tokenizer.rex +2 -1
- data/lib/nokogiri/css/xpath_visitor.rb +174 -75
- data/lib/nokogiri/css.rb +38 -6
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/gumbo.rb +1 -0
- data/lib/nokogiri/html.rb +16 -10
- data/lib/nokogiri/html4/builder.rb +1 -0
- data/lib/nokogiri/html4/document.rb +84 -75
- data/lib/nokogiri/html4/document_fragment.rb +11 -7
- data/lib/nokogiri/html4/element_description.rb +1 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +426 -520
- data/lib/nokogiri/html4/entity_lookup.rb +2 -1
- data/lib/nokogiri/html4/sax/parser.rb +2 -1
- data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
- data/lib/nokogiri/html4.rb +11 -5
- data/lib/nokogiri/html5/document.rb +24 -10
- data/lib/nokogiri/html5/document_fragment.rb +5 -2
- data/lib/nokogiri/html5/node.rb +6 -3
- data/lib/nokogiri/html5.rb +68 -64
- data/lib/nokogiri/jruby/dependencies.rb +10 -9
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +19 -13
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +5 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +69 -31
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +178 -96
- data/lib/nokogiri/xml/document_fragment.rb +41 -38
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +2 -0
- data/lib/nokogiri/xml/node/save_options.rb +7 -4
- data/lib/nokogiri/xml/node.rb +512 -348
- data/lib/nokogiri/xml/node_set.rb +46 -54
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +11 -7
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +24 -26
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/reader.rb +17 -19
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +20 -19
- data/lib/nokogiri/xml/sax/parser.rb +36 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +42 -22
- data/lib/nokogiri/xml/syntax_error.rb +4 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +12 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +3 -3
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/nokogiri/xslt.rb +3 -2
- data/lib/nokogiri.rb +19 -16
- data/lib/xsd/xmlparser/nokogiri.rb +25 -24
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- metadata +101 -27
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -148,9 +148,9 @@ namespaces(VALUE self)
|
|
148
148
|
}
|
149
149
|
|
150
150
|
/*
|
151
|
-
|
152
|
-
|
153
|
-
|
151
|
+
:call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
|
152
|
+
|
153
|
+
Get the attributes of the current node as an Array of Attr
|
154
154
|
*/
|
155
155
|
static VALUE
|
156
156
|
rb_xml_reader_attribute_nodes(VALUE rb_reader)
|
@@ -414,16 +414,23 @@ name(VALUE self)
|
|
414
414
|
* Get the xml:base of the node
|
415
415
|
*/
|
416
416
|
static VALUE
|
417
|
-
|
417
|
+
rb_xml_reader_base_uri(VALUE rb_reader)
|
418
418
|
{
|
419
|
-
|
420
|
-
|
419
|
+
VALUE rb_base_uri;
|
420
|
+
xmlTextReaderPtr c_reader;
|
421
|
+
xmlChar *c_base_uri;
|
421
422
|
|
422
|
-
Data_Get_Struct(
|
423
|
-
|
424
|
-
|
423
|
+
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
|
424
|
+
|
425
|
+
c_base_uri = xmlTextReaderBaseUri(c_reader);
|
426
|
+
if (c_base_uri == NULL) {
|
427
|
+
return Qnil;
|
428
|
+
}
|
429
|
+
|
430
|
+
rb_base_uri = NOKOGIRI_STR_NEW2(c_base_uri);
|
431
|
+
xmlFree(c_base_uri);
|
425
432
|
|
426
|
-
return
|
433
|
+
return rb_base_uri;
|
427
434
|
}
|
428
435
|
|
429
436
|
/*
|
@@ -652,6 +659,24 @@ empty_element_p(VALUE self)
|
|
652
659
|
return Qfalse;
|
653
660
|
}
|
654
661
|
|
662
|
+
static VALUE
|
663
|
+
rb_xml_reader_encoding(VALUE rb_reader)
|
664
|
+
{
|
665
|
+
xmlTextReaderPtr c_reader;
|
666
|
+
const char *parser_encoding;
|
667
|
+
VALUE constructor_encoding;
|
668
|
+
|
669
|
+
constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
670
|
+
if (RTEST(constructor_encoding)) {
|
671
|
+
return constructor_encoding;
|
672
|
+
}
|
673
|
+
|
674
|
+
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
|
675
|
+
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
676
|
+
if (parser_encoding == NULL) { return Qnil; }
|
677
|
+
return NOKOGIRI_STR_NEW2(parser_encoding);
|
678
|
+
}
|
679
|
+
|
655
680
|
void
|
656
681
|
noko_init_xml_reader()
|
657
682
|
{
|
@@ -672,10 +697,11 @@ noko_init_xml_reader()
|
|
672
697
|
rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
|
673
698
|
rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
|
674
699
|
rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
|
675
|
-
rb_define_method(cNokogiriXmlReader, "base_uri",
|
700
|
+
rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
|
676
701
|
rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0);
|
677
702
|
rb_define_method(cNokogiriXmlReader, "depth", depth, 0);
|
678
703
|
rb_define_method(cNokogiriXmlReader, "empty_element?", empty_element_p, 0);
|
704
|
+
rb_define_method(cNokogiriXmlReader, "encoding", rb_xml_reader_encoding, 0);
|
679
705
|
rb_define_method(cNokogiriXmlReader, "inner_xml", inner_xml, 0);
|
680
706
|
rb_define_method(cNokogiriXmlReader, "lang", lang, 0);
|
681
707
|
rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
|
@@ -86,6 +86,26 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
|
86
86
|
xmlXPathFreeObject(needle);
|
87
87
|
}
|
88
88
|
|
89
|
+
|
90
|
+
/* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should ignore namespaces */
|
91
|
+
static void
|
92
|
+
xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
|
93
|
+
{
|
94
|
+
xmlXPathObjectPtr element_name;
|
95
|
+
|
96
|
+
assert(ctxt->context->node);
|
97
|
+
|
98
|
+
CHECK_ARITY(1);
|
99
|
+
CAST_TO_STRING;
|
100
|
+
CHECK_TYPE(XPATH_STRING);
|
101
|
+
element_name = valuePop(ctxt);
|
102
|
+
|
103
|
+
valuePush(ctxt, xmlXPathNewBoolean(xmlStrEqual(ctxt->context->node->name, element_name->stringval)));
|
104
|
+
|
105
|
+
xmlXPathFreeObject(element_name);
|
106
|
+
}
|
107
|
+
|
108
|
+
|
89
109
|
/*
|
90
110
|
* call-seq:
|
91
111
|
* register_ns(prefix, uri)
|
@@ -128,11 +148,44 @@ register_variable(VALUE self, VALUE name, VALUE value)
|
|
128
148
|
return self;
|
129
149
|
}
|
130
150
|
|
151
|
+
|
152
|
+
/*
|
153
|
+
* convert an XPath object into a Ruby object of the appropriate type.
|
154
|
+
* returns Qundef if no conversion was possible.
|
155
|
+
*/
|
156
|
+
static VALUE
|
157
|
+
xpath2ruby(xmlXPathObjectPtr xobj, xmlXPathContextPtr xctx)
|
158
|
+
{
|
159
|
+
VALUE retval;
|
160
|
+
|
161
|
+
assert(xctx->doc);
|
162
|
+
assert(DOC_RUBY_OBJECT_TEST(xctx->doc));
|
163
|
+
|
164
|
+
switch (xobj->type) {
|
165
|
+
case XPATH_STRING:
|
166
|
+
retval = NOKOGIRI_STR_NEW2(xobj->stringval);
|
167
|
+
xmlFree(xobj->stringval);
|
168
|
+
return retval;
|
169
|
+
|
170
|
+
case XPATH_NODESET:
|
171
|
+
return noko_xml_node_set_wrap(xobj->nodesetval,
|
172
|
+
DOC_RUBY_OBJECT(xctx->doc));
|
173
|
+
|
174
|
+
case XPATH_NUMBER:
|
175
|
+
return rb_float_new(xobj->floatval);
|
176
|
+
|
177
|
+
case XPATH_BOOLEAN:
|
178
|
+
return (xobj->boolval == 1) ? Qtrue : Qfalse;
|
179
|
+
|
180
|
+
default:
|
181
|
+
return Qundef;
|
182
|
+
}
|
183
|
+
}
|
184
|
+
|
131
185
|
void
|
132
186
|
Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
|
133
187
|
const char *function_name)
|
134
188
|
{
|
135
|
-
int i;
|
136
189
|
VALUE result, doc;
|
137
190
|
VALUE *argv;
|
138
191
|
VALUE node_set = Qnil;
|
@@ -143,40 +196,25 @@ Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, i
|
|
143
196
|
assert(DOC_RUBY_OBJECT_TEST(ctx->context->doc));
|
144
197
|
|
145
198
|
argv = (VALUE *)calloc((size_t)nargs, sizeof(VALUE));
|
146
|
-
for (
|
147
|
-
rb_gc_register_address(&argv[
|
199
|
+
for (int j = 0 ; j < nargs ; ++j) {
|
200
|
+
rb_gc_register_address(&argv[j]);
|
148
201
|
}
|
149
202
|
|
150
203
|
doc = DOC_RUBY_OBJECT(ctx->context->doc);
|
151
204
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
break;
|
160
|
-
case XPATH_BOOLEAN:
|
161
|
-
argv[i] = obj->boolval == 1 ? Qtrue : Qfalse;
|
162
|
-
break;
|
163
|
-
case XPATH_NUMBER:
|
164
|
-
argv[i] = rb_float_new(obj->floatval);
|
165
|
-
break;
|
166
|
-
case XPATH_NODESET:
|
167
|
-
argv[i] = noko_xml_node_set_wrap(obj->nodesetval, doc);
|
168
|
-
break;
|
169
|
-
default:
|
170
|
-
argv[i] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
|
171
|
-
}
|
172
|
-
xmlXPathFreeNodeSetList(obj);
|
173
|
-
} while (i-- > 0);
|
205
|
+
for (int j = nargs - 1 ; j >= 0 ; --j) {
|
206
|
+
obj = valuePop(ctx);
|
207
|
+
argv[j] = xpath2ruby(obj, ctx->context);
|
208
|
+
if (argv[j] == Qundef) {
|
209
|
+
argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
|
210
|
+
}
|
211
|
+
xmlXPathFreeNodeSetList(obj);
|
174
212
|
}
|
175
213
|
|
176
214
|
result = rb_funcall2(handler, rb_intern((const char *)function_name), nargs, argv);
|
177
215
|
|
178
|
-
for (
|
179
|
-
rb_gc_unregister_address(&argv[
|
216
|
+
for (int j = 0 ; j < nargs ; ++j) {
|
217
|
+
rb_gc_unregister_address(&argv[j]);
|
180
218
|
}
|
181
219
|
free(argv);
|
182
220
|
|
@@ -275,7 +313,7 @@ static VALUE
|
|
275
313
|
evaluate(int argc, VALUE *argv, VALUE self)
|
276
314
|
{
|
277
315
|
VALUE search_path, xpath_handler;
|
278
|
-
VALUE
|
316
|
+
VALUE retval = Qnil;
|
279
317
|
xmlXPathContextPtr ctx;
|
280
318
|
xmlXPathObjectPtr xpath;
|
281
319
|
xmlChar *query;
|
@@ -310,31 +348,14 @@ evaluate(int argc, VALUE *argv, VALUE self)
|
|
310
348
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
311
349
|
}
|
312
350
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
switch (xpath->type) {
|
317
|
-
case XPATH_STRING:
|
318
|
-
thing = NOKOGIRI_STR_NEW2(xpath->stringval);
|
319
|
-
xmlFree(xpath->stringval);
|
320
|
-
break;
|
321
|
-
case XPATH_NODESET:
|
322
|
-
thing = noko_xml_node_set_wrap(xpath->nodesetval,
|
323
|
-
DOC_RUBY_OBJECT(ctx->doc));
|
324
|
-
break;
|
325
|
-
case XPATH_NUMBER:
|
326
|
-
thing = rb_float_new(xpath->floatval);
|
327
|
-
break;
|
328
|
-
case XPATH_BOOLEAN:
|
329
|
-
thing = xpath->boolval == 1 ? Qtrue : Qfalse;
|
330
|
-
break;
|
331
|
-
default:
|
332
|
-
thing = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
|
351
|
+
retval = xpath2ruby(xpath, ctx);
|
352
|
+
if (retval == Qundef) {
|
353
|
+
retval = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
|
333
354
|
}
|
334
355
|
|
335
356
|
xmlXPathFreeNodeSetList(xpath);
|
336
357
|
|
337
|
-
return
|
358
|
+
return retval;
|
338
359
|
}
|
339
360
|
|
340
361
|
/*
|
@@ -360,6 +381,8 @@ new (VALUE klass, VALUE nodeobj)
|
|
360
381
|
xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
|
361
382
|
xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
|
362
383
|
xpath_builtin_css_class);
|
384
|
+
xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI,
|
385
|
+
xpath_builtin_local_name_is);
|
363
386
|
|
364
387
|
self = Data_Wrap_Struct(klass, 0, deallocate, ctx);
|
365
388
|
return self;
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -4423,11 +4423,6 @@ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
4423
4423
|
/* Parse error */
|
4424
4424
|
parser_add_parse_error(parser, token);
|
4425
4425
|
|
4426
|
-
/*
|
4427
|
-
* Fragment case: If the parser was originally created for the HTML
|
4428
|
-
* fragment parsing algorithm, then act as described in the "any other
|
4429
|
-
* start tag" entry below.
|
4430
|
-
*/
|
4431
4426
|
while (
|
4432
4427
|
!(
|
4433
4428
|
is_mathml_integration_point(get_current_node(parser))
|
@@ -4437,12 +4432,6 @@ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
4437
4432
|
) {
|
4438
4433
|
pop_current_node(parser);
|
4439
4434
|
}
|
4440
|
-
// XXX: The spec currently says to handle this using the in body insertion
|
4441
|
-
// mode rules. That seems wrong. See
|
4442
|
-
// <https://github.com/whatwg/html/issues/6808>. Instead, use the current
|
4443
|
-
// insertion mode which seems like it works.
|
4444
|
-
//
|
4445
|
-
// handle_in_body(parser, token);
|
4446
4435
|
handle_html_content(parser, token);
|
4447
4436
|
return;
|
4448
4437
|
}
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "set"
|
5
|
+
|
6
|
+
module Nokogiri
|
7
|
+
#
|
8
|
+
# Some classes in Nokogiri are namespaced as a group, for example
|
9
|
+
# Document, DocumentFragment, and Builder.
|
10
|
+
#
|
11
|
+
# It's sometimes necessary to look up the related class, e.g.:
|
12
|
+
#
|
13
|
+
# XML::Builder → XML::Document
|
14
|
+
# HTML4::Builder → HTML4::Document
|
15
|
+
# HTML5::Document → HTML5::DocumentFragment
|
16
|
+
#
|
17
|
+
# This module is included into those key classes who need to do this.
|
18
|
+
#
|
19
|
+
module ClassResolver
|
20
|
+
# #related_class restricts matching namespaces to those matching this set.
|
21
|
+
VALID_NAMESPACES = Set.new(["HTML", "HTML4", "HTML5", "XML"])
|
22
|
+
|
23
|
+
# :call-seq:
|
24
|
+
# related_class(class_name) → Class
|
25
|
+
#
|
26
|
+
# Find a class constant within the
|
27
|
+
#
|
28
|
+
# Some examples:
|
29
|
+
#
|
30
|
+
# Nokogiri::XML::Document.new.related_class("DocumentFragment")
|
31
|
+
# # => Nokogiri::XML::DocumentFragment
|
32
|
+
# Nokogiri::HTML4::Document.new.related_class("DocumentFragment")
|
33
|
+
# # => Nokogiri::HTML4::DocumentFragment
|
34
|
+
#
|
35
|
+
# Note this will also work for subclasses that follow the same convention, e.g.:
|
36
|
+
#
|
37
|
+
# Loofah::HTML::Document.new.related_class("DocumentFragment")
|
38
|
+
# # => Loofah::HTML::DocumentFragment
|
39
|
+
#
|
40
|
+
# And even if it's a subclass, this will iterate through the superclasses:
|
41
|
+
#
|
42
|
+
# class ThisIsATopLevelClass < Nokogiri::HTML4::Builder ; end
|
43
|
+
# ThisIsATopLevelClass.new.related_class("Document")
|
44
|
+
# # => Nokogiri::HTML4::Document
|
45
|
+
#
|
46
|
+
def related_class(class_name)
|
47
|
+
klass = nil
|
48
|
+
inspecting = self.class
|
49
|
+
|
50
|
+
while inspecting
|
51
|
+
namespace_path = inspecting.name.split("::")[0..-2]
|
52
|
+
inspecting = inspecting.superclass
|
53
|
+
|
54
|
+
next unless VALID_NAMESPACES.include?(namespace_path.last)
|
55
|
+
|
56
|
+
related_class_name = (namespace_path << class_name).join("::")
|
57
|
+
klass = begin
|
58
|
+
Object.const_get(related_class_name)
|
59
|
+
rescue NameError
|
60
|
+
nil
|
61
|
+
end
|
62
|
+
break if klass
|
63
|
+
end
|
64
|
+
klass
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/lib/nokogiri/css/node.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module CSS
|
4
|
-
class Node
|
5
|
+
class Node # :nodoc:
|
5
6
|
ALLOW_COMBINATOR_ON_SELF = [:DIRECT_ADJACENT_SELECTOR, :FOLLOWING_SELECTOR, :CHILD_SELECTOR]
|
6
7
|
|
7
8
|
# Get the type of this node
|
@@ -10,25 +11,25 @@ module Nokogiri
|
|
10
11
|
attr_accessor :value
|
11
12
|
|
12
13
|
# Create a new Node with +type+ and +value+
|
13
|
-
def initialize
|
14
|
+
def initialize(type, value)
|
14
15
|
@type = type
|
15
16
|
@value = value
|
16
17
|
end
|
17
18
|
|
18
19
|
# Accept +visitor+
|
19
|
-
def accept
|
20
|
+
def accept(visitor)
|
20
21
|
visitor.send(:"visit_#{type.to_s.downcase}", self)
|
21
22
|
end
|
22
23
|
|
23
24
|
###
|
24
25
|
# Convert this CSS node to xpath with +prefix+ using +visitor+
|
25
|
-
def to_xpath
|
26
|
-
prefix =
|
26
|
+
def to_xpath(prefix, visitor)
|
27
|
+
prefix = "." if ALLOW_COMBINATOR_ON_SELF.include?(type) && value.first.nil?
|
27
28
|
prefix + visitor.accept(self)
|
28
29
|
end
|
29
30
|
|
30
31
|
# Find a node by type using +types+
|
31
|
-
def find_by_type
|
32
|
+
def find_by_type(types)
|
32
33
|
matches = []
|
33
34
|
matches << self if to_type == types
|
34
35
|
@value.each do |v|
|
@@ -39,9 +40,9 @@ module Nokogiri
|
|
39
40
|
|
40
41
|
# Convert to_type
|
41
42
|
def to_type
|
42
|
-
[@type] + @value.map
|
43
|
+
[@type] + @value.map do |n|
|
43
44
|
n.to_type if n.respond_to?(:to_type)
|
44
|
-
|
45
|
+
end.compact
|
45
46
|
end
|
46
47
|
|
47
48
|
# Convert to array
|
data/lib/nokogiri/css/parser.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
#
|
3
3
|
# DO NOT MODIFY!!!!
|
4
|
-
# This file is automatically generated by Racc 1.
|
4
|
+
# This file is automatically generated by Racc 1.6.0
|
5
5
|
# from Racc grammar file "".
|
6
6
|
#
|
7
7
|
|
@@ -10,6 +10,14 @@ require 'racc/parser.rb'
|
|
10
10
|
|
11
11
|
require_relative "parser_extras"
|
12
12
|
|
13
|
+
module Nokogiri
|
14
|
+
module CSS
|
15
|
+
# :nodoc: all
|
16
|
+
class Parser < Racc::Parser
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
13
21
|
module Nokogiri
|
14
22
|
module CSS
|
15
23
|
class Parser < Racc::Parser
|
@@ -485,7 +493,7 @@ def _reduce_27(val, _values, result)
|
|
485
493
|
end
|
486
494
|
|
487
495
|
def _reduce_28(val, _values, result)
|
488
|
-
result = Node.new(:
|
496
|
+
result = Node.new(:ATTRIB_NAME,
|
489
497
|
[[val.first, val.last].compact.join(':')]
|
490
498
|
)
|
491
499
|
|
@@ -495,7 +503,7 @@ end
|
|
495
503
|
def _reduce_29(val, _values, result)
|
496
504
|
# Default namespace is not applied to attributes.
|
497
505
|
# So we don't add prefix "xmlns:" as in namespaced_ident.
|
498
|
-
result = Node.new(:
|
506
|
+
result = Node.new(:ATTRIB_NAME, [val.first])
|
499
507
|
|
500
508
|
result
|
501
509
|
end
|
data/lib/nokogiri/css/parser.y
CHANGED
@@ -96,14 +96,14 @@ rule
|
|
96
96
|
;
|
97
97
|
attrib_name
|
98
98
|
: namespace '|' IDENT {
|
99
|
-
result = Node.new(:
|
99
|
+
result = Node.new(:ATTRIB_NAME,
|
100
100
|
[[val.first, val.last].compact.join(':')]
|
101
101
|
)
|
102
102
|
}
|
103
103
|
| IDENT {
|
104
104
|
# Default namespace is not applied to attributes.
|
105
105
|
# So we don't add prefix "xmlns:" as in namespaced_ident.
|
106
|
-
result = Node.new(:
|
106
|
+
result = Node.new(:ATTRIB_NAME, [val.first])
|
107
107
|
}
|
108
108
|
;
|
109
109
|
function
|
@@ -255,6 +255,14 @@ end
|
|
255
255
|
|
256
256
|
require_relative "parser_extras"
|
257
257
|
|
258
|
+
module Nokogiri
|
259
|
+
module CSS
|
260
|
+
# :nodoc: all
|
261
|
+
class Parser < Racc::Parser
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
258
266
|
---- inner
|
259
267
|
|
260
268
|
def unescape_css_identifier(identifier)
|
@@ -1,9 +1,10 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require "thread"
|
3
4
|
|
4
5
|
module Nokogiri
|
5
6
|
module CSS
|
6
|
-
class Parser < Racc::Parser
|
7
|
+
class Parser < Racc::Parser # :nodoc:
|
7
8
|
CACHE_SWITCH_NAME = :nokogiri_css_parser_cache_is_off
|
8
9
|
|
9
10
|
@cache = {}
|
@@ -16,13 +17,13 @@ module Nokogiri
|
|
16
17
|
end
|
17
18
|
|
18
19
|
# Set a thread-local boolean to turn cacheing on and off. Truthy values turn the cache on, falsey values turn the cache off.
|
19
|
-
def set_cache(value)
|
20
|
+
def set_cache(value) # rubocop:disable Naming/AccessorMethodName
|
20
21
|
Thread.current[CACHE_SWITCH_NAME] = !value
|
21
22
|
end
|
22
23
|
|
23
24
|
# Get the css selector in +string+ from the cache
|
24
25
|
def [](string)
|
25
|
-
return unless cache_on?
|
26
|
+
return nil unless cache_on?
|
26
27
|
@mutex.synchronize { @cache[string] }
|
27
28
|
end
|
28
29
|
|
@@ -46,10 +47,10 @@ module Nokogiri
|
|
46
47
|
# Execute +block+ without cache
|
47
48
|
def without_cache(&block)
|
48
49
|
original_cache_setting = cache_on?
|
49
|
-
set_cache
|
50
|
-
|
50
|
+
set_cache(false)
|
51
|
+
yield
|
51
52
|
ensure
|
52
|
-
set_cache
|
53
|
+
set_cache(original_cache_setting)
|
53
54
|
end
|
54
55
|
end
|
55
56
|
|
@@ -61,7 +62,7 @@ module Nokogiri
|
|
61
62
|
end
|
62
63
|
|
63
64
|
def parse(string)
|
64
|
-
@tokenizer.scan_setup
|
65
|
+
@tokenizer.scan_setup(string)
|
65
66
|
do_parse
|
66
67
|
end
|
67
68
|
|
@@ -70,24 +71,23 @@ module Nokogiri
|
|
70
71
|
end
|
71
72
|
|
72
73
|
# Get the xpath for +string+ using +options+
|
73
|
-
def xpath_for(string,
|
74
|
-
key =
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
args = [
|
79
|
-
options[:prefix] || "//",
|
80
|
-
options[:visitor] || XPathVisitor.new,
|
81
|
-
]
|
82
|
-
self.class[key] = parse(string).map { |ast|
|
83
|
-
ast.to_xpath(*args)
|
84
|
-
}
|
74
|
+
def xpath_for(string, prefix, visitor)
|
75
|
+
key = cache_key(string, prefix, visitor)
|
76
|
+
self.class[key] ||= parse(string).map do |ast|
|
77
|
+
ast.to_xpath(prefix, visitor)
|
78
|
+
end
|
85
79
|
end
|
86
80
|
|
87
81
|
# On CSS parser error, raise an exception
|
88
82
|
def on_error(error_token_id, error_value, value_stack)
|
89
83
|
after = value_stack.compact.last
|
90
|
-
raise SyntaxError
|
84
|
+
raise SyntaxError, "unexpected '#{error_value}' after '#{after}'"
|
85
|
+
end
|
86
|
+
|
87
|
+
def cache_key(query, prefix, visitor)
|
88
|
+
if self.class.cache_on?
|
89
|
+
[query, prefix, @namespaces, visitor.config]
|
90
|
+
end
|
91
91
|
end
|
92
92
|
end
|
93
93
|
end
|