nokogiri 1.12.2 → 1.13.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/README.md +9 -7
  4. data/bin/nokogiri +63 -50
  5. data/dependencies.yml +5 -6
  6. data/ext/nokogiri/extconf.rb +51 -35
  7. data/ext/nokogiri/gumbo.c +11 -11
  8. data/ext/nokogiri/html4_element_description.c +1 -1
  9. data/ext/nokogiri/html4_sax_parser_context.c +2 -1
  10. data/ext/nokogiri/nokogiri.c +1 -1
  11. data/ext/nokogiri/nokogiri.h +3 -0
  12. data/ext/nokogiri/xml_document.c +36 -36
  13. data/ext/nokogiri/xml_document_fragment.c +0 -2
  14. data/ext/nokogiri/xml_dtd.c +2 -2
  15. data/ext/nokogiri/xml_encoding_handler.c +25 -11
  16. data/ext/nokogiri/xml_namespace.c +2 -2
  17. data/ext/nokogiri/xml_node.c +647 -335
  18. data/ext/nokogiri/xml_reader.c +37 -11
  19. data/ext/nokogiri/xml_xpath_context.c +72 -49
  20. data/gumbo-parser/src/parser.c +0 -11
  21. data/lib/nokogiri/class_resolver.rb +67 -0
  22. data/lib/nokogiri/css/node.rb +9 -8
  23. data/lib/nokogiri/css/parser.rb +11 -3
  24. data/lib/nokogiri/css/parser.y +10 -2
  25. data/lib/nokogiri/css/parser_extras.rb +20 -20
  26. data/lib/nokogiri/css/syntax_error.rb +1 -0
  27. data/lib/nokogiri/css/tokenizer.rb +2 -1
  28. data/lib/nokogiri/css/tokenizer.rex +2 -1
  29. data/lib/nokogiri/css/xpath_visitor.rb +174 -75
  30. data/lib/nokogiri/css.rb +38 -6
  31. data/lib/nokogiri/decorators/slop.rb +8 -7
  32. data/lib/nokogiri/extension.rb +1 -1
  33. data/lib/nokogiri/gumbo.rb +1 -0
  34. data/lib/nokogiri/html.rb +16 -10
  35. data/lib/nokogiri/html4/builder.rb +1 -0
  36. data/lib/nokogiri/html4/document.rb +84 -75
  37. data/lib/nokogiri/html4/document_fragment.rb +11 -7
  38. data/lib/nokogiri/html4/element_description.rb +1 -0
  39. data/lib/nokogiri/html4/element_description_defaults.rb +426 -520
  40. data/lib/nokogiri/html4/entity_lookup.rb +2 -1
  41. data/lib/nokogiri/html4/sax/parser.rb +2 -1
  42. data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
  43. data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
  44. data/lib/nokogiri/html4.rb +11 -5
  45. data/lib/nokogiri/html5/document.rb +24 -10
  46. data/lib/nokogiri/html5/document_fragment.rb +5 -2
  47. data/lib/nokogiri/html5/node.rb +6 -3
  48. data/lib/nokogiri/html5.rb +68 -64
  49. data/lib/nokogiri/jruby/dependencies.rb +10 -9
  50. data/lib/nokogiri/syntax_error.rb +1 -0
  51. data/lib/nokogiri/version/constant.rb +2 -1
  52. data/lib/nokogiri/version/info.rb +19 -13
  53. data/lib/nokogiri/version.rb +1 -0
  54. data/lib/nokogiri/xml/attr.rb +5 -3
  55. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  56. data/lib/nokogiri/xml/builder.rb +69 -31
  57. data/lib/nokogiri/xml/cdata.rb +2 -1
  58. data/lib/nokogiri/xml/character_data.rb +1 -0
  59. data/lib/nokogiri/xml/document.rb +178 -96
  60. data/lib/nokogiri/xml/document_fragment.rb +41 -38
  61. data/lib/nokogiri/xml/dtd.rb +3 -2
  62. data/lib/nokogiri/xml/element_content.rb +1 -0
  63. data/lib/nokogiri/xml/element_decl.rb +2 -1
  64. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  65. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  66. data/lib/nokogiri/xml/namespace.rb +2 -0
  67. data/lib/nokogiri/xml/node/save_options.rb +7 -4
  68. data/lib/nokogiri/xml/node.rb +512 -348
  69. data/lib/nokogiri/xml/node_set.rb +46 -54
  70. data/lib/nokogiri/xml/notation.rb +12 -0
  71. data/lib/nokogiri/xml/parse_options.rb +11 -7
  72. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  73. data/lib/nokogiri/xml/pp/node.rb +24 -26
  74. data/lib/nokogiri/xml/pp.rb +1 -0
  75. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  76. data/lib/nokogiri/xml/reader.rb +17 -19
  77. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  78. data/lib/nokogiri/xml/sax/document.rb +20 -19
  79. data/lib/nokogiri/xml/sax/parser.rb +36 -34
  80. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  81. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  82. data/lib/nokogiri/xml/sax.rb +1 -0
  83. data/lib/nokogiri/xml/schema.rb +7 -6
  84. data/lib/nokogiri/xml/searchable.rb +42 -22
  85. data/lib/nokogiri/xml/syntax_error.rb +4 -4
  86. data/lib/nokogiri/xml/text.rb +1 -0
  87. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  88. data/lib/nokogiri/xml/xpath.rb +12 -0
  89. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  90. data/lib/nokogiri/xml.rb +3 -3
  91. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  92. data/lib/nokogiri/xslt.rb +3 -2
  93. data/lib/nokogiri.rb +19 -16
  94. data/lib/xsd/xmlparser/nokogiri.rb +25 -24
  95. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  96. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  97. metadata +101 -27
@@ -148,9 +148,9 @@ namespaces(VALUE self)
148
148
  }
149
149
 
150
150
  /*
151
- * @overload attribute_nodes()
152
- * Get the attributes of the current node as an Array of Attr
153
- * @return [Array<Nokogiri::XML::Attr>]
151
+ :call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
152
+
153
+ Get the attributes of the current node as an Array of Attr
154
154
  */
155
155
  static VALUE
156
156
  rb_xml_reader_attribute_nodes(VALUE rb_reader)
@@ -414,16 +414,23 @@ name(VALUE self)
414
414
  * Get the xml:base of the node
415
415
  */
416
416
  static VALUE
417
- base_uri(VALUE self)
417
+ rb_xml_reader_base_uri(VALUE rb_reader)
418
418
  {
419
- xmlTextReaderPtr reader;
420
- const char *base_uri;
419
+ VALUE rb_base_uri;
420
+ xmlTextReaderPtr c_reader;
421
+ xmlChar *c_base_uri;
421
422
 
422
- Data_Get_Struct(self, xmlTextReader, reader);
423
- base_uri = (const char *)xmlTextReaderBaseUri(reader);
424
- if (base_uri == NULL) { return Qnil; }
423
+ Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
424
+
425
+ c_base_uri = xmlTextReaderBaseUri(c_reader);
426
+ if (c_base_uri == NULL) {
427
+ return Qnil;
428
+ }
429
+
430
+ rb_base_uri = NOKOGIRI_STR_NEW2(c_base_uri);
431
+ xmlFree(c_base_uri);
425
432
 
426
- return NOKOGIRI_STR_NEW2(base_uri);
433
+ return rb_base_uri;
427
434
  }
428
435
 
429
436
  /*
@@ -652,6 +659,24 @@ empty_element_p(VALUE self)
652
659
  return Qfalse;
653
660
  }
654
661
 
662
+ static VALUE
663
+ rb_xml_reader_encoding(VALUE rb_reader)
664
+ {
665
+ xmlTextReaderPtr c_reader;
666
+ const char *parser_encoding;
667
+ VALUE constructor_encoding;
668
+
669
+ constructor_encoding = rb_iv_get(rb_reader, "@encoding");
670
+ if (RTEST(constructor_encoding)) {
671
+ return constructor_encoding;
672
+ }
673
+
674
+ Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
675
+ parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
676
+ if (parser_encoding == NULL) { return Qnil; }
677
+ return NOKOGIRI_STR_NEW2(parser_encoding);
678
+ }
679
+
655
680
  void
656
681
  noko_init_xml_reader()
657
682
  {
@@ -672,10 +697,11 @@ noko_init_xml_reader()
672
697
  rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
673
698
  rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
674
699
  rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
675
- rb_define_method(cNokogiriXmlReader, "base_uri", base_uri, 0);
700
+ rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
676
701
  rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0);
677
702
  rb_define_method(cNokogiriXmlReader, "depth", depth, 0);
678
703
  rb_define_method(cNokogiriXmlReader, "empty_element?", empty_element_p, 0);
704
+ rb_define_method(cNokogiriXmlReader, "encoding", rb_xml_reader_encoding, 0);
679
705
  rb_define_method(cNokogiriXmlReader, "inner_xml", inner_xml, 0);
680
706
  rb_define_method(cNokogiriXmlReader, "lang", lang, 0);
681
707
  rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
@@ -86,6 +86,26 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
86
86
  xmlXPathFreeObject(needle);
87
87
  }
88
88
 
89
+
90
+ /* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should ignore namespaces */
91
+ static void
92
+ xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
93
+ {
94
+ xmlXPathObjectPtr element_name;
95
+
96
+ assert(ctxt->context->node);
97
+
98
+ CHECK_ARITY(1);
99
+ CAST_TO_STRING;
100
+ CHECK_TYPE(XPATH_STRING);
101
+ element_name = valuePop(ctxt);
102
+
103
+ valuePush(ctxt, xmlXPathNewBoolean(xmlStrEqual(ctxt->context->node->name, element_name->stringval)));
104
+
105
+ xmlXPathFreeObject(element_name);
106
+ }
107
+
108
+
89
109
  /*
90
110
  * call-seq:
91
111
  * register_ns(prefix, uri)
@@ -128,11 +148,44 @@ register_variable(VALUE self, VALUE name, VALUE value)
128
148
  return self;
129
149
  }
130
150
 
151
+
152
+ /*
153
+ * convert an XPath object into a Ruby object of the appropriate type.
154
+ * returns Qundef if no conversion was possible.
155
+ */
156
+ static VALUE
157
+ xpath2ruby(xmlXPathObjectPtr xobj, xmlXPathContextPtr xctx)
158
+ {
159
+ VALUE retval;
160
+
161
+ assert(xctx->doc);
162
+ assert(DOC_RUBY_OBJECT_TEST(xctx->doc));
163
+
164
+ switch (xobj->type) {
165
+ case XPATH_STRING:
166
+ retval = NOKOGIRI_STR_NEW2(xobj->stringval);
167
+ xmlFree(xobj->stringval);
168
+ return retval;
169
+
170
+ case XPATH_NODESET:
171
+ return noko_xml_node_set_wrap(xobj->nodesetval,
172
+ DOC_RUBY_OBJECT(xctx->doc));
173
+
174
+ case XPATH_NUMBER:
175
+ return rb_float_new(xobj->floatval);
176
+
177
+ case XPATH_BOOLEAN:
178
+ return (xobj->boolval == 1) ? Qtrue : Qfalse;
179
+
180
+ default:
181
+ return Qundef;
182
+ }
183
+ }
184
+
131
185
  void
132
186
  Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
133
187
  const char *function_name)
134
188
  {
135
- int i;
136
189
  VALUE result, doc;
137
190
  VALUE *argv;
138
191
  VALUE node_set = Qnil;
@@ -143,40 +196,25 @@ Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, i
143
196
  assert(DOC_RUBY_OBJECT_TEST(ctx->context->doc));
144
197
 
145
198
  argv = (VALUE *)calloc((size_t)nargs, sizeof(VALUE));
146
- for (i = 0 ; i < nargs ; ++i) {
147
- rb_gc_register_address(&argv[i]);
199
+ for (int j = 0 ; j < nargs ; ++j) {
200
+ rb_gc_register_address(&argv[j]);
148
201
  }
149
202
 
150
203
  doc = DOC_RUBY_OBJECT(ctx->context->doc);
151
204
 
152
- if (nargs > 0) {
153
- i = nargs - 1;
154
- do {
155
- obj = valuePop(ctx);
156
- switch (obj->type) {
157
- case XPATH_STRING:
158
- argv[i] = NOKOGIRI_STR_NEW2(obj->stringval);
159
- break;
160
- case XPATH_BOOLEAN:
161
- argv[i] = obj->boolval == 1 ? Qtrue : Qfalse;
162
- break;
163
- case XPATH_NUMBER:
164
- argv[i] = rb_float_new(obj->floatval);
165
- break;
166
- case XPATH_NODESET:
167
- argv[i] = noko_xml_node_set_wrap(obj->nodesetval, doc);
168
- break;
169
- default:
170
- argv[i] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
171
- }
172
- xmlXPathFreeNodeSetList(obj);
173
- } while (i-- > 0);
205
+ for (int j = nargs - 1 ; j >= 0 ; --j) {
206
+ obj = valuePop(ctx);
207
+ argv[j] = xpath2ruby(obj, ctx->context);
208
+ if (argv[j] == Qundef) {
209
+ argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
210
+ }
211
+ xmlXPathFreeNodeSetList(obj);
174
212
  }
175
213
 
176
214
  result = rb_funcall2(handler, rb_intern((const char *)function_name), nargs, argv);
177
215
 
178
- for (i = 0 ; i < nargs ; ++i) {
179
- rb_gc_unregister_address(&argv[i]);
216
+ for (int j = 0 ; j < nargs ; ++j) {
217
+ rb_gc_unregister_address(&argv[j]);
180
218
  }
181
219
  free(argv);
182
220
 
@@ -275,7 +313,7 @@ static VALUE
275
313
  evaluate(int argc, VALUE *argv, VALUE self)
276
314
  {
277
315
  VALUE search_path, xpath_handler;
278
- VALUE thing = Qnil;
316
+ VALUE retval = Qnil;
279
317
  xmlXPathContextPtr ctx;
280
318
  xmlXPathObjectPtr xpath;
281
319
  xmlChar *query;
@@ -310,31 +348,14 @@ evaluate(int argc, VALUE *argv, VALUE self)
310
348
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
311
349
  }
312
350
 
313
- assert(ctx->doc);
314
- assert(DOC_RUBY_OBJECT_TEST(ctx->doc));
315
-
316
- switch (xpath->type) {
317
- case XPATH_STRING:
318
- thing = NOKOGIRI_STR_NEW2(xpath->stringval);
319
- xmlFree(xpath->stringval);
320
- break;
321
- case XPATH_NODESET:
322
- thing = noko_xml_node_set_wrap(xpath->nodesetval,
323
- DOC_RUBY_OBJECT(ctx->doc));
324
- break;
325
- case XPATH_NUMBER:
326
- thing = rb_float_new(xpath->floatval);
327
- break;
328
- case XPATH_BOOLEAN:
329
- thing = xpath->boolval == 1 ? Qtrue : Qfalse;
330
- break;
331
- default:
332
- thing = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
351
+ retval = xpath2ruby(xpath, ctx);
352
+ if (retval == Qundef) {
353
+ retval = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
333
354
  }
334
355
 
335
356
  xmlXPathFreeNodeSetList(xpath);
336
357
 
337
- return thing;
358
+ return retval;
338
359
  }
339
360
 
340
361
  /*
@@ -360,6 +381,8 @@ new (VALUE klass, VALUE nodeobj)
360
381
  xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
361
382
  xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
362
383
  xpath_builtin_css_class);
384
+ xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI,
385
+ xpath_builtin_local_name_is);
363
386
 
364
387
  self = Data_Wrap_Struct(klass, 0, deallocate, ctx);
365
388
  return self;
@@ -4423,11 +4423,6 @@ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
4423
4423
  /* Parse error */
4424
4424
  parser_add_parse_error(parser, token);
4425
4425
 
4426
- /*
4427
- * Fragment case: If the parser was originally created for the HTML
4428
- * fragment parsing algorithm, then act as described in the "any other
4429
- * start tag" entry below.
4430
- */
4431
4426
  while (
4432
4427
  !(
4433
4428
  is_mathml_integration_point(get_current_node(parser))
@@ -4437,12 +4432,6 @@ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
4437
4432
  ) {
4438
4433
  pop_current_node(parser);
4439
4434
  }
4440
- // XXX: The spec currently says to handle this using the in body insertion
4441
- // mode rules. That seems wrong. See
4442
- // <https://github.com/whatwg/html/issues/6808>. Instead, use the current
4443
- // insertion mode which seems like it works.
4444
- //
4445
- // handle_in_body(parser, token);
4446
4435
  handle_html_content(parser, token);
4447
4436
  return;
4448
4437
  }
@@ -0,0 +1,67 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "set"
5
+
6
+ module Nokogiri
7
+ #
8
+ # Some classes in Nokogiri are namespaced as a group, for example
9
+ # Document, DocumentFragment, and Builder.
10
+ #
11
+ # It's sometimes necessary to look up the related class, e.g.:
12
+ #
13
+ # XML::Builder → XML::Document
14
+ # HTML4::Builder → HTML4::Document
15
+ # HTML5::Document → HTML5::DocumentFragment
16
+ #
17
+ # This module is included into those key classes who need to do this.
18
+ #
19
+ module ClassResolver
20
+ # #related_class restricts matching namespaces to those matching this set.
21
+ VALID_NAMESPACES = Set.new(["HTML", "HTML4", "HTML5", "XML"])
22
+
23
+ # :call-seq:
24
+ # related_class(class_name) → Class
25
+ #
26
+ # Find a class constant within the
27
+ #
28
+ # Some examples:
29
+ #
30
+ # Nokogiri::XML::Document.new.related_class("DocumentFragment")
31
+ # # => Nokogiri::XML::DocumentFragment
32
+ # Nokogiri::HTML4::Document.new.related_class("DocumentFragment")
33
+ # # => Nokogiri::HTML4::DocumentFragment
34
+ #
35
+ # Note this will also work for subclasses that follow the same convention, e.g.:
36
+ #
37
+ # Loofah::HTML::Document.new.related_class("DocumentFragment")
38
+ # # => Loofah::HTML::DocumentFragment
39
+ #
40
+ # And even if it's a subclass, this will iterate through the superclasses:
41
+ #
42
+ # class ThisIsATopLevelClass < Nokogiri::HTML4::Builder ; end
43
+ # ThisIsATopLevelClass.new.related_class("Document")
44
+ # # => Nokogiri::HTML4::Document
45
+ #
46
+ def related_class(class_name)
47
+ klass = nil
48
+ inspecting = self.class
49
+
50
+ while inspecting
51
+ namespace_path = inspecting.name.split("::")[0..-2]
52
+ inspecting = inspecting.superclass
53
+
54
+ next unless VALID_NAMESPACES.include?(namespace_path.last)
55
+
56
+ related_class_name = (namespace_path << class_name).join("::")
57
+ klass = begin
58
+ Object.const_get(related_class_name)
59
+ rescue NameError
60
+ nil
61
+ end
62
+ break if klass
63
+ end
64
+ klass
65
+ end
66
+ end
67
+ end
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module CSS
4
- class Node
5
+ class Node # :nodoc:
5
6
  ALLOW_COMBINATOR_ON_SELF = [:DIRECT_ADJACENT_SELECTOR, :FOLLOWING_SELECTOR, :CHILD_SELECTOR]
6
7
 
7
8
  # Get the type of this node
@@ -10,25 +11,25 @@ module Nokogiri
10
11
  attr_accessor :value
11
12
 
12
13
  # Create a new Node with +type+ and +value+
13
- def initialize type, value
14
+ def initialize(type, value)
14
15
  @type = type
15
16
  @value = value
16
17
  end
17
18
 
18
19
  # Accept +visitor+
19
- def accept visitor
20
+ def accept(visitor)
20
21
  visitor.send(:"visit_#{type.to_s.downcase}", self)
21
22
  end
22
23
 
23
24
  ###
24
25
  # Convert this CSS node to xpath with +prefix+ using +visitor+
25
- def to_xpath prefix = '//', visitor = XPathVisitor.new
26
- prefix = '.' if ALLOW_COMBINATOR_ON_SELF.include?(type) && value.first.nil?
26
+ def to_xpath(prefix, visitor)
27
+ prefix = "." if ALLOW_COMBINATOR_ON_SELF.include?(type) && value.first.nil?
27
28
  prefix + visitor.accept(self)
28
29
  end
29
30
 
30
31
  # Find a node by type using +types+
31
- def find_by_type types
32
+ def find_by_type(types)
32
33
  matches = []
33
34
  matches << self if to_type == types
34
35
  @value.each do |v|
@@ -39,9 +40,9 @@ module Nokogiri
39
40
 
40
41
  # Convert to_type
41
42
  def to_type
42
- [@type] + @value.map { |n|
43
+ [@type] + @value.map do |n|
43
44
  n.to_type if n.respond_to?(:to_type)
44
- }.compact
45
+ end.compact
45
46
  end
46
47
 
47
48
  # Convert to array
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
  #
3
3
  # DO NOT MODIFY!!!!
4
- # This file is automatically generated by Racc 1.5.2
4
+ # This file is automatically generated by Racc 1.6.0
5
5
  # from Racc grammar file "".
6
6
  #
7
7
 
@@ -10,6 +10,14 @@ require 'racc/parser.rb'
10
10
 
11
11
  require_relative "parser_extras"
12
12
 
13
+ module Nokogiri
14
+ module CSS
15
+ # :nodoc: all
16
+ class Parser < Racc::Parser
17
+ end
18
+ end
19
+ end
20
+
13
21
  module Nokogiri
14
22
  module CSS
15
23
  class Parser < Racc::Parser
@@ -485,7 +493,7 @@ def _reduce_27(val, _values, result)
485
493
  end
486
494
 
487
495
  def _reduce_28(val, _values, result)
488
- result = Node.new(:ELEMENT_NAME,
496
+ result = Node.new(:ATTRIB_NAME,
489
497
  [[val.first, val.last].compact.join(':')]
490
498
  )
491
499
 
@@ -495,7 +503,7 @@ end
495
503
  def _reduce_29(val, _values, result)
496
504
  # Default namespace is not applied to attributes.
497
505
  # So we don't add prefix "xmlns:" as in namespaced_ident.
498
- result = Node.new(:ELEMENT_NAME, [val.first])
506
+ result = Node.new(:ATTRIB_NAME, [val.first])
499
507
 
500
508
  result
501
509
  end
@@ -96,14 +96,14 @@ rule
96
96
  ;
97
97
  attrib_name
98
98
  : namespace '|' IDENT {
99
- result = Node.new(:ELEMENT_NAME,
99
+ result = Node.new(:ATTRIB_NAME,
100
100
  [[val.first, val.last].compact.join(':')]
101
101
  )
102
102
  }
103
103
  | IDENT {
104
104
  # Default namespace is not applied to attributes.
105
105
  # So we don't add prefix "xmlns:" as in namespaced_ident.
106
- result = Node.new(:ELEMENT_NAME, [val.first])
106
+ result = Node.new(:ATTRIB_NAME, [val.first])
107
107
  }
108
108
  ;
109
109
  function
@@ -255,6 +255,14 @@ end
255
255
 
256
256
  require_relative "parser_extras"
257
257
 
258
+ module Nokogiri
259
+ module CSS
260
+ # :nodoc: all
261
+ class Parser < Racc::Parser
262
+ end
263
+ end
264
+ end
265
+
258
266
  ---- inner
259
267
 
260
268
  def unescape_css_identifier(identifier)
@@ -1,9 +1,10 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require "thread"
3
4
 
4
5
  module Nokogiri
5
6
  module CSS
6
- class Parser < Racc::Parser
7
+ class Parser < Racc::Parser # :nodoc:
7
8
  CACHE_SWITCH_NAME = :nokogiri_css_parser_cache_is_off
8
9
 
9
10
  @cache = {}
@@ -16,13 +17,13 @@ module Nokogiri
16
17
  end
17
18
 
18
19
  # Set a thread-local boolean to turn cacheing on and off. Truthy values turn the cache on, falsey values turn the cache off.
19
- def set_cache(value)
20
+ def set_cache(value) # rubocop:disable Naming/AccessorMethodName
20
21
  Thread.current[CACHE_SWITCH_NAME] = !value
21
22
  end
22
23
 
23
24
  # Get the css selector in +string+ from the cache
24
25
  def [](string)
25
- return unless cache_on?
26
+ return nil unless cache_on?
26
27
  @mutex.synchronize { @cache[string] }
27
28
  end
28
29
 
@@ -46,10 +47,10 @@ module Nokogiri
46
47
  # Execute +block+ without cache
47
48
  def without_cache(&block)
48
49
  original_cache_setting = cache_on?
49
- set_cache false
50
- block.call
50
+ set_cache(false)
51
+ yield
51
52
  ensure
52
- set_cache original_cache_setting
53
+ set_cache(original_cache_setting)
53
54
  end
54
55
  end
55
56
 
@@ -61,7 +62,7 @@ module Nokogiri
61
62
  end
62
63
 
63
64
  def parse(string)
64
- @tokenizer.scan_setup string
65
+ @tokenizer.scan_setup(string)
65
66
  do_parse
66
67
  end
67
68
 
@@ -70,24 +71,23 @@ module Nokogiri
70
71
  end
71
72
 
72
73
  # Get the xpath for +string+ using +options+
73
- def xpath_for(string, options = {})
74
- key = "#{string}#{options[:ns]}#{options[:prefix]}"
75
- v = self.class[key]
76
- return v if v
77
-
78
- args = [
79
- options[:prefix] || "//",
80
- options[:visitor] || XPathVisitor.new,
81
- ]
82
- self.class[key] = parse(string).map { |ast|
83
- ast.to_xpath(*args)
84
- }
74
+ def xpath_for(string, prefix, visitor)
75
+ key = cache_key(string, prefix, visitor)
76
+ self.class[key] ||= parse(string).map do |ast|
77
+ ast.to_xpath(prefix, visitor)
78
+ end
85
79
  end
86
80
 
87
81
  # On CSS parser error, raise an exception
88
82
  def on_error(error_token_id, error_value, value_stack)
89
83
  after = value_stack.compact.last
90
- raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
84
+ raise SyntaxError, "unexpected '#{error_value}' after '#{after}'"
85
+ end
86
+
87
+ def cache_key(query, prefix, visitor)
88
+ if self.class.cache_on?
89
+ [query, prefix, @namespaces, visitor.config]
90
+ end
91
91
  end
92
92
  end
93
93
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require_relative "../syntax_error"
3
4
  module Nokogiri
4
5
  module CSS
@@ -7,7 +7,8 @@
7
7
 
8
8
  module Nokogiri
9
9
  module CSS
10
- class Tokenizer # :nodoc:
10
+ # :nodoc: all
11
+ class Tokenizer
11
12
  require 'strscan'
12
13
 
13
14
  class ScanError < StandardError ; end
@@ -1,6 +1,7 @@
1
1
  module Nokogiri
2
2
  module CSS
3
- class Tokenizer # :nodoc:
3
+ # :nodoc: all
4
+ class Tokenizer
4
5
 
5
6
  macro
6
7
  nl \n|\r\n|\r|\f