nokogiri 1.12.5 → 1.13.10

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/README.md +9 -7
  4. data/bin/nokogiri +63 -50
  5. data/dependencies.yml +13 -64
  6. data/ext/nokogiri/extconf.rb +81 -46
  7. data/ext/nokogiri/gumbo.c +1 -1
  8. data/ext/nokogiri/html4_sax_parser_context.c +2 -3
  9. data/ext/nokogiri/nokogiri.h +9 -0
  10. data/ext/nokogiri/xml_attr.c +2 -2
  11. data/ext/nokogiri/xml_attribute_decl.c +3 -3
  12. data/ext/nokogiri/xml_cdata.c +1 -1
  13. data/ext/nokogiri/xml_document.c +41 -37
  14. data/ext/nokogiri/xml_document_fragment.c +0 -2
  15. data/ext/nokogiri/xml_dtd.c +10 -10
  16. data/ext/nokogiri/xml_element_decl.c +3 -3
  17. data/ext/nokogiri/xml_encoding_handler.c +25 -11
  18. data/ext/nokogiri/xml_entity_decl.c +5 -5
  19. data/ext/nokogiri/xml_namespace.c +41 -5
  20. data/ext/nokogiri/xml_node.c +708 -382
  21. data/ext/nokogiri/xml_node_set.c +4 -4
  22. data/ext/nokogiri/xml_reader.c +92 -11
  23. data/ext/nokogiri/xml_sax_parser_context.c +10 -3
  24. data/ext/nokogiri/xml_schema.c +3 -3
  25. data/ext/nokogiri/xml_text.c +1 -1
  26. data/ext/nokogiri/xml_xpath_context.c +76 -50
  27. data/ext/nokogiri/xslt_stylesheet.c +107 -9
  28. data/gumbo-parser/src/parser.c +0 -11
  29. data/lib/nokogiri/class_resolver.rb +67 -0
  30. data/lib/nokogiri/css/node.rb +9 -8
  31. data/lib/nokogiri/css/parser.rb +360 -341
  32. data/lib/nokogiri/css/parser.y +249 -244
  33. data/lib/nokogiri/css/parser_extras.rb +22 -20
  34. data/lib/nokogiri/css/syntax_error.rb +1 -0
  35. data/lib/nokogiri/css/tokenizer.rb +4 -3
  36. data/lib/nokogiri/css/tokenizer.rex +3 -2
  37. data/lib/nokogiri/css/xpath_visitor.rb +179 -82
  38. data/lib/nokogiri/css.rb +38 -6
  39. data/lib/nokogiri/decorators/slop.rb +8 -7
  40. data/lib/nokogiri/extension.rb +1 -1
  41. data/lib/nokogiri/gumbo.rb +1 -0
  42. data/lib/nokogiri/html.rb +16 -10
  43. data/lib/nokogiri/html4/builder.rb +1 -0
  44. data/lib/nokogiri/html4/document.rb +88 -77
  45. data/lib/nokogiri/html4/document_fragment.rb +11 -7
  46. data/lib/nokogiri/html4/element_description.rb +1 -0
  47. data/lib/nokogiri/html4/element_description_defaults.rb +426 -520
  48. data/lib/nokogiri/html4/entity_lookup.rb +2 -1
  49. data/lib/nokogiri/html4/sax/parser.rb +5 -2
  50. data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
  51. data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
  52. data/lib/nokogiri/html4.rb +11 -5
  53. data/lib/nokogiri/html5/document.rb +27 -10
  54. data/lib/nokogiri/html5/document_fragment.rb +5 -2
  55. data/lib/nokogiri/html5/node.rb +10 -3
  56. data/lib/nokogiri/html5.rb +69 -64
  57. data/lib/nokogiri/jruby/dependencies.rb +10 -9
  58. data/lib/nokogiri/syntax_error.rb +1 -0
  59. data/lib/nokogiri/version/constant.rb +2 -1
  60. data/lib/nokogiri/version/info.rb +20 -13
  61. data/lib/nokogiri/version.rb +1 -0
  62. data/lib/nokogiri/xml/attr.rb +5 -3
  63. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  64. data/lib/nokogiri/xml/builder.rb +34 -32
  65. data/lib/nokogiri/xml/cdata.rb +2 -1
  66. data/lib/nokogiri/xml/character_data.rb +1 -0
  67. data/lib/nokogiri/xml/document.rb +144 -103
  68. data/lib/nokogiri/xml/document_fragment.rb +41 -38
  69. data/lib/nokogiri/xml/dtd.rb +3 -2
  70. data/lib/nokogiri/xml/element_content.rb +1 -0
  71. data/lib/nokogiri/xml/element_decl.rb +2 -1
  72. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  73. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  74. data/lib/nokogiri/xml/namespace.rb +2 -0
  75. data/lib/nokogiri/xml/node/save_options.rb +8 -4
  76. data/lib/nokogiri/xml/node.rb +521 -351
  77. data/lib/nokogiri/xml/node_set.rb +50 -54
  78. data/lib/nokogiri/xml/notation.rb +12 -0
  79. data/lib/nokogiri/xml/parse_options.rb +12 -7
  80. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  81. data/lib/nokogiri/xml/pp/node.rb +24 -26
  82. data/lib/nokogiri/xml/pp.rb +1 -0
  83. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  84. data/lib/nokogiri/xml/reader.rb +20 -24
  85. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  86. data/lib/nokogiri/xml/sax/document.rb +20 -19
  87. data/lib/nokogiri/xml/sax/parser.rb +37 -34
  88. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  89. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  90. data/lib/nokogiri/xml/sax.rb +1 -0
  91. data/lib/nokogiri/xml/schema.rb +7 -6
  92. data/lib/nokogiri/xml/searchable.rb +93 -62
  93. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  94. data/lib/nokogiri/xml/text.rb +1 -0
  95. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  96. data/lib/nokogiri/xml/xpath.rb +12 -0
  97. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  98. data/lib/nokogiri/xml.rb +4 -3
  99. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  100. data/lib/nokogiri/xslt.rb +21 -13
  101. data/lib/nokogiri.rb +19 -16
  102. data/lib/xsd/xmlparser/nokogiri.rb +25 -24
  103. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  104. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
  105. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  106. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  107. metadata +117 -34
  108. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  109. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -2511
  110. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
  111. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
  112. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  113. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -156,7 +156,7 @@ push(VALUE self, VALUE rb_node)
156
156
  Check_Node_Set_Node_Type(rb_node);
157
157
 
158
158
  Data_Get_Struct(self, xmlNodeSet, node_set);
159
- Data_Get_Struct(rb_node, xmlNode, node);
159
+ Noko_Node_Get_Struct(rb_node, xmlNode, node);
160
160
 
161
161
  xmlXPathNodeSetAdd(node_set, node);
162
162
 
@@ -179,7 +179,7 @@ delete (VALUE self, VALUE rb_node)
179
179
  Check_Node_Set_Node_Type(rb_node);
180
180
 
181
181
  Data_Get_Struct(self, xmlNodeSet, node_set);
182
- Data_Get_Struct(rb_node, xmlNode, node);
182
+ Noko_Node_Get_Struct(rb_node, xmlNode, node);
183
183
 
184
184
  if (xmlXPathNodeSetContains(node_set, node)) {
185
185
  xpath_node_set_del(node_set, node);
@@ -228,7 +228,7 @@ include_eh(VALUE self, VALUE rb_node)
228
228
  Check_Node_Set_Node_Type(rb_node);
229
229
 
230
230
  Data_Get_Struct(self, xmlNodeSet, node_set);
231
- Data_Get_Struct(rb_node, xmlNode, node);
231
+ Noko_Node_Get_Struct(rb_node, xmlNode, node);
232
232
 
233
233
  return (xmlXPathNodeSetContains(node_set, node) ? Qtrue : Qfalse);
234
234
  }
@@ -430,7 +430,7 @@ unlink_nodeset(VALUE self)
430
430
  xmlNodePtr node_ptr;
431
431
  node = noko_xml_node_wrap(Qnil, node_set->nodeTab[j]);
432
432
  rb_funcall(node, rb_intern("unlink"), 0); /* modifies the C struct out from under the object */
433
- Data_Get_Struct(node, xmlNode, node_ptr);
433
+ Noko_Node_Get_Struct(node, xmlNode, node_ptr);
434
434
  node_set->nodeTab[j] = node_ptr ;
435
435
  }
436
436
  }
@@ -31,6 +31,7 @@ has_attributes(xmlTextReaderPtr reader)
31
31
  return (0);
32
32
  }
33
33
 
34
+ // TODO: merge this function into the `namespaces` method implementation
34
35
  static void
35
36
  Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
36
37
  {
@@ -148,9 +149,13 @@ namespaces(VALUE self)
148
149
  }
149
150
 
150
151
  /*
151
- * @overload attribute_nodes()
152
- * Get the attributes of the current node as an Array of Attr
153
- * @return [Array<Nokogiri::XML::Attr>]
152
+ :call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
153
+
154
+ Get the attributes of the current node as an Array of XML:Attr
155
+
156
+ ⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri.
157
+
158
+ See related: #attribute_hash, #attributes
154
159
  */
155
160
  static VALUE
156
161
  rb_xml_reader_attribute_nodes(VALUE rb_reader)
@@ -160,6 +165,10 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader)
160
165
  VALUE attr_nodes;
161
166
  int j;
162
167
 
168
+ // TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598
169
+ // After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
170
+ NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
171
+
163
172
  Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
164
173
 
165
174
  if (! has_attributes(c_reader)) {
@@ -181,6 +190,51 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader)
181
190
  return attr_nodes;
182
191
  }
183
192
 
193
+ /*
194
+ :call-seq: attribute_hash() → Hash<String ⇒ String>
195
+
196
+ Get the attributes of the current node as a Hash of names and values.
197
+
198
+ See related: #attributes and #namespaces
199
+ */
200
+ static VALUE
201
+ rb_xml_reader_attribute_hash(VALUE rb_reader)
202
+ {
203
+ VALUE rb_attributes = rb_hash_new();
204
+ xmlTextReaderPtr c_reader;
205
+ xmlNodePtr c_node;
206
+ xmlAttrPtr c_property;
207
+
208
+ Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
209
+
210
+ if (!has_attributes(c_reader)) {
211
+ return rb_attributes;
212
+ }
213
+
214
+ c_node = xmlTextReaderExpand(c_reader);
215
+ if (c_node == NULL) {
216
+ return Qnil;
217
+ }
218
+
219
+ c_property = c_node->properties;
220
+ while (c_property != NULL) {
221
+ VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name);
222
+ VALUE rb_value = Qnil;
223
+ xmlChar *c_value = xmlNodeGetContent((xmlNode *)c_property);
224
+
225
+ if (c_value) {
226
+ rb_value = NOKOGIRI_STR_NEW2(c_value);
227
+ xmlFree(c_value);
228
+ }
229
+
230
+ rb_hash_aset(rb_attributes, rb_name, rb_value);
231
+
232
+ c_property = c_property->next;
233
+ }
234
+
235
+ return rb_attributes;
236
+ }
237
+
184
238
  /*
185
239
  * call-seq:
186
240
  * attribute_at(index)
@@ -414,16 +468,23 @@ name(VALUE self)
414
468
  * Get the xml:base of the node
415
469
  */
416
470
  static VALUE
417
- base_uri(VALUE self)
471
+ rb_xml_reader_base_uri(VALUE rb_reader)
418
472
  {
419
- xmlTextReaderPtr reader;
420
- const char *base_uri;
473
+ VALUE rb_base_uri;
474
+ xmlTextReaderPtr c_reader;
475
+ xmlChar *c_base_uri;
421
476
 
422
- Data_Get_Struct(self, xmlTextReader, reader);
423
- base_uri = (const char *)xmlTextReaderBaseUri(reader);
424
- if (base_uri == NULL) { return Qnil; }
477
+ Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
425
478
 
426
- return NOKOGIRI_STR_NEW2(base_uri);
479
+ c_base_uri = xmlTextReaderBaseUri(c_reader);
480
+ if (c_base_uri == NULL) {
481
+ return Qnil;
482
+ }
483
+
484
+ rb_base_uri = NOKOGIRI_STR_NEW2(c_base_uri);
485
+ xmlFree(c_base_uri);
486
+
487
+ return rb_base_uri;
427
488
  }
428
489
 
429
490
  /*
@@ -652,6 +713,24 @@ empty_element_p(VALUE self)
652
713
  return Qfalse;
653
714
  }
654
715
 
716
+ static VALUE
717
+ rb_xml_reader_encoding(VALUE rb_reader)
718
+ {
719
+ xmlTextReaderPtr c_reader;
720
+ const char *parser_encoding;
721
+ VALUE constructor_encoding;
722
+
723
+ constructor_encoding = rb_iv_get(rb_reader, "@encoding");
724
+ if (RTEST(constructor_encoding)) {
725
+ return constructor_encoding;
726
+ }
727
+
728
+ Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
729
+ parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
730
+ if (parser_encoding == NULL) { return Qnil; }
731
+ return NOKOGIRI_STR_NEW2(parser_encoding);
732
+ }
733
+
655
734
  void
656
735
  noko_init_xml_reader()
657
736
  {
@@ -671,11 +750,13 @@ noko_init_xml_reader()
671
750
  rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
672
751
  rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
673
752
  rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
753
+ rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
674
754
  rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
675
- rb_define_method(cNokogiriXmlReader, "base_uri", base_uri, 0);
755
+ rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
676
756
  rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0);
677
757
  rb_define_method(cNokogiriXmlReader, "depth", depth, 0);
678
758
  rb_define_method(cNokogiriXmlReader, "empty_element?", empty_element_p, 0);
759
+ rb_define_method(cNokogiriXmlReader, "encoding", rb_xml_reader_encoding, 0);
679
760
  rb_define_method(cNokogiriXmlReader, "inner_xml", inner_xml, 0);
680
761
  rb_define_method(cNokogiriXmlReader, "lang", lang, 0);
681
762
  rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
@@ -2,6 +2,8 @@
2
2
 
3
3
  VALUE cNokogiriXmlSaxParserContext ;
4
4
 
5
+ static ID id_read;
6
+
5
7
  static void
6
8
  deallocate(xmlParserCtxtPtr ctxt)
7
9
  {
@@ -26,6 +28,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
26
28
  xmlParserCtxtPtr ctxt;
27
29
  xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
28
30
 
31
+ if (!rb_respond_to(io, id_read)) {
32
+ rb_raise(rb_eTypeError, "argument expected to respond to :read");
33
+ }
34
+
29
35
  ctxt = xmlCreateIOParserCtxt(NULL, NULL,
30
36
  (xmlInputReadCallback)noko_io_read,
31
37
  (xmlInputCloseCallback)noko_io_close,
@@ -62,9 +68,8 @@ parse_memory(VALUE klass, VALUE data)
62
68
  {
63
69
  xmlParserCtxtPtr ctxt;
64
70
 
65
- if (NIL_P(data)) {
66
- rb_raise(rb_eArgError, "data cannot be nil");
67
- }
71
+ Check_Type(data, T_STRING);
72
+
68
73
  if (!(int)RSTRING_LEN(data)) {
69
74
  rb_raise(rb_eRuntimeError, "data cannot be empty");
70
75
  }
@@ -278,4 +283,6 @@ noko_init_xml_sax_parser_context()
278
283
  rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
279
284
  rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
280
285
  rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
286
+
287
+ id_read = rb_intern("read");
281
288
  }
@@ -25,7 +25,7 @@ validate_document(VALUE self, VALUE document)
25
25
  VALUE errors;
26
26
 
27
27
  Data_Get_Struct(self, xmlSchema, schema);
28
- Data_Get_Struct(document, xmlDoc, doc);
28
+ Noko_Node_Get_Struct(document, xmlDoc, doc);
29
29
 
30
30
  errors = rb_ary_new();
31
31
 
@@ -179,7 +179,7 @@ has_blank_nodes_p(VALUE cache)
179
179
  for (i = 0; i < RARRAY_LEN(cache); i++) {
180
180
  xmlNodePtr node;
181
181
  VALUE element = rb_ary_entry(cache, i);
182
- Data_Get_Struct(element, xmlNode, node);
182
+ Noko_Node_Get_Struct(element, xmlNode, node);
183
183
  if (xmlIsBlankNode(node)) {
184
184
  return 1;
185
185
  }
@@ -210,7 +210,7 @@ from_document(int argc, VALUE *argv, VALUE klass)
210
210
 
211
211
  scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
212
212
 
213
- Data_Get_Struct(document, xmlDoc, doc);
213
+ Noko_Node_Get_Struct(document, xmlDoc, doc);
214
214
  doc = doc->doc; /* In case someone passes us a node. ugh. */
215
215
 
216
216
  if (scanned_args == 1) {
@@ -20,7 +20,7 @@ new (int argc, VALUE *argv, VALUE klass)
20
20
 
21
21
  rb_scan_args(argc, argv, "2*", &string, &document, &rest);
22
22
 
23
- Data_Get_Struct(document, xmlDoc, doc);
23
+ Noko_Node_Get_Struct(document, xmlDoc, doc);
24
24
 
25
25
  node = xmlNewText((xmlChar *)StringValueCStr(string));
26
26
  node->doc = doc->doc;
@@ -86,6 +86,26 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
86
86
  xmlXPathFreeObject(needle);
87
87
  }
88
88
 
89
+
90
+ /* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should ignore namespaces */
91
+ static void
92
+ xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
93
+ {
94
+ xmlXPathObjectPtr element_name;
95
+
96
+ assert(ctxt->context->node);
97
+
98
+ CHECK_ARITY(1);
99
+ CAST_TO_STRING;
100
+ CHECK_TYPE(XPATH_STRING);
101
+ element_name = valuePop(ctxt);
102
+
103
+ valuePush(ctxt, xmlXPathNewBoolean(xmlStrEqual(ctxt->context->node->name, element_name->stringval)));
104
+
105
+ xmlXPathFreeObject(element_name);
106
+ }
107
+
108
+
89
109
  /*
90
110
  * call-seq:
91
111
  * register_ns(prefix, uri)
@@ -128,11 +148,44 @@ register_variable(VALUE self, VALUE name, VALUE value)
128
148
  return self;
129
149
  }
130
150
 
151
+
152
+ /*
153
+ * convert an XPath object into a Ruby object of the appropriate type.
154
+ * returns Qundef if no conversion was possible.
155
+ */
156
+ static VALUE
157
+ xpath2ruby(xmlXPathObjectPtr xobj, xmlXPathContextPtr xctx)
158
+ {
159
+ VALUE retval;
160
+
161
+ assert(xctx->doc);
162
+ assert(DOC_RUBY_OBJECT_TEST(xctx->doc));
163
+
164
+ switch (xobj->type) {
165
+ case XPATH_STRING:
166
+ retval = NOKOGIRI_STR_NEW2(xobj->stringval);
167
+ xmlFree(xobj->stringval);
168
+ return retval;
169
+
170
+ case XPATH_NODESET:
171
+ return noko_xml_node_set_wrap(xobj->nodesetval,
172
+ DOC_RUBY_OBJECT(xctx->doc));
173
+
174
+ case XPATH_NUMBER:
175
+ return rb_float_new(xobj->floatval);
176
+
177
+ case XPATH_BOOLEAN:
178
+ return (xobj->boolval == 1) ? Qtrue : Qfalse;
179
+
180
+ default:
181
+ return Qundef;
182
+ }
183
+ }
184
+
131
185
  void
132
186
  Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
133
187
  const char *function_name)
134
188
  {
135
- int i;
136
189
  VALUE result, doc;
137
190
  VALUE *argv;
138
191
  VALUE node_set = Qnil;
@@ -143,40 +196,25 @@ Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, i
143
196
  assert(DOC_RUBY_OBJECT_TEST(ctx->context->doc));
144
197
 
145
198
  argv = (VALUE *)calloc((size_t)nargs, sizeof(VALUE));
146
- for (i = 0 ; i < nargs ; ++i) {
147
- rb_gc_register_address(&argv[i]);
199
+ for (int j = 0 ; j < nargs ; ++j) {
200
+ rb_gc_register_address(&argv[j]);
148
201
  }
149
202
 
150
203
  doc = DOC_RUBY_OBJECT(ctx->context->doc);
151
204
 
152
- if (nargs > 0) {
153
- i = nargs - 1;
154
- do {
155
- obj = valuePop(ctx);
156
- switch (obj->type) {
157
- case XPATH_STRING:
158
- argv[i] = NOKOGIRI_STR_NEW2(obj->stringval);
159
- break;
160
- case XPATH_BOOLEAN:
161
- argv[i] = obj->boolval == 1 ? Qtrue : Qfalse;
162
- break;
163
- case XPATH_NUMBER:
164
- argv[i] = rb_float_new(obj->floatval);
165
- break;
166
- case XPATH_NODESET:
167
- argv[i] = noko_xml_node_set_wrap(obj->nodesetval, doc);
168
- break;
169
- default:
170
- argv[i] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
171
- }
172
- xmlXPathFreeNodeSetList(obj);
173
- } while (i-- > 0);
205
+ for (int j = nargs - 1 ; j >= 0 ; --j) {
206
+ obj = valuePop(ctx);
207
+ argv[j] = xpath2ruby(obj, ctx->context);
208
+ if (argv[j] == Qundef) {
209
+ argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
210
+ }
211
+ xmlXPathFreeNodeSetList(obj);
174
212
  }
175
213
 
176
214
  result = rb_funcall2(handler, rb_intern((const char *)function_name), nargs, argv);
177
215
 
178
- for (i = 0 ; i < nargs ; ++i) {
179
- rb_gc_unregister_address(&argv[i]);
216
+ for (int j = 0 ; j < nargs ; ++j) {
217
+ rb_gc_unregister_address(&argv[j]);
180
218
  }
181
219
  free(argv);
182
220
 
@@ -275,7 +313,7 @@ static VALUE
275
313
  evaluate(int argc, VALUE *argv, VALUE self)
276
314
  {
277
315
  VALUE search_path, xpath_handler;
278
- VALUE thing = Qnil;
316
+ VALUE retval = Qnil;
279
317
  xmlXPathContextPtr ctx;
280
318
  xmlXPathObjectPtr xpath;
281
319
  xmlChar *query;
@@ -310,31 +348,14 @@ evaluate(int argc, VALUE *argv, VALUE self)
310
348
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
311
349
  }
312
350
 
313
- assert(ctx->doc);
314
- assert(DOC_RUBY_OBJECT_TEST(ctx->doc));
315
-
316
- switch (xpath->type) {
317
- case XPATH_STRING:
318
- thing = NOKOGIRI_STR_NEW2(xpath->stringval);
319
- xmlFree(xpath->stringval);
320
- break;
321
- case XPATH_NODESET:
322
- thing = noko_xml_node_set_wrap(xpath->nodesetval,
323
- DOC_RUBY_OBJECT(ctx->doc));
324
- break;
325
- case XPATH_NUMBER:
326
- thing = rb_float_new(xpath->floatval);
327
- break;
328
- case XPATH_BOOLEAN:
329
- thing = xpath->boolval == 1 ? Qtrue : Qfalse;
330
- break;
331
- default:
332
- thing = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
351
+ retval = xpath2ruby(xpath, ctx);
352
+ if (retval == Qundef) {
353
+ retval = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
333
354
  }
334
355
 
335
356
  xmlXPathFreeNodeSetList(xpath);
336
357
 
337
- return thing;
358
+ return retval;
338
359
  }
339
360
 
340
361
  /*
@@ -350,9 +371,12 @@ new (VALUE klass, VALUE nodeobj)
350
371
  xmlXPathContextPtr ctx;
351
372
  VALUE self;
352
373
 
353
- Data_Get_Struct(nodeobj, xmlNode, node);
374
+ Noko_Node_Get_Struct(nodeobj, xmlNode, node);
354
375
 
376
+ #if LIBXML_VERSION < 21000
377
+ /* deprecated in 40483d0 */
355
378
  xmlXPathInit();
379
+ #endif
356
380
 
357
381
  ctx = xmlXPathNewContext(node->doc);
358
382
  ctx->node = node;
@@ -360,6 +384,8 @@ new (VALUE klass, VALUE nodeobj)
360
384
  xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
361
385
  xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
362
386
  xpath_builtin_css_class);
387
+ xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI,
388
+ xpath_builtin_local_name_is);
363
389
 
364
390
  self = Data_Wrap_Struct(klass, 0, deallocate, ctx);
365
391
  return self;
@@ -107,19 +107,117 @@ serialize(VALUE self, VALUE xmlobj)
107
107
  }
108
108
 
109
109
  /*
110
- * call-seq:
111
- * transform(document, params = [])
110
+ * call-seq:
111
+ * transform(document)
112
+ * transform(document, params = {})
113
+ *
114
+ * Apply an XSLT stylesheet to an XML::Document.
115
+ *
116
+ * [Parameters]
117
+ * - +document+ (Nokogiri::XML::Document) the document to be transformed.
118
+ * - +params+ (Hash, Array) strings used as XSLT parameters.
119
+ *
120
+ * [Returns] Nokogiri::XML::Document
121
+ *
122
+ * *Example* of basic transformation:
123
+ *
124
+ * xslt = <<~XSLT
125
+ * <xsl:stylesheet version="1.0"
126
+ * xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
127
+ *
128
+ * <xsl:param name="title"/>
129
+ *
130
+ * <xsl:template match="/">
131
+ * <html>
132
+ * <body>
133
+ * <h1><xsl:value-of select="$title"/></h1>
134
+ * <ol>
135
+ * <xsl:for-each select="staff/employee">
136
+ * <li><xsl:value-of select="employeeId"></li>
137
+ * </xsl:for-each>
138
+ * </ol>
139
+ * </body>
140
+ * </html>
141
+ * </xsl:stylesheet>
142
+ * XSLT
143
+ *
144
+ * xml = <<~XML
145
+ * <?xml version="1.0"?>
146
+ * <staff>
147
+ * <employee>
148
+ * <employeeId>EMP0001</employeeId>
149
+ * <position>Accountant</position>
150
+ * </employee>
151
+ * <employee>
152
+ * <employeeId>EMP0002</employeeId>
153
+ * <position>Developer</position>
154
+ * </employee>
155
+ * </staff>
156
+ * XML
157
+ *
158
+ * doc = Nokogiri::XML::Document.parse(xml)
159
+ * stylesheet = Nokogiri::XSLT.parse(xslt)
160
+ *
161
+ * ⚠ Note that the +h1+ element is empty because no param has been provided!
162
+ *
163
+ * stylesheet.transform(doc).to_xml
164
+ * # => "<html><body>\n" +
165
+ * # "<h1></h1>\n" +
166
+ * # "<ol>\n" +
167
+ * # "<li>EMP0001</li>\n" +
168
+ * # "<li>EMP0002</li>\n" +
169
+ * # "</ol>\n" +
170
+ * # "</body></html>\n"
171
+ *
172
+ * *Example* of using an input parameter hash:
173
+ *
174
+ * ⚠ The title is populated, but note how we need to quote-escape the value.
175
+ *
176
+ * stylesheet.transform(doc, { "title" => "'Employee List'" }).to_xml
177
+ * # => "<html><body>\n" +
178
+ * # "<h1>Employee List</h1>\n" +
179
+ * # "<ol>\n" +
180
+ * # "<li>EMP0001</li>\n" +
181
+ * # "<li>EMP0002</li>\n" +
182
+ * # "</ol>\n" +
183
+ * # "</body></html>\n"
184
+ *
185
+ * *Example* using the XSLT.quote_params helper method to safely quote-escape strings:
186
+ *
187
+ * stylesheet.transform(doc, Nokogiri::XSLT.quote_params({ "title" => "Aaron's List" })).to_xml
188
+ * # => "<html><body>\n" +
189
+ * # "<h1>Aaron's List</h1>\n" +
190
+ * # "<ol>\n" +
191
+ * # "<li>EMP0001</li>\n" +
192
+ * # "<li>EMP0002</li>\n" +
193
+ * # "</ol>\n" +
194
+ * # "</body></html>\n"
195
+ *
196
+ * *Example* using an array of XSLT parameters
197
+ *
198
+ * You can also use an array if you want to.
112
199
  *
113
- * Apply an XSLT stylesheet to an XML::Document.
114
- * +params+ is an array of strings used as XSLT parameters.
115
- * returns Nokogiri::XML::Document
200
+ * stylesheet.transform(doc, ["title", "'Employee List'"]).to_xml
201
+ * # => "<html><body>\n" +
202
+ * # "<h1>Employee List</h1>\n" +
203
+ * # "<ol>\n" +
204
+ * # "<li>EMP0001</li>\n" +
205
+ * # "<li>EMP0002</li>\n" +
206
+ * # "</ol>\n" +
207
+ * # "</body></html>\n"
116
208
  *
117
- * Example:
209
+ * Or pass an array to XSLT.quote_params:
118
210
  *
119
- * doc = Nokogiri::XML(File.read(ARGV[0]))
120
- * xslt = Nokogiri::XSLT(File.read(ARGV[1]))
121
- * puts xslt.transform(doc, ['key', 'value'])
211
+ * stylesheet.transform(doc, Nokogiri::XSLT.quote_params(["title", "Aaron's List"])).to_xml
212
+ * # => "<html><body>\n" +
213
+ * # "<h1>Aaron's List</h1>\n" +
214
+ * # "<ol>\n" +
215
+ * # "<li>EMP0001</li>\n" +
216
+ * # "<li>EMP0002</li>\n" +
217
+ * # "</ol>\n" +
218
+ * # "</body></html>\n"
122
219
  *
220
+ * See: Nokogiri::XSLT.quote_params
123
221
  */
124
222
  static VALUE
125
223
  transform(int argc, VALUE *argv, VALUE self)
@@ -4423,11 +4423,6 @@ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
4423
4423
  /* Parse error */
4424
4424
  parser_add_parse_error(parser, token);
4425
4425
 
4426
- /*
4427
- * Fragment case: If the parser was originally created for the HTML
4428
- * fragment parsing algorithm, then act as described in the "any other
4429
- * start tag" entry below.
4430
- */
4431
4426
  while (
4432
4427
  !(
4433
4428
  is_mathml_integration_point(get_current_node(parser))
@@ -4437,12 +4432,6 @@ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
4437
4432
  ) {
4438
4433
  pop_current_node(parser);
4439
4434
  }
4440
- // XXX: The spec currently says to handle this using the in body insertion
4441
- // mode rules. That seems wrong. See
4442
- // <https://github.com/whatwg/html/issues/6808>. Instead, use the current
4443
- // insertion mode which seems like it works.
4444
- //
4445
- // handle_in_body(parser, token);
4446
4435
  handle_html_content(parser, token);
4447
4436
  return;
4448
4437
  }
@@ -0,0 +1,67 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "set"
5
+
6
+ module Nokogiri
7
+ #
8
+ # Some classes in Nokogiri are namespaced as a group, for example
9
+ # Document, DocumentFragment, and Builder.
10
+ #
11
+ # It's sometimes necessary to look up the related class, e.g.:
12
+ #
13
+ # XML::Builder → XML::Document
14
+ # HTML4::Builder → HTML4::Document
15
+ # HTML5::Document → HTML5::DocumentFragment
16
+ #
17
+ # This module is included into those key classes who need to do this.
18
+ #
19
+ module ClassResolver
20
+ # #related_class restricts matching namespaces to those matching this set.
21
+ VALID_NAMESPACES = Set.new(["HTML", "HTML4", "HTML5", "XML"])
22
+
23
+ # :call-seq:
24
+ # related_class(class_name) → Class
25
+ #
26
+ # Find a class constant within the
27
+ #
28
+ # Some examples:
29
+ #
30
+ # Nokogiri::XML::Document.new.related_class("DocumentFragment")
31
+ # # => Nokogiri::XML::DocumentFragment
32
+ # Nokogiri::HTML4::Document.new.related_class("DocumentFragment")
33
+ # # => Nokogiri::HTML4::DocumentFragment
34
+ #
35
+ # Note this will also work for subclasses that follow the same convention, e.g.:
36
+ #
37
+ # Loofah::HTML::Document.new.related_class("DocumentFragment")
38
+ # # => Loofah::HTML::DocumentFragment
39
+ #
40
+ # And even if it's a subclass, this will iterate through the superclasses:
41
+ #
42
+ # class ThisIsATopLevelClass < Nokogiri::HTML4::Builder ; end
43
+ # ThisIsATopLevelClass.new.related_class("Document")
44
+ # # => Nokogiri::HTML4::Document
45
+ #
46
+ def related_class(class_name)
47
+ klass = nil
48
+ inspecting = self.class
49
+
50
+ while inspecting
51
+ namespace_path = inspecting.name.split("::")[0..-2]
52
+ inspecting = inspecting.superclass
53
+
54
+ next unless VALID_NAMESPACES.include?(namespace_path.last)
55
+
56
+ related_class_name = (namespace_path << class_name).join("::")
57
+ klass = begin
58
+ Object.const_get(related_class_name)
59
+ rescue NameError
60
+ nil
61
+ end
62
+ break if klass
63
+ end
64
+ klass
65
+ end
66
+ end
67
+ end