nokogiri 1.11.3 → 1.13.8

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (179) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/LICENSE-DEPENDENCIES.md +243 -22
  4. data/LICENSE.md +1 -1
  5. data/README.md +14 -11
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -64
  8. data/ext/nokogiri/depend +35 -34
  9. data/ext/nokogiri/extconf.rb +237 -133
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
  12. data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +21 -19
  13. data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
  14. data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +8 -8
  15. data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
  16. data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
  17. data/ext/nokogiri/nokogiri.c +70 -38
  18. data/ext/nokogiri/nokogiri.h +27 -9
  19. data/ext/nokogiri/xml_attr.c +2 -2
  20. data/ext/nokogiri/xml_attribute_decl.c +3 -3
  21. data/ext/nokogiri/xml_cdata.c +1 -1
  22. data/ext/nokogiri/xml_document.c +50 -50
  23. data/ext/nokogiri/xml_document_fragment.c +0 -2
  24. data/ext/nokogiri/xml_dtd.c +10 -10
  25. data/ext/nokogiri/xml_element_content.c +2 -0
  26. data/ext/nokogiri/xml_element_decl.c +3 -3
  27. data/ext/nokogiri/xml_encoding_handler.c +31 -12
  28. data/ext/nokogiri/xml_entity_decl.c +5 -5
  29. data/ext/nokogiri/xml_namespace.c +4 -2
  30. data/ext/nokogiri/xml_node.c +833 -492
  31. data/ext/nokogiri/xml_node_set.c +24 -24
  32. data/ext/nokogiri/xml_reader.c +90 -11
  33. data/ext/nokogiri/xml_sax_parser.c +6 -6
  34. data/ext/nokogiri/xml_sax_parser_context.c +12 -3
  35. data/ext/nokogiri/xml_schema.c +5 -3
  36. data/ext/nokogiri/xml_text.c +1 -1
  37. data/ext/nokogiri/xml_xpath_context.c +110 -85
  38. data/ext/nokogiri/xslt_stylesheet.c +109 -10
  39. data/gumbo-parser/CHANGES.md +63 -0
  40. data/gumbo-parser/Makefile +101 -0
  41. data/gumbo-parser/THANKS +27 -0
  42. data/gumbo-parser/src/Makefile +34 -0
  43. data/gumbo-parser/src/README.md +41 -0
  44. data/gumbo-parser/src/ascii.c +75 -0
  45. data/gumbo-parser/src/ascii.h +115 -0
  46. data/gumbo-parser/src/attribute.c +42 -0
  47. data/gumbo-parser/src/attribute.h +17 -0
  48. data/gumbo-parser/src/char_ref.c +22225 -0
  49. data/gumbo-parser/src/char_ref.h +29 -0
  50. data/gumbo-parser/src/char_ref.rl +2154 -0
  51. data/gumbo-parser/src/error.c +626 -0
  52. data/gumbo-parser/src/error.h +148 -0
  53. data/gumbo-parser/src/foreign_attrs.c +104 -0
  54. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  55. data/gumbo-parser/src/gumbo.h +943 -0
  56. data/gumbo-parser/src/insertion_mode.h +33 -0
  57. data/gumbo-parser/src/macros.h +91 -0
  58. data/gumbo-parser/src/parser.c +4875 -0
  59. data/gumbo-parser/src/parser.h +41 -0
  60. data/gumbo-parser/src/replacement.h +33 -0
  61. data/gumbo-parser/src/string_buffer.c +103 -0
  62. data/gumbo-parser/src/string_buffer.h +68 -0
  63. data/gumbo-parser/src/string_piece.c +48 -0
  64. data/gumbo-parser/src/svg_attrs.c +174 -0
  65. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  66. data/gumbo-parser/src/svg_tags.c +137 -0
  67. data/gumbo-parser/src/svg_tags.gperf +55 -0
  68. data/gumbo-parser/src/tag.c +222 -0
  69. data/gumbo-parser/src/tag_lookup.c +382 -0
  70. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  71. data/gumbo-parser/src/tag_lookup.h +13 -0
  72. data/gumbo-parser/src/token_buffer.c +79 -0
  73. data/gumbo-parser/src/token_buffer.h +71 -0
  74. data/gumbo-parser/src/token_type.h +17 -0
  75. data/gumbo-parser/src/tokenizer.c +3463 -0
  76. data/gumbo-parser/src/tokenizer.h +112 -0
  77. data/gumbo-parser/src/tokenizer_states.h +339 -0
  78. data/gumbo-parser/src/utf8.c +245 -0
  79. data/gumbo-parser/src/utf8.h +164 -0
  80. data/gumbo-parser/src/util.c +68 -0
  81. data/gumbo-parser/src/util.h +30 -0
  82. data/gumbo-parser/src/vector.c +111 -0
  83. data/gumbo-parser/src/vector.h +45 -0
  84. data/lib/nokogiri/class_resolver.rb +67 -0
  85. data/lib/nokogiri/css/node.rb +9 -8
  86. data/lib/nokogiri/css/parser.rb +361 -342
  87. data/lib/nokogiri/css/parser.y +250 -245
  88. data/lib/nokogiri/css/parser_extras.rb +22 -20
  89. data/lib/nokogiri/css/syntax_error.rb +2 -1
  90. data/lib/nokogiri/css/tokenizer.rb +4 -3
  91. data/lib/nokogiri/css/tokenizer.rex +3 -2
  92. data/lib/nokogiri/css/xpath_visitor.rb +179 -82
  93. data/lib/nokogiri/css.rb +49 -17
  94. data/lib/nokogiri/decorators/slop.rb +8 -7
  95. data/lib/nokogiri/extension.rb +8 -3
  96. data/lib/nokogiri/gumbo.rb +15 -0
  97. data/lib/nokogiri/html.rb +37 -27
  98. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  99. data/lib/nokogiri/{html → html4}/document.rb +92 -81
  100. data/lib/nokogiri/{html → html4}/document_fragment.rb +13 -9
  101. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  102. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  103. data/lib/nokogiri/{html → html4}/entity_lookup.rb +3 -2
  104. data/lib/nokogiri/{html → html4}/sax/parser.rb +16 -16
  105. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  106. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +11 -11
  107. data/lib/nokogiri/html4.rb +46 -0
  108. data/lib/nokogiri/html5/document.rb +91 -0
  109. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  110. data/lib/nokogiri/html5/node.rb +100 -0
  111. data/lib/nokogiri/html5.rb +478 -0
  112. data/lib/nokogiri/jruby/dependencies.rb +10 -9
  113. data/lib/nokogiri/syntax_error.rb +1 -0
  114. data/lib/nokogiri/version/constant.rb +2 -1
  115. data/lib/nokogiri/version/info.rb +31 -14
  116. data/lib/nokogiri/version.rb +1 -0
  117. data/lib/nokogiri/xml/attr.rb +5 -3
  118. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  119. data/lib/nokogiri/xml/builder.rb +71 -31
  120. data/lib/nokogiri/xml/cdata.rb +2 -1
  121. data/lib/nokogiri/xml/character_data.rb +1 -0
  122. data/lib/nokogiri/xml/document.rb +183 -96
  123. data/lib/nokogiri/xml/document_fragment.rb +41 -38
  124. data/lib/nokogiri/xml/dtd.rb +3 -2
  125. data/lib/nokogiri/xml/element_content.rb +1 -0
  126. data/lib/nokogiri/xml/element_decl.rb +2 -1
  127. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  128. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  129. data/lib/nokogiri/xml/namespace.rb +2 -0
  130. data/lib/nokogiri/xml/node/save_options.rb +9 -5
  131. data/lib/nokogiri/xml/node.rb +525 -354
  132. data/lib/nokogiri/xml/node_set.rb +50 -54
  133. data/lib/nokogiri/xml/notation.rb +12 -0
  134. data/lib/nokogiri/xml/parse_options.rb +13 -6
  135. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  136. data/lib/nokogiri/xml/pp/node.rb +24 -26
  137. data/lib/nokogiri/xml/pp.rb +3 -2
  138. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  139. data/lib/nokogiri/xml/reader.rb +20 -24
  140. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  141. data/lib/nokogiri/xml/sax/document.rb +44 -49
  142. data/lib/nokogiri/xml/sax/parser.rb +37 -34
  143. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  144. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  145. data/lib/nokogiri/xml/sax.rb +5 -4
  146. data/lib/nokogiri/xml/schema.rb +7 -6
  147. data/lib/nokogiri/xml/searchable.rb +93 -62
  148. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  149. data/lib/nokogiri/xml/text.rb +1 -0
  150. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  151. data/lib/nokogiri/xml/xpath.rb +13 -1
  152. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  153. data/lib/nokogiri/xml.rb +37 -37
  154. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  155. data/lib/nokogiri/xslt.rb +28 -20
  156. data/lib/nokogiri.rb +48 -43
  157. data/lib/xsd/xmlparser/nokogiri.rb +25 -24
  158. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  159. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  160. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  161. data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +3 -3
  162. data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
  163. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  164. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  165. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  166. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
  167. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  168. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  169. metadata +204 -93
  170. data/lib/nokogiri/html/element_description_defaults.rb +0 -672
  171. data/lib/nokogiri/html/sax/parser_context.rb +0 -17
  172. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  173. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  174. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
  175. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
  176. data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +0 -70
  177. data/patches/libxml2/0011-update-automake-files-for-arm64.patch +0 -2511
  178. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  179. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -20,20 +20,20 @@ ruby_object_get(xmlNodePtr c_node)
20
20
  {
21
21
  /* see xmlElementType in libxml2 tree.h */
22
22
  switch (c_node->type) {
23
- case XML_NAMESPACE_DECL:
24
- /* _private is later in the namespace struct */
25
- return (VALUE)(((xmlNsPtr)c_node)->_private);
26
-
27
- case XML_DOCUMENT_NODE:
28
- case XML_HTML_DOCUMENT_NODE:
29
- /* in documents we use _private to store a tuple */
30
- if (DOC_RUBY_OBJECT_TEST(((xmlDocPtr)c_node))) {
31
- return DOC_RUBY_OBJECT((xmlDocPtr)c_node);
32
- }
33
- return (VALUE)NULL;
34
-
35
- default:
36
- return (VALUE)(c_node->_private);
23
+ case XML_NAMESPACE_DECL:
24
+ /* _private is later in the namespace struct */
25
+ return (VALUE)(((xmlNsPtr)c_node)->_private);
26
+
27
+ case XML_DOCUMENT_NODE:
28
+ case XML_HTML_DOCUMENT_NODE:
29
+ /* in documents we use _private to store a tuple */
30
+ if (DOC_RUBY_OBJECT_TEST(((xmlDocPtr)c_node))) {
31
+ return DOC_RUBY_OBJECT((xmlDocPtr)c_node);
32
+ }
33
+ return (VALUE)NULL;
34
+
35
+ default:
36
+ return (VALUE)(c_node->_private);
37
37
  }
38
38
  }
39
39
 
@@ -156,7 +156,7 @@ push(VALUE self, VALUE rb_node)
156
156
  Check_Node_Set_Node_Type(rb_node);
157
157
 
158
158
  Data_Get_Struct(self, xmlNodeSet, node_set);
159
- Data_Get_Struct(rb_node, xmlNode, node);
159
+ Noko_Node_Get_Struct(rb_node, xmlNode, node);
160
160
 
161
161
  xmlXPathNodeSetAdd(node_set, node);
162
162
 
@@ -179,7 +179,7 @@ delete (VALUE self, VALUE rb_node)
179
179
  Check_Node_Set_Node_Type(rb_node);
180
180
 
181
181
  Data_Get_Struct(self, xmlNodeSet, node_set);
182
- Data_Get_Struct(rb_node, xmlNode, node);
182
+ Noko_Node_Get_Struct(rb_node, xmlNode, node);
183
183
 
184
184
  if (xmlXPathNodeSetContains(node_set, node)) {
185
185
  xpath_node_set_del(node_set, node);
@@ -228,7 +228,7 @@ include_eh(VALUE self, VALUE rb_node)
228
228
  Check_Node_Set_Node_Type(rb_node);
229
229
 
230
230
  Data_Get_Struct(self, xmlNodeSet, node_set);
231
- Data_Get_Struct(rb_node, xmlNode, node);
231
+ Noko_Node_Get_Struct(rb_node, xmlNode, node);
232
232
 
233
233
  return (xmlXPathNodeSetContains(node_set, node) ? Qtrue : Qfalse);
234
234
  }
@@ -373,12 +373,12 @@ slice(int argc, VALUE *argv, VALUE self)
373
373
 
374
374
  /* if arg is Range */
375
375
  switch (rb_range_beg_len(arg, &beg, &len, (long)node_set->nodeNr, 0)) {
376
- case Qfalse:
377
- break;
378
- case Qnil:
379
- return Qnil;
380
- default:
381
- return subseq(self, beg, len);
376
+ case Qfalse:
377
+ break;
378
+ case Qnil:
379
+ return Qnil;
380
+ default:
381
+ return subseq(self, beg, len);
382
382
  }
383
383
 
384
384
  return index_at(self, NUM2LONG(arg));
@@ -430,7 +430,7 @@ unlink_nodeset(VALUE self)
430
430
  xmlNodePtr node_ptr;
431
431
  node = noko_xml_node_wrap(Qnil, node_set->nodeTab[j]);
432
432
  rb_funcall(node, rb_intern("unlink"), 0); /* modifies the C struct out from under the object */
433
- Data_Get_Struct(node, xmlNode, node_ptr);
433
+ Noko_Node_Get_Struct(node, xmlNode, node_ptr);
434
434
  node_set->nodeTab[j] = node_ptr ;
435
435
  }
436
436
  }
@@ -31,6 +31,7 @@ has_attributes(xmlTextReaderPtr reader)
31
31
  return (0);
32
32
  }
33
33
 
34
+ // TODO: merge this function into the `namespaces` method implementation
34
35
  static void
35
36
  Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
36
37
  {
@@ -148,9 +149,13 @@ namespaces(VALUE self)
148
149
  }
149
150
 
150
151
  /*
151
- * @overload attribute_nodes()
152
- * Get the attributes of the current node as an Array of Attr
153
- * @return [Array<Nokogiri::XML::Attr>]
152
+ :call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
153
+
154
+ Get the attributes of the current node as an Array of XML:Attr
155
+
156
+ ⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri.
157
+
158
+ See related: #attribute_hash, #attributes
154
159
  */
155
160
  static VALUE
156
161
  rb_xml_reader_attribute_nodes(VALUE rb_reader)
@@ -160,6 +165,10 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader)
160
165
  VALUE attr_nodes;
161
166
  int j;
162
167
 
168
+ // TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598
169
+ // After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
170
+ NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
171
+
163
172
  Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
164
173
 
165
174
  if (! has_attributes(c_reader)) {
@@ -181,6 +190,47 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader)
181
190
  return attr_nodes;
182
191
  }
183
192
 
193
+ /*
194
+ :call-seq: attribute_hash() → Hash<String ⇒ String>
195
+
196
+ Get the attributes of the current node as a Hash of names and values.
197
+
198
+ See related: #attributes and #namespaces
199
+ */
200
+ static VALUE
201
+ rb_xml_reader_attribute_hash(VALUE rb_reader)
202
+ {
203
+ VALUE rb_attributes = rb_hash_new();
204
+ xmlTextReaderPtr c_reader;
205
+ xmlNodePtr c_node;
206
+ xmlAttrPtr c_property;
207
+
208
+ Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
209
+
210
+ if (!has_attributes(c_reader)) {
211
+ return rb_attributes;
212
+ }
213
+
214
+ c_node = xmlTextReaderExpand(c_reader);
215
+ c_property = c_node->properties;
216
+ while (c_property != NULL) {
217
+ VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name);
218
+ VALUE rb_value = Qnil;
219
+ xmlChar *c_value = xmlNodeGetContent((xmlNode *)c_property);
220
+
221
+ if (c_value) {
222
+ rb_value = NOKOGIRI_STR_NEW2(c_value);
223
+ xmlFree(c_value);
224
+ }
225
+
226
+ rb_hash_aset(rb_attributes, rb_name, rb_value);
227
+
228
+ c_property = c_property->next;
229
+ }
230
+
231
+ return rb_attributes;
232
+ }
233
+
184
234
  /*
185
235
  * call-seq:
186
236
  * attribute_at(index)
@@ -414,16 +464,23 @@ name(VALUE self)
414
464
  * Get the xml:base of the node
415
465
  */
416
466
  static VALUE
417
- base_uri(VALUE self)
467
+ rb_xml_reader_base_uri(VALUE rb_reader)
418
468
  {
419
- xmlTextReaderPtr reader;
420
- const char *base_uri;
469
+ VALUE rb_base_uri;
470
+ xmlTextReaderPtr c_reader;
471
+ xmlChar *c_base_uri;
421
472
 
422
- Data_Get_Struct(self, xmlTextReader, reader);
423
- base_uri = (const char *)xmlTextReaderBaseUri(reader);
424
- if (base_uri == NULL) { return Qnil; }
473
+ Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
474
+
475
+ c_base_uri = xmlTextReaderBaseUri(c_reader);
476
+ if (c_base_uri == NULL) {
477
+ return Qnil;
478
+ }
479
+
480
+ rb_base_uri = NOKOGIRI_STR_NEW2(c_base_uri);
481
+ xmlFree(c_base_uri);
425
482
 
426
- return NOKOGIRI_STR_NEW2(base_uri);
483
+ return rb_base_uri;
427
484
  }
428
485
 
429
486
  /*
@@ -652,6 +709,24 @@ empty_element_p(VALUE self)
652
709
  return Qfalse;
653
710
  }
654
711
 
712
+ static VALUE
713
+ rb_xml_reader_encoding(VALUE rb_reader)
714
+ {
715
+ xmlTextReaderPtr c_reader;
716
+ const char *parser_encoding;
717
+ VALUE constructor_encoding;
718
+
719
+ constructor_encoding = rb_iv_get(rb_reader, "@encoding");
720
+ if (RTEST(constructor_encoding)) {
721
+ return constructor_encoding;
722
+ }
723
+
724
+ Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
725
+ parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
726
+ if (parser_encoding == NULL) { return Qnil; }
727
+ return NOKOGIRI_STR_NEW2(parser_encoding);
728
+ }
729
+
655
730
  void
656
731
  noko_init_xml_reader()
657
732
  {
@@ -662,6 +737,8 @@ noko_init_xml_reader()
662
737
  */
663
738
  cNokogiriXmlReader = rb_define_class_under(mNokogiriXml, "Reader", rb_cObject);
664
739
 
740
+ rb_undef_alloc_func(cNokogiriXmlReader);
741
+
665
742
  rb_define_singleton_method(cNokogiriXmlReader, "from_memory", from_memory, -1);
666
743
  rb_define_singleton_method(cNokogiriXmlReader, "from_io", from_io, -1);
667
744
 
@@ -669,11 +746,13 @@ noko_init_xml_reader()
669
746
  rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
670
747
  rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
671
748
  rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
749
+ rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
672
750
  rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
673
- rb_define_method(cNokogiriXmlReader, "base_uri", base_uri, 0);
751
+ rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
674
752
  rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0);
675
753
  rb_define_method(cNokogiriXmlReader, "depth", depth, 0);
676
754
  rb_define_method(cNokogiriXmlReader, "empty_element?", empty_element_p, 0);
755
+ rb_define_method(cNokogiriXmlReader, "encoding", rb_xml_reader_encoding, 0);
677
756
  rb_define_method(cNokogiriXmlReader, "inner_xml", inner_xml, 0);
678
757
  rb_define_method(cNokogiriXmlReader, "lang", lang, 0);
679
758
  rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
@@ -30,12 +30,12 @@ start_document(void *ctx)
30
30
  version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil;
31
31
 
32
32
  switch (ctxt->standalone) {
33
- case 0:
34
- standalone = NOKOGIRI_STR_NEW2("no");
35
- break;
36
- case 1:
37
- standalone = NOKOGIRI_STR_NEW2("yes");
38
- break;
33
+ case 0:
34
+ standalone = NOKOGIRI_STR_NEW2("no");
35
+ break;
36
+ case 1:
37
+ standalone = NOKOGIRI_STR_NEW2("yes");
38
+ break;
39
39
  }
40
40
 
41
41
  rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone);
@@ -2,6 +2,8 @@
2
2
 
3
3
  VALUE cNokogiriXmlSaxParserContext ;
4
4
 
5
+ static ID id_read;
6
+
5
7
  static void
6
8
  deallocate(xmlParserCtxtPtr ctxt)
7
9
  {
@@ -26,6 +28,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
26
28
  xmlParserCtxtPtr ctxt;
27
29
  xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
28
30
 
31
+ if (!rb_respond_to(io, id_read)) {
32
+ rb_raise(rb_eTypeError, "argument expected to respond to :read");
33
+ }
34
+
29
35
  ctxt = xmlCreateIOParserCtxt(NULL, NULL,
30
36
  (xmlInputReadCallback)noko_io_read,
31
37
  (xmlInputCloseCallback)noko_io_close,
@@ -62,9 +68,8 @@ parse_memory(VALUE klass, VALUE data)
62
68
  {
63
69
  xmlParserCtxtPtr ctxt;
64
70
 
65
- if (NIL_P(data)) {
66
- rb_raise(rb_eArgError, "data cannot be nil");
67
- }
71
+ Check_Type(data, T_STRING);
72
+
68
73
  if (!(int)RSTRING_LEN(data)) {
69
74
  rb_raise(rb_eRuntimeError, "data cannot be empty");
70
75
  }
@@ -265,6 +270,8 @@ noko_init_xml_sax_parser_context()
265
270
  {
266
271
  cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject);
267
272
 
273
+ rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
274
+
268
275
  rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
269
276
  rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
270
277
  rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);
@@ -276,4 +283,6 @@ noko_init_xml_sax_parser_context()
276
283
  rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
277
284
  rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
278
285
  rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
286
+
287
+ id_read = rb_intern("read");
279
288
  }
@@ -25,7 +25,7 @@ validate_document(VALUE self, VALUE document)
25
25
  VALUE errors;
26
26
 
27
27
  Data_Get_Struct(self, xmlSchema, schema);
28
- Data_Get_Struct(document, xmlDoc, doc);
28
+ Noko_Node_Get_Struct(document, xmlDoc, doc);
29
29
 
30
30
  errors = rb_ary_new();
31
31
 
@@ -179,7 +179,7 @@ has_blank_nodes_p(VALUE cache)
179
179
  for (i = 0; i < RARRAY_LEN(cache); i++) {
180
180
  xmlNodePtr node;
181
181
  VALUE element = rb_ary_entry(cache, i);
182
- Data_Get_Struct(element, xmlNode, node);
182
+ Noko_Node_Get_Struct(element, xmlNode, node);
183
183
  if (xmlIsBlankNode(node)) {
184
184
  return 1;
185
185
  }
@@ -210,7 +210,7 @@ from_document(int argc, VALUE *argv, VALUE klass)
210
210
 
211
211
  scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
212
212
 
213
- Data_Get_Struct(document, xmlDoc, doc);
213
+ Noko_Node_Get_Struct(document, xmlDoc, doc);
214
214
  doc = doc->doc; /* In case someone passes us a node. ugh. */
215
215
 
216
216
  if (scanned_args == 1) {
@@ -274,6 +274,8 @@ noko_init_xml_schema()
274
274
  {
275
275
  cNokogiriXmlSchema = rb_define_class_under(mNokogiriXml, "Schema", rb_cObject);
276
276
 
277
+ rb_undef_alloc_func(cNokogiriXmlSchema);
278
+
277
279
  rb_define_singleton_method(cNokogiriXmlSchema, "read_memory", read_memory, -1);
278
280
  rb_define_singleton_method(cNokogiriXmlSchema, "from_document", from_document, -1);
279
281
 
@@ -20,7 +20,7 @@ new (int argc, VALUE *argv, VALUE klass)
20
20
 
21
21
  rb_scan_args(argc, argv, "2*", &string, &document, &rest);
22
22
 
23
- Data_Get_Struct(document, xmlDoc, doc);
23
+ Noko_Node_Get_Struct(document, xmlDoc, doc);
24
24
 
25
25
  node = xmlNewText((xmlChar *)StringValueCStr(string));
26
26
  node->doc = doc->doc;
@@ -86,6 +86,26 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
86
86
  xmlXPathFreeObject(needle);
87
87
  }
88
88
 
89
+
90
+ /* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should ignore namespaces */
91
+ static void
92
+ xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
93
+ {
94
+ xmlXPathObjectPtr element_name;
95
+
96
+ assert(ctxt->context->node);
97
+
98
+ CHECK_ARITY(1);
99
+ CAST_TO_STRING;
100
+ CHECK_TYPE(XPATH_STRING);
101
+ element_name = valuePop(ctxt);
102
+
103
+ valuePush(ctxt, xmlXPathNewBoolean(xmlStrEqual(ctxt->context->node->name, element_name->stringval)));
104
+
105
+ xmlXPathFreeObject(element_name);
106
+ }
107
+
108
+
89
109
  /*
90
110
  * call-seq:
91
111
  * register_ns(prefix, uri)
@@ -128,11 +148,44 @@ register_variable(VALUE self, VALUE name, VALUE value)
128
148
  return self;
129
149
  }
130
150
 
151
+
152
+ /*
153
+ * convert an XPath object into a Ruby object of the appropriate type.
154
+ * returns Qundef if no conversion was possible.
155
+ */
156
+ static VALUE
157
+ xpath2ruby(xmlXPathObjectPtr xobj, xmlXPathContextPtr xctx)
158
+ {
159
+ VALUE retval;
160
+
161
+ assert(xctx->doc);
162
+ assert(DOC_RUBY_OBJECT_TEST(xctx->doc));
163
+
164
+ switch (xobj->type) {
165
+ case XPATH_STRING:
166
+ retval = NOKOGIRI_STR_NEW2(xobj->stringval);
167
+ xmlFree(xobj->stringval);
168
+ return retval;
169
+
170
+ case XPATH_NODESET:
171
+ return noko_xml_node_set_wrap(xobj->nodesetval,
172
+ DOC_RUBY_OBJECT(xctx->doc));
173
+
174
+ case XPATH_NUMBER:
175
+ return rb_float_new(xobj->floatval);
176
+
177
+ case XPATH_BOOLEAN:
178
+ return (xobj->boolval == 1) ? Qtrue : Qfalse;
179
+
180
+ default:
181
+ return Qundef;
182
+ }
183
+ }
184
+
131
185
  void
132
186
  Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
133
187
  const char *function_name)
134
188
  {
135
- int i;
136
189
  VALUE result, doc;
137
190
  VALUE *argv;
138
191
  VALUE node_set = Qnil;
@@ -143,81 +196,66 @@ Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, i
143
196
  assert(DOC_RUBY_OBJECT_TEST(ctx->context->doc));
144
197
 
145
198
  argv = (VALUE *)calloc((size_t)nargs, sizeof(VALUE));
146
- for (i = 0 ; i < nargs ; ++i) {
147
- rb_gc_register_address(&argv[i]);
199
+ for (int j = 0 ; j < nargs ; ++j) {
200
+ rb_gc_register_address(&argv[j]);
148
201
  }
149
202
 
150
203
  doc = DOC_RUBY_OBJECT(ctx->context->doc);
151
204
 
152
- if (nargs > 0) {
153
- i = nargs - 1;
154
- do {
155
- obj = valuePop(ctx);
156
- switch (obj->type) {
157
- case XPATH_STRING:
158
- argv[i] = NOKOGIRI_STR_NEW2(obj->stringval);
159
- break;
160
- case XPATH_BOOLEAN:
161
- argv[i] = obj->boolval == 1 ? Qtrue : Qfalse;
162
- break;
163
- case XPATH_NUMBER:
164
- argv[i] = rb_float_new(obj->floatval);
165
- break;
166
- case XPATH_NODESET:
167
- argv[i] = noko_xml_node_set_wrap(obj->nodesetval, doc);
168
- break;
169
- default:
170
- argv[i] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
171
- }
172
- xmlXPathFreeNodeSetList(obj);
173
- } while (i-- > 0);
205
+ for (int j = nargs - 1 ; j >= 0 ; --j) {
206
+ obj = valuePop(ctx);
207
+ argv[j] = xpath2ruby(obj, ctx->context);
208
+ if (argv[j] == Qundef) {
209
+ argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj));
210
+ }
211
+ xmlXPathFreeNodeSetList(obj);
174
212
  }
175
213
 
176
214
  result = rb_funcall2(handler, rb_intern((const char *)function_name), nargs, argv);
177
215
 
178
- for (i = 0 ; i < nargs ; ++i) {
179
- rb_gc_unregister_address(&argv[i]);
216
+ for (int j = 0 ; j < nargs ; ++j) {
217
+ rb_gc_unregister_address(&argv[j]);
180
218
  }
181
219
  free(argv);
182
220
 
183
221
  switch (TYPE(result)) {
184
- case T_FLOAT:
185
- case T_BIGNUM:
186
- case T_FIXNUM:
187
- xmlXPathReturnNumber(ctx, NUM2DBL(result));
188
- break;
189
- case T_STRING:
190
- xmlXPathReturnString(
191
- ctx,
192
- xmlCharStrdup(StringValueCStr(result))
193
- );
194
- break;
195
- case T_TRUE:
196
- xmlXPathReturnTrue(ctx);
197
- break;
198
- case T_FALSE:
199
- xmlXPathReturnFalse(ctx);
200
- break;
201
- case T_NIL:
202
- break;
203
- case T_ARRAY: {
204
- VALUE args[2];
205
- args[0] = doc;
206
- args[1] = result;
207
- node_set = rb_class_new_instance(2, args, cNokogiriXmlNodeSet);
208
- Data_Get_Struct(node_set, xmlNodeSet, xml_node_set);
209
- xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
210
- }
211
- break;
212
- case T_DATA:
213
- if (rb_obj_is_kind_of(result, cNokogiriXmlNodeSet)) {
214
- Data_Get_Struct(result, xmlNodeSet, xml_node_set);
215
- /* Copy the node set, otherwise it will get GC'd. */
216
- xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
222
+ case T_FLOAT:
223
+ case T_BIGNUM:
224
+ case T_FIXNUM:
225
+ xmlXPathReturnNumber(ctx, NUM2DBL(result));
226
+ break;
227
+ case T_STRING:
228
+ xmlXPathReturnString(
229
+ ctx,
230
+ xmlCharStrdup(StringValueCStr(result))
231
+ );
217
232
  break;
233
+ case T_TRUE:
234
+ xmlXPathReturnTrue(ctx);
235
+ break;
236
+ case T_FALSE:
237
+ xmlXPathReturnFalse(ctx);
238
+ break;
239
+ case T_NIL:
240
+ break;
241
+ case T_ARRAY: {
242
+ VALUE args[2];
243
+ args[0] = doc;
244
+ args[1] = result;
245
+ node_set = rb_class_new_instance(2, args, cNokogiriXmlNodeSet);
246
+ Data_Get_Struct(node_set, xmlNodeSet, xml_node_set);
247
+ xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
218
248
  }
219
- default:
220
- rb_raise(rb_eRuntimeError, "Invalid return type");
249
+ break;
250
+ case T_DATA:
251
+ if (rb_obj_is_kind_of(result, cNokogiriXmlNodeSet)) {
252
+ Data_Get_Struct(result, xmlNodeSet, xml_node_set);
253
+ /* Copy the node set, otherwise it will get GC'd. */
254
+ xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set));
255
+ break;
256
+ }
257
+ default:
258
+ rb_raise(rb_eRuntimeError, "Invalid return type");
221
259
  }
222
260
  }
223
261
 
@@ -275,7 +313,7 @@ static VALUE
275
313
  evaluate(int argc, VALUE *argv, VALUE self)
276
314
  {
277
315
  VALUE search_path, xpath_handler;
278
- VALUE thing = Qnil;
316
+ VALUE retval = Qnil;
279
317
  xmlXPathContextPtr ctx;
280
318
  xmlXPathObjectPtr xpath;
281
319
  xmlChar *query;
@@ -310,31 +348,14 @@ evaluate(int argc, VALUE *argv, VALUE self)
310
348
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
311
349
  }
312
350
 
313
- assert(ctx->doc);
314
- assert(DOC_RUBY_OBJECT_TEST(ctx->doc));
315
-
316
- switch (xpath->type) {
317
- case XPATH_STRING:
318
- thing = NOKOGIRI_STR_NEW2(xpath->stringval);
319
- xmlFree(xpath->stringval);
320
- break;
321
- case XPATH_NODESET:
322
- thing = noko_xml_node_set_wrap(xpath->nodesetval,
323
- DOC_RUBY_OBJECT(ctx->doc));
324
- break;
325
- case XPATH_NUMBER:
326
- thing = rb_float_new(xpath->floatval);
327
- break;
328
- case XPATH_BOOLEAN:
329
- thing = xpath->boolval == 1 ? Qtrue : Qfalse;
330
- break;
331
- default:
332
- thing = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
351
+ retval = xpath2ruby(xpath, ctx);
352
+ if (retval == Qundef) {
353
+ retval = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc));
333
354
  }
334
355
 
335
356
  xmlXPathFreeNodeSetList(xpath);
336
357
 
337
- return thing;
358
+ return retval;
338
359
  }
339
360
 
340
361
  /*
@@ -350,7 +371,7 @@ new (VALUE klass, VALUE nodeobj)
350
371
  xmlXPathContextPtr ctx;
351
372
  VALUE self;
352
373
 
353
- Data_Get_Struct(nodeobj, xmlNode, node);
374
+ Noko_Node_Get_Struct(nodeobj, xmlNode, node);
354
375
 
355
376
  xmlXPathInit();
356
377
 
@@ -360,6 +381,8 @@ new (VALUE klass, VALUE nodeobj)
360
381
  xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
361
382
  xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
362
383
  xpath_builtin_css_class);
384
+ xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI,
385
+ xpath_builtin_local_name_is);
363
386
 
364
387
  self = Data_Wrap_Struct(klass, 0, deallocate, ctx);
365
388
  return self;
@@ -373,6 +396,8 @@ noko_init_xml_xpath_context(void)
373
396
  */
374
397
  cNokogiriXmlXpathContext = rb_define_class_under(mNokogiriXml, "XPathContext", rb_cObject);
375
398
 
399
+ rb_undef_alloc_func(cNokogiriXmlXpathContext);
400
+
376
401
  rb_define_singleton_method(cNokogiriXmlXpathContext, "new", new, 1);
377
402
 
378
403
  rb_define_method(cNokogiriXmlXpathContext, "evaluate", evaluate, -1);