nokogiri 1.12.2 → 1.13.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/README.md +9 -7
  4. data/bin/nokogiri +63 -50
  5. data/dependencies.yml +5 -6
  6. data/ext/nokogiri/extconf.rb +51 -35
  7. data/ext/nokogiri/gumbo.c +11 -11
  8. data/ext/nokogiri/html4_element_description.c +1 -1
  9. data/ext/nokogiri/html4_sax_parser_context.c +2 -1
  10. data/ext/nokogiri/nokogiri.c +1 -1
  11. data/ext/nokogiri/nokogiri.h +3 -0
  12. data/ext/nokogiri/xml_document.c +36 -36
  13. data/ext/nokogiri/xml_document_fragment.c +0 -2
  14. data/ext/nokogiri/xml_dtd.c +2 -2
  15. data/ext/nokogiri/xml_encoding_handler.c +25 -11
  16. data/ext/nokogiri/xml_namespace.c +2 -2
  17. data/ext/nokogiri/xml_node.c +647 -335
  18. data/ext/nokogiri/xml_reader.c +37 -11
  19. data/ext/nokogiri/xml_xpath_context.c +72 -49
  20. data/gumbo-parser/src/parser.c +0 -11
  21. data/lib/nokogiri/class_resolver.rb +67 -0
  22. data/lib/nokogiri/css/node.rb +9 -8
  23. data/lib/nokogiri/css/parser.rb +11 -3
  24. data/lib/nokogiri/css/parser.y +10 -2
  25. data/lib/nokogiri/css/parser_extras.rb +20 -20
  26. data/lib/nokogiri/css/syntax_error.rb +1 -0
  27. data/lib/nokogiri/css/tokenizer.rb +2 -1
  28. data/lib/nokogiri/css/tokenizer.rex +2 -1
  29. data/lib/nokogiri/css/xpath_visitor.rb +174 -75
  30. data/lib/nokogiri/css.rb +38 -6
  31. data/lib/nokogiri/decorators/slop.rb +8 -7
  32. data/lib/nokogiri/extension.rb +1 -1
  33. data/lib/nokogiri/gumbo.rb +1 -0
  34. data/lib/nokogiri/html.rb +16 -10
  35. data/lib/nokogiri/html4/builder.rb +1 -0
  36. data/lib/nokogiri/html4/document.rb +84 -75
  37. data/lib/nokogiri/html4/document_fragment.rb +11 -7
  38. data/lib/nokogiri/html4/element_description.rb +1 -0
  39. data/lib/nokogiri/html4/element_description_defaults.rb +426 -520
  40. data/lib/nokogiri/html4/entity_lookup.rb +2 -1
  41. data/lib/nokogiri/html4/sax/parser.rb +2 -1
  42. data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
  43. data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
  44. data/lib/nokogiri/html4.rb +11 -5
  45. data/lib/nokogiri/html5/document.rb +24 -10
  46. data/lib/nokogiri/html5/document_fragment.rb +5 -2
  47. data/lib/nokogiri/html5/node.rb +6 -3
  48. data/lib/nokogiri/html5.rb +68 -64
  49. data/lib/nokogiri/jruby/dependencies.rb +10 -9
  50. data/lib/nokogiri/syntax_error.rb +1 -0
  51. data/lib/nokogiri/version/constant.rb +2 -1
  52. data/lib/nokogiri/version/info.rb +19 -13
  53. data/lib/nokogiri/version.rb +1 -0
  54. data/lib/nokogiri/xml/attr.rb +5 -3
  55. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  56. data/lib/nokogiri/xml/builder.rb +69 -31
  57. data/lib/nokogiri/xml/cdata.rb +2 -1
  58. data/lib/nokogiri/xml/character_data.rb +1 -0
  59. data/lib/nokogiri/xml/document.rb +178 -96
  60. data/lib/nokogiri/xml/document_fragment.rb +41 -38
  61. data/lib/nokogiri/xml/dtd.rb +3 -2
  62. data/lib/nokogiri/xml/element_content.rb +1 -0
  63. data/lib/nokogiri/xml/element_decl.rb +2 -1
  64. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  65. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  66. data/lib/nokogiri/xml/namespace.rb +2 -0
  67. data/lib/nokogiri/xml/node/save_options.rb +7 -4
  68. data/lib/nokogiri/xml/node.rb +512 -348
  69. data/lib/nokogiri/xml/node_set.rb +46 -54
  70. data/lib/nokogiri/xml/notation.rb +12 -0
  71. data/lib/nokogiri/xml/parse_options.rb +11 -7
  72. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  73. data/lib/nokogiri/xml/pp/node.rb +24 -26
  74. data/lib/nokogiri/xml/pp.rb +1 -0
  75. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  76. data/lib/nokogiri/xml/reader.rb +17 -19
  77. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  78. data/lib/nokogiri/xml/sax/document.rb +20 -19
  79. data/lib/nokogiri/xml/sax/parser.rb +36 -34
  80. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  81. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  82. data/lib/nokogiri/xml/sax.rb +1 -0
  83. data/lib/nokogiri/xml/schema.rb +7 -6
  84. data/lib/nokogiri/xml/searchable.rb +42 -22
  85. data/lib/nokogiri/xml/syntax_error.rb +4 -4
  86. data/lib/nokogiri/xml/text.rb +1 -0
  87. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  88. data/lib/nokogiri/xml/xpath.rb +12 -0
  89. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  90. data/lib/nokogiri/xml.rb +3 -3
  91. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  92. data/lib/nokogiri/xslt.rb +3 -2
  93. data/lib/nokogiri.rb +19 -16
  94. data/lib/xsd/xmlparser/nokogiri.rb +25 -24
  95. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  96. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  97. metadata +101 -27
@@ -213,7 +213,7 @@ set_encoding(VALUE self, VALUE encoding)
213
213
  Data_Get_Struct(self, xmlDoc, doc);
214
214
 
215
215
  if (doc->encoding) {
216
- free((char *)(uintptr_t) doc->encoding); /* avoid gcc cast warning */
216
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
217
217
  }
218
218
 
219
219
  doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
@@ -533,59 +533,59 @@ block_caller(void *ctx, xmlNodePtr c_node, xmlNodePtr c_parent_node)
533
533
  static VALUE
534
534
  rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
535
535
  {
536
- VALUE mode;
537
- VALUE incl_ns;
538
- VALUE with_comments;
539
- xmlChar **ns;
540
- long ns_len, i;
536
+ VALUE rb_mode;
537
+ VALUE rb_namespaces;
538
+ VALUE rb_comments_p;
539
+ xmlChar **c_namespaces;
541
540
 
542
- xmlDocPtr doc;
543
- xmlOutputBufferPtr buf;
544
- xmlC14NIsVisibleCallback cb = NULL;
545
- void *ctx = NULL;
541
+ xmlDocPtr c_doc;
542
+ xmlOutputBufferPtr c_obuf;
543
+ xmlC14NIsVisibleCallback c_callback_wrapper = NULL;
544
+ void *rb_callback = NULL;
546
545
 
547
546
  VALUE rb_cStringIO;
548
- VALUE io;
547
+ VALUE rb_io;
549
548
 
550
- rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);
549
+ rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
550
+ if (!NIL_P(rb_mode)) { Check_Type(rb_mode, T_FIXNUM); }
551
+ if (!NIL_P(rb_namespaces)) { Check_Type(rb_namespaces, T_ARRAY); }
551
552
 
552
- Data_Get_Struct(self, xmlDoc, doc);
553
+ Data_Get_Struct(self, xmlDoc, c_doc);
553
554
 
554
555
  rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
555
- io = rb_class_new_instance(0, 0, rb_cStringIO);
556
- buf = xmlAllocOutputBuffer(NULL);
556
+ rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
557
+ c_obuf = xmlAllocOutputBuffer(NULL);
557
558
 
558
- buf->writecallback = (xmlOutputWriteCallback)noko_io_write;
559
- buf->closecallback = (xmlOutputCloseCallback)noko_io_close;
560
- buf->context = (void *)io;
559
+ c_obuf->writecallback = (xmlOutputWriteCallback)noko_io_write;
560
+ c_obuf->closecallback = (xmlOutputCloseCallback)noko_io_close;
561
+ c_obuf->context = (void *)rb_io;
561
562
 
562
563
  if (rb_block_given_p()) {
563
- cb = block_caller;
564
- ctx = (void *)rb_block_proc();
564
+ c_callback_wrapper = block_caller;
565
+ rb_callback = (void *)rb_block_proc();
565
566
  }
566
567
 
567
- if (NIL_P(incl_ns)) {
568
- ns = NULL;
568
+ if (NIL_P(rb_namespaces)) {
569
+ c_namespaces = NULL;
569
570
  } else {
570
- Check_Type(incl_ns, T_ARRAY);
571
- ns_len = RARRAY_LEN(incl_ns);
572
- ns = calloc((size_t)ns_len + 1, sizeof(xmlChar *));
573
- for (i = 0 ; i < ns_len ; i++) {
574
- VALUE entry = rb_ary_entry(incl_ns, i);
575
- ns[i] = (xmlChar *)StringValueCStr(entry);
571
+ long ns_len = RARRAY_LEN(rb_namespaces);
572
+ c_namespaces = calloc((size_t)ns_len + 1, sizeof(xmlChar *));
573
+ for (int j = 0 ; j < ns_len ; j++) {
574
+ VALUE entry = rb_ary_entry(rb_namespaces, j);
575
+ c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
576
576
  }
577
577
  }
578
578
 
579
+ xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
580
+ (int)(NIL_P(rb_mode) ? 0 : NUM2INT(rb_mode)),
581
+ c_namespaces,
582
+ (int)RTEST(rb_comments_p),
583
+ c_obuf);
579
584
 
580
- xmlC14NExecute(doc, cb, ctx,
581
- (int)(NIL_P(mode) ? 0 : NUM2INT(mode)),
582
- ns,
583
- (int) RTEST(with_comments),
584
- buf);
585
-
586
- xmlOutputBufferClose(buf);
585
+ free(c_namespaces);
586
+ xmlOutputBufferClose(c_obuf);
587
587
 
588
- return rb_funcall(io, rb_intern("string"), 0);
588
+ return rb_funcall(rb_io, rb_intern("string"), 0);
589
589
  }
590
590
 
591
591
  VALUE
@@ -28,8 +28,6 @@ new (int argc, VALUE *argv, VALUE klass)
28
28
  rb_node = noko_xml_node_wrap(klass, node);
29
29
  rb_obj_call_init(rb_node, argc, argv);
30
30
 
31
- if (rb_block_given_p()) { rb_yield(rb_node); }
32
-
33
31
  return rb_node;
34
32
  }
35
33
 
@@ -57,9 +57,9 @@ entities(VALUE self)
57
57
 
58
58
  /*
59
59
  * call-seq:
60
- * notations
60
+ * notations() → Hash<name(String)⇒Notation>
61
61
  *
62
- * Get a hash of the notations for this DTD.
62
+ * [Returns] All the notations for this DTD in a Hash of Notation +name+ to Notation.
63
63
  */
64
64
  static VALUE
65
65
  notations(VALUE self)
@@ -3,70 +3,82 @@
3
3
  VALUE cNokogiriEncodingHandler;
4
4
 
5
5
 
6
+ static void
7
+ _xml_encoding_handler_dealloc(xmlCharEncodingHandlerPtr c_handler)
8
+ {
9
+ /* make sure iconv handlers are cleaned up and freed */
10
+ xmlCharEncCloseFunc(c_handler);
11
+ }
12
+
13
+
6
14
  /*
7
15
  * call-seq: Nokogiri::EncodingHandler.[](name)
8
16
  *
9
17
  * Get the encoding handler for +name+
10
18
  */
11
19
  static VALUE
12
- get(VALUE klass, VALUE key)
20
+ rb_xml_encoding_handler_s_get(VALUE klass, VALUE key)
13
21
  {
14
22
  xmlCharEncodingHandlerPtr handler;
15
23
 
16
24
  handler = xmlFindCharEncodingHandler(StringValueCStr(key));
17
25
  if (handler) {
18
- return Data_Wrap_Struct(klass, NULL, NULL, handler);
26
+ return Data_Wrap_Struct(klass, NULL, _xml_encoding_handler_dealloc, handler);
19
27
  }
20
28
 
21
29
  return Qnil;
22
30
  }
23
31
 
32
+
24
33
  /*
25
34
  * call-seq: Nokogiri::EncodingHandler.delete(name)
26
35
  *
27
36
  * Delete the encoding alias named +name+
28
37
  */
29
38
  static VALUE
30
- delete (VALUE klass, VALUE name)
39
+ rb_xml_encoding_handler_s_delete(VALUE klass, VALUE name)
31
40
  {
32
41
  if (xmlDelEncodingAlias(StringValueCStr(name))) { return Qnil; }
33
42
 
34
43
  return Qtrue;
35
44
  }
36
45
 
46
+
37
47
  /*
38
48
  * call-seq: Nokogiri::EncodingHandler.alias(from, to)
39
49
  *
40
50
  * Alias encoding handler with name +from+ to name +to+
41
51
  */
42
52
  static VALUE
43
- alias(VALUE klass, VALUE from, VALUE to)
53
+ rb_xml_encoding_handler_s_alias(VALUE klass, VALUE from, VALUE to)
44
54
  {
45
55
  xmlAddEncodingAlias(StringValueCStr(from), StringValueCStr(to));
46
56
 
47
57
  return to;
48
58
  }
49
59
 
60
+
50
61
  /*
51
62
  * call-seq: Nokogiri::EncodingHandler.clear_aliases!
52
63
  *
53
64
  * Remove all encoding aliases.
54
65
  */
55
66
  static VALUE
56
- clear_aliases(VALUE klass)
67
+ rb_xml_encoding_handler_s_clear_aliases(VALUE klass)
57
68
  {
58
69
  xmlCleanupEncodingAliases();
59
70
 
60
71
  return klass;
61
72
  }
62
73
 
74
+
63
75
  /*
64
76
  * call-seq: name
65
77
  *
66
78
  * Get the name of this EncodingHandler
67
79
  */
68
80
  static VALUE
69
- name(VALUE self)
81
+ rb_xml_encoding_handler_name(VALUE self)
70
82
  {
71
83
  xmlCharEncodingHandlerPtr handler;
72
84
 
@@ -75,6 +87,7 @@ name(VALUE self)
75
87
  return NOKOGIRI_STR_NEW2(handler->name);
76
88
  }
77
89
 
90
+
78
91
  void
79
92
  noko_init_xml_encoding_handler()
80
93
  {
@@ -82,9 +95,10 @@ noko_init_xml_encoding_handler()
82
95
 
83
96
  rb_undef_alloc_func(cNokogiriEncodingHandler);
84
97
 
85
- rb_define_singleton_method(cNokogiriEncodingHandler, "[]", get, 1);
86
- rb_define_singleton_method(cNokogiriEncodingHandler, "delete", delete, 1);
87
- rb_define_singleton_method(cNokogiriEncodingHandler, "alias", alias, 2);
88
- rb_define_singleton_method(cNokogiriEncodingHandler, "clear_aliases!", clear_aliases, 0);
89
- rb_define_method(cNokogiriEncodingHandler, "name", name, 0);
98
+ rb_define_singleton_method(cNokogiriEncodingHandler, "[]", rb_xml_encoding_handler_s_get, 1);
99
+ rb_define_singleton_method(cNokogiriEncodingHandler, "delete", rb_xml_encoding_handler_s_delete, 1);
100
+ rb_define_singleton_method(cNokogiriEncodingHandler, "alias", rb_xml_encoding_handler_s_alias, 2);
101
+ rb_define_singleton_method(cNokogiriEncodingHandler, "clear_aliases!", rb_xml_encoding_handler_s_clear_aliases, 0);
102
+
103
+ rb_define_method(cNokogiriEncodingHandler, "name", rb_xml_encoding_handler_name, 0);
90
104
  }
@@ -33,10 +33,10 @@ dealloc_namespace(xmlNsPtr ns)
33
33
  */
34
34
  NOKOGIRI_DEBUG_START(ns) ;
35
35
  if (ns->href) {
36
- xmlFree((xmlChar *)(uintptr_t)ns->href);
36
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->href));
37
37
  }
38
38
  if (ns->prefix) {
39
- xmlFree((xmlChar *)(uintptr_t)ns->prefix);
39
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->prefix));
40
40
  }
41
41
  xmlFree(ns);
42
42
  NOKOGIRI_DEBUG_END(ns) ;