nokogiri 1.13.6 → 1.16.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (125) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +43 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +21 -11
  6. data/dependencies.yml +34 -15
  7. data/ext/nokogiri/extconf.rb +167 -48
  8. data/ext/nokogiri/gumbo.c +21 -11
  9. data/ext/nokogiri/html4_document.c +3 -4
  10. data/ext/nokogiri/html4_element_description.c +20 -15
  11. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  12. data/ext/nokogiri/html4_sax_parser_context.c +11 -22
  13. data/ext/nokogiri/html4_sax_push_parser.c +4 -4
  14. data/ext/nokogiri/nokogiri.c +84 -75
  15. data/ext/nokogiri/nokogiri.h +46 -16
  16. data/ext/nokogiri/test_global_handlers.c +2 -2
  17. data/ext/nokogiri/xml_attr.c +3 -3
  18. data/ext/nokogiri/xml_attribute_decl.c +5 -5
  19. data/ext/nokogiri/xml_cdata.c +31 -18
  20. data/ext/nokogiri/xml_comment.c +2 -2
  21. data/ext/nokogiri/xml_document.c +135 -38
  22. data/ext/nokogiri/xml_document_fragment.c +2 -2
  23. data/ext/nokogiri/xml_dtd.c +9 -9
  24. data/ext/nokogiri/xml_element_content.c +34 -31
  25. data/ext/nokogiri/xml_element_decl.c +10 -10
  26. data/ext/nokogiri/xml_encoding_handler.c +15 -7
  27. data/ext/nokogiri/xml_entity_decl.c +6 -6
  28. data/ext/nokogiri/xml_entity_reference.c +2 -2
  29. data/ext/nokogiri/xml_namespace.c +75 -14
  30. data/ext/nokogiri/xml_node.c +365 -87
  31. data/ext/nokogiri/xml_node_set.c +129 -111
  32. data/ext/nokogiri/xml_processing_instruction.c +2 -2
  33. data/ext/nokogiri/xml_reader.c +126 -64
  34. data/ext/nokogiri/xml_relax_ng.c +67 -82
  35. data/ext/nokogiri/xml_sax_parser.c +45 -20
  36. data/ext/nokogiri/xml_sax_parser_context.c +50 -30
  37. data/ext/nokogiri/xml_sax_push_parser.c +31 -12
  38. data/ext/nokogiri/xml_schema.c +95 -118
  39. data/ext/nokogiri/xml_syntax_error.c +4 -4
  40. data/ext/nokogiri/xml_text.c +27 -14
  41. data/ext/nokogiri/xml_xpath_context.c +213 -136
  42. data/ext/nokogiri/xslt_stylesheet.c +126 -67
  43. data/gumbo-parser/Makefile +28 -0
  44. data/gumbo-parser/src/attribute.h +1 -1
  45. data/gumbo-parser/src/error.c +10 -6
  46. data/gumbo-parser/src/error.h +1 -1
  47. data/gumbo-parser/src/foreign_attrs.c +15 -16
  48. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  49. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  50. data/gumbo-parser/src/parser.c +29 -10
  51. data/gumbo-parser/src/replacement.h +1 -1
  52. data/gumbo-parser/src/string_buffer.h +1 -1
  53. data/gumbo-parser/src/string_piece.c +1 -1
  54. data/gumbo-parser/src/svg_attrs.c +2 -2
  55. data/gumbo-parser/src/svg_tags.c +2 -2
  56. data/gumbo-parser/src/tag.c +2 -1
  57. data/gumbo-parser/src/tag_lookup.c +7 -7
  58. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  59. data/gumbo-parser/src/tag_lookup.h +1 -1
  60. data/gumbo-parser/src/token_buffer.h +1 -1
  61. data/gumbo-parser/src/tokenizer.c +2 -1
  62. data/gumbo-parser/src/tokenizer.h +1 -1
  63. data/gumbo-parser/src/utf8.c +1 -1
  64. data/gumbo-parser/src/utf8.h +1 -1
  65. data/gumbo-parser/src/util.c +1 -3
  66. data/gumbo-parser/src/util.h +4 -0
  67. data/gumbo-parser/src/vector.h +1 -1
  68. data/lib/nokogiri/css/node.rb +2 -2
  69. data/lib/nokogiri/css/parser_extras.rb +1 -1
  70. data/lib/nokogiri/css/xpath_visitor.rb +8 -26
  71. data/lib/nokogiri/css.rb +6 -0
  72. data/lib/nokogiri/decorators/slop.rb +1 -1
  73. data/lib/nokogiri/encoding_handler.rb +57 -0
  74. data/lib/nokogiri/extension.rb +4 -3
  75. data/lib/nokogiri/html4/document.rb +3 -122
  76. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  77. data/lib/nokogiri/html4/element_description_defaults.rb +1827 -365
  78. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  79. data/lib/nokogiri/html4.rb +1 -0
  80. data/lib/nokogiri/html5/document.rb +113 -36
  81. data/lib/nokogiri/html5/document_fragment.rb +10 -3
  82. data/lib/nokogiri/html5/node.rb +8 -5
  83. data/lib/nokogiri/html5.rb +74 -226
  84. data/lib/nokogiri/jruby/dependencies.rb +1 -19
  85. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  86. data/lib/nokogiri/version/constant.rb +1 -1
  87. data/lib/nokogiri/version/info.rb +16 -14
  88. data/lib/nokogiri/xml/attr.rb +49 -0
  89. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  90. data/lib/nokogiri/xml/builder.rb +1 -1
  91. data/lib/nokogiri/xml/document.rb +103 -56
  92. data/lib/nokogiri/xml/document_fragment.rb +50 -7
  93. data/lib/nokogiri/xml/element_content.rb +10 -2
  94. data/lib/nokogiri/xml/element_decl.rb +4 -2
  95. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  96. data/lib/nokogiri/xml/namespace.rb +41 -0
  97. data/lib/nokogiri/xml/node/save_options.rb +14 -4
  98. data/lib/nokogiri/xml/node.rb +241 -70
  99. data/lib/nokogiri/xml/node_set.rb +90 -11
  100. data/lib/nokogiri/xml/parse_options.rb +129 -50
  101. data/lib/nokogiri/xml/pp/node.rb +28 -15
  102. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  103. data/lib/nokogiri/xml/reader.rb +16 -17
  104. data/lib/nokogiri/xml/sax/document.rb +1 -1
  105. data/lib/nokogiri/xml/sax/parser.rb +2 -3
  106. data/lib/nokogiri/xml/searchable.rb +21 -13
  107. data/lib/nokogiri/xml/syntax_error.rb +1 -1
  108. data/lib/nokogiri/xml.rb +1 -1
  109. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  110. data/lib/nokogiri/xslt.rb +75 -5
  111. data/lib/nokogiri.rb +15 -15
  112. data/lib/xsd/xmlparser/nokogiri.rb +4 -2
  113. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  114. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  115. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  116. data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
  117. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  118. metadata +21 -248
  119. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  120. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  121. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
  122. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
  123. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  124. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  125. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
@@ -1,5 +1,7 @@
1
1
  #include <nokogiri.h>
2
2
 
3
+ #include <stdbool.h>
4
+
3
5
  // :stopdoc:
4
6
 
5
7
  VALUE cNokogiriXmlNode ;
@@ -7,22 +9,15 @@ static ID id_decorate, id_decorate_bang;
7
9
 
8
10
  typedef xmlNodePtr(*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
9
11
 
10
-
11
- #ifdef DEBUG
12
12
  static void
13
- _xml_node_dealloc(xmlNodePtr x)
13
+ _xml_node_mark(void *ptr)
14
14
  {
15
- NOKOGIRI_DEBUG_START(x)
16
- NOKOGIRI_DEBUG_END(x)
17
- }
18
- #else
19
- # define _xml_node_dealloc 0
20
- #endif
15
+ xmlNodePtr node = ptr;
21
16
 
17
+ if (!DOC_RUBY_OBJECT_TEST(node->doc)) {
18
+ return;
19
+ }
22
20
 
23
- static void
24
- _xml_node_mark(xmlNodePtr node)
25
- {
26
21
  xmlDocPtr doc = node->doc;
27
22
  if (doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE) {
28
23
  if (DOC_RUBY_OBJECT_TEST(doc)) {
@@ -33,6 +28,24 @@ _xml_node_mark(xmlNodePtr node)
33
28
  }
34
29
  }
35
30
 
31
+ static void
32
+ _xml_node_update_references(void *ptr)
33
+ {
34
+ xmlNodePtr node = ptr;
35
+
36
+ if (node->_private) {
37
+ node->_private = (void *)rb_gc_location((VALUE)node->_private);
38
+ }
39
+ }
40
+
41
+ static const rb_data_type_t nokogiri_node_type = {
42
+ .wrap_struct_name = "Nokogiri::XML::Node",
43
+ .function = {
44
+ .dmark = _xml_node_mark,
45
+ .dcompact = _xml_node_update_references,
46
+ },
47
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
48
+ };
36
49
 
37
50
  static void
38
51
  relink_namespace(xmlNodePtr reparented)
@@ -198,8 +211,8 @@ reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func
198
211
  rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
199
212
  }
200
213
 
201
- Data_Get_Struct(reparentee_obj, xmlNode, reparentee);
202
- Data_Get_Struct(pivot_obj, xmlNode, pivot);
214
+ Noko_Node_Get_Struct(reparentee_obj, xmlNode, reparentee);
215
+ Noko_Node_Get_Struct(pivot_obj, xmlNode, pivot);
203
216
 
204
217
  /*
205
218
  * Check if nodes given are appropriate to have a parent-child
@@ -333,7 +346,7 @@ ok:
333
346
 
334
347
  xmlUnlinkNode(original_reparentee);
335
348
 
336
- if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling
349
+ if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling && prf != xmlAddChild
337
350
  && reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
338
351
  /*
339
352
  * libxml merges text nodes in a right-to-left fashion, meaning that if
@@ -439,7 +452,7 @@ rb_xml_node_add_namespace_definition(VALUE rb_node, VALUE rb_prefix, VALUE rb_hr
439
452
  xmlNsPtr c_namespace;
440
453
  const xmlChar *c_prefix = (const xmlChar *)(NIL_P(rb_prefix) ? NULL : StringValueCStr(rb_prefix));
441
454
 
442
- Data_Get_Struct(rb_node, xmlNode, c_node);
455
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
443
456
  element = c_node ;
444
457
 
445
458
  c_namespace = xmlSearchNs(c_node->doc, c_node, c_prefix);
@@ -506,7 +519,7 @@ rb_xml_node_attribute(VALUE self, VALUE name)
506
519
  {
507
520
  xmlNodePtr node;
508
521
  xmlAttrPtr prop;
509
- Data_Get_Struct(self, xmlNode, node);
522
+ Noko_Node_Get_Struct(self, xmlNode, node);
510
523
  prop = xmlHasProp(node, (xmlChar *)StringValueCStr(name));
511
524
 
512
525
  if (! prop) { return Qnil; }
@@ -557,7 +570,7 @@ rb_xml_node_attribute_nodes(VALUE rb_node)
557
570
  {
558
571
  xmlNodePtr c_node;
559
572
 
560
- Data_Get_Struct(rb_node, xmlNode, c_node);
573
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
561
574
 
562
575
  return noko_xml_node_attrs(c_node);
563
576
  }
@@ -609,7 +622,7 @@ rb_xml_node_attribute_with_ns(VALUE self, VALUE name, VALUE namespace)
609
622
  {
610
623
  xmlNodePtr node;
611
624
  xmlAttrPtr prop;
612
- Data_Get_Struct(self, xmlNode, node);
625
+ Noko_Node_Get_Struct(self, xmlNode, node);
613
626
  prop = xmlHasNsProp(node, (xmlChar *)StringValueCStr(name),
614
627
  NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace));
615
628
 
@@ -636,7 +649,7 @@ static VALUE
636
649
  rb_xml_node_blank_eh(VALUE self)
637
650
  {
638
651
  xmlNodePtr node;
639
- Data_Get_Struct(self, xmlNode, node);
652
+ Noko_Node_Get_Struct(self, xmlNode, node);
640
653
  return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ;
641
654
  }
642
655
 
@@ -658,7 +671,7 @@ static VALUE
658
671
  rb_xml_node_child(VALUE self)
659
672
  {
660
673
  xmlNodePtr node, child;
661
- Data_Get_Struct(self, xmlNode, node);
674
+ Noko_Node_Get_Struct(self, xmlNode, node);
662
675
 
663
676
  child = node->children;
664
677
  if (!child) { return Qnil; }
@@ -683,7 +696,7 @@ rb_xml_node_children(VALUE self)
683
696
  VALUE document;
684
697
  VALUE node_set;
685
698
 
686
- Data_Get_Struct(self, xmlNode, node);
699
+ Noko_Node_Get_Struct(self, xmlNode, node);
687
700
 
688
701
  child = node->children;
689
702
  set = xmlXPathNodeSetCreate(child);
@@ -742,7 +755,7 @@ rb_xml_node_content(VALUE self)
742
755
  xmlNodePtr node;
743
756
  xmlChar *content;
744
757
 
745
- Data_Get_Struct(self, xmlNode, node);
758
+ Noko_Node_Get_Struct(self, xmlNode, node);
746
759
 
747
760
  content = xmlNodeGetContent(node);
748
761
  if (content) {
@@ -765,7 +778,7 @@ static VALUE
765
778
  rb_xml_node_document(VALUE self)
766
779
  {
767
780
  xmlNodePtr node;
768
- Data_Get_Struct(self, xmlNode, node);
781
+ Noko_Node_Get_Struct(self, xmlNode, node);
769
782
  return DOC_RUBY_OBJECT(node->doc);
770
783
  }
771
784
 
@@ -780,9 +793,9 @@ static VALUE
780
793
  rb_xml_node_pointer_id(VALUE self)
781
794
  {
782
795
  xmlNodePtr node;
783
- Data_Get_Struct(self, xmlNode, node);
796
+ Noko_Node_Get_Struct(self, xmlNode, node);
784
797
 
785
- return INT2NUM((long)(node));
798
+ return rb_uint2inum((uintptr_t)(node));
786
799
  }
787
800
 
788
801
  /*
@@ -797,7 +810,7 @@ encode_special_chars(VALUE self, VALUE string)
797
810
  xmlChar *encoded;
798
811
  VALUE encoded_str;
799
812
 
800
- Data_Get_Struct(self, xmlNode, node);
813
+ Noko_Node_Get_Struct(self, xmlNode, node);
801
814
  encoded = xmlEncodeSpecialChars(
802
815
  node->doc,
803
816
  (const xmlChar *)StringValueCStr(string)
@@ -828,7 +841,7 @@ create_internal_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_i
828
841
  xmlDocPtr doc;
829
842
  xmlDtdPtr dtd;
830
843
 
831
- Data_Get_Struct(self, xmlNode, node);
844
+ Noko_Node_Get_Struct(self, xmlNode, node);
832
845
 
833
846
  doc = node->doc;
834
847
 
@@ -861,7 +874,7 @@ create_external_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_i
861
874
  xmlDocPtr doc;
862
875
  xmlDtdPtr dtd;
863
876
 
864
- Data_Get_Struct(self, xmlNode, node);
877
+ Noko_Node_Get_Struct(self, xmlNode, node);
865
878
 
866
879
  doc = node->doc;
867
880
 
@@ -894,7 +907,7 @@ external_subset(VALUE self)
894
907
  xmlDocPtr doc;
895
908
  xmlDtdPtr dtd;
896
909
 
897
- Data_Get_Struct(self, xmlNode, node);
910
+ Noko_Node_Get_Struct(self, xmlNode, node);
898
911
 
899
912
  if (!node->doc) { return Qnil; }
900
913
 
@@ -919,7 +932,7 @@ internal_subset(VALUE self)
919
932
  xmlDocPtr doc;
920
933
  xmlDtdPtr dtd;
921
934
 
922
- Data_Get_Struct(self, xmlNode, node);
935
+ Noko_Node_Get_Struct(self, xmlNode, node);
923
936
 
924
937
  if (!node->doc) { return Qnil; }
925
938
 
@@ -944,7 +957,7 @@ internal_subset(VALUE self)
944
957
  * - +new_parent_doc+
945
958
  * The new node's parent Document. Defaults to the this node's document.
946
959
  *
947
- * [Returns] The new Nokgiri::XML::Node
960
+ * [Returns] The new Nokogiri::XML::Node
948
961
  */
949
962
  static VALUE
950
963
  duplicate_node(int argc, VALUE *argv, VALUE self)
@@ -955,7 +968,7 @@ duplicate_node(int argc, VALUE *argv, VALUE self)
955
968
  xmlDocPtr new_parent_doc;
956
969
  xmlNodePtr node, dup;
957
970
 
958
- Data_Get_Struct(self, xmlNode, node);
971
+ Noko_Node_Get_Struct(self, xmlNode, node);
959
972
 
960
973
  n_args = rb_scan_args(argc, argv, "02", &r_level, &r_new_parent_doc);
961
974
 
@@ -967,7 +980,7 @@ duplicate_node(int argc, VALUE *argv, VALUE self)
967
980
  if (n_args < 2) {
968
981
  new_parent_doc = node->doc;
969
982
  } else {
970
- Data_Get_Struct(r_new_parent_doc, xmlDoc, new_parent_doc);
983
+ new_parent_doc = noko_xml_document_unwrap(r_new_parent_doc);
971
984
  }
972
985
 
973
986
  dup = xmlDocCopyNode(node, new_parent_doc, level);
@@ -988,7 +1001,7 @@ static VALUE
988
1001
  unlink_node(VALUE self)
989
1002
  {
990
1003
  xmlNodePtr node;
991
- Data_Get_Struct(self, xmlNode, node);
1004
+ Noko_Node_Get_Struct(self, xmlNode, node);
992
1005
  xmlUnlinkNode(node);
993
1006
  noko_xml_document_pin_node(node);
994
1007
  return self;
@@ -1005,7 +1018,7 @@ static VALUE
1005
1018
  next_sibling(VALUE self)
1006
1019
  {
1007
1020
  xmlNodePtr node, sibling;
1008
- Data_Get_Struct(self, xmlNode, node);
1021
+ Noko_Node_Get_Struct(self, xmlNode, node);
1009
1022
 
1010
1023
  sibling = node->next;
1011
1024
  if (!sibling) { return Qnil; }
@@ -1023,7 +1036,7 @@ static VALUE
1023
1036
  previous_sibling(VALUE self)
1024
1037
  {
1025
1038
  xmlNodePtr node, sibling;
1026
- Data_Get_Struct(self, xmlNode, node);
1039
+ Noko_Node_Get_Struct(self, xmlNode, node);
1027
1040
 
1028
1041
  sibling = node->prev;
1029
1042
  if (!sibling) { return Qnil; }
@@ -1041,7 +1054,7 @@ static VALUE
1041
1054
  next_element(VALUE self)
1042
1055
  {
1043
1056
  xmlNodePtr node, sibling;
1044
- Data_Get_Struct(self, xmlNode, node);
1057
+ Noko_Node_Get_Struct(self, xmlNode, node);
1045
1058
 
1046
1059
  sibling = xmlNextElementSibling(node);
1047
1060
  if (!sibling) { return Qnil; }
@@ -1059,7 +1072,7 @@ static VALUE
1059
1072
  previous_element(VALUE self)
1060
1073
  {
1061
1074
  xmlNodePtr node, sibling;
1062
- Data_Get_Struct(self, xmlNode, node);
1075
+ Noko_Node_Get_Struct(self, xmlNode, node);
1063
1076
 
1064
1077
  /*
1065
1078
  * note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7.
@@ -1081,7 +1094,7 @@ replace(VALUE self, VALUE new_node)
1081
1094
  VALUE reparent = reparent_node_with(self, new_node, xmlReplaceNodeWrapper);
1082
1095
 
1083
1096
  xmlNodePtr pivot;
1084
- Data_Get_Struct(self, xmlNode, pivot);
1097
+ Noko_Node_Get_Struct(self, xmlNode, pivot);
1085
1098
  noko_xml_document_pin_node(pivot);
1086
1099
 
1087
1100
  return reparent;
@@ -1116,7 +1129,7 @@ rb_xml_node_element_children(VALUE self)
1116
1129
  VALUE document;
1117
1130
  VALUE node_set;
1118
1131
 
1119
- Data_Get_Struct(self, xmlNode, node);
1132
+ Noko_Node_Get_Struct(self, xmlNode, node);
1120
1133
 
1121
1134
  child = xmlFirstElementChild(node);
1122
1135
  set = xmlXPathNodeSetCreate(child);
@@ -1155,7 +1168,7 @@ static VALUE
1155
1168
  rb_xml_node_first_element_child(VALUE self)
1156
1169
  {
1157
1170
  xmlNodePtr node, child;
1158
- Data_Get_Struct(self, xmlNode, node);
1171
+ Noko_Node_Get_Struct(self, xmlNode, node);
1159
1172
 
1160
1173
  child = xmlFirstElementChild(node);
1161
1174
  if (!child) { return Qnil; }
@@ -1182,7 +1195,7 @@ static VALUE
1182
1195
  rb_xml_node_last_element_child(VALUE self)
1183
1196
  {
1184
1197
  xmlNodePtr node, child;
1185
- Data_Get_Struct(self, xmlNode, node);
1198
+ Noko_Node_Get_Struct(self, xmlNode, node);
1186
1199
 
1187
1200
  child = xmlLastElementChild(node);
1188
1201
  if (!child) { return Qnil; }
@@ -1200,7 +1213,7 @@ static VALUE
1200
1213
  key_eh(VALUE self, VALUE attribute)
1201
1214
  {
1202
1215
  xmlNodePtr node;
1203
- Data_Get_Struct(self, xmlNode, node);
1216
+ Noko_Node_Get_Struct(self, xmlNode, node);
1204
1217
  if (xmlHasProp(node, (xmlChar *)StringValueCStr(attribute))) {
1205
1218
  return Qtrue;
1206
1219
  }
@@ -1217,7 +1230,7 @@ static VALUE
1217
1230
  namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
1218
1231
  {
1219
1232
  xmlNodePtr node;
1220
- Data_Get_Struct(self, xmlNode, node);
1233
+ Noko_Node_Get_Struct(self, xmlNode, node);
1221
1234
  if (xmlHasNsProp(node, (xmlChar *)StringValueCStr(attribute),
1222
1235
  NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace))) {
1223
1236
  return Qtrue;
@@ -1236,7 +1249,7 @@ set(VALUE self, VALUE property, VALUE value)
1236
1249
  {
1237
1250
  xmlNodePtr node, cur;
1238
1251
  xmlAttrPtr prop;
1239
- Data_Get_Struct(self, xmlNode, node);
1252
+ Noko_Node_Get_Struct(self, xmlNode, node);
1240
1253
 
1241
1254
  /* If a matching attribute node already exists, then xmlSetProp will destroy
1242
1255
  * the existing node's children. However, if Nokogiri has a node object
@@ -1281,7 +1294,7 @@ get(VALUE self, VALUE rattribute)
1281
1294
 
1282
1295
  if (NIL_P(rattribute)) { return Qnil; }
1283
1296
 
1284
- Data_Get_Struct(self, xmlNode, node);
1297
+ Noko_Node_Get_Struct(self, xmlNode, node);
1285
1298
  attribute = xmlCharStrdup(StringValueCStr(rattribute));
1286
1299
 
1287
1300
  colon = DISCARD_CONST_QUAL_XMLCHAR(xmlStrchr(attribute, (const xmlChar)':'));
@@ -1323,10 +1336,10 @@ set_namespace(VALUE self, VALUE namespace)
1323
1336
  xmlNodePtr node;
1324
1337
  xmlNsPtr ns = NULL;
1325
1338
 
1326
- Data_Get_Struct(self, xmlNode, node);
1339
+ Noko_Node_Get_Struct(self, xmlNode, node);
1327
1340
 
1328
1341
  if (!NIL_P(namespace)) {
1329
- Data_Get_Struct(namespace, xmlNs, ns);
1342
+ Noko_Namespace_Get_Struct(namespace, xmlNs, ns);
1330
1343
  }
1331
1344
 
1332
1345
  xmlSetNs(node, ns);
@@ -1360,7 +1373,7 @@ static VALUE
1360
1373
  rb_xml_node_namespace(VALUE rb_node)
1361
1374
  {
1362
1375
  xmlNodePtr c_node ;
1363
- Data_Get_Struct(rb_node, xmlNode, c_node);
1376
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1364
1377
 
1365
1378
  if (c_node->ns) {
1366
1379
  return noko_xml_namespace_wrap(c_node->ns, c_node->doc);
@@ -1405,7 +1418,7 @@ namespace_definitions(VALUE rb_node)
1405
1418
  xmlNsPtr c_namespace;
1406
1419
  VALUE definitions = rb_ary_new();
1407
1420
 
1408
- Data_Get_Struct(rb_node, xmlNode, c_node);
1421
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1409
1422
 
1410
1423
  c_namespace = c_node->nsDef;
1411
1424
  if (!c_namespace) {
@@ -1456,7 +1469,7 @@ rb_xml_node_namespace_scopes(VALUE rb_node)
1456
1469
  VALUE scopes = rb_ary_new();
1457
1470
  int j;
1458
1471
 
1459
- Data_Get_Struct(rb_node, xmlNode, c_node);
1472
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1460
1473
 
1461
1474
  namespaces = xmlGetNsList(c_node->doc, c_node);
1462
1475
  if (!namespaces) {
@@ -1481,8 +1494,8 @@ static VALUE
1481
1494
  node_type(VALUE self)
1482
1495
  {
1483
1496
  xmlNodePtr node;
1484
- Data_Get_Struct(self, xmlNode, node);
1485
- return INT2NUM((long)node->type);
1497
+ Noko_Node_Get_Struct(self, xmlNode, node);
1498
+ return INT2NUM(node->type);
1486
1499
  }
1487
1500
 
1488
1501
  /*
@@ -1495,7 +1508,7 @@ static VALUE
1495
1508
  set_native_content(VALUE self, VALUE content)
1496
1509
  {
1497
1510
  xmlNodePtr node, child, next ;
1498
- Data_Get_Struct(self, xmlNode, node);
1511
+ Noko_Node_Get_Struct(self, xmlNode, node);
1499
1512
 
1500
1513
  child = node->children;
1501
1514
  while (NULL != child) {
@@ -1521,7 +1534,7 @@ set_lang(VALUE self_rb, VALUE lang_rb)
1521
1534
  xmlNodePtr self ;
1522
1535
  xmlChar *lang ;
1523
1536
 
1524
- Data_Get_Struct(self_rb, xmlNode, self);
1537
+ Noko_Node_Get_Struct(self_rb, xmlNode, self);
1525
1538
  lang = (xmlChar *)StringValueCStr(lang_rb);
1526
1539
 
1527
1540
  xmlNodeSetLang(self, lang);
@@ -1543,7 +1556,7 @@ get_lang(VALUE self_rb)
1543
1556
  xmlChar *lang ;
1544
1557
  VALUE lang_rb ;
1545
1558
 
1546
- Data_Get_Struct(self_rb, xmlNode, self);
1559
+ Noko_Node_Get_Struct(self_rb, xmlNode, self);
1547
1560
 
1548
1561
  lang = xmlNodeGetLang(self);
1549
1562
  if (lang) {
@@ -1572,7 +1585,7 @@ static VALUE
1572
1585
  get_parent(VALUE self)
1573
1586
  {
1574
1587
  xmlNodePtr node, parent;
1575
- Data_Get_Struct(self, xmlNode, node);
1588
+ Noko_Node_Get_Struct(self, xmlNode, node);
1576
1589
 
1577
1590
  parent = node->parent;
1578
1591
  if (!parent) { return Qnil; }
@@ -1590,7 +1603,7 @@ static VALUE
1590
1603
  set_name(VALUE self, VALUE new_name)
1591
1604
  {
1592
1605
  xmlNodePtr node;
1593
- Data_Get_Struct(self, xmlNode, node);
1606
+ Noko_Node_Get_Struct(self, xmlNode, node);
1594
1607
  xmlNodeSetName(node, (xmlChar *)StringValueCStr(new_name));
1595
1608
  return new_name;
1596
1609
  }
@@ -1605,7 +1618,7 @@ static VALUE
1605
1618
  get_name(VALUE self)
1606
1619
  {
1607
1620
  xmlNodePtr node;
1608
- Data_Get_Struct(self, xmlNode, node);
1621
+ Noko_Node_Get_Struct(self, xmlNode, node);
1609
1622
  if (node->name) {
1610
1623
  return NOKOGIRI_STR_NEW2(node->name);
1611
1624
  }
@@ -1625,7 +1638,7 @@ rb_xml_node_path(VALUE rb_node)
1625
1638
  xmlChar *c_path ;
1626
1639
  VALUE rval;
1627
1640
 
1628
- Data_Get_Struct(rb_node, xmlNode, c_node);
1641
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1629
1642
 
1630
1643
  c_path = xmlGetNodePath(c_node);
1631
1644
  if (c_path == NULL) {
@@ -1674,7 +1687,7 @@ native_write_to(
1674
1687
  const char *before_indent;
1675
1688
  xmlSaveCtxtPtr savectx;
1676
1689
 
1677
- Data_Get_Struct(self, xmlNode, node);
1690
+ Noko_Node_Get_Struct(self, xmlNode, node);
1678
1691
 
1679
1692
  xmlIndentTreeOutput = 1;
1680
1693
 
@@ -1697,6 +1710,269 @@ native_write_to(
1697
1710
  return io;
1698
1711
  }
1699
1712
 
1713
+
1714
+ static inline void
1715
+ output_partial_string(VALUE out, char const *str, size_t length)
1716
+ {
1717
+ if (length) {
1718
+ rb_enc_str_buf_cat(out, str, (long)length, rb_utf8_encoding());
1719
+ }
1720
+ }
1721
+
1722
+ static inline void
1723
+ output_char(VALUE out, char ch)
1724
+ {
1725
+ output_partial_string(out, &ch, 1);
1726
+ }
1727
+
1728
+ static inline void
1729
+ output_string(VALUE out, char const *str)
1730
+ {
1731
+ output_partial_string(out, str, strlen(str));
1732
+ }
1733
+
1734
+ static inline void
1735
+ output_tagname(VALUE out, xmlNodePtr elem)
1736
+ {
1737
+ // Elements in the HTML, MathML, and SVG namespaces do not use a namespace
1738
+ // prefix in the HTML syntax.
1739
+ char const *name = (char const *)elem->name;
1740
+ xmlNsPtr ns = elem->ns;
1741
+ if (ns && ns->href && ns->prefix
1742
+ && strcmp((char const *)ns->href, "http://www.w3.org/1999/xhtml")
1743
+ && strcmp((char const *)ns->href, "http://www.w3.org/1998/Math/MathML")
1744
+ && strcmp((char const *)ns->href, "http://www.w3.org/2000/svg")) {
1745
+ output_string(out, (char const *)elem->ns->prefix);
1746
+ output_char(out, ':');
1747
+ char const *colon = strchr(name, ':');
1748
+ if (colon) {
1749
+ name = colon + 1;
1750
+ }
1751
+ }
1752
+ output_string(out, name);
1753
+ }
1754
+
1755
+ static inline void
1756
+ output_attr_name(VALUE out, xmlAttrPtr attr)
1757
+ {
1758
+ xmlNsPtr ns = attr->ns;
1759
+ char const *name = (char const *)attr->name;
1760
+ if (ns && ns->href) {
1761
+ char const *uri = (char const *)ns->href;
1762
+ char const *localname = strchr(name, ':');
1763
+ if (localname) {
1764
+ ++localname;
1765
+ } else {
1766
+ localname = name;
1767
+ }
1768
+
1769
+ if (!strcmp(uri, "http://www.w3.org/XML/1998/namespace")) {
1770
+ output_string(out, "xml:");
1771
+ name = localname;
1772
+ } else if (!strcmp(uri, "http://www.w3.org/2000/xmlns/")) {
1773
+ // xmlns:xmlns -> xmlns
1774
+ // xmlns:foo -> xmlns:foo
1775
+ if (strcmp(localname, "xmlns")) {
1776
+ output_string(out, "xmlns:");
1777
+ }
1778
+ name = localname;
1779
+ } else if (!strcmp(uri, "http://www.w3.org/1999/xlink")) {
1780
+ output_string(out, "xlink:");
1781
+ name = localname;
1782
+ } else if (ns->prefix) {
1783
+ output_string(out, (char const *)ns->prefix);
1784
+ output_char(out, ':');
1785
+ name = localname;
1786
+ }
1787
+ }
1788
+ output_string(out, name);
1789
+ }
1790
+
1791
+ static void
1792
+ output_escaped_string(VALUE out, xmlChar const *start, bool attr)
1793
+ {
1794
+ xmlChar const *next = start;
1795
+ int ch;
1796
+
1797
+ while ((ch = *next) != 0) {
1798
+ char const *replacement = NULL;
1799
+ size_t replaced_bytes = 1;
1800
+ if (ch == '&') {
1801
+ replacement = "&amp;";
1802
+ } else if (ch == 0xC2 && next[1] == 0xA0) {
1803
+ // U+00A0 NO-BREAK SPACE has the UTF-8 encoding C2 A0.
1804
+ replacement = "&nbsp;";
1805
+ replaced_bytes = 2;
1806
+ } else if (attr && ch == '"') {
1807
+ replacement = "&quot;";
1808
+ } else if (!attr && ch == '<') {
1809
+ replacement = "&lt;";
1810
+ } else if (!attr && ch == '>') {
1811
+ replacement = "&gt;";
1812
+ } else {
1813
+ ++next;
1814
+ continue;
1815
+ }
1816
+ output_partial_string(out, (char const *)start, next - start);
1817
+ output_string(out, replacement);
1818
+ next += replaced_bytes;
1819
+ start = next;
1820
+ }
1821
+ output_partial_string(out, (char const *)start, next - start);
1822
+ }
1823
+
1824
+ static bool
1825
+ should_prepend_newline(xmlNodePtr node)
1826
+ {
1827
+ char const *name = (char const *)node->name;
1828
+ xmlNodePtr child = node->children;
1829
+
1830
+ if (!name || !child || (strcmp(name, "pre") && strcmp(name, "textarea") && strcmp(name, "listing"))) {
1831
+ return false;
1832
+ }
1833
+
1834
+ return child->type == XML_TEXT_NODE && child->content && child->content[0] == '\n';
1835
+ }
1836
+
1837
+ static VALUE
1838
+ rb_prepend_newline(VALUE self)
1839
+ {
1840
+ xmlNodePtr node;
1841
+ Noko_Node_Get_Struct(self, xmlNode, node);
1842
+ return should_prepend_newline(node) ? Qtrue : Qfalse;
1843
+ }
1844
+
1845
+ static bool
1846
+ is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames)
1847
+ {
1848
+ char const *name = (char const *)node->name;
1849
+ if (name == NULL) { // fragments don't have a name
1850
+ return false;
1851
+ }
1852
+ for (size_t idx = 0; idx < num_tagnames; ++idx) {
1853
+ if (!strcmp(name, tagnames[idx])) {
1854
+ return true;
1855
+ }
1856
+ }
1857
+ return false;
1858
+
1859
+ }
1860
+
1861
+ static void
1862
+ output_node(
1863
+ VALUE out,
1864
+ xmlNodePtr node,
1865
+ bool preserve_newline
1866
+ )
1867
+ {
1868
+ static char const *const VOID_ELEMENTS[] = {
1869
+ "area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr",
1870
+ "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr",
1871
+ };
1872
+
1873
+ static char const *const UNESCAPED_TEXT_ELEMENTS[] = {
1874
+ "style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript",
1875
+ };
1876
+
1877
+ switch (node->type) {
1878
+ case XML_ELEMENT_NODE:
1879
+ // Serialize the start tag.
1880
+ output_char(out, '<');
1881
+ output_tagname(out, node);
1882
+
1883
+ // Add attributes.
1884
+ for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
1885
+ output_char(out, ' ');
1886
+ output_attr_name(out, attr);
1887
+ if (attr->children) {
1888
+ output_string(out, "=\"");
1889
+ xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1890
+ output_escaped_string(out, value, true);
1891
+ xmlFree(value);
1892
+ output_char(out, '"');
1893
+ } else {
1894
+ // Output name=""
1895
+ output_string(out, "=\"\"");
1896
+ }
1897
+ }
1898
+ output_char(out, '>');
1899
+
1900
+ // Add children and end tag if element is not void.
1901
+ if (!is_one_of(node, VOID_ELEMENTS, sizeof VOID_ELEMENTS / sizeof VOID_ELEMENTS[0])) {
1902
+ if (preserve_newline && should_prepend_newline(node)) {
1903
+ output_char(out, '\n');
1904
+ }
1905
+ for (xmlNodePtr child = node->children; child; child = child->next) {
1906
+ output_node(out, child, preserve_newline);
1907
+ }
1908
+ output_string(out, "</");
1909
+ output_tagname(out, node);
1910
+ output_char(out, '>');
1911
+ }
1912
+ break;
1913
+
1914
+ case XML_TEXT_NODE:
1915
+ if (node->parent
1916
+ && is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
1917
+ sizeof UNESCAPED_TEXT_ELEMENTS / sizeof UNESCAPED_TEXT_ELEMENTS[0])) {
1918
+ output_string(out, (char const *)node->content);
1919
+ } else {
1920
+ output_escaped_string(out, node->content, false);
1921
+ }
1922
+ break;
1923
+
1924
+ case XML_CDATA_SECTION_NODE:
1925
+ output_string(out, "<![CDATA[");
1926
+ output_string(out, (char const *)node->content);
1927
+ output_string(out, "]]>");
1928
+ break;
1929
+
1930
+ case XML_COMMENT_NODE:
1931
+ output_string(out, "<!--");
1932
+ output_string(out, (char const *)node->content);
1933
+ output_string(out, "-->");
1934
+ break;
1935
+
1936
+ case XML_PI_NODE:
1937
+ output_string(out, "<?");
1938
+ output_string(out, (char const *)node->content);
1939
+ output_char(out, '>');
1940
+ break;
1941
+
1942
+ case XML_DOCUMENT_TYPE_NODE:
1943
+ case XML_DTD_NODE:
1944
+ output_string(out, "<!DOCTYPE ");
1945
+ output_string(out, (char const *)node->name);
1946
+ output_string(out, ">");
1947
+ break;
1948
+
1949
+ case XML_DOCUMENT_NODE:
1950
+ case XML_DOCUMENT_FRAG_NODE:
1951
+ case XML_HTML_DOCUMENT_NODE:
1952
+ for (xmlNodePtr child = node->children; child; child = child->next) {
1953
+ output_node(out, child, preserve_newline);
1954
+ }
1955
+ break;
1956
+
1957
+ default:
1958
+ rb_raise(rb_eRuntimeError, "Unsupported document node (%d); this is a bug in Nokogiri", node->type);
1959
+ break;
1960
+ }
1961
+ }
1962
+
1963
+ static VALUE
1964
+ html_standard_serialize(
1965
+ VALUE self,
1966
+ VALUE preserve_newline
1967
+ )
1968
+ {
1969
+ xmlNodePtr node;
1970
+ Noko_Node_Get_Struct(self, xmlNode, node);
1971
+ VALUE output = rb_str_buf_new(4096);
1972
+ output_node(output, node, RTEST(preserve_newline));
1973
+ return output;
1974
+ }
1975
+
1700
1976
  /*
1701
1977
  * :call-seq:
1702
1978
  * line() → Integer
@@ -1728,9 +2004,9 @@ static VALUE
1728
2004
  rb_xml_node_line(VALUE rb_node)
1729
2005
  {
1730
2006
  xmlNodePtr c_node;
1731
- Data_Get_Struct(rb_node, xmlNode, c_node);
2007
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1732
2008
 
1733
- return INT2NUM(xmlGetLineNo(c_node));
2009
+ return LONG2NUM(xmlGetLineNo(c_node));
1734
2010
  }
1735
2011
 
1736
2012
  /*
@@ -1745,7 +2021,7 @@ rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number)
1745
2021
  xmlNodePtr c_node;
1746
2022
  int line_number = NUM2INT(rb_line_number);
1747
2023
 
1748
- Data_Get_Struct(rb_node, xmlNode, c_node);
2024
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1749
2025
 
1750
2026
  // libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes.
1751
2027
  // search for "psvi" in SAX2.c and tree.c to learn more.
@@ -1778,10 +2054,9 @@ rb_xml_node_new(int argc, VALUE *argv, VALUE klass)
1778
2054
  rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node");
1779
2055
  }
1780
2056
  if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) {
1781
- // TODO: deprecate allowing Node
1782
- rb_warn("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri.");
2057
+ NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.13.0, remove in v1.17.0
1783
2058
  }
1784
- Data_Get_Struct(rb_document_node, xmlNode, c_document_node);
2059
+ Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node);
1785
2060
 
1786
2061
  c_node = xmlNewNode(NULL, (xmlChar *)StringValueCStr(rb_name));
1787
2062
  c_node->doc = c_document_node->doc;
@@ -1811,7 +2086,7 @@ dump_html(VALUE self)
1811
2086
  xmlNodePtr node ;
1812
2087
  VALUE html;
1813
2088
 
1814
- Data_Get_Struct(self, xmlNode, node);
2089
+ Noko_Node_Get_Struct(self, xmlNode, node);
1815
2090
 
1816
2091
  buf = xmlBufferCreate() ;
1817
2092
  htmlNodeDump(buf, node->doc, node);
@@ -1830,10 +2105,10 @@ static VALUE
1830
2105
  compare(VALUE self, VALUE _other)
1831
2106
  {
1832
2107
  xmlNodePtr node, other;
1833
- Data_Get_Struct(self, xmlNode, node);
1834
- Data_Get_Struct(_other, xmlNode, other);
2108
+ Noko_Node_Get_Struct(self, xmlNode, node);
2109
+ Noko_Node_Get_Struct(_other, xmlNode, other);
1835
2110
 
1836
- return INT2NUM((long)xmlXPathCmpNodes(other, node));
2111
+ return INT2NUM(xmlXPathCmpNodes(other, node));
1837
2112
  }
1838
2113
 
1839
2114
 
@@ -1851,14 +2126,14 @@ process_xincludes(VALUE self, VALUE options)
1851
2126
  xmlNodePtr node;
1852
2127
  VALUE error_list = rb_ary_new();
1853
2128
 
1854
- Data_Get_Struct(self, xmlNode, node);
2129
+ Noko_Node_Get_Struct(self, xmlNode, node);
1855
2130
 
1856
2131
  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
1857
2132
  rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options));
1858
2133
  xmlSetStructuredErrorFunc(NULL, NULL);
1859
2134
 
1860
2135
  if (rcode < 0) {
1861
- xmlErrorPtr error;
2136
+ xmlErrorConstPtr error;
1862
2137
 
1863
2138
  error = xmlGetLastError();
1864
2139
  if (error) {
@@ -1882,7 +2157,7 @@ in_context(VALUE self, VALUE _str, VALUE _options)
1882
2157
  VALUE doc, err;
1883
2158
  int doc_is_empty;
1884
2159
 
1885
- Data_Get_Struct(self, xmlNode, node);
2160
+ Noko_Node_Get_Struct(self, xmlNode, node);
1886
2161
 
1887
2162
  doc = DOC_RUBY_OBJECT(node->doc);
1888
2163
  err = rb_iv_get(doc, "@errors");
@@ -1894,6 +2169,8 @@ in_context(VALUE self, VALUE _str, VALUE _options)
1894
2169
 
1895
2170
  /* Twiddle global variable because of a bug in libxml2.
1896
2171
  * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
2172
+ *
2173
+ * TODO: this is fixed, and HTML_PARSE_NOIMPLIED is defined, in libxml2 2.7.7
1897
2174
  */
1898
2175
  #ifndef HTML_PARSE_NOIMPLIED
1899
2176
  htmlHandleOmittedElem(0);
@@ -1933,12 +2210,17 @@ in_context(VALUE self, VALUE _str, VALUE _options)
1933
2210
 
1934
2211
  xmlSetStructuredErrorFunc(NULL, NULL);
1935
2212
 
1936
- /* Workaround for a libxml2 bug where a parsing error may leave a broken
2213
+ /*
2214
+ * Workaround for a libxml2 bug where a parsing error may leave a broken
1937
2215
  * node reference in node->doc->children.
2216
+ *
2217
+ * https://bugzilla.gnome.org/show_bug.cgi?id=668155
2218
+ *
1938
2219
  * This workaround is limited to when a parse error occurs, the document
1939
2220
  * went from having no children to having children, and the context node is
1940
2221
  * part of a document fragment.
1941
- * https://bugzilla.gnome.org/show_bug.cgi?id=668155
2222
+ *
2223
+ * TODO: This was fixed in libxml 2.8.0 by 71a243d
1942
2224
  */
1943
2225
  if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
1944
2226
  child_iter = node;
@@ -1974,14 +2256,12 @@ in_context(VALUE self, VALUE _str, VALUE _options)
1974
2256
  return noko_xml_node_set_wrap(set, doc);
1975
2257
  }
1976
2258
 
1977
-
1978
2259
  VALUE
1979
2260
  noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
1980
2261
  {
1981
2262
  VALUE rb_document, rb_node_cache, rb_node;
1982
2263
  nokogiriTuplePtr node_has_a_document;
1983
2264
  xmlDocPtr c_doc;
1984
- void (*f_mark)(xmlNodePtr) = NULL ;
1985
2265
 
1986
2266
  assert(c_node);
1987
2267
 
@@ -1989,11 +2269,9 @@ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
1989
2269
  return DOC_RUBY_OBJECT(c_node->doc);
1990
2270
  }
1991
2271
 
1992
- /* It's OK if the node doesn't have a fully-realized document (as in XML::Reader). */
1993
- /* see https://github.com/sparklemotion/nokogiri/issues/95 */
1994
- /* and https://github.com/sparklemotion/nokogiri/issues/439 */
1995
2272
  c_doc = c_node->doc;
1996
- if (c_doc->type == XML_DOCUMENT_FRAG_NODE) { c_doc = c_doc->doc; }
2273
+
2274
+ // Nodes yielded from XML::Reader don't have a fully-realized Document
1997
2275
  node_has_a_document = DOC_RUBY_OBJECT_TEST(c_doc);
1998
2276
 
1999
2277
  if (c_node->_private && node_has_a_document) {
@@ -2043,9 +2321,7 @@ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
2043
2321
  }
2044
2322
  }
2045
2323
 
2046
- f_mark = node_has_a_document ? _xml_node_mark : NULL ;
2047
-
2048
- rb_node = Data_Wrap_Struct(rb_class, f_mark, _xml_node_dealloc, c_node) ;
2324
+ rb_node = TypedData_Wrap_Struct(rb_class, &nokogiri_node_type, c_node) ;
2049
2325
  c_node->_private = (void *)rb_node;
2050
2326
 
2051
2327
  if (node_has_a_document) {
@@ -2078,7 +2354,7 @@ noko_xml_node_attrs(xmlNodePtr c_node)
2078
2354
  }
2079
2355
 
2080
2356
  void
2081
- noko_init_xml_node()
2357
+ noko_init_xml_node(void)
2082
2358
  {
2083
2359
  cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject);
2084
2360
 
@@ -2134,6 +2410,8 @@ noko_init_xml_node()
2134
2410
  rb_define_private_method(cNokogiriXmlNode, "get", get, 1);
2135
2411
  rb_define_private_method(cNokogiriXmlNode, "in_context", in_context, 2);
2136
2412
  rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
2413
+ rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
2414
+ rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
2137
2415
  rb_define_private_method(cNokogiriXmlNode, "process_xincludes", process_xincludes, 1);
2138
2416
  rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
2139
2417
  rb_define_private_method(cNokogiriXmlNode, "set", set, 2);