nokogiri 1.13.10 → 1.14.0.rc1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +33 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/README.md +18 -11
  5. data/dependencies.yml +25 -7
  6. data/ext/nokogiri/extconf.rb +79 -20
  7. data/ext/nokogiri/gumbo.c +19 -9
  8. data/ext/nokogiri/html4_document.c +1 -1
  9. data/ext/nokogiri/html4_entity_lookup.c +1 -1
  10. data/ext/nokogiri/html4_sax_parser_context.c +0 -5
  11. data/ext/nokogiri/nokogiri.c +32 -51
  12. data/ext/nokogiri/nokogiri.h +17 -14
  13. data/ext/nokogiri/xml_attribute_decl.c +1 -1
  14. data/ext/nokogiri/xml_cdata.c +1 -1
  15. data/ext/nokogiri/xml_document.c +16 -11
  16. data/ext/nokogiri/xml_element_content.c +2 -2
  17. data/ext/nokogiri/xml_element_decl.c +1 -1
  18. data/ext/nokogiri/xml_encoding_handler.c +2 -2
  19. data/ext/nokogiri/xml_namespace.c +38 -8
  20. data/ext/nokogiri/xml_node.c +286 -26
  21. data/ext/nokogiri/xml_node_set.c +0 -2
  22. data/ext/nokogiri/xml_reader.c +40 -20
  23. data/ext/nokogiri/xml_relax_ng.c +0 -2
  24. data/ext/nokogiri/xml_sax_parser.c +22 -16
  25. data/ext/nokogiri/xml_sax_parser_context.c +0 -5
  26. data/ext/nokogiri/xml_sax_push_parser.c +0 -2
  27. data/ext/nokogiri/xml_schema.c +0 -2
  28. data/ext/nokogiri/xml_xpath_context.c +87 -83
  29. data/ext/nokogiri/xslt_stylesheet.c +14 -13
  30. data/gumbo-parser/Makefile +10 -0
  31. data/gumbo-parser/src/attribute.h +1 -1
  32. data/gumbo-parser/src/error.c +1 -1
  33. data/gumbo-parser/src/error.h +1 -1
  34. data/gumbo-parser/src/foreign_attrs.c +2 -2
  35. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  36. data/gumbo-parser/src/parser.c +7 -4
  37. data/gumbo-parser/src/replacement.h +1 -1
  38. data/gumbo-parser/src/string_buffer.h +1 -1
  39. data/gumbo-parser/src/string_piece.c +1 -1
  40. data/gumbo-parser/src/svg_attrs.c +2 -2
  41. data/gumbo-parser/src/svg_tags.c +2 -2
  42. data/gumbo-parser/src/tag.c +2 -1
  43. data/gumbo-parser/src/tag_lookup.c +7 -7
  44. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  45. data/gumbo-parser/src/tag_lookup.h +1 -1
  46. data/gumbo-parser/src/token_buffer.h +1 -1
  47. data/gumbo-parser/src/tokenizer.c +1 -1
  48. data/gumbo-parser/src/tokenizer.h +1 -1
  49. data/gumbo-parser/src/utf8.c +1 -1
  50. data/gumbo-parser/src/utf8.h +1 -1
  51. data/gumbo-parser/src/util.c +1 -3
  52. data/gumbo-parser/src/util.h +4 -0
  53. data/gumbo-parser/src/vector.h +1 -1
  54. data/lib/nokogiri/css/node.rb +2 -2
  55. data/lib/nokogiri/css/xpath_visitor.rb +3 -1
  56. data/lib/nokogiri/css.rb +6 -0
  57. data/lib/nokogiri/encoding_handler.rb +57 -0
  58. data/lib/nokogiri/extension.rb +3 -2
  59. data/lib/nokogiri/html4/document.rb +2 -121
  60. data/lib/nokogiri/html4/element_description_defaults.rb +6 -12
  61. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  62. data/lib/nokogiri/html4.rb +1 -0
  63. data/lib/nokogiri/html5/document.rb +113 -36
  64. data/lib/nokogiri/html5/document_fragment.rb +9 -2
  65. data/lib/nokogiri/html5/node.rb +3 -5
  66. data/lib/nokogiri/html5.rb +127 -216
  67. data/lib/nokogiri/jruby/dependencies.rb +1 -19
  68. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  69. data/lib/nokogiri/version/constant.rb +1 -1
  70. data/lib/nokogiri/version/info.rb +11 -10
  71. data/lib/nokogiri/xml/attr.rb +49 -0
  72. data/lib/nokogiri/xml/builder.rb +1 -1
  73. data/lib/nokogiri/xml/document.rb +102 -54
  74. data/lib/nokogiri/xml/document_fragment.rb +49 -6
  75. data/lib/nokogiri/xml/namespace.rb +42 -0
  76. data/lib/nokogiri/xml/node/save_options.rb +4 -2
  77. data/lib/nokogiri/xml/node.rb +190 -35
  78. data/lib/nokogiri/xml/node_set.rb +87 -9
  79. data/lib/nokogiri/xml/parse_options.rb +127 -48
  80. data/lib/nokogiri/xml/pp/node.rb +6 -4
  81. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  82. data/lib/nokogiri/xml/sax/parser.rb +2 -3
  83. data/lib/nokogiri/xslt.rb +1 -1
  84. data/lib/nokogiri.rb +3 -11
  85. metadata +11 -247
  86. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
@@ -1,6 +1,8 @@
1
1
  #ifndef NOKOGIRI_NATIVE
2
2
  #define NOKOGIRI_NATIVE
3
3
 
4
+ #include <ruby/defines.h> // https://github.com/sparklemotion/nokogiri/issues/2696
5
+
4
6
  #ifdef _MSC_VER
5
7
  # ifndef WIN32_LEAN_AND_MEAN
6
8
  # define WIN32_LEAN_AND_MEAN
@@ -23,7 +25,6 @@
23
25
  # define NOKOPUBVAR extern
24
26
  #endif
25
27
 
26
-
27
28
  #include <stdlib.h>
28
29
  #include <string.h>
29
30
  #include <assert.h>
@@ -75,22 +76,25 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
75
76
  #define NOKOGIRI_STR_NEW(str, len) rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding())
76
77
  #define RBSTR_OR_QNIL(_str) (_str ? NOKOGIRI_STR_NEW2(_str) : Qnil)
77
78
 
78
- #ifdef DEBUG
79
- # define NOKOGIRI_DEBUG_START(p) if (getenv("NOKOGIRI_NO_FREE")) return ; if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p start\n", __FILE__, __LINE__, p);
80
- # define NOKOGIRI_DEBUG_END(p) if (getenv("NOKOGIRI_DEBUG")) fprintf(stderr,"nokogiri: %s:%d %p end\n", __FILE__, __LINE__, p);
81
- #else
82
- # define NOKOGIRI_DEBUG_START(p)
83
- # define NOKOGIRI_DEBUG_END(p)
79
+ #ifndef NORETURN_DECL
80
+ # if defined(__GNUC__)
81
+ # define NORETURN_DECL __attribute__ ((noreturn))
82
+ # else
83
+ # define NORETURN_DECL
84
+ # endif
84
85
  #endif
85
86
 
86
- #ifndef NORETURN
87
+ #ifndef PRINTFLIKE_DECL
87
88
  # if defined(__GNUC__)
88
- # define NORETURN(name) __attribute__((noreturn)) name
89
+ # define PRINTFLIKE_DECL(stringidx, argidx) __attribute__ ((format(printf,stringidx,argidx)))
89
90
  # else
90
- # define NORETURN(name) name
91
+ # define PRINTFLIKE_DECL(stringidx, argidx)
91
92
  # endif
92
93
  #endif
93
94
 
95
+ #if defined(TRUFFLERUBY) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
96
+ # define TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
97
+ #endif
94
98
 
95
99
  NOKOPUBVAR VALUE mNokogiri ;
96
100
  NOKOPUBVAR VALUE mNokogiriGumbo ;
@@ -162,7 +166,6 @@ typedef struct _nokogiriXsltStylesheetTuple {
162
166
  VALUE func_instances;
163
167
  } nokogiriXsltStylesheetTuple;
164
168
 
165
- int vasprintf(char **strp, const char *fmt, va_list ap);
166
169
  void noko_xml_document_pin_node(xmlNodePtr);
167
170
  void noko_xml_document_pin_namespace(xmlNsPtr, xmlDocPtr);
168
171
 
@@ -198,7 +201,7 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
198
201
  #define NOKOGIRI_SAX_SELF(_ctxt) ((nokogiriSAXTuplePtr)(_ctxt))->self
199
202
  #define NOKOGIRI_SAX_CTXT(_ctxt) ((nokogiriSAXTuplePtr)(_ctxt))->ctxt
200
203
  #define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) nokogiri_sax_tuple_new(_ctxt, _self)
201
- #define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) free(_tuple)
204
+ #define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) ruby_xfree(_tuple)
202
205
 
203
206
  #define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v))
204
207
  #define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v)
@@ -215,7 +218,7 @@ void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerSta
215
218
  void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);
216
219
  VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error);
217
220
  void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error);
218
- NORETURN(void Nokogiri_error_raise(void *ctx, xmlErrorPtr error));
221
+ NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorPtr error);
219
222
  void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
220
223
  const char *function_name) ;
221
224
 
@@ -223,7 +226,7 @@ static inline
223
226
  nokogiriSAXTuplePtr
224
227
  nokogiri_sax_tuple_new(xmlParserCtxtPtr ctxt, VALUE self)
225
228
  {
226
- nokogiriSAXTuplePtr tuple = malloc(sizeof(nokogiriSAXTuple));
229
+ nokogiriSAXTuplePtr tuple = ruby_xmalloc(sizeof(nokogiriSAXTuple));
227
230
  tuple->self = self;
228
231
  tuple->ctxt = ctxt;
229
232
  return tuple;
@@ -13,7 +13,7 @@ attribute_type(VALUE self)
13
13
  {
14
14
  xmlAttributePtr node;
15
15
  Noko_Node_Get_Struct(self, xmlAttribute, node);
16
- return INT2NUM((long)node->atype);
16
+ return INT2NUM(node->atype);
17
17
  }
18
18
 
19
19
  /*
@@ -29,7 +29,7 @@ new (int argc, VALUE *argv, VALUE klass)
29
29
 
30
30
  if (!NIL_P(content)) {
31
31
  content_str = (xmlChar *)StringValuePtr(content);
32
- content_str_len = RSTRING_LEN(content);
32
+ content_str_len = RSTRING_LENINT(content);
33
33
  }
34
34
 
35
35
  node = xmlNewCDataBlock(xml_doc->doc, content_str, content_str_len);
@@ -65,14 +65,12 @@ dealloc(xmlDocPtr doc)
65
65
  {
66
66
  st_table *node_hash;
67
67
 
68
- NOKOGIRI_DEBUG_START(doc);
69
-
70
68
  node_hash = DOC_UNLINKED_NODE_HASH(doc);
71
69
 
72
70
  st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
73
71
  st_free_table(node_hash);
74
72
 
75
- free(doc->_private);
73
+ ruby_xfree(doc->_private);
76
74
 
77
75
  /* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes
78
76
  * have their _private pointers cleared. This is to avoid libxml-ruby's
@@ -84,8 +82,6 @@ dealloc(xmlDocPtr doc)
84
82
  }
85
83
 
86
84
  xmlFreeDoc(doc);
87
-
88
- NOKOGIRI_DEBUG_END(doc);
89
85
  }
90
86
 
91
87
  static void
@@ -540,6 +536,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
540
536
  VALUE rb_mode;
541
537
  VALUE rb_namespaces;
542
538
  VALUE rb_comments_p;
539
+ int c_mode = 0;
543
540
  xmlChar **c_namespaces;
544
541
 
545
542
  xmlDocPtr c_doc;
@@ -551,8 +548,16 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
551
548
  VALUE rb_io;
552
549
 
553
550
  rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
554
- if (!NIL_P(rb_mode)) { Check_Type(rb_mode, T_FIXNUM); }
555
- if (!NIL_P(rb_namespaces)) { Check_Type(rb_namespaces, T_ARRAY); }
551
+ if (!NIL_P(rb_mode)) {
552
+ Check_Type(rb_mode, T_FIXNUM);
553
+ c_mode = NUM2INT(rb_mode);
554
+ }
555
+ if (!NIL_P(rb_namespaces)) {
556
+ Check_Type(rb_namespaces, T_ARRAY);
557
+ if (c_mode == XML_C14N_1_0 || c_mode == XML_C14N_1_1) {
558
+ rb_raise(rb_eRuntimeError, "This canonicalizer does not support this operation");
559
+ }
560
+ }
556
561
 
557
562
  Data_Get_Struct(self, xmlDoc, c_doc);
558
563
 
@@ -573,7 +578,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
573
578
  c_namespaces = NULL;
574
579
  } else {
575
580
  long ns_len = RARRAY_LEN(rb_namespaces);
576
- c_namespaces = calloc((size_t)ns_len + 1, sizeof(xmlChar *));
581
+ c_namespaces = ruby_xcalloc((size_t)ns_len + 1, sizeof(xmlChar *));
577
582
  for (int j = 0 ; j < ns_len ; j++) {
578
583
  VALUE entry = rb_ary_entry(rb_namespaces, j);
579
584
  c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
@@ -581,12 +586,12 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
581
586
  }
582
587
 
583
588
  xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
584
- (int)(NIL_P(rb_mode) ? 0 : NUM2INT(rb_mode)),
589
+ c_mode,
585
590
  c_namespaces,
586
591
  (int)RTEST(rb_comments_p),
587
592
  c_obuf);
588
593
 
589
- free(c_namespaces);
594
+ ruby_xfree(c_namespaces);
590
595
  xmlOutputBufferClose(c_obuf);
591
596
 
592
597
  return rb_funcall(rb_io, rb_intern("string"), 0);
@@ -604,7 +609,7 @@ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int arg
604
609
 
605
610
  rb_document = Data_Wrap_Struct(klass, mark, dealloc, c_document);
606
611
 
607
- tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));
612
+ tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
608
613
  tuple->doc = rb_document;
609
614
  tuple->unlinkedNodes = st_init_numtable_with_size(128);
610
615
  tuple->node_cache = rb_ary_new();
@@ -31,7 +31,7 @@ get_type(VALUE self)
31
31
  xmlElementContentPtr elem;
32
32
  Data_Get_Struct(self, xmlElementContent, elem);
33
33
 
34
- return INT2NUM((long)elem->type);
34
+ return INT2NUM(elem->type);
35
35
  }
36
36
 
37
37
  /*
@@ -79,7 +79,7 @@ get_occur(VALUE self)
79
79
  xmlElementContentPtr elem;
80
80
  Data_Get_Struct(self, xmlElementContent, elem);
81
81
 
82
- return INT2NUM((long)elem->ocur);
82
+ return INT2NUM(elem->ocur);
83
83
  }
84
84
 
85
85
  /*
@@ -15,7 +15,7 @@ element_type(VALUE self)
15
15
  {
16
16
  xmlElementPtr node;
17
17
  Noko_Node_Get_Struct(self, xmlElement, node);
18
- return INT2NUM((long)node->etype);
18
+ return INT2NUM(node->etype);
19
19
  }
20
20
 
21
21
  /*
@@ -45,9 +45,9 @@ rb_xml_encoding_handler_s_delete(VALUE klass, VALUE name)
45
45
 
46
46
 
47
47
  /*
48
- * call-seq: Nokogiri::EncodingHandler.alias(from, to)
48
+ * call-seq: Nokogiri::EncodingHandler.alias(real_name, alias_name)
49
49
  *
50
- * Alias encoding handler with name +from+ to name +to+
50
+ * Alias encoding handler with name +real_name+ to name +alias_name+
51
51
  */
52
52
  static VALUE
53
53
  rb_xml_encoding_handler_s_alias(VALUE klass, VALUE from, VALUE to)
@@ -32,7 +32,6 @@ _xml_namespace_dealloc(void *ptr)
32
32
  * node set. see noko_xml_namespace_wrap().
33
33
  */
34
34
  xmlNsPtr ns = ptr;
35
- NOKOGIRI_DEBUG_START(ns) ;
36
35
 
37
36
  if (ns->href) {
38
37
  xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->href));
@@ -41,7 +40,6 @@ _xml_namespace_dealloc(void *ptr)
41
40
  xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->prefix));
42
41
  }
43
42
  xmlFree(ns);
44
- NOKOGIRI_DEBUG_END(ns) ;
45
43
  }
46
44
 
47
45
  #ifdef HAVE_RB_GC_LOCATION
@@ -76,10 +74,26 @@ static const rb_data_type_t nokogiri_xml_namespace_type_without_dealloc = {
76
74
  };
77
75
 
78
76
  /*
79
- * call-seq:
80
- * prefix
77
+ * :call-seq:
78
+ * prefix() → String or nil
81
79
  *
82
- * Get the prefix for this namespace. Returns +nil+ if there is no prefix.
80
+ * Return the prefix for this Namespace, or +nil+ if there is no prefix (e.g., default namespace).
81
+ *
82
+ * *Example*
83
+ *
84
+ * doc = Nokogiri::XML.parse(<<~XML)
85
+ * <?xml version="1.0"?>
86
+ * <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
87
+ * <child1 foo="abc" noko:bar="def"/>
88
+ * <noko:child2 foo="qwe" noko:bar="rty"/>
89
+ * </root>
90
+ * XML
91
+ *
92
+ * doc.root.elements.first.namespace.prefix
93
+ * # => nil
94
+ *
95
+ * doc.root.elements.last.namespace.prefix
96
+ * # => "noko"
83
97
  */
84
98
  static VALUE
85
99
  prefix(VALUE self)
@@ -93,10 +107,26 @@ prefix(VALUE self)
93
107
  }
94
108
 
95
109
  /*
96
- * call-seq:
97
- * href
110
+ * :call-seq:
111
+ * href() → String
112
+ *
113
+ * Returns the URI reference for this Namespace.
114
+ *
115
+ * *Example*
116
+ *
117
+ * doc = Nokogiri::XML.parse(<<~XML)
118
+ * <?xml version="1.0"?>
119
+ * <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
120
+ * <child1 foo="abc" noko:bar="def"/>
121
+ * <noko:child2 foo="qwe" noko:bar="rty"/>
122
+ * </root>
123
+ * XML
124
+ *
125
+ * doc.root.elements.first.namespace.href
126
+ * # => "http://nokogiri.org/ns/default"
98
127
  *
99
- * Get the href for this namespace
128
+ * doc.root.elements.last.namespace.href
129
+ * # => "http://nokogiri.org/ns/noko"
100
130
  */
101
131
  static VALUE
102
132
  href(VALUE self)
@@ -1,5 +1,7 @@
1
1
  #include <nokogiri.h>
2
2
 
3
+ #include <stdbool.h>
4
+
3
5
  // :stopdoc:
4
6
 
5
7
  VALUE cNokogiriXmlNode ;
@@ -7,20 +9,11 @@ static ID id_decorate, id_decorate_bang;
7
9
 
8
10
  typedef xmlNodePtr(*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
9
11
 
10
- #ifdef DEBUG
11
12
  static void
12
- _xml_node_dealloc(xmlNodePtr x)
13
+ _xml_node_mark(void *ptr)
13
14
  {
14
- NOKOGIRI_DEBUG_START(x)
15
- NOKOGIRI_DEBUG_END(x)
16
- }
17
- #else
18
- # define _xml_node_dealloc 0
19
- #endif
15
+ xmlNodePtr node = ptr;
20
16
 
21
- static void
22
- _xml_node_mark(xmlNodePtr node)
23
- {
24
17
  if (!DOC_RUBY_OBJECT_TEST(node->doc)) {
25
18
  return;
26
19
  }
@@ -37,24 +30,21 @@ _xml_node_mark(xmlNodePtr node)
37
30
 
38
31
  #ifdef HAVE_RB_GC_LOCATION
39
32
  static void
40
- _xml_node_update_references(xmlNodePtr node)
33
+ _xml_node_update_references(void *ptr)
41
34
  {
35
+ xmlNodePtr node = ptr;
36
+
42
37
  if (node->_private) {
43
38
  node->_private = (void *)rb_gc_location((VALUE)node->_private);
44
39
  }
45
40
  }
41
+ #else
42
+ # define _xml_node_update_references 0
46
43
  #endif
47
44
 
48
- typedef void (*gc_callback_t)(void *);
49
-
50
45
  static const rb_data_type_t nokogiri_node_type = {
51
46
  "Nokogiri/XMLNode",
52
- {
53
- (gc_callback_t)_xml_node_mark, (gc_callback_t)_xml_node_dealloc, 0,
54
- #ifdef HAVE_RB_GC_LOCATION
55
- (gc_callback_t)_xml_node_update_references
56
- #endif
57
- },
47
+ {_xml_node_mark, 0, 0, _xml_node_update_references},
58
48
  0, 0,
59
49
  #ifdef RUBY_TYPED_FREE_IMMEDIATELY
60
50
  RUBY_TYPED_FREE_IMMEDIATELY,
@@ -809,7 +799,7 @@ rb_xml_node_pointer_id(VALUE self)
809
799
  xmlNodePtr node;
810
800
  Noko_Node_Get_Struct(self, xmlNode, node);
811
801
 
812
- return INT2NUM((long)(node));
802
+ return rb_uint2inum((uintptr_t)(node));
813
803
  }
814
804
 
815
805
  /*
@@ -1509,7 +1499,7 @@ node_type(VALUE self)
1509
1499
  {
1510
1500
  xmlNodePtr node;
1511
1501
  Noko_Node_Get_Struct(self, xmlNode, node);
1512
- return INT2NUM((long)node->type);
1502
+ return INT2NUM(node->type);
1513
1503
  }
1514
1504
 
1515
1505
  /*
@@ -1724,6 +1714,269 @@ native_write_to(
1724
1714
  return io;
1725
1715
  }
1726
1716
 
1717
+
1718
+ static inline void
1719
+ output_partial_string(VALUE out, char const *str, size_t length)
1720
+ {
1721
+ if (length) {
1722
+ rb_enc_str_buf_cat(out, str, (long)length, rb_utf8_encoding());
1723
+ }
1724
+ }
1725
+
1726
+ static inline void
1727
+ output_char(VALUE out, char ch)
1728
+ {
1729
+ output_partial_string(out, &ch, 1);
1730
+ }
1731
+
1732
+ static inline void
1733
+ output_string(VALUE out, char const *str)
1734
+ {
1735
+ output_partial_string(out, str, strlen(str));
1736
+ }
1737
+
1738
+ static inline void
1739
+ output_tagname(VALUE out, xmlNodePtr elem)
1740
+ {
1741
+ // Elements in the HTML, MathML, and SVG namespaces do not use a namespace
1742
+ // prefix in the HTML syntax.
1743
+ char const *name = (char const *)elem->name;
1744
+ xmlNsPtr ns = elem->ns;
1745
+ if (ns && ns->href && ns->prefix
1746
+ && strcmp((char const *)ns->href, "http://www.w3.org/1999/xhtml")
1747
+ && strcmp((char const *)ns->href, "http://www.w3.org/1998/Math/MathML")
1748
+ && strcmp((char const *)ns->href, "http://www.w3.org/2000/svg")) {
1749
+ output_string(out, (char const *)elem->ns->prefix);
1750
+ output_char(out, ':');
1751
+ char const *colon = strchr(name, ':');
1752
+ if (colon) {
1753
+ name = colon + 1;
1754
+ }
1755
+ }
1756
+ output_string(out, name);
1757
+ }
1758
+
1759
+ static inline void
1760
+ output_attr_name(VALUE out, xmlAttrPtr attr)
1761
+ {
1762
+ xmlNsPtr ns = attr->ns;
1763
+ char const *name = (char const *)attr->name;
1764
+ if (ns && ns->href) {
1765
+ char const *uri = (char const *)ns->href;
1766
+ char const *localname = strchr(name, ':');
1767
+ if (localname) {
1768
+ ++localname;
1769
+ } else {
1770
+ localname = name;
1771
+ }
1772
+
1773
+ if (!strcmp(uri, "http://www.w3.org/XML/1998/namespace")) {
1774
+ output_string(out, "xml:");
1775
+ name = localname;
1776
+ } else if (!strcmp(uri, "http://www.w3.org/2000/xmlns/")) {
1777
+ // xmlns:xmlns -> xmlns
1778
+ // xmlns:foo -> xmlns:foo
1779
+ if (strcmp(localname, "xmlns")) {
1780
+ output_string(out, "xmlns:");
1781
+ }
1782
+ name = localname;
1783
+ } else if (!strcmp(uri, "http://www.w3.org/1999/xlink")) {
1784
+ output_string(out, "xlink:");
1785
+ name = localname;
1786
+ } else if (ns->prefix) {
1787
+ output_string(out, (char const *)ns->prefix);
1788
+ output_char(out, ':');
1789
+ name = localname;
1790
+ }
1791
+ }
1792
+ output_string(out, name);
1793
+ }
1794
+
1795
+ static void
1796
+ output_escaped_string(VALUE out, xmlChar const *start, bool attr)
1797
+ {
1798
+ xmlChar const *next = start;
1799
+ int ch;
1800
+
1801
+ while ((ch = *next) != 0) {
1802
+ char const *replacement = NULL;
1803
+ size_t replaced_bytes = 1;
1804
+ if (ch == '&') {
1805
+ replacement = "&amp;";
1806
+ } else if (ch == 0xC2 && next[1] == 0xA0) {
1807
+ // U+00A0 NO-BREAK SPACE has the UTF-8 encoding C2 A0.
1808
+ replacement = "&nbsp;";
1809
+ replaced_bytes = 2;
1810
+ } else if (attr && ch == '"') {
1811
+ replacement = "&quot;";
1812
+ } else if (!attr && ch == '<') {
1813
+ replacement = "&lt;";
1814
+ } else if (!attr && ch == '>') {
1815
+ replacement = "&gt;";
1816
+ } else {
1817
+ ++next;
1818
+ continue;
1819
+ }
1820
+ output_partial_string(out, (char const *)start, next - start);
1821
+ output_string(out, replacement);
1822
+ next += replaced_bytes;
1823
+ start = next;
1824
+ }
1825
+ output_partial_string(out, (char const *)start, next - start);
1826
+ }
1827
+
1828
+ static bool
1829
+ should_prepend_newline(xmlNodePtr node)
1830
+ {
1831
+ char const *name = (char const *)node->name;
1832
+ xmlNodePtr child = node->children;
1833
+
1834
+ if (!name || !child || (strcmp(name, "pre") && strcmp(name, "textarea") && strcmp(name, "listing"))) {
1835
+ return false;
1836
+ }
1837
+
1838
+ return child->type == XML_TEXT_NODE && child->content && child->content[0] == '\n';
1839
+ }
1840
+
1841
+ static VALUE
1842
+ rb_prepend_newline(VALUE self)
1843
+ {
1844
+ xmlNodePtr node;
1845
+ Noko_Node_Get_Struct(self, xmlNode, node);
1846
+ return should_prepend_newline(node) ? Qtrue : Qfalse;
1847
+ }
1848
+
1849
+ static bool
1850
+ is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames)
1851
+ {
1852
+ char const *name = (char const *)node->name;
1853
+ if (name == NULL) { // fragments don't have a name
1854
+ return false;
1855
+ }
1856
+ for (size_t idx = 0; idx < num_tagnames; ++idx) {
1857
+ if (!strcmp(name, tagnames[idx])) {
1858
+ return true;
1859
+ }
1860
+ }
1861
+ return false;
1862
+
1863
+ }
1864
+
1865
+ static void
1866
+ output_node(
1867
+ VALUE out,
1868
+ xmlNodePtr node,
1869
+ bool preserve_newline
1870
+ )
1871
+ {
1872
+ static char const *const VOID_ELEMENTS[] = {
1873
+ "area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr",
1874
+ "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr",
1875
+ };
1876
+
1877
+ static char const *const UNESCAPED_TEXT_ELEMENTS[] = {
1878
+ "style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript",
1879
+ };
1880
+
1881
+ switch (node->type) {
1882
+ case XML_ELEMENT_NODE:
1883
+ // Serialize the start tag.
1884
+ output_char(out, '<');
1885
+ output_tagname(out, node);
1886
+
1887
+ // Add attributes.
1888
+ for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
1889
+ output_char(out, ' ');
1890
+ output_attr_name(out, attr);
1891
+ if (attr->children) {
1892
+ output_string(out, "=\"");
1893
+ xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1894
+ output_escaped_string(out, value, true);
1895
+ xmlFree(value);
1896
+ output_char(out, '"');
1897
+ } else {
1898
+ // Output name=""
1899
+ output_string(out, "=\"\"");
1900
+ }
1901
+ }
1902
+ output_char(out, '>');
1903
+
1904
+ // Add children and end tag if element is not void.
1905
+ if (!is_one_of(node, VOID_ELEMENTS, sizeof VOID_ELEMENTS / sizeof VOID_ELEMENTS[0])) {
1906
+ if (preserve_newline && should_prepend_newline(node)) {
1907
+ output_char(out, '\n');
1908
+ }
1909
+ for (xmlNodePtr child = node->children; child; child = child->next) {
1910
+ output_node(out, child, preserve_newline);
1911
+ }
1912
+ output_string(out, "</");
1913
+ output_tagname(out, node);
1914
+ output_char(out, '>');
1915
+ }
1916
+ break;
1917
+
1918
+ case XML_TEXT_NODE:
1919
+ if (node->parent
1920
+ && is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
1921
+ sizeof UNESCAPED_TEXT_ELEMENTS / sizeof UNESCAPED_TEXT_ELEMENTS[0])) {
1922
+ output_string(out, (char const *)node->content);
1923
+ } else {
1924
+ output_escaped_string(out, node->content, false);
1925
+ }
1926
+ break;
1927
+
1928
+ case XML_CDATA_SECTION_NODE:
1929
+ output_string(out, "<![CDATA[");
1930
+ output_string(out, (char const *)node->content);
1931
+ output_string(out, "]]>");
1932
+ break;
1933
+
1934
+ case XML_COMMENT_NODE:
1935
+ output_string(out, "<!--");
1936
+ output_string(out, (char const *)node->content);
1937
+ output_string(out, "-->");
1938
+ break;
1939
+
1940
+ case XML_PI_NODE:
1941
+ output_string(out, "<?");
1942
+ output_string(out, (char const *)node->content);
1943
+ output_char(out, '>');
1944
+ break;
1945
+
1946
+ case XML_DOCUMENT_TYPE_NODE:
1947
+ case XML_DTD_NODE:
1948
+ output_string(out, "<!DOCTYPE ");
1949
+ output_string(out, (char const *)node->name);
1950
+ output_string(out, ">");
1951
+ break;
1952
+
1953
+ case XML_DOCUMENT_NODE:
1954
+ case XML_DOCUMENT_FRAG_NODE:
1955
+ case XML_HTML_DOCUMENT_NODE:
1956
+ for (xmlNodePtr child = node->children; child; child = child->next) {
1957
+ output_node(out, child, preserve_newline);
1958
+ }
1959
+ break;
1960
+
1961
+ default:
1962
+ rb_raise(rb_eRuntimeError, "Unsupported document node (%d); this is a bug in Nokogiri", node->type);
1963
+ break;
1964
+ }
1965
+ }
1966
+
1967
+ static VALUE
1968
+ html_standard_serialize(
1969
+ VALUE self,
1970
+ VALUE preserve_newline
1971
+ )
1972
+ {
1973
+ xmlNodePtr node;
1974
+ Noko_Node_Get_Struct(self, xmlNode, node);
1975
+ VALUE output = rb_str_buf_new(4096);
1976
+ output_node(output, node, RTEST(preserve_newline));
1977
+ return output;
1978
+ }
1979
+
1727
1980
  /*
1728
1981
  * :call-seq:
1729
1982
  * line() → Integer
@@ -1757,7 +2010,7 @@ rb_xml_node_line(VALUE rb_node)
1757
2010
  xmlNodePtr c_node;
1758
2011
  Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
1759
2012
 
1760
- return INT2NUM(xmlGetLineNo(c_node));
2013
+ return LONG2NUM(xmlGetLineNo(c_node));
1761
2014
  }
1762
2015
 
1763
2016
  /*
@@ -1860,7 +2113,7 @@ compare(VALUE self, VALUE _other)
1860
2113
  Noko_Node_Get_Struct(self, xmlNode, node);
1861
2114
  Noko_Node_Get_Struct(_other, xmlNode, other);
1862
2115
 
1863
- return INT2NUM((long)xmlXPathCmpNodes(other, node));
2116
+ return INT2NUM(xmlXPathCmpNodes(other, node));
1864
2117
  }
1865
2118
 
1866
2119
 
@@ -1960,12 +2213,17 @@ in_context(VALUE self, VALUE _str, VALUE _options)
1960
2213
 
1961
2214
  xmlSetStructuredErrorFunc(NULL, NULL);
1962
2215
 
1963
- /* Workaround for a libxml2 bug where a parsing error may leave a broken
2216
+ /*
2217
+ * Workaround for a libxml2 bug where a parsing error may leave a broken
1964
2218
  * node reference in node->doc->children.
2219
+ *
2220
+ * https://bugzilla.gnome.org/show_bug.cgi?id=668155
2221
+ *
1965
2222
  * This workaround is limited to when a parse error occurs, the document
1966
2223
  * went from having no children to having children, and the context node is
1967
2224
  * part of a document fragment.
1968
- * https://bugzilla.gnome.org/show_bug.cgi?id=668155
2225
+ *
2226
+ * TODO: This was fixed in libxml 2.8.0 by 71a243d
1969
2227
  */
1970
2228
  if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
1971
2229
  child_iter = node;
@@ -2155,6 +2413,8 @@ noko_init_xml_node()
2155
2413
  rb_define_private_method(cNokogiriXmlNode, "get", get, 1);
2156
2414
  rb_define_private_method(cNokogiriXmlNode, "in_context", in_context, 2);
2157
2415
  rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
2416
+ rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
2417
+ rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
2158
2418
  rb_define_private_method(cNokogiriXmlNode, "process_xincludes", process_xincludes, 1);
2159
2419
  rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
2160
2420
  rb_define_private_method(cNokogiriXmlNode, "set", set, 2);