nokogiri 1.13.10 → 1.14.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +33 -0
- data/LICENSE-DEPENDENCIES.md +830 -509
- data/README.md +18 -11
- data/dependencies.yml +25 -7
- data/ext/nokogiri/extconf.rb +79 -20
- data/ext/nokogiri/gumbo.c +19 -9
- data/ext/nokogiri/html4_document.c +1 -1
- data/ext/nokogiri/html4_entity_lookup.c +1 -1
- data/ext/nokogiri/html4_sax_parser_context.c +0 -5
- data/ext/nokogiri/nokogiri.c +32 -51
- data/ext/nokogiri/nokogiri.h +17 -14
- data/ext/nokogiri/xml_attribute_decl.c +1 -1
- data/ext/nokogiri/xml_cdata.c +1 -1
- data/ext/nokogiri/xml_document.c +16 -11
- data/ext/nokogiri/xml_element_content.c +2 -2
- data/ext/nokogiri/xml_element_decl.c +1 -1
- data/ext/nokogiri/xml_encoding_handler.c +2 -2
- data/ext/nokogiri/xml_namespace.c +38 -8
- data/ext/nokogiri/xml_node.c +286 -26
- data/ext/nokogiri/xml_node_set.c +0 -2
- data/ext/nokogiri/xml_reader.c +40 -20
- data/ext/nokogiri/xml_relax_ng.c +0 -2
- data/ext/nokogiri/xml_sax_parser.c +22 -16
- data/ext/nokogiri/xml_sax_parser_context.c +0 -5
- data/ext/nokogiri/xml_sax_push_parser.c +0 -2
- data/ext/nokogiri/xml_schema.c +0 -2
- data/ext/nokogiri/xml_xpath_context.c +87 -83
- data/ext/nokogiri/xslt_stylesheet.c +14 -13
- data/gumbo-parser/Makefile +10 -0
- data/gumbo-parser/src/attribute.h +1 -1
- data/gumbo-parser/src/error.c +1 -1
- data/gumbo-parser/src/error.h +1 -1
- data/gumbo-parser/src/foreign_attrs.c +2 -2
- data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
- data/gumbo-parser/src/parser.c +7 -4
- data/gumbo-parser/src/replacement.h +1 -1
- data/gumbo-parser/src/string_buffer.h +1 -1
- data/gumbo-parser/src/string_piece.c +1 -1
- data/gumbo-parser/src/svg_attrs.c +2 -2
- data/gumbo-parser/src/svg_tags.c +2 -2
- data/gumbo-parser/src/tag.c +2 -1
- data/gumbo-parser/src/tag_lookup.c +7 -7
- data/gumbo-parser/src/tag_lookup.gperf +1 -0
- data/gumbo-parser/src/tag_lookup.h +1 -1
- data/gumbo-parser/src/token_buffer.h +1 -1
- data/gumbo-parser/src/tokenizer.c +1 -1
- data/gumbo-parser/src/tokenizer.h +1 -1
- data/gumbo-parser/src/utf8.c +1 -1
- data/gumbo-parser/src/utf8.h +1 -1
- data/gumbo-parser/src/util.c +1 -3
- data/gumbo-parser/src/util.h +4 -0
- data/gumbo-parser/src/vector.h +1 -1
- data/lib/nokogiri/css/node.rb +2 -2
- data/lib/nokogiri/css/xpath_visitor.rb +3 -1
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +3 -2
- data/lib/nokogiri/html4/document.rb +2 -121
- data/lib/nokogiri/html4/element_description_defaults.rb +6 -12
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4.rb +1 -0
- data/lib/nokogiri/html5/document.rb +113 -36
- data/lib/nokogiri/html5/document_fragment.rb +9 -2
- data/lib/nokogiri/html5/node.rb +3 -5
- data/lib/nokogiri/html5.rb +127 -216
- data/lib/nokogiri/jruby/dependencies.rb +1 -19
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +11 -10
- data/lib/nokogiri/xml/attr.rb +49 -0
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +102 -54
- data/lib/nokogiri/xml/document_fragment.rb +49 -6
- data/lib/nokogiri/xml/namespace.rb +42 -0
- data/lib/nokogiri/xml/node/save_options.rb +4 -2
- data/lib/nokogiri/xml/node.rb +190 -35
- data/lib/nokogiri/xml/node_set.rb +87 -9
- data/lib/nokogiri/xml/parse_options.rb +127 -48
- data/lib/nokogiri/xml/pp/node.rb +6 -4
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/sax/parser.rb +2 -3
- data/lib/nokogiri/xslt.rb +1 -1
- data/lib/nokogiri.rb +3 -11
- metadata +11 -247
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
data/ext/nokogiri/nokogiri.h
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
#ifndef NOKOGIRI_NATIVE
|
2
2
|
#define NOKOGIRI_NATIVE
|
3
3
|
|
4
|
+
#include <ruby/defines.h> // https://github.com/sparklemotion/nokogiri/issues/2696
|
5
|
+
|
4
6
|
#ifdef _MSC_VER
|
5
7
|
# ifndef WIN32_LEAN_AND_MEAN
|
6
8
|
# define WIN32_LEAN_AND_MEAN
|
@@ -23,7 +25,6 @@
|
|
23
25
|
# define NOKOPUBVAR extern
|
24
26
|
#endif
|
25
27
|
|
26
|
-
|
27
28
|
#include <stdlib.h>
|
28
29
|
#include <string.h>
|
29
30
|
#include <assert.h>
|
@@ -75,22 +76,25 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
|
|
75
76
|
#define NOKOGIRI_STR_NEW(str, len) rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding())
|
76
77
|
#define RBSTR_OR_QNIL(_str) (_str ? NOKOGIRI_STR_NEW2(_str) : Qnil)
|
77
78
|
|
78
|
-
#
|
79
|
-
#
|
80
|
-
#
|
81
|
-
#else
|
82
|
-
#
|
83
|
-
#
|
79
|
+
#ifndef NORETURN_DECL
|
80
|
+
# if defined(__GNUC__)
|
81
|
+
# define NORETURN_DECL __attribute__ ((noreturn))
|
82
|
+
# else
|
83
|
+
# define NORETURN_DECL
|
84
|
+
# endif
|
84
85
|
#endif
|
85
86
|
|
86
|
-
#ifndef
|
87
|
+
#ifndef PRINTFLIKE_DECL
|
87
88
|
# if defined(__GNUC__)
|
88
|
-
# define
|
89
|
+
# define PRINTFLIKE_DECL(stringidx, argidx) __attribute__ ((format(printf,stringidx,argidx)))
|
89
90
|
# else
|
90
|
-
# define
|
91
|
+
# define PRINTFLIKE_DECL(stringidx, argidx)
|
91
92
|
# endif
|
92
93
|
#endif
|
93
94
|
|
95
|
+
#if defined(TRUFFLERUBY) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
|
96
|
+
# define TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
97
|
+
#endif
|
94
98
|
|
95
99
|
NOKOPUBVAR VALUE mNokogiri ;
|
96
100
|
NOKOPUBVAR VALUE mNokogiriGumbo ;
|
@@ -162,7 +166,6 @@ typedef struct _nokogiriXsltStylesheetTuple {
|
|
162
166
|
VALUE func_instances;
|
163
167
|
} nokogiriXsltStylesheetTuple;
|
164
168
|
|
165
|
-
int vasprintf(char **strp, const char *fmt, va_list ap);
|
166
169
|
void noko_xml_document_pin_node(xmlNodePtr);
|
167
170
|
void noko_xml_document_pin_namespace(xmlNsPtr, xmlDocPtr);
|
168
171
|
|
@@ -198,7 +201,7 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
|
|
198
201
|
#define NOKOGIRI_SAX_SELF(_ctxt) ((nokogiriSAXTuplePtr)(_ctxt))->self
|
199
202
|
#define NOKOGIRI_SAX_CTXT(_ctxt) ((nokogiriSAXTuplePtr)(_ctxt))->ctxt
|
200
203
|
#define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) nokogiri_sax_tuple_new(_ctxt, _self)
|
201
|
-
#define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple)
|
204
|
+
#define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) ruby_xfree(_tuple)
|
202
205
|
|
203
206
|
#define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v))
|
204
207
|
#define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v)
|
@@ -215,7 +218,7 @@ void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerSta
|
|
215
218
|
void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);
|
216
219
|
VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error);
|
217
220
|
void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error);
|
218
|
-
|
221
|
+
NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorPtr error);
|
219
222
|
void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
|
220
223
|
const char *function_name) ;
|
221
224
|
|
@@ -223,7 +226,7 @@ static inline
|
|
223
226
|
nokogiriSAXTuplePtr
|
224
227
|
nokogiri_sax_tuple_new(xmlParserCtxtPtr ctxt, VALUE self)
|
225
228
|
{
|
226
|
-
nokogiriSAXTuplePtr tuple =
|
229
|
+
nokogiriSAXTuplePtr tuple = ruby_xmalloc(sizeof(nokogiriSAXTuple));
|
227
230
|
tuple->self = self;
|
228
231
|
tuple->ctxt = ctxt;
|
229
232
|
return tuple;
|
data/ext/nokogiri/xml_cdata.c
CHANGED
@@ -29,7 +29,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
29
29
|
|
30
30
|
if (!NIL_P(content)) {
|
31
31
|
content_str = (xmlChar *)StringValuePtr(content);
|
32
|
-
content_str_len =
|
32
|
+
content_str_len = RSTRING_LENINT(content);
|
33
33
|
}
|
34
34
|
|
35
35
|
node = xmlNewCDataBlock(xml_doc->doc, content_str, content_str_len);
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -65,14 +65,12 @@ dealloc(xmlDocPtr doc)
|
|
65
65
|
{
|
66
66
|
st_table *node_hash;
|
67
67
|
|
68
|
-
NOKOGIRI_DEBUG_START(doc);
|
69
|
-
|
70
68
|
node_hash = DOC_UNLINKED_NODE_HASH(doc);
|
71
69
|
|
72
70
|
st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
|
73
71
|
st_free_table(node_hash);
|
74
72
|
|
75
|
-
|
73
|
+
ruby_xfree(doc->_private);
|
76
74
|
|
77
75
|
/* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes
|
78
76
|
* have their _private pointers cleared. This is to avoid libxml-ruby's
|
@@ -84,8 +82,6 @@ dealloc(xmlDocPtr doc)
|
|
84
82
|
}
|
85
83
|
|
86
84
|
xmlFreeDoc(doc);
|
87
|
-
|
88
|
-
NOKOGIRI_DEBUG_END(doc);
|
89
85
|
}
|
90
86
|
|
91
87
|
static void
|
@@ -540,6 +536,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
|
540
536
|
VALUE rb_mode;
|
541
537
|
VALUE rb_namespaces;
|
542
538
|
VALUE rb_comments_p;
|
539
|
+
int c_mode = 0;
|
543
540
|
xmlChar **c_namespaces;
|
544
541
|
|
545
542
|
xmlDocPtr c_doc;
|
@@ -551,8 +548,16 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
|
551
548
|
VALUE rb_io;
|
552
549
|
|
553
550
|
rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
|
554
|
-
if (!NIL_P(rb_mode)) {
|
555
|
-
|
551
|
+
if (!NIL_P(rb_mode)) {
|
552
|
+
Check_Type(rb_mode, T_FIXNUM);
|
553
|
+
c_mode = NUM2INT(rb_mode);
|
554
|
+
}
|
555
|
+
if (!NIL_P(rb_namespaces)) {
|
556
|
+
Check_Type(rb_namespaces, T_ARRAY);
|
557
|
+
if (c_mode == XML_C14N_1_0 || c_mode == XML_C14N_1_1) {
|
558
|
+
rb_raise(rb_eRuntimeError, "This canonicalizer does not support this operation");
|
559
|
+
}
|
560
|
+
}
|
556
561
|
|
557
562
|
Data_Get_Struct(self, xmlDoc, c_doc);
|
558
563
|
|
@@ -573,7 +578,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
|
573
578
|
c_namespaces = NULL;
|
574
579
|
} else {
|
575
580
|
long ns_len = RARRAY_LEN(rb_namespaces);
|
576
|
-
c_namespaces =
|
581
|
+
c_namespaces = ruby_xcalloc((size_t)ns_len + 1, sizeof(xmlChar *));
|
577
582
|
for (int j = 0 ; j < ns_len ; j++) {
|
578
583
|
VALUE entry = rb_ary_entry(rb_namespaces, j);
|
579
584
|
c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
|
@@ -581,12 +586,12 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
|
581
586
|
}
|
582
587
|
|
583
588
|
xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
|
584
|
-
|
589
|
+
c_mode,
|
585
590
|
c_namespaces,
|
586
591
|
(int)RTEST(rb_comments_p),
|
587
592
|
c_obuf);
|
588
593
|
|
589
|
-
|
594
|
+
ruby_xfree(c_namespaces);
|
590
595
|
xmlOutputBufferClose(c_obuf);
|
591
596
|
|
592
597
|
return rb_funcall(rb_io, rb_intern("string"), 0);
|
@@ -604,7 +609,7 @@ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int arg
|
|
604
609
|
|
605
610
|
rb_document = Data_Wrap_Struct(klass, mark, dealloc, c_document);
|
606
611
|
|
607
|
-
tuple = (nokogiriTuplePtr)
|
612
|
+
tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
|
608
613
|
tuple->doc = rb_document;
|
609
614
|
tuple->unlinkedNodes = st_init_numtable_with_size(128);
|
610
615
|
tuple->node_cache = rb_ary_new();
|
@@ -31,7 +31,7 @@ get_type(VALUE self)
|
|
31
31
|
xmlElementContentPtr elem;
|
32
32
|
Data_Get_Struct(self, xmlElementContent, elem);
|
33
33
|
|
34
|
-
return INT2NUM(
|
34
|
+
return INT2NUM(elem->type);
|
35
35
|
}
|
36
36
|
|
37
37
|
/*
|
@@ -79,7 +79,7 @@ get_occur(VALUE self)
|
|
79
79
|
xmlElementContentPtr elem;
|
80
80
|
Data_Get_Struct(self, xmlElementContent, elem);
|
81
81
|
|
82
|
-
return INT2NUM(
|
82
|
+
return INT2NUM(elem->ocur);
|
83
83
|
}
|
84
84
|
|
85
85
|
/*
|
@@ -45,9 +45,9 @@ rb_xml_encoding_handler_s_delete(VALUE klass, VALUE name)
|
|
45
45
|
|
46
46
|
|
47
47
|
/*
|
48
|
-
* call-seq: Nokogiri::EncodingHandler.alias(
|
48
|
+
* call-seq: Nokogiri::EncodingHandler.alias(real_name, alias_name)
|
49
49
|
*
|
50
|
-
* Alias encoding handler with name +
|
50
|
+
* Alias encoding handler with name +real_name+ to name +alias_name+
|
51
51
|
*/
|
52
52
|
static VALUE
|
53
53
|
rb_xml_encoding_handler_s_alias(VALUE klass, VALUE from, VALUE to)
|
@@ -32,7 +32,6 @@ _xml_namespace_dealloc(void *ptr)
|
|
32
32
|
* node set. see noko_xml_namespace_wrap().
|
33
33
|
*/
|
34
34
|
xmlNsPtr ns = ptr;
|
35
|
-
NOKOGIRI_DEBUG_START(ns) ;
|
36
35
|
|
37
36
|
if (ns->href) {
|
38
37
|
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->href));
|
@@ -41,7 +40,6 @@ _xml_namespace_dealloc(void *ptr)
|
|
41
40
|
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->prefix));
|
42
41
|
}
|
43
42
|
xmlFree(ns);
|
44
|
-
NOKOGIRI_DEBUG_END(ns) ;
|
45
43
|
}
|
46
44
|
|
47
45
|
#ifdef HAVE_RB_GC_LOCATION
|
@@ -76,10 +74,26 @@ static const rb_data_type_t nokogiri_xml_namespace_type_without_dealloc = {
|
|
76
74
|
};
|
77
75
|
|
78
76
|
/*
|
79
|
-
*
|
80
|
-
*
|
77
|
+
* :call-seq:
|
78
|
+
* prefix() → String or nil
|
81
79
|
*
|
82
|
-
*
|
80
|
+
* Return the prefix for this Namespace, or +nil+ if there is no prefix (e.g., default namespace).
|
81
|
+
*
|
82
|
+
* *Example*
|
83
|
+
*
|
84
|
+
* doc = Nokogiri::XML.parse(<<~XML)
|
85
|
+
* <?xml version="1.0"?>
|
86
|
+
* <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
|
87
|
+
* <child1 foo="abc" noko:bar="def"/>
|
88
|
+
* <noko:child2 foo="qwe" noko:bar="rty"/>
|
89
|
+
* </root>
|
90
|
+
* XML
|
91
|
+
*
|
92
|
+
* doc.root.elements.first.namespace.prefix
|
93
|
+
* # => nil
|
94
|
+
*
|
95
|
+
* doc.root.elements.last.namespace.prefix
|
96
|
+
* # => "noko"
|
83
97
|
*/
|
84
98
|
static VALUE
|
85
99
|
prefix(VALUE self)
|
@@ -93,10 +107,26 @@ prefix(VALUE self)
|
|
93
107
|
}
|
94
108
|
|
95
109
|
/*
|
96
|
-
*
|
97
|
-
*
|
110
|
+
* :call-seq:
|
111
|
+
* href() → String
|
112
|
+
*
|
113
|
+
* Returns the URI reference for this Namespace.
|
114
|
+
*
|
115
|
+
* *Example*
|
116
|
+
*
|
117
|
+
* doc = Nokogiri::XML.parse(<<~XML)
|
118
|
+
* <?xml version="1.0"?>
|
119
|
+
* <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
|
120
|
+
* <child1 foo="abc" noko:bar="def"/>
|
121
|
+
* <noko:child2 foo="qwe" noko:bar="rty"/>
|
122
|
+
* </root>
|
123
|
+
* XML
|
124
|
+
*
|
125
|
+
* doc.root.elements.first.namespace.href
|
126
|
+
* # => "http://nokogiri.org/ns/default"
|
98
127
|
*
|
99
|
-
*
|
128
|
+
* doc.root.elements.last.namespace.href
|
129
|
+
* # => "http://nokogiri.org/ns/noko"
|
100
130
|
*/
|
101
131
|
static VALUE
|
102
132
|
href(VALUE self)
|
data/ext/nokogiri/xml_node.c
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#include <nokogiri.h>
|
2
2
|
|
3
|
+
#include <stdbool.h>
|
4
|
+
|
3
5
|
// :stopdoc:
|
4
6
|
|
5
7
|
VALUE cNokogiriXmlNode ;
|
@@ -7,20 +9,11 @@ static ID id_decorate, id_decorate_bang;
|
|
7
9
|
|
8
10
|
typedef xmlNodePtr(*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
|
9
11
|
|
10
|
-
#ifdef DEBUG
|
11
12
|
static void
|
12
|
-
|
13
|
+
_xml_node_mark(void *ptr)
|
13
14
|
{
|
14
|
-
|
15
|
-
NOKOGIRI_DEBUG_END(x)
|
16
|
-
}
|
17
|
-
#else
|
18
|
-
# define _xml_node_dealloc 0
|
19
|
-
#endif
|
15
|
+
xmlNodePtr node = ptr;
|
20
16
|
|
21
|
-
static void
|
22
|
-
_xml_node_mark(xmlNodePtr node)
|
23
|
-
{
|
24
17
|
if (!DOC_RUBY_OBJECT_TEST(node->doc)) {
|
25
18
|
return;
|
26
19
|
}
|
@@ -37,24 +30,21 @@ _xml_node_mark(xmlNodePtr node)
|
|
37
30
|
|
38
31
|
#ifdef HAVE_RB_GC_LOCATION
|
39
32
|
static void
|
40
|
-
_xml_node_update_references(
|
33
|
+
_xml_node_update_references(void *ptr)
|
41
34
|
{
|
35
|
+
xmlNodePtr node = ptr;
|
36
|
+
|
42
37
|
if (node->_private) {
|
43
38
|
node->_private = (void *)rb_gc_location((VALUE)node->_private);
|
44
39
|
}
|
45
40
|
}
|
41
|
+
#else
|
42
|
+
# define _xml_node_update_references 0
|
46
43
|
#endif
|
47
44
|
|
48
|
-
typedef void (*gc_callback_t)(void *);
|
49
|
-
|
50
45
|
static const rb_data_type_t nokogiri_node_type = {
|
51
46
|
"Nokogiri/XMLNode",
|
52
|
-
{
|
53
|
-
(gc_callback_t)_xml_node_mark, (gc_callback_t)_xml_node_dealloc, 0,
|
54
|
-
#ifdef HAVE_RB_GC_LOCATION
|
55
|
-
(gc_callback_t)_xml_node_update_references
|
56
|
-
#endif
|
57
|
-
},
|
47
|
+
{_xml_node_mark, 0, 0, _xml_node_update_references},
|
58
48
|
0, 0,
|
59
49
|
#ifdef RUBY_TYPED_FREE_IMMEDIATELY
|
60
50
|
RUBY_TYPED_FREE_IMMEDIATELY,
|
@@ -809,7 +799,7 @@ rb_xml_node_pointer_id(VALUE self)
|
|
809
799
|
xmlNodePtr node;
|
810
800
|
Noko_Node_Get_Struct(self, xmlNode, node);
|
811
801
|
|
812
|
-
return
|
802
|
+
return rb_uint2inum((uintptr_t)(node));
|
813
803
|
}
|
814
804
|
|
815
805
|
/*
|
@@ -1509,7 +1499,7 @@ node_type(VALUE self)
|
|
1509
1499
|
{
|
1510
1500
|
xmlNodePtr node;
|
1511
1501
|
Noko_Node_Get_Struct(self, xmlNode, node);
|
1512
|
-
return INT2NUM(
|
1502
|
+
return INT2NUM(node->type);
|
1513
1503
|
}
|
1514
1504
|
|
1515
1505
|
/*
|
@@ -1724,6 +1714,269 @@ native_write_to(
|
|
1724
1714
|
return io;
|
1725
1715
|
}
|
1726
1716
|
|
1717
|
+
|
1718
|
+
static inline void
|
1719
|
+
output_partial_string(VALUE out, char const *str, size_t length)
|
1720
|
+
{
|
1721
|
+
if (length) {
|
1722
|
+
rb_enc_str_buf_cat(out, str, (long)length, rb_utf8_encoding());
|
1723
|
+
}
|
1724
|
+
}
|
1725
|
+
|
1726
|
+
static inline void
|
1727
|
+
output_char(VALUE out, char ch)
|
1728
|
+
{
|
1729
|
+
output_partial_string(out, &ch, 1);
|
1730
|
+
}
|
1731
|
+
|
1732
|
+
static inline void
|
1733
|
+
output_string(VALUE out, char const *str)
|
1734
|
+
{
|
1735
|
+
output_partial_string(out, str, strlen(str));
|
1736
|
+
}
|
1737
|
+
|
1738
|
+
static inline void
|
1739
|
+
output_tagname(VALUE out, xmlNodePtr elem)
|
1740
|
+
{
|
1741
|
+
// Elements in the HTML, MathML, and SVG namespaces do not use a namespace
|
1742
|
+
// prefix in the HTML syntax.
|
1743
|
+
char const *name = (char const *)elem->name;
|
1744
|
+
xmlNsPtr ns = elem->ns;
|
1745
|
+
if (ns && ns->href && ns->prefix
|
1746
|
+
&& strcmp((char const *)ns->href, "http://www.w3.org/1999/xhtml")
|
1747
|
+
&& strcmp((char const *)ns->href, "http://www.w3.org/1998/Math/MathML")
|
1748
|
+
&& strcmp((char const *)ns->href, "http://www.w3.org/2000/svg")) {
|
1749
|
+
output_string(out, (char const *)elem->ns->prefix);
|
1750
|
+
output_char(out, ':');
|
1751
|
+
char const *colon = strchr(name, ':');
|
1752
|
+
if (colon) {
|
1753
|
+
name = colon + 1;
|
1754
|
+
}
|
1755
|
+
}
|
1756
|
+
output_string(out, name);
|
1757
|
+
}
|
1758
|
+
|
1759
|
+
static inline void
|
1760
|
+
output_attr_name(VALUE out, xmlAttrPtr attr)
|
1761
|
+
{
|
1762
|
+
xmlNsPtr ns = attr->ns;
|
1763
|
+
char const *name = (char const *)attr->name;
|
1764
|
+
if (ns && ns->href) {
|
1765
|
+
char const *uri = (char const *)ns->href;
|
1766
|
+
char const *localname = strchr(name, ':');
|
1767
|
+
if (localname) {
|
1768
|
+
++localname;
|
1769
|
+
} else {
|
1770
|
+
localname = name;
|
1771
|
+
}
|
1772
|
+
|
1773
|
+
if (!strcmp(uri, "http://www.w3.org/XML/1998/namespace")) {
|
1774
|
+
output_string(out, "xml:");
|
1775
|
+
name = localname;
|
1776
|
+
} else if (!strcmp(uri, "http://www.w3.org/2000/xmlns/")) {
|
1777
|
+
// xmlns:xmlns -> xmlns
|
1778
|
+
// xmlns:foo -> xmlns:foo
|
1779
|
+
if (strcmp(localname, "xmlns")) {
|
1780
|
+
output_string(out, "xmlns:");
|
1781
|
+
}
|
1782
|
+
name = localname;
|
1783
|
+
} else if (!strcmp(uri, "http://www.w3.org/1999/xlink")) {
|
1784
|
+
output_string(out, "xlink:");
|
1785
|
+
name = localname;
|
1786
|
+
} else if (ns->prefix) {
|
1787
|
+
output_string(out, (char const *)ns->prefix);
|
1788
|
+
output_char(out, ':');
|
1789
|
+
name = localname;
|
1790
|
+
}
|
1791
|
+
}
|
1792
|
+
output_string(out, name);
|
1793
|
+
}
|
1794
|
+
|
1795
|
+
static void
|
1796
|
+
output_escaped_string(VALUE out, xmlChar const *start, bool attr)
|
1797
|
+
{
|
1798
|
+
xmlChar const *next = start;
|
1799
|
+
int ch;
|
1800
|
+
|
1801
|
+
while ((ch = *next) != 0) {
|
1802
|
+
char const *replacement = NULL;
|
1803
|
+
size_t replaced_bytes = 1;
|
1804
|
+
if (ch == '&') {
|
1805
|
+
replacement = "&";
|
1806
|
+
} else if (ch == 0xC2 && next[1] == 0xA0) {
|
1807
|
+
// U+00A0 NO-BREAK SPACE has the UTF-8 encoding C2 A0.
|
1808
|
+
replacement = " ";
|
1809
|
+
replaced_bytes = 2;
|
1810
|
+
} else if (attr && ch == '"') {
|
1811
|
+
replacement = """;
|
1812
|
+
} else if (!attr && ch == '<') {
|
1813
|
+
replacement = "<";
|
1814
|
+
} else if (!attr && ch == '>') {
|
1815
|
+
replacement = ">";
|
1816
|
+
} else {
|
1817
|
+
++next;
|
1818
|
+
continue;
|
1819
|
+
}
|
1820
|
+
output_partial_string(out, (char const *)start, next - start);
|
1821
|
+
output_string(out, replacement);
|
1822
|
+
next += replaced_bytes;
|
1823
|
+
start = next;
|
1824
|
+
}
|
1825
|
+
output_partial_string(out, (char const *)start, next - start);
|
1826
|
+
}
|
1827
|
+
|
1828
|
+
static bool
|
1829
|
+
should_prepend_newline(xmlNodePtr node)
|
1830
|
+
{
|
1831
|
+
char const *name = (char const *)node->name;
|
1832
|
+
xmlNodePtr child = node->children;
|
1833
|
+
|
1834
|
+
if (!name || !child || (strcmp(name, "pre") && strcmp(name, "textarea") && strcmp(name, "listing"))) {
|
1835
|
+
return false;
|
1836
|
+
}
|
1837
|
+
|
1838
|
+
return child->type == XML_TEXT_NODE && child->content && child->content[0] == '\n';
|
1839
|
+
}
|
1840
|
+
|
1841
|
+
static VALUE
|
1842
|
+
rb_prepend_newline(VALUE self)
|
1843
|
+
{
|
1844
|
+
xmlNodePtr node;
|
1845
|
+
Noko_Node_Get_Struct(self, xmlNode, node);
|
1846
|
+
return should_prepend_newline(node) ? Qtrue : Qfalse;
|
1847
|
+
}
|
1848
|
+
|
1849
|
+
static bool
|
1850
|
+
is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames)
|
1851
|
+
{
|
1852
|
+
char const *name = (char const *)node->name;
|
1853
|
+
if (name == NULL) { // fragments don't have a name
|
1854
|
+
return false;
|
1855
|
+
}
|
1856
|
+
for (size_t idx = 0; idx < num_tagnames; ++idx) {
|
1857
|
+
if (!strcmp(name, tagnames[idx])) {
|
1858
|
+
return true;
|
1859
|
+
}
|
1860
|
+
}
|
1861
|
+
return false;
|
1862
|
+
|
1863
|
+
}
|
1864
|
+
|
1865
|
+
static void
|
1866
|
+
output_node(
|
1867
|
+
VALUE out,
|
1868
|
+
xmlNodePtr node,
|
1869
|
+
bool preserve_newline
|
1870
|
+
)
|
1871
|
+
{
|
1872
|
+
static char const *const VOID_ELEMENTS[] = {
|
1873
|
+
"area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr",
|
1874
|
+
"img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr",
|
1875
|
+
};
|
1876
|
+
|
1877
|
+
static char const *const UNESCAPED_TEXT_ELEMENTS[] = {
|
1878
|
+
"style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript",
|
1879
|
+
};
|
1880
|
+
|
1881
|
+
switch (node->type) {
|
1882
|
+
case XML_ELEMENT_NODE:
|
1883
|
+
// Serialize the start tag.
|
1884
|
+
output_char(out, '<');
|
1885
|
+
output_tagname(out, node);
|
1886
|
+
|
1887
|
+
// Add attributes.
|
1888
|
+
for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
|
1889
|
+
output_char(out, ' ');
|
1890
|
+
output_attr_name(out, attr);
|
1891
|
+
if (attr->children) {
|
1892
|
+
output_string(out, "=\"");
|
1893
|
+
xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
|
1894
|
+
output_escaped_string(out, value, true);
|
1895
|
+
xmlFree(value);
|
1896
|
+
output_char(out, '"');
|
1897
|
+
} else {
|
1898
|
+
// Output name=""
|
1899
|
+
output_string(out, "=\"\"");
|
1900
|
+
}
|
1901
|
+
}
|
1902
|
+
output_char(out, '>');
|
1903
|
+
|
1904
|
+
// Add children and end tag if element is not void.
|
1905
|
+
if (!is_one_of(node, VOID_ELEMENTS, sizeof VOID_ELEMENTS / sizeof VOID_ELEMENTS[0])) {
|
1906
|
+
if (preserve_newline && should_prepend_newline(node)) {
|
1907
|
+
output_char(out, '\n');
|
1908
|
+
}
|
1909
|
+
for (xmlNodePtr child = node->children; child; child = child->next) {
|
1910
|
+
output_node(out, child, preserve_newline);
|
1911
|
+
}
|
1912
|
+
output_string(out, "</");
|
1913
|
+
output_tagname(out, node);
|
1914
|
+
output_char(out, '>');
|
1915
|
+
}
|
1916
|
+
break;
|
1917
|
+
|
1918
|
+
case XML_TEXT_NODE:
|
1919
|
+
if (node->parent
|
1920
|
+
&& is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
|
1921
|
+
sizeof UNESCAPED_TEXT_ELEMENTS / sizeof UNESCAPED_TEXT_ELEMENTS[0])) {
|
1922
|
+
output_string(out, (char const *)node->content);
|
1923
|
+
} else {
|
1924
|
+
output_escaped_string(out, node->content, false);
|
1925
|
+
}
|
1926
|
+
break;
|
1927
|
+
|
1928
|
+
case XML_CDATA_SECTION_NODE:
|
1929
|
+
output_string(out, "<![CDATA[");
|
1930
|
+
output_string(out, (char const *)node->content);
|
1931
|
+
output_string(out, "]]>");
|
1932
|
+
break;
|
1933
|
+
|
1934
|
+
case XML_COMMENT_NODE:
|
1935
|
+
output_string(out, "<!--");
|
1936
|
+
output_string(out, (char const *)node->content);
|
1937
|
+
output_string(out, "-->");
|
1938
|
+
break;
|
1939
|
+
|
1940
|
+
case XML_PI_NODE:
|
1941
|
+
output_string(out, "<?");
|
1942
|
+
output_string(out, (char const *)node->content);
|
1943
|
+
output_char(out, '>');
|
1944
|
+
break;
|
1945
|
+
|
1946
|
+
case XML_DOCUMENT_TYPE_NODE:
|
1947
|
+
case XML_DTD_NODE:
|
1948
|
+
output_string(out, "<!DOCTYPE ");
|
1949
|
+
output_string(out, (char const *)node->name);
|
1950
|
+
output_string(out, ">");
|
1951
|
+
break;
|
1952
|
+
|
1953
|
+
case XML_DOCUMENT_NODE:
|
1954
|
+
case XML_DOCUMENT_FRAG_NODE:
|
1955
|
+
case XML_HTML_DOCUMENT_NODE:
|
1956
|
+
for (xmlNodePtr child = node->children; child; child = child->next) {
|
1957
|
+
output_node(out, child, preserve_newline);
|
1958
|
+
}
|
1959
|
+
break;
|
1960
|
+
|
1961
|
+
default:
|
1962
|
+
rb_raise(rb_eRuntimeError, "Unsupported document node (%d); this is a bug in Nokogiri", node->type);
|
1963
|
+
break;
|
1964
|
+
}
|
1965
|
+
}
|
1966
|
+
|
1967
|
+
static VALUE
|
1968
|
+
html_standard_serialize(
|
1969
|
+
VALUE self,
|
1970
|
+
VALUE preserve_newline
|
1971
|
+
)
|
1972
|
+
{
|
1973
|
+
xmlNodePtr node;
|
1974
|
+
Noko_Node_Get_Struct(self, xmlNode, node);
|
1975
|
+
VALUE output = rb_str_buf_new(4096);
|
1976
|
+
output_node(output, node, RTEST(preserve_newline));
|
1977
|
+
return output;
|
1978
|
+
}
|
1979
|
+
|
1727
1980
|
/*
|
1728
1981
|
* :call-seq:
|
1729
1982
|
* line() → Integer
|
@@ -1757,7 +2010,7 @@ rb_xml_node_line(VALUE rb_node)
|
|
1757
2010
|
xmlNodePtr c_node;
|
1758
2011
|
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
|
1759
2012
|
|
1760
|
-
return
|
2013
|
+
return LONG2NUM(xmlGetLineNo(c_node));
|
1761
2014
|
}
|
1762
2015
|
|
1763
2016
|
/*
|
@@ -1860,7 +2113,7 @@ compare(VALUE self, VALUE _other)
|
|
1860
2113
|
Noko_Node_Get_Struct(self, xmlNode, node);
|
1861
2114
|
Noko_Node_Get_Struct(_other, xmlNode, other);
|
1862
2115
|
|
1863
|
-
return INT2NUM(
|
2116
|
+
return INT2NUM(xmlXPathCmpNodes(other, node));
|
1864
2117
|
}
|
1865
2118
|
|
1866
2119
|
|
@@ -1960,12 +2213,17 @@ in_context(VALUE self, VALUE _str, VALUE _options)
|
|
1960
2213
|
|
1961
2214
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
1962
2215
|
|
1963
|
-
/*
|
2216
|
+
/*
|
2217
|
+
* Workaround for a libxml2 bug where a parsing error may leave a broken
|
1964
2218
|
* node reference in node->doc->children.
|
2219
|
+
*
|
2220
|
+
* https://bugzilla.gnome.org/show_bug.cgi?id=668155
|
2221
|
+
*
|
1965
2222
|
* This workaround is limited to when a parse error occurs, the document
|
1966
2223
|
* went from having no children to having children, and the context node is
|
1967
2224
|
* part of a document fragment.
|
1968
|
-
*
|
2225
|
+
*
|
2226
|
+
* TODO: This was fixed in libxml 2.8.0 by 71a243d
|
1969
2227
|
*/
|
1970
2228
|
if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
|
1971
2229
|
child_iter = node;
|
@@ -2155,6 +2413,8 @@ noko_init_xml_node()
|
|
2155
2413
|
rb_define_private_method(cNokogiriXmlNode, "get", get, 1);
|
2156
2414
|
rb_define_private_method(cNokogiriXmlNode, "in_context", in_context, 2);
|
2157
2415
|
rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
|
2416
|
+
rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
|
2417
|
+
rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
|
2158
2418
|
rb_define_private_method(cNokogiriXmlNode, "process_xincludes", process_xincludes, 1);
|
2159
2419
|
rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
|
2160
2420
|
rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
|