nokogiri 1.14.5-java → 1.15.0-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +9 -8
- data/dependencies.yml +6 -6
- data/ext/java/nokogiri/NokogiriService.java +0 -1
- data/ext/java/nokogiri/XmlCdata.java +6 -0
- data/ext/java/nokogiri/XmlNode.java +14 -5
- data/ext/java/nokogiri/XmlSchema.java +5 -0
- data/ext/java/nokogiri/XmlText.java +5 -0
- data/ext/java/nokogiri/XsltStylesheet.java +5 -20
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +0 -25
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +1 -0
- data/ext/nokogiri/extconf.rb +66 -22
- data/ext/nokogiri/html4_document.c +1 -2
- data/ext/nokogiri/html4_element_description.c +19 -14
- data/ext/nokogiri/html4_sax_parser_context.c +10 -16
- data/ext/nokogiri/html4_sax_push_parser.c +2 -2
- data/ext/nokogiri/nokogiri.c +46 -24
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +10 -2
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +102 -22
- data/ext/nokogiri/xml_document_fragment.c +1 -1
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +32 -29
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +12 -4
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +11 -12
- data/ext/nokogiri/xml_node.c +7 -7
- data/ext/nokogiri/xml_node_set.c +125 -105
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +37 -28
- data/ext/nokogiri/xml_relax_ng.c +65 -78
- data/ext/nokogiri/xml_sax_parser.c +24 -5
- data/ext/nokogiri/xml_sax_parser_context.c +46 -25
- data/ext/nokogiri/xml_sax_push_parser.c +29 -8
- data/ext/nokogiri/xml_schema.c +90 -116
- data/ext/nokogiri/xml_text.c +10 -2
- data/ext/nokogiri/xml_xpath_context.c +156 -83
- data/ext/nokogiri/xslt_stylesheet.c +103 -50
- data/lib/nokogiri/css/xpath_visitor.rb +2 -2
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
- data/lib/nokogiri/html5/document_fragment.rb +1 -1
- data/lib/nokogiri/html5/node.rb +5 -0
- data/lib/nokogiri/html5.rb +5 -2
- data/lib/nokogiri/jruby/nokogiri_jars.rb +3 -3
- data/lib/nokogiri/jruby/org/nokogiri/nekodtd/0.1.11.noko1/nekodtd-0.1.11.noko1.jar +0 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/document_fragment.rb +1 -1
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/node/save_options.rb +8 -0
- data/lib/nokogiri/xml/node.rb +22 -13
- data/lib/nokogiri/xml/pp/node.rb +23 -12
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/searchable.rb +18 -10
- data/lib/nokogiri/xslt.rb +73 -3
- data/lib/nokogiri.rb +12 -4
- data/lib/xsd/xmlparser/nokogiri.rb +1 -1
- metadata +4 -4
- data/lib/nokogiri/jruby/org/nokogiri/nekodtd/0.1.11.noko2/nekodtd-0.1.11.noko2.jar +0 -0
data/ext/nokogiri/nokogiri.c
CHANGED
@@ -137,6 +137,48 @@ noko_io_close(void *io)
|
|
137
137
|
}
|
138
138
|
|
139
139
|
|
140
|
+
#if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
|
141
|
+
# define NOKOGIRI_WINDOWS_DLLS 1
|
142
|
+
#else
|
143
|
+
# define NOKOGIRI_WINDOWS_DLLS 0
|
144
|
+
#endif
|
145
|
+
|
146
|
+
//
|
147
|
+
// | dlls || true | false |
|
148
|
+
// | nlmm || | |
|
149
|
+
// |-----------++---------+---------|
|
150
|
+
// | NULL || default | ruby |
|
151
|
+
// | "random" || default | ruby |
|
152
|
+
// | "ruby" || ruby | ruby |
|
153
|
+
// | "default" || default | default |
|
154
|
+
//
|
155
|
+
// We choose *not* to use Ruby's memory management functions with windows DLLs because of this
|
156
|
+
// issue: https://github.com/sparklemotion/nokogiri/issues/2241
|
157
|
+
//
|
158
|
+
static void
|
159
|
+
set_libxml_memory_management(void)
|
160
|
+
{
|
161
|
+
const char *nlmm = getenv("NOKOGIRI_LIBXML_MEMORY_MANAGEMENT");
|
162
|
+
if (nlmm) {
|
163
|
+
if (strcmp(nlmm, "default") == 0) {
|
164
|
+
goto libxml_uses_default_memory_management;
|
165
|
+
} else if (strcmp(nlmm, "ruby") == 0) {
|
166
|
+
goto libxml_uses_ruby_memory_management;
|
167
|
+
}
|
168
|
+
}
|
169
|
+
if (NOKOGIRI_WINDOWS_DLLS) {
|
170
|
+
libxml_uses_default_memory_management:
|
171
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
|
172
|
+
return;
|
173
|
+
} else {
|
174
|
+
libxml_uses_ruby_memory_management:
|
175
|
+
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
|
176
|
+
xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
|
177
|
+
return;
|
178
|
+
}
|
179
|
+
}
|
180
|
+
|
181
|
+
|
140
182
|
void
|
141
183
|
Init_nokogiri(void)
|
142
184
|
{
|
@@ -150,6 +192,10 @@ Init_nokogiri(void)
|
|
150
192
|
mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
|
151
193
|
mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
|
152
194
|
|
195
|
+
set_libxml_memory_management(); /* must be before any function calls that might invoke xmlInitParser() */
|
196
|
+
xmlInitParser();
|
197
|
+
exsltRegisterAll();
|
198
|
+
|
153
199
|
rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
|
154
200
|
rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
|
155
201
|
|
@@ -182,30 +228,6 @@ Init_nokogiri(void)
|
|
182
228
|
rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS));
|
183
229
|
#endif
|
184
230
|
|
185
|
-
#if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
|
186
|
-
/*
|
187
|
-
* We choose *not* to do use Ruby's memory management functions with windows DLLs because of this
|
188
|
-
* issue in libxml 2.9.12:
|
189
|
-
*
|
190
|
-
* https://github.com/sparklemotion/nokogiri/issues/2241
|
191
|
-
*
|
192
|
-
* If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip
|
193
|
-
* this config only for the specific libxml2 versions 2.9.12.
|
194
|
-
*
|
195
|
-
* Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its
|
196
|
-
* default memory management functions (recall that this config was introduced to reduce memory
|
197
|
-
* bloat and allow Ruby to GC more often); but we should *really* test with production workloads
|
198
|
-
* before making that kind of a potentially-invasive change.
|
199
|
-
*/
|
200
|
-
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default"));
|
201
|
-
#else
|
202
|
-
rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby"));
|
203
|
-
xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup);
|
204
|
-
#endif
|
205
|
-
|
206
|
-
xmlInitParser();
|
207
|
-
exsltRegisterAll();
|
208
|
-
|
209
231
|
if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) {
|
210
232
|
rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue);
|
211
233
|
} else {
|
data/ext/nokogiri/xml_attr.c
CHANGED
@@ -68,7 +68,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
68
68
|
rb_raise(rb_eArgError, "parameter must be a Nokogiri::XML::Document");
|
69
69
|
}
|
70
70
|
|
71
|
-
|
71
|
+
xml_doc = noko_xml_document_unwrap(document);
|
72
72
|
|
73
73
|
node = xmlNewDocProp(
|
74
74
|
xml_doc,
|
data/ext/nokogiri/xml_cdata.c
CHANGED
@@ -25,14 +25,22 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
25
25
|
|
26
26
|
rb_scan_args(argc, argv, "2*", &doc, &content, &rest);
|
27
27
|
|
28
|
-
|
28
|
+
if (rb_obj_is_kind_of(doc, cNokogiriXmlDocument)) {
|
29
|
+
xml_doc = noko_xml_document_unwrap(doc);
|
30
|
+
} else {
|
31
|
+
xmlNodePtr deprecated_node_type_arg;
|
32
|
+
// TODO: deprecate allowing Node
|
33
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
|
34
|
+
Noko_Node_Get_Struct(doc, xmlNode, deprecated_node_type_arg);
|
35
|
+
xml_doc = deprecated_node_type_arg->doc;
|
36
|
+
}
|
29
37
|
|
30
38
|
if (!NIL_P(content)) {
|
31
39
|
content_str = (xmlChar *)StringValuePtr(content);
|
32
40
|
content_str_len = RSTRING_LENINT(content);
|
33
41
|
}
|
34
42
|
|
35
|
-
node = xmlNewCDataBlock(xml_doc
|
43
|
+
node = xmlNewCDataBlock(xml_doc, content_str, content_str_len);
|
36
44
|
|
37
45
|
noko_xml_document_pin_node(node);
|
38
46
|
|
data/ext/nokogiri/xml_comment.c
CHANGED
@@ -30,7 +30,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
30
30
|
rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
|
31
31
|
}
|
32
32
|
|
33
|
-
|
33
|
+
xml_doc = noko_xml_document_unwrap(document);
|
34
34
|
|
35
35
|
node = xmlNewDocComment(
|
36
36
|
xml_doc,
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -51,8 +51,9 @@ remove_private(xmlNodePtr node)
|
|
51
51
|
}
|
52
52
|
|
53
53
|
static void
|
54
|
-
mark(
|
54
|
+
mark(void *data)
|
55
55
|
{
|
56
|
+
xmlDocPtr doc = (xmlDocPtr)data;
|
56
57
|
nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
|
57
58
|
if (tuple) {
|
58
59
|
rb_gc_mark(tuple->doc);
|
@@ -61,8 +62,9 @@ mark(xmlDocPtr doc)
|
|
61
62
|
}
|
62
63
|
|
63
64
|
static void
|
64
|
-
dealloc(
|
65
|
+
dealloc(void *data)
|
65
66
|
{
|
67
|
+
xmlDocPtr doc = (xmlDocPtr)data;
|
66
68
|
st_table *node_hash;
|
67
69
|
|
68
70
|
node_hash = DOC_UNLINKED_NODE_HASH(doc);
|
@@ -72,18 +74,67 @@ dealloc(xmlDocPtr doc)
|
|
72
74
|
|
73
75
|
ruby_xfree(doc->_private);
|
74
76
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
*
|
77
|
+
#pragma GCC diagnostic push
|
78
|
+
#pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
|
79
|
+
/*
|
80
|
+
* libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
|
81
|
+
* versions, the registered callback from libxml-ruby will access the _private pointers set by
|
82
|
+
* nokogiri, which will result in segfaults.
|
83
|
+
*
|
84
|
+
* To avoid this, we need to clear the _private pointers from all nodes in this document tree
|
85
|
+
* before that callback gets invoked.
|
86
|
+
*
|
87
|
+
* libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
|
88
|
+
* safeguard (though probably pairing with a runtime check on the libxml-ruby version).
|
79
89
|
*/
|
80
90
|
if (xmlDeregisterNodeDefaultValue) {
|
81
91
|
remove_private((xmlNodePtr)doc);
|
82
92
|
}
|
93
|
+
#pragma GCC diagnostic pop
|
83
94
|
|
84
95
|
xmlFreeDoc(doc);
|
85
96
|
}
|
86
97
|
|
98
|
+
static size_t
|
99
|
+
memsize_node(const xmlNodePtr node)
|
100
|
+
{
|
101
|
+
/* note we don't count namespace definitions, just going for a good-enough number here */
|
102
|
+
xmlNodePtr child;
|
103
|
+
size_t memsize = 0;
|
104
|
+
|
105
|
+
memsize += xmlStrlen(node->name);
|
106
|
+
for (child = (xmlNodePtr)node->properties; child; child = child->next) {
|
107
|
+
memsize += sizeof(xmlAttr) + memsize_node(child);
|
108
|
+
}
|
109
|
+
if (node->type == XML_TEXT_NODE) {
|
110
|
+
memsize += xmlStrlen(node->content);
|
111
|
+
}
|
112
|
+
for (child = node->children; child; child = child->next) {
|
113
|
+
memsize += sizeof(xmlNode) + memsize_node(child);
|
114
|
+
}
|
115
|
+
return memsize;
|
116
|
+
}
|
117
|
+
|
118
|
+
static size_t
|
119
|
+
memsize(const void *data)
|
120
|
+
{
|
121
|
+
xmlDocPtr doc = (const xmlDocPtr)data;
|
122
|
+
size_t memsize = sizeof(xmlDoc);
|
123
|
+
/* This may not account for all memory use */
|
124
|
+
memsize += memsize_node((xmlNodePtr)doc);
|
125
|
+
return memsize;
|
126
|
+
}
|
127
|
+
|
128
|
+
static const rb_data_type_t noko_xml_document_data_type = {
|
129
|
+
.wrap_struct_name = "Nokogiri::XML::Document",
|
130
|
+
.function = {
|
131
|
+
.dmark = mark,
|
132
|
+
.dfree = dealloc,
|
133
|
+
.dsize = memsize,
|
134
|
+
},
|
135
|
+
// .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
|
136
|
+
};
|
137
|
+
|
87
138
|
static void
|
88
139
|
recursively_remove_namespaces_from_node(xmlNodePtr node)
|
89
140
|
{
|
@@ -126,8 +177,7 @@ recursively_remove_namespaces_from_node(xmlNodePtr node)
|
|
126
177
|
static VALUE
|
127
178
|
url(VALUE self)
|
128
179
|
{
|
129
|
-
xmlDocPtr doc;
|
130
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
180
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
131
181
|
|
132
182
|
if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
|
133
183
|
|
@@ -146,7 +196,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
|
|
146
196
|
xmlDocPtr c_document;
|
147
197
|
xmlNodePtr c_new_root = NULL, c_current_root;
|
148
198
|
|
149
|
-
|
199
|
+
c_document = noko_xml_document_unwrap(self);
|
150
200
|
|
151
201
|
c_current_root = xmlDocGetRootElement(c_document);
|
152
202
|
if (c_current_root) {
|
@@ -190,7 +240,7 @@ rb_xml_document_root(VALUE self)
|
|
190
240
|
xmlDocPtr c_document;
|
191
241
|
xmlNodePtr c_root;
|
192
242
|
|
193
|
-
|
243
|
+
c_document = noko_xml_document_unwrap(self);
|
194
244
|
|
195
245
|
c_root = xmlDocGetRootElement(c_document);
|
196
246
|
if (!c_root) {
|
@@ -209,8 +259,7 @@ rb_xml_document_root(VALUE self)
|
|
209
259
|
static VALUE
|
210
260
|
set_encoding(VALUE self, VALUE encoding)
|
211
261
|
{
|
212
|
-
xmlDocPtr doc;
|
213
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
262
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
214
263
|
|
215
264
|
if (doc->encoding) {
|
216
265
|
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
|
@@ -230,8 +279,7 @@ set_encoding(VALUE self, VALUE encoding)
|
|
230
279
|
static VALUE
|
231
280
|
encoding(VALUE self)
|
232
281
|
{
|
233
|
-
xmlDocPtr doc;
|
234
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
282
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
235
283
|
|
236
284
|
if (!doc->encoding) { return Qnil; }
|
237
285
|
return NOKOGIRI_STR_NEW2(doc->encoding);
|
@@ -246,8 +294,7 @@ encoding(VALUE self)
|
|
246
294
|
static VALUE
|
247
295
|
version(VALUE self)
|
248
296
|
{
|
249
|
-
xmlDocPtr doc;
|
250
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
297
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
251
298
|
|
252
299
|
if (!doc->version) { return Qnil; }
|
253
300
|
return NOKOGIRI_STR_NEW2(doc->version);
|
@@ -369,7 +416,7 @@ duplicate_document(int argc, VALUE *argv, VALUE self)
|
|
369
416
|
level = INT2NUM((long)1);
|
370
417
|
}
|
371
418
|
|
372
|
-
|
419
|
+
doc = noko_xml_document_unwrap(self);
|
373
420
|
|
374
421
|
dup = xmlCopyDoc(doc, (int)NUM2INT(level));
|
375
422
|
|
@@ -442,8 +489,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
442
489
|
static VALUE
|
443
490
|
remove_namespaces_bang(VALUE self)
|
444
491
|
{
|
445
|
-
xmlDocPtr doc ;
|
446
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
492
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
447
493
|
|
448
494
|
recursively_remove_namespaces_from_node((xmlNodePtr)doc);
|
449
495
|
return self;
|
@@ -471,7 +517,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
|
|
471
517
|
xmlEntityPtr ptr;
|
472
518
|
xmlDocPtr doc ;
|
473
519
|
|
474
|
-
|
520
|
+
doc = noko_xml_document_unwrap(self);
|
475
521
|
|
476
522
|
rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
|
477
523
|
&content);
|
@@ -559,7 +605,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
|
559
605
|
}
|
560
606
|
}
|
561
607
|
|
562
|
-
|
608
|
+
c_doc = noko_xml_document_unwrap(self);
|
563
609
|
|
564
610
|
rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
|
565
611
|
rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
|
@@ -607,7 +653,7 @@ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int arg
|
|
607
653
|
klass = cNokogiriXmlDocument;
|
608
654
|
}
|
609
655
|
|
610
|
-
rb_document =
|
656
|
+
rb_document = TypedData_Wrap_Struct(klass, &noko_xml_document_data_type, c_document);
|
611
657
|
|
612
658
|
tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
|
613
659
|
tuple->doc = rb_document;
|
@@ -640,6 +686,40 @@ noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
|
|
640
686
|
return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
|
641
687
|
}
|
642
688
|
|
689
|
+
xmlDocPtr
|
690
|
+
noko_xml_document_unwrap(VALUE rb_document)
|
691
|
+
{
|
692
|
+
xmlDocPtr c_document;
|
693
|
+
TypedData_Get_Struct(rb_document, xmlDoc, &noko_xml_document_data_type, c_document);
|
694
|
+
return c_document;
|
695
|
+
}
|
696
|
+
|
697
|
+
/* Schema creation will remove and deallocate "blank" nodes.
|
698
|
+
* If those blank nodes have been exposed to Ruby, they could get freed
|
699
|
+
* out from under the VALUE pointer. This function checks to see if any of
|
700
|
+
* those nodes have been exposed to Ruby, and if so we should raise an exception.
|
701
|
+
*/
|
702
|
+
int
|
703
|
+
noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
|
704
|
+
{
|
705
|
+
VALUE cache = DOC_NODE_CACHE(c_document);
|
706
|
+
|
707
|
+
if (NIL_P(cache)) {
|
708
|
+
return 0;
|
709
|
+
}
|
710
|
+
|
711
|
+
for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
|
712
|
+
xmlNodePtr node;
|
713
|
+
VALUE element = rb_ary_entry(cache, jnode);
|
714
|
+
|
715
|
+
Noko_Node_Get_Struct(element, xmlNode, node);
|
716
|
+
if (xmlIsBlankNode(node)) {
|
717
|
+
return 1;
|
718
|
+
}
|
719
|
+
}
|
720
|
+
|
721
|
+
return 0;
|
722
|
+
}
|
643
723
|
|
644
724
|
void
|
645
725
|
noko_xml_document_pin_node(xmlNodePtr node)
|
data/ext/nokogiri/xml_dtd.c
CHANGED
@@ -139,7 +139,7 @@ validate(VALUE self, VALUE document)
|
|
139
139
|
VALUE error_list;
|
140
140
|
|
141
141
|
Noko_Node_Get_Struct(self, xmlDtd, dtd);
|
142
|
-
|
142
|
+
doc = noko_xml_document_unwrap(document);
|
143
143
|
error_list = rb_ary_new();
|
144
144
|
|
145
145
|
ctxt = xmlNewValidCtxt();
|
@@ -2,17 +2,22 @@
|
|
2
2
|
|
3
3
|
VALUE cNokogiriXmlElementContent;
|
4
4
|
|
5
|
+
static const rb_data_type_t element_content_data_type = {
|
6
|
+
.wrap_struct_name = "Nokogiri::XML::ElementContent",
|
7
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
8
|
+
};
|
9
|
+
|
5
10
|
/*
|
6
11
|
* call-seq:
|
7
|
-
*
|
12
|
+
* name → String
|
8
13
|
*
|
9
|
-
*
|
14
|
+
* [Returns] The content element's +name+
|
10
15
|
*/
|
11
16
|
static VALUE
|
12
17
|
get_name(VALUE self)
|
13
18
|
{
|
14
19
|
xmlElementContentPtr elem;
|
15
|
-
|
20
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
16
21
|
|
17
22
|
if (!elem->name) { return Qnil; }
|
18
23
|
return NOKOGIRI_STR_NEW2(elem->name);
|
@@ -20,47 +25,40 @@ get_name(VALUE self)
|
|
20
25
|
|
21
26
|
/*
|
22
27
|
* call-seq:
|
23
|
-
*
|
28
|
+
* type → Integer
|
24
29
|
*
|
25
|
-
*
|
26
|
-
* or OR.
|
30
|
+
* [Returns] The content element's +type+. Possible values are +PCDATA+, +ELEMENT+, +SEQ+, or +OR+.
|
27
31
|
*/
|
28
32
|
static VALUE
|
29
33
|
get_type(VALUE self)
|
30
34
|
{
|
31
35
|
xmlElementContentPtr elem;
|
32
|
-
|
36
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
33
37
|
|
34
38
|
return INT2NUM(elem->type);
|
35
39
|
}
|
36
40
|
|
37
41
|
/*
|
38
|
-
* call-seq:
|
39
|
-
* c1
|
40
|
-
*
|
41
42
|
* Get the first child.
|
42
43
|
*/
|
43
44
|
static VALUE
|
44
45
|
get_c1(VALUE self)
|
45
46
|
{
|
46
47
|
xmlElementContentPtr elem;
|
47
|
-
|
48
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
48
49
|
|
49
50
|
if (!elem->c1) { return Qnil; }
|
50
51
|
return noko_xml_element_content_wrap(rb_iv_get(self, "@document"), elem->c1);
|
51
52
|
}
|
52
53
|
|
53
54
|
/*
|
54
|
-
*
|
55
|
-
* c2
|
56
|
-
*
|
57
|
-
* Get the first child.
|
55
|
+
* Get the second child.
|
58
56
|
*/
|
59
57
|
static VALUE
|
60
58
|
get_c2(VALUE self)
|
61
59
|
{
|
62
60
|
xmlElementContentPtr elem;
|
63
|
-
|
61
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
64
62
|
|
65
63
|
if (!elem->c2) { return Qnil; }
|
66
64
|
return noko_xml_element_content_wrap(rb_iv_get(self, "@document"), elem->c2);
|
@@ -68,45 +66,50 @@ get_c2(VALUE self)
|
|
68
66
|
|
69
67
|
/*
|
70
68
|
* call-seq:
|
71
|
-
*
|
69
|
+
* occur → Integer
|
72
70
|
*
|
73
|
-
*
|
74
|
-
* or PLUS.
|
71
|
+
* [Returns] The content element's +occur+ flag. Possible values are +ONCE+, +OPT+, +MULT+ or +PLUS+.
|
75
72
|
*/
|
76
73
|
static VALUE
|
77
74
|
get_occur(VALUE self)
|
78
75
|
{
|
79
76
|
xmlElementContentPtr elem;
|
80
|
-
|
77
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
81
78
|
|
82
79
|
return INT2NUM(elem->ocur);
|
83
80
|
}
|
84
81
|
|
85
82
|
/*
|
86
83
|
* call-seq:
|
87
|
-
*
|
84
|
+
* prefix → String
|
88
85
|
*
|
89
|
-
*
|
86
|
+
* [Returns] The content element's namespace +prefix+.
|
90
87
|
*/
|
91
88
|
static VALUE
|
92
89
|
get_prefix(VALUE self)
|
93
90
|
{
|
94
91
|
xmlElementContentPtr elem;
|
95
|
-
|
92
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
96
93
|
|
97
94
|
if (!elem->prefix) { return Qnil; }
|
98
95
|
|
99
96
|
return NOKOGIRI_STR_NEW2(elem->prefix);
|
100
97
|
}
|
101
98
|
|
99
|
+
/*
|
100
|
+
* create a Nokogiri::XML::ElementContent object around an +element+.
|
101
|
+
*/
|
102
102
|
VALUE
|
103
|
-
noko_xml_element_content_wrap(VALUE
|
103
|
+
noko_xml_element_content_wrap(VALUE rb_document, xmlElementContentPtr c_element_content)
|
104
104
|
{
|
105
|
-
VALUE elem =
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
105
|
+
VALUE elem = TypedData_Wrap_Struct(
|
106
|
+
cNokogiriXmlElementContent,
|
107
|
+
&element_content_data_type,
|
108
|
+
c_element_content
|
109
|
+
);
|
110
|
+
|
111
|
+
/* keep a handle on the document for GC marking */
|
112
|
+
rb_iv_set(elem, "@document", rb_document);
|
110
113
|
|
111
114
|
return elem;
|
112
115
|
}
|
@@ -6,7 +6,7 @@ static ID id_document;
|
|
6
6
|
|
7
7
|
/*
|
8
8
|
* call-seq:
|
9
|
-
*
|
9
|
+
* element_type → Integer
|
10
10
|
*
|
11
11
|
* The element_type
|
12
12
|
*/
|
@@ -20,9 +20,9 @@ element_type(VALUE self)
|
|
20
20
|
|
21
21
|
/*
|
22
22
|
* call-seq:
|
23
|
-
*
|
23
|
+
* content → Nokogiri::XML::ElementContent
|
24
24
|
*
|
25
|
-
* The
|
25
|
+
* [Returns] The root of this element declaration's content tree.
|
26
26
|
*/
|
27
27
|
static VALUE
|
28
28
|
content(VALUE self)
|
@@ -40,9 +40,9 @@ content(VALUE self)
|
|
40
40
|
|
41
41
|
/*
|
42
42
|
* call-seq:
|
43
|
-
*
|
43
|
+
* prefix → String
|
44
44
|
*
|
45
|
-
* The namespace prefix for this
|
45
|
+
* [Returns] The namespace +prefix+ for this element declaration.
|
46
46
|
*/
|
47
47
|
static VALUE
|
48
48
|
prefix(VALUE self)
|
@@ -2,14 +2,22 @@
|
|
2
2
|
|
3
3
|
VALUE cNokogiriEncodingHandler;
|
4
4
|
|
5
|
-
|
6
5
|
static void
|
7
|
-
|
6
|
+
xml_encoding_handler_dealloc(void *data)
|
8
7
|
{
|
9
8
|
/* make sure iconv handlers are cleaned up and freed */
|
9
|
+
xmlCharEncodingHandlerPtr c_handler = data;
|
10
10
|
xmlCharEncCloseFunc(c_handler);
|
11
11
|
}
|
12
12
|
|
13
|
+
static const rb_data_type_t xml_encoding_handler_type = {
|
14
|
+
.wrap_struct_name = "Nokogiri::EncodingHandler",
|
15
|
+
.function = {
|
16
|
+
.dfree = xml_encoding_handler_dealloc,
|
17
|
+
},
|
18
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
19
|
+
};
|
20
|
+
|
13
21
|
|
14
22
|
/*
|
15
23
|
* call-seq: Nokogiri::EncodingHandler.[](name)
|
@@ -23,7 +31,7 @@ rb_xml_encoding_handler_s_get(VALUE klass, VALUE key)
|
|
23
31
|
|
24
32
|
handler = xmlFindCharEncodingHandler(StringValueCStr(key));
|
25
33
|
if (handler) {
|
26
|
-
return
|
34
|
+
return TypedData_Wrap_Struct(klass, &xml_encoding_handler_type, handler);
|
27
35
|
}
|
28
36
|
|
29
37
|
return Qnil;
|
@@ -82,7 +90,7 @@ rb_xml_encoding_handler_name(VALUE self)
|
|
82
90
|
{
|
83
91
|
xmlCharEncodingHandlerPtr handler;
|
84
92
|
|
85
|
-
|
93
|
+
TypedData_Get_Struct(self, xmlCharEncodingHandler, &xml_encoding_handler_type, handler);
|
86
94
|
|
87
95
|
return NOKOGIRI_STR_NEW2(handler->name);
|
88
96
|
}
|
@@ -56,21 +56,20 @@ _xml_namespace_update_references(void *ptr)
|
|
56
56
|
#endif
|
57
57
|
|
58
58
|
static const rb_data_type_t nokogiri_xml_namespace_type_with_dealloc = {
|
59
|
-
"Nokogiri
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
59
|
+
.wrap_struct_name = "Nokogiri::XML::Namespace#with_dealloc",
|
60
|
+
.function = {
|
61
|
+
.dfree = _xml_namespace_dealloc,
|
62
|
+
.dcompact = _xml_namespace_update_references,
|
63
|
+
},
|
64
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
65
65
|
};
|
66
66
|
|
67
67
|
static const rb_data_type_t nokogiri_xml_namespace_type_without_dealloc = {
|
68
|
-
"Nokogiri
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
RUBY_TYPED_FREE_IMMEDIATELY,
|
73
|
-
#endif
|
68
|
+
.wrap_struct_name = "Nokogiri::XML::Namespace#without_dealloc",
|
69
|
+
.function = {
|
70
|
+
.dcompact = _xml_namespace_update_references,
|
71
|
+
},
|
72
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
74
73
|
};
|
75
74
|
|
76
75
|
/*
|
data/ext/nokogiri/xml_node.c
CHANGED
@@ -43,12 +43,12 @@ _xml_node_update_references(void *ptr)
|
|
43
43
|
#endif
|
44
44
|
|
45
45
|
static const rb_data_type_t nokogiri_node_type = {
|
46
|
-
"Nokogiri
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
46
|
+
.wrap_struct_name = "Nokogiri::XML::Node",
|
47
|
+
.function = {
|
48
|
+
.dmark = _xml_node_mark,
|
49
|
+
.dcompact = _xml_node_update_references,
|
50
|
+
},
|
51
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
52
52
|
};
|
53
53
|
|
54
54
|
static void
|
@@ -984,7 +984,7 @@ duplicate_node(int argc, VALUE *argv, VALUE self)
|
|
984
984
|
if (n_args < 2) {
|
985
985
|
new_parent_doc = node->doc;
|
986
986
|
} else {
|
987
|
-
|
987
|
+
new_parent_doc = noko_xml_document_unwrap(r_new_parent_doc);
|
988
988
|
}
|
989
989
|
|
990
990
|
dup = xmlDocCopyNode(node, new_parent_doc, level);
|