nokogiri 1.5.10 → 1.10.4
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/LICENSE-DEPENDENCIES.md +1614 -0
- data/LICENSE.md +9 -0
- data/README.md +198 -0
- data/bin/nokogiri +50 -10
- data/dependencies.yml +72 -0
- data/ext/nokogiri/extconf.rb +634 -92
- data/ext/nokogiri/html_document.c +8 -8
- data/ext/nokogiri/html_element_description.c +15 -15
- data/ext/nokogiri/html_entity_lookup.c +1 -1
- data/ext/nokogiri/html_sax_parser_context.c +4 -4
- data/ext/nokogiri/html_sax_push_parser.c +2 -2
- data/ext/nokogiri/nokogiri.c +20 -12
- data/ext/nokogiri/nokogiri.h +1 -44
- data/ext/nokogiri/xml_attr.c +34 -25
- data/ext/nokogiri/xml_cdata.c +12 -6
- data/ext/nokogiri/xml_comment.c +18 -3
- data/ext/nokogiri/xml_document.c +64 -32
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_encoding_handler.c +3 -3
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_io.c +11 -6
- data/ext/nokogiri/xml_namespace.c +50 -17
- data/ext/nokogiri/xml_namespace.h +3 -2
- data/ext/nokogiri/xml_node.c +459 -240
- data/ext/nokogiri/xml_node_set.c +166 -147
- data/ext/nokogiri/xml_node_set.h +2 -4
- data/ext/nokogiri/xml_processing_instruction.c +2 -2
- data/ext/nokogiri/xml_reader.c +6 -19
- data/ext/nokogiri/xml_sax_parser.c +11 -13
- data/ext/nokogiri/xml_sax_parser_context.c +41 -1
- data/ext/nokogiri/xml_sax_push_parser.c +56 -12
- data/ext/nokogiri/xml_schema.c +1 -1
- data/ext/nokogiri/xml_syntax_error.c +11 -5
- data/ext/nokogiri/xml_syntax_error.h +1 -1
- data/ext/nokogiri/xml_text.c +1 -1
- data/ext/nokogiri/xml_xpath_context.c +17 -38
- data/ext/nokogiri/xslt_stylesheet.c +10 -10
- data/lib/nokogiri/css/node.rb +0 -50
- data/lib/nokogiri/css/parser.rb +263 -233
- data/lib/nokogiri/css/parser.y +54 -40
- data/lib/nokogiri/css/tokenizer.rb +104 -103
- data/lib/nokogiri/css/tokenizer.rex +5 -5
- data/lib/nokogiri/css/xpath_visitor.rb +78 -19
- data/lib/nokogiri/decorators/slop.rb +12 -5
- data/lib/nokogiri/html/document.rb +102 -21
- data/lib/nokogiri/html/document_fragment.rb +11 -3
- data/lib/nokogiri/html/sax/parser.rb +12 -2
- data/lib/nokogiri/html/sax/push_parser.rb +22 -2
- data/lib/nokogiri/version.rb +40 -22
- data/lib/nokogiri/xml/builder.rb +34 -31
- data/lib/nokogiri/xml/document.rb +20 -14
- data/lib/nokogiri/xml/document_fragment.rb +50 -2
- data/lib/nokogiri/xml/dtd.rb +14 -4
- data/lib/nokogiri/xml/entity_reference.rb +18 -0
- data/lib/nokogiri/xml/node.rb +148 -203
- data/lib/nokogiri/xml/node_set.rb +139 -123
- data/lib/nokogiri/xml/parse_options.rb +22 -0
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/sax/parser.rb +7 -8
- data/lib/nokogiri/xml/searchable.rb +230 -0
- data/lib/nokogiri/xml/syntax_error.rb +24 -1
- data/lib/nokogiri/xml.rb +3 -1
- data/lib/nokogiri.rb +40 -24
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +78 -0
- data/patches/libxml2/0002-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0003-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxslt/0001-Fix-security-framework-bypass.patch +120 -0
- data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
- metadata +252 -388
- data/.autotest +0 -26
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +0 -785
- data/CHANGELOG.rdoc +0 -783
- data/C_CODING_STYLE.rdoc +0 -33
- data/Manifest.txt +0 -303
- data/README.ja.rdoc +0 -106
- data/README.rdoc +0 -175
- data/ROADMAP.md +0 -90
- data/Rakefile +0 -228
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/build_all +0 -105
- data/tasks/cross_compile.rb +0 -150
- data/tasks/nokogiri.org.rb +0 -24
- data/tasks/test.rb +0 -95
- data/test/css/test_nthiness.rb +0 -159
- data/test/css/test_parser.rb +0 -341
- data/test/css/test_tokenizer.rb +0 -198
- data/test/css/test_xpath_visitor.rb +0 -91
- data/test/decorators/test_slop.rb +0 -16
- data/test/files/2ch.html +0 -108
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -850
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -154
- data/test/html/sax/test_parser.rb +0 -141
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -552
- data/test/html/test_document_encoding.rb +0 -138
- data/test/html/test_document_fragment.rb +0 -261
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -196
- data/test/html/test_node_encoding.rb +0 -27
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -45
- data/test/test_encoding_handler.rb +0 -46
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -132
- data/test/test_reader.rb +0 -555
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -254
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -366
- data/test/xml/sax/test_parser_context.rb +0 -106
- data/test/xml/sax/test_push_parser.rb +0 -157
- data/test/xml/test_attr.rb +0 -64
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -306
- data/test/xml/test_c14n.rb +0 -151
- data/test/xml/test_cdata.rb +0 -48
- data/test/xml/test_comment.rb +0 -29
- data/test/xml/test_document.rb +0 -828
- data/test/xml/test_document_encoding.rb +0 -28
- data/test/xml/test_document_fragment.rb +0 -223
- data/test/xml/test_dtd.rb +0 -103
- data/test/xml/test_dtd_encoding.rb +0 -33
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -245
- data/test/xml/test_namespace.rb +0 -95
- data/test/xml/test_node.rb +0 -1137
- data/test/xml/test_node_attributes.rb +0 -96
- data/test/xml/test_node_encoding.rb +0 -107
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -374
- data/test/xml/test_node_set.rb +0 -755
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader_encoding.rb +0 -142
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -103
- data/test/xml/test_syntax_error.rb +0 -12
- data/test/xml/test_text.rb +0 -45
- data/test/xml/test_unparented_node.rb +0 -422
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -295
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
- data/test_all +0 -81
@@ -18,8 +18,8 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
|
|
18
18
|
external_id = rb_ary_entry(rest, (long)1);
|
19
19
|
|
20
20
|
doc = htmlNewDoc(
|
21
|
-
RTEST(uri) ? (const xmlChar *)
|
22
|
-
RTEST(external_id) ? (const xmlChar *)
|
21
|
+
RTEST(uri) ? (const xmlChar *)StringValueCStr(uri) : NULL,
|
22
|
+
RTEST(external_id) ? (const xmlChar *)StringValueCStr(external_id) : NULL
|
23
23
|
);
|
24
24
|
rb_doc = Nokogiri_wrap_xml_document(klass, doc);
|
25
25
|
rb_obj_call_init(rb_doc, argc, argv);
|
@@ -39,8 +39,8 @@ static VALUE read_io( VALUE klass,
|
|
39
39
|
VALUE encoding,
|
40
40
|
VALUE options )
|
41
41
|
{
|
42
|
-
const char * c_url = NIL_P(url) ? NULL :
|
43
|
-
const char * c_enc = NIL_P(encoding) ? NULL :
|
42
|
+
const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url);
|
43
|
+
const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
|
44
44
|
VALUE error_list = rb_ary_new();
|
45
45
|
VALUE document;
|
46
46
|
htmlDocPtr doc;
|
@@ -77,7 +77,7 @@ static VALUE read_io( VALUE klass,
|
|
77
77
|
|
78
78
|
error = xmlGetLastError();
|
79
79
|
if(error)
|
80
|
-
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(
|
80
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
81
81
|
else
|
82
82
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
83
83
|
|
@@ -103,8 +103,8 @@ static VALUE read_memory( VALUE klass,
|
|
103
103
|
VALUE options )
|
104
104
|
{
|
105
105
|
const char * c_buffer = StringValuePtr(string);
|
106
|
-
const char * c_url = NIL_P(url) ? NULL :
|
107
|
-
const char * c_enc = NIL_P(encoding) ? NULL :
|
106
|
+
const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url);
|
107
|
+
const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
|
108
108
|
int len = (int)RSTRING_LEN(string);
|
109
109
|
VALUE error_list = rb_ary_new();
|
110
110
|
VALUE document;
|
@@ -123,7 +123,7 @@ static VALUE read_memory( VALUE klass,
|
|
123
123
|
|
124
124
|
error = xmlGetLastError();
|
125
125
|
if(error)
|
126
|
-
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(
|
126
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
127
127
|
else
|
128
128
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
129
129
|
|
@@ -8,7 +8,7 @@
|
|
8
8
|
*/
|
9
9
|
static VALUE required_attributes(VALUE self)
|
10
10
|
{
|
11
|
-
htmlElemDesc * description;
|
11
|
+
const htmlElemDesc * description;
|
12
12
|
VALUE list;
|
13
13
|
int i;
|
14
14
|
|
@@ -33,7 +33,7 @@ static VALUE required_attributes(VALUE self)
|
|
33
33
|
*/
|
34
34
|
static VALUE deprecated_attributes(VALUE self)
|
35
35
|
{
|
36
|
-
htmlElemDesc * description;
|
36
|
+
const htmlElemDesc * description;
|
37
37
|
VALUE list;
|
38
38
|
int i;
|
39
39
|
|
@@ -58,7 +58,7 @@ static VALUE deprecated_attributes(VALUE self)
|
|
58
58
|
*/
|
59
59
|
static VALUE optional_attributes(VALUE self)
|
60
60
|
{
|
61
|
-
htmlElemDesc * description;
|
61
|
+
const htmlElemDesc * description;
|
62
62
|
VALUE list;
|
63
63
|
int i;
|
64
64
|
|
@@ -83,7 +83,7 @@ static VALUE optional_attributes(VALUE self)
|
|
83
83
|
*/
|
84
84
|
static VALUE default_sub_element(VALUE self)
|
85
85
|
{
|
86
|
-
htmlElemDesc * description;
|
86
|
+
const htmlElemDesc * description;
|
87
87
|
Data_Get_Struct(self, htmlElemDesc, description);
|
88
88
|
|
89
89
|
if (description->defaultsubelt)
|
@@ -100,7 +100,7 @@ static VALUE default_sub_element(VALUE self)
|
|
100
100
|
*/
|
101
101
|
static VALUE sub_elements(VALUE self)
|
102
102
|
{
|
103
|
-
htmlElemDesc * description;
|
103
|
+
const htmlElemDesc * description;
|
104
104
|
VALUE list;
|
105
105
|
int i;
|
106
106
|
|
@@ -125,7 +125,7 @@ static VALUE sub_elements(VALUE self)
|
|
125
125
|
*/
|
126
126
|
static VALUE description(VALUE self)
|
127
127
|
{
|
128
|
-
htmlElemDesc * description;
|
128
|
+
const htmlElemDesc * description;
|
129
129
|
Data_Get_Struct(self, htmlElemDesc, description);
|
130
130
|
|
131
131
|
return NOKOGIRI_STR_NEW2(description->desc);
|
@@ -139,7 +139,7 @@ static VALUE description(VALUE self)
|
|
139
139
|
*/
|
140
140
|
static VALUE inline_eh(VALUE self)
|
141
141
|
{
|
142
|
-
htmlElemDesc * description;
|
142
|
+
const htmlElemDesc * description;
|
143
143
|
Data_Get_Struct(self, htmlElemDesc, description);
|
144
144
|
|
145
145
|
if(description->isinline) return Qtrue;
|
@@ -154,7 +154,7 @@ static VALUE inline_eh(VALUE self)
|
|
154
154
|
*/
|
155
155
|
static VALUE deprecated_eh(VALUE self)
|
156
156
|
{
|
157
|
-
htmlElemDesc * description;
|
157
|
+
const htmlElemDesc * description;
|
158
158
|
Data_Get_Struct(self, htmlElemDesc, description);
|
159
159
|
|
160
160
|
if(description->depr) return Qtrue;
|
@@ -169,7 +169,7 @@ static VALUE deprecated_eh(VALUE self)
|
|
169
169
|
*/
|
170
170
|
static VALUE empty_eh(VALUE self)
|
171
171
|
{
|
172
|
-
htmlElemDesc * description;
|
172
|
+
const htmlElemDesc * description;
|
173
173
|
Data_Get_Struct(self, htmlElemDesc, description);
|
174
174
|
|
175
175
|
if(description->empty) return Qtrue;
|
@@ -184,7 +184,7 @@ static VALUE empty_eh(VALUE self)
|
|
184
184
|
*/
|
185
185
|
static VALUE save_end_tag_eh(VALUE self)
|
186
186
|
{
|
187
|
-
htmlElemDesc * description;
|
187
|
+
const htmlElemDesc * description;
|
188
188
|
Data_Get_Struct(self, htmlElemDesc, description);
|
189
189
|
|
190
190
|
if(description->saveEndTag) return Qtrue;
|
@@ -199,7 +199,7 @@ static VALUE save_end_tag_eh(VALUE self)
|
|
199
199
|
*/
|
200
200
|
static VALUE implied_end_tag_eh(VALUE self)
|
201
201
|
{
|
202
|
-
htmlElemDesc * description;
|
202
|
+
const htmlElemDesc * description;
|
203
203
|
Data_Get_Struct(self, htmlElemDesc, description);
|
204
204
|
|
205
205
|
if(description->endTag) return Qtrue;
|
@@ -214,7 +214,7 @@ static VALUE implied_end_tag_eh(VALUE self)
|
|
214
214
|
*/
|
215
215
|
static VALUE implied_start_tag_eh(VALUE self)
|
216
216
|
{
|
217
|
-
htmlElemDesc * description;
|
217
|
+
const htmlElemDesc * description;
|
218
218
|
Data_Get_Struct(self, htmlElemDesc, description);
|
219
219
|
|
220
220
|
if(description->startTag) return Qtrue;
|
@@ -229,7 +229,7 @@ static VALUE implied_start_tag_eh(VALUE self)
|
|
229
229
|
*/
|
230
230
|
static VALUE name(VALUE self)
|
231
231
|
{
|
232
|
-
htmlElemDesc * description;
|
232
|
+
const htmlElemDesc * description;
|
233
233
|
Data_Get_Struct(self, htmlElemDesc, description);
|
234
234
|
|
235
235
|
if(NULL == description->name) return Qnil;
|
@@ -245,11 +245,11 @@ static VALUE name(VALUE self)
|
|
245
245
|
static VALUE get_description(VALUE klass, VALUE tag_name)
|
246
246
|
{
|
247
247
|
const htmlElemDesc * description = htmlTagLookup(
|
248
|
-
(const xmlChar *)
|
248
|
+
(const xmlChar *)StringValueCStr(tag_name)
|
249
249
|
);
|
250
250
|
|
251
251
|
if(NULL == description) return Qnil;
|
252
|
-
return Data_Wrap_Struct(klass, 0, 0, (void *)description);
|
252
|
+
return Data_Wrap_Struct(klass, 0, 0, (void *)(uintptr_t)description);
|
253
253
|
}
|
254
254
|
|
255
255
|
VALUE cNokogiriHtmlElementDescription ;
|
@@ -31,12 +31,12 @@ parse_memory(VALUE klass, VALUE data, VALUE encoding)
|
|
31
31
|
}
|
32
32
|
|
33
33
|
if (RTEST(encoding)) {
|
34
|
-
xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(
|
34
|
+
xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding));
|
35
35
|
if (enc != NULL) {
|
36
36
|
xmlSwitchToEncoding(ctxt, enc);
|
37
37
|
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
38
38
|
rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
|
39
|
-
|
39
|
+
StringValueCStr(encoding));
|
40
40
|
}
|
41
41
|
}
|
42
42
|
}
|
@@ -47,8 +47,8 @@ parse_memory(VALUE klass, VALUE data, VALUE encoding)
|
|
47
47
|
static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
|
48
48
|
{
|
49
49
|
htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(
|
50
|
-
|
51
|
-
|
50
|
+
StringValueCStr(filename),
|
51
|
+
StringValueCStr(encoding)
|
52
52
|
);
|
53
53
|
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
54
54
|
}
|
@@ -46,10 +46,10 @@ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
|
|
46
46
|
|
47
47
|
Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
|
48
48
|
|
49
|
-
if(_filename != Qnil) filename =
|
49
|
+
if(_filename != Qnil) filename = StringValueCStr(_filename);
|
50
50
|
|
51
51
|
if (!NIL_P(encoding)) {
|
52
|
-
enc = xmlParseCharEncoding(
|
52
|
+
enc = xmlParseCharEncoding(StringValueCStr(encoding));
|
53
53
|
if (enc == XML_CHAR_ENCODING_ERROR)
|
54
54
|
rb_raise(rb_eArgError, "Unsupported Encoding");
|
55
55
|
}
|
data/ext/nokogiri/nokogiri.c
CHANGED
@@ -14,7 +14,12 @@ VALUE mNokogiriHtmlSax ;
|
|
14
14
|
*/
|
15
15
|
int vasprintf (char **strp, const char *fmt, va_list ap)
|
16
16
|
{
|
17
|
-
|
17
|
+
/* Mingw32/64 have a broken vsnprintf implementation that fails when
|
18
|
+
* using a zero-byte limit in order to retrieve the required size for malloc.
|
19
|
+
* So we use a one byte buffer instead.
|
20
|
+
*/
|
21
|
+
char tmp[1];
|
22
|
+
int len = vsnprintf (tmp, 1, fmt, ap) + 1;
|
18
23
|
char *res = (char *)malloc((unsigned int)len);
|
19
24
|
if (res == NULL)
|
20
25
|
return -1;
|
@@ -23,25 +28,16 @@ int vasprintf (char **strp, const char *fmt, va_list ap)
|
|
23
28
|
}
|
24
29
|
#endif
|
25
30
|
|
26
|
-
#ifdef USING_SYSTEM_ALLOCATOR_LIBRARY /* Ruby Enterprise Edition with tcmalloc */
|
27
|
-
void vasprintf_free (void *p)
|
28
|
-
{
|
29
|
-
system_free(p);
|
30
|
-
}
|
31
|
-
#else
|
32
31
|
void vasprintf_free (void *p)
|
33
32
|
{
|
34
33
|
free(p);
|
35
34
|
}
|
36
|
-
#endif
|
37
35
|
|
38
36
|
#ifdef HAVE_RUBY_UTIL_H
|
39
37
|
#include "ruby/util.h"
|
40
38
|
#else
|
41
|
-
#ifndef __MACRUBY__
|
42
39
|
#include "util.h"
|
43
40
|
#endif
|
44
|
-
#endif
|
45
41
|
|
46
42
|
void nokogiri_root_node(xmlNodePtr node)
|
47
43
|
{
|
@@ -65,14 +61,12 @@ void nokogiri_root_nsdef(xmlNsPtr ns, xmlDocPtr doc)
|
|
65
61
|
|
66
62
|
void Init_nokogiri()
|
67
63
|
{
|
68
|
-
#ifndef __MACRUBY__
|
69
64
|
xmlMemSetup(
|
70
65
|
(xmlFreeFunc)ruby_xfree,
|
71
66
|
(xmlMallocFunc)ruby_xmalloc,
|
72
67
|
(xmlReallocFunc)ruby_xrealloc,
|
73
68
|
ruby_strdup
|
74
69
|
);
|
75
|
-
#endif
|
76
70
|
|
77
71
|
mNokogiri = rb_define_module("Nokogiri");
|
78
72
|
mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
|
@@ -90,6 +84,20 @@ void Init_nokogiri()
|
|
90
84
|
NOKOGIRI_STR_NEW2(xmlParserVersion)
|
91
85
|
);
|
92
86
|
|
87
|
+
#ifdef NOKOGIRI_USE_PACKAGED_LIBRARIES
|
88
|
+
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qtrue);
|
89
|
+
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATH));
|
90
|
+
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATH));
|
91
|
+
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXML2_PATCHES), " "));
|
92
|
+
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATCHES"), rb_str_split(NOKOGIRI_STR_NEW2(NOKOGIRI_LIBXSLT_PATCHES), " "));
|
93
|
+
#else
|
94
|
+
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_USE_PACKAGED_LIBRARIES"), Qfalse);
|
95
|
+
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATH"), Qnil);
|
96
|
+
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATH"), Qnil);
|
97
|
+
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXML2_PATCHES"), Qnil);
|
98
|
+
rb_const_set(mNokogiri, rb_intern("NOKOGIRI_LIBXSLT_PATCHES"), Qnil);
|
99
|
+
#endif
|
100
|
+
|
93
101
|
#ifdef LIBXML_ICONV_ENABLED
|
94
102
|
rb_const_set(mNokogiri, rb_intern("LIBXML_ICONV_ENABLED"), Qtrue);
|
95
103
|
#else
|
data/ext/nokogiri/nokogiri.h
CHANGED
@@ -31,22 +31,8 @@ int vasprintf (char **strp, const char *fmt, va_list ap);
|
|
31
31
|
#include <libxslt/extensions.h>
|
32
32
|
#include <libxml/c14n.h>
|
33
33
|
#include <ruby.h>
|
34
|
-
|
35
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
36
34
|
#include <ruby/st.h>
|
37
|
-
#
|
38
|
-
#include <st.h>
|
39
|
-
#endif
|
40
|
-
|
41
|
-
#ifndef UNUSED
|
42
|
-
# if defined(__GNUC__)
|
43
|
-
# define MAYBE_UNUSED(name) name __attribute__((unused))
|
44
|
-
# define UNUSED(name) MAYBE_UNUSED(UNUSED_ ## name)
|
45
|
-
# else
|
46
|
-
# define MAYBE_UNUSED(name) name
|
47
|
-
# define UNUSED(name) name
|
48
|
-
# endif
|
49
|
-
#endif
|
35
|
+
#include <ruby/encoding.h>
|
50
36
|
|
51
37
|
#ifndef NORETURN
|
52
38
|
# if defined(__GNUC__)
|
@@ -56,25 +42,12 @@ int vasprintf (char **strp, const char *fmt, va_list ap);
|
|
56
42
|
# endif
|
57
43
|
#endif
|
58
44
|
|
59
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
60
|
-
|
61
|
-
#include <ruby/encoding.h>
|
62
|
-
|
63
45
|
#define NOKOGIRI_STR_NEW2(str) \
|
64
46
|
NOKOGIRI_STR_NEW(str, strlen((const char *)(str)))
|
65
47
|
|
66
48
|
#define NOKOGIRI_STR_NEW(str, len) \
|
67
49
|
rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding())
|
68
50
|
|
69
|
-
#else
|
70
|
-
|
71
|
-
#define NOKOGIRI_STR_NEW2(str) \
|
72
|
-
rb_str_new2((const char *)(str))
|
73
|
-
|
74
|
-
#define NOKOGIRI_STR_NEW(str, len) \
|
75
|
-
rb_str_new((const char *)(str), (long)(len))
|
76
|
-
#endif
|
77
|
-
|
78
51
|
#define RBSTR_OR_QNIL(_str) \
|
79
52
|
(_str ? NOKOGIRI_STR_NEW2(_str) : Qnil)
|
80
53
|
|
@@ -135,22 +108,6 @@ void nokogiri_root_nsdef(xmlNsPtr, xmlDocPtr);
|
|
135
108
|
|
136
109
|
#endif
|
137
110
|
|
138
|
-
#ifndef RSTRING_PTR
|
139
|
-
#define RSTRING_PTR(s) (RSTRING(s)->ptr)
|
140
|
-
#endif
|
141
|
-
|
142
|
-
#ifndef RSTRING_LEN
|
143
|
-
#define RSTRING_LEN(s) (RSTRING(s)->len)
|
144
|
-
#endif
|
145
|
-
|
146
|
-
#ifndef RARRAY_PTR
|
147
|
-
#define RARRAY_PTR(a) RARRAY(a)->ptr
|
148
|
-
#endif
|
149
|
-
|
150
|
-
#ifndef RARRAY_LEN
|
151
|
-
#define RARRAY_LEN(a) RARRAY(a)->len
|
152
|
-
#endif
|
153
|
-
|
154
111
|
#ifndef __builtin_expect
|
155
112
|
# if defined(__GNUC__)
|
156
113
|
# define __builtin_expect(expr, c) __builtin_expect((long)(expr), (long)(c))
|
data/ext/nokogiri/xml_attr.c
CHANGED
@@ -4,37 +4,40 @@
|
|
4
4
|
* call-seq:
|
5
5
|
* value=(content)
|
6
6
|
*
|
7
|
-
* Set the value for this Attr to +content
|
7
|
+
* Set the value for this Attr to +content+. Use `nil` to remove the value
|
8
|
+
* (e.g., a HTML boolean attribute).
|
8
9
|
*/
|
9
10
|
static VALUE set_value(VALUE self, VALUE content)
|
10
11
|
{
|
11
12
|
xmlAttrPtr attr;
|
12
|
-
|
13
|
+
xmlChar *value;
|
14
|
+
xmlNode *cur;
|
13
15
|
|
14
|
-
|
16
|
+
Data_Get_Struct(self, xmlAttr, attr);
|
15
17
|
|
18
|
+
if (attr->children) {
|
19
|
+
xmlFreeNodeList(attr->children);
|
20
|
+
}
|
16
21
|
attr->children = attr->last = NULL;
|
17
22
|
|
18
|
-
if(content) {
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
/* Encode our content */
|
23
|
-
buffer = xmlEncodeEntitiesReentrant(attr->doc, (unsigned char *)StringValuePtr(content));
|
23
|
+
if (content == Qnil) {
|
24
|
+
return content;
|
25
|
+
}
|
24
26
|
|
25
|
-
|
26
|
-
|
27
|
-
|
27
|
+
value = xmlEncodeEntitiesReentrant(attr->doc, (unsigned char *)StringValueCStr(content));
|
28
|
+
if (xmlStrlen(value) == 0) {
|
29
|
+
attr->children = xmlNewDocText(attr->doc, value);
|
30
|
+
} else {
|
31
|
+
attr->children = xmlStringGetNodeList(attr->doc, value);
|
32
|
+
}
|
33
|
+
xmlFree(value);
|
28
34
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
35
|
+
for (cur = attr->children; cur; cur = cur->next) {
|
36
|
+
cur->parent = (xmlNode *)attr;
|
37
|
+
cur->doc = attr->doc;
|
38
|
+
if (cur->next == NULL) {
|
39
|
+
attr->last = cur;
|
34
40
|
}
|
35
|
-
|
36
|
-
/* Free up memory */
|
37
|
-
xmlFree(buffer);
|
38
41
|
}
|
39
42
|
|
40
43
|
return content;
|
@@ -57,20 +60,26 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
|
|
57
60
|
|
58
61
|
rb_scan_args(argc, argv, "2*", &document, &name, &rest);
|
59
62
|
|
63
|
+
if (! rb_obj_is_kind_of(document, cNokogiriXmlDocument)) {
|
64
|
+
rb_raise(rb_eArgError, "parameter must be a Nokogiri::XML::Document");
|
65
|
+
}
|
66
|
+
|
60
67
|
Data_Get_Struct(document, xmlDoc, xml_doc);
|
61
68
|
|
62
69
|
node = xmlNewDocProp(
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
70
|
+
xml_doc,
|
71
|
+
(const xmlChar *)StringValueCStr(name),
|
72
|
+
NULL
|
73
|
+
);
|
67
74
|
|
68
75
|
nokogiri_root_node((xmlNodePtr)node);
|
69
76
|
|
70
77
|
rb_node = Nokogiri_wrap_xml_node(klass, (xmlNodePtr)node);
|
71
78
|
rb_obj_call_init(rb_node, argc, argv);
|
72
79
|
|
73
|
-
if(rb_block_given_p())
|
80
|
+
if (rb_block_given_p()) {
|
81
|
+
rb_yield(rb_node);
|
82
|
+
}
|
74
83
|
|
75
84
|
return rb_node;
|
76
85
|
}
|
data/ext/nokogiri/xml_cdata.c
CHANGED
@@ -5,6 +5,9 @@
|
|
5
5
|
* new(document, content)
|
6
6
|
*
|
7
7
|
* Create a new CDATA element on the +document+ with +content+
|
8
|
+
*
|
9
|
+
* If +content+ cannot be implicitly converted to a string, this method will
|
10
|
+
* raise a TypeError exception.
|
8
11
|
*/
|
9
12
|
static VALUE new(int argc, VALUE *argv, VALUE klass)
|
10
13
|
{
|
@@ -14,23 +17,26 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
|
|
14
17
|
VALUE content;
|
15
18
|
VALUE rest;
|
16
19
|
VALUE rb_node;
|
20
|
+
xmlChar *content_str = NULL;
|
21
|
+
int content_str_len = 0;
|
17
22
|
|
18
23
|
rb_scan_args(argc, argv, "2*", &doc, &content, &rest);
|
19
24
|
|
20
25
|
Data_Get_Struct(doc, xmlDoc, xml_doc);
|
21
26
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
+
if (!NIL_P(content)) {
|
28
|
+
content_str = (xmlChar *)StringValuePtr(content);
|
29
|
+
content_str_len = RSTRING_LEN(content);
|
30
|
+
}
|
31
|
+
|
32
|
+
node = xmlNewCDataBlock(xml_doc->doc, content_str, content_str_len);
|
27
33
|
|
28
34
|
nokogiri_root_node(node);
|
29
35
|
|
30
36
|
rb_node = Nokogiri_wrap_xml_node(klass, node);
|
31
37
|
rb_obj_call_init(rb_node, argc, argv);
|
32
38
|
|
33
|
-
if(rb_block_given_p()) rb_yield(rb_node);
|
39
|
+
if(rb_block_given_p()) { rb_yield(rb_node); }
|
34
40
|
|
35
41
|
return rb_node;
|
36
42
|
}
|
data/ext/nokogiri/xml_comment.c
CHANGED
@@ -1,10 +1,13 @@
|
|
1
1
|
#include <xml_comment.h>
|
2
2
|
|
3
|
+
static ID document_id ;
|
4
|
+
|
3
5
|
/*
|
4
6
|
* call-seq:
|
5
|
-
* new(
|
7
|
+
* new(document_or_node, content)
|
6
8
|
*
|
7
|
-
* Create a new Comment element on the +document+ with +content
|
9
|
+
* Create a new Comment element on the +document+ with +content+.
|
10
|
+
* Alternatively, if a +node+ is passed, the +node+'s document is used.
|
8
11
|
*/
|
9
12
|
static VALUE new(int argc, VALUE *argv, VALUE klass)
|
10
13
|
{
|
@@ -17,11 +20,21 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
|
|
17
20
|
|
18
21
|
rb_scan_args(argc, argv, "2*", &document, &content, &rest);
|
19
22
|
|
23
|
+
if (rb_obj_is_kind_of(document, cNokogiriXmlNode))
|
24
|
+
{
|
25
|
+
document = rb_funcall(document, document_id, 0);
|
26
|
+
}
|
27
|
+
else if ( !rb_obj_is_kind_of(document, cNokogiriXmlDocument)
|
28
|
+
&& !rb_obj_is_kind_of(document, cNokogiriXmlDocumentFragment))
|
29
|
+
{
|
30
|
+
rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
|
31
|
+
}
|
32
|
+
|
20
33
|
Data_Get_Struct(document, xmlDoc, xml_doc);
|
21
34
|
|
22
35
|
node = xmlNewDocComment(
|
23
36
|
xml_doc,
|
24
|
-
(const xmlChar *)
|
37
|
+
(const xmlChar *)StringValueCStr(content)
|
25
38
|
);
|
26
39
|
|
27
40
|
rb_node = Nokogiri_wrap_xml_node(klass, node);
|
@@ -51,4 +64,6 @@ void init_xml_comment()
|
|
51
64
|
cNokogiriXmlComment = klass;
|
52
65
|
|
53
66
|
rb_define_singleton_method(klass, "new", new, -1);
|
67
|
+
|
68
|
+
document_id = rb_intern("document");
|
54
69
|
}
|