nokogiri 1.2.0 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/History.ja.txt +7 -0
- data/History.txt +7 -0
- data/Manifest.txt +4 -0
- data/ext/nokogiri/native.c +1 -1
- data/ext/nokogiri/native.h +40 -0
- data/ext/nokogiri/xml_document.c +2 -2
- data/ext/nokogiri/xml_dtd.c +8 -7
- data/ext/nokogiri/xml_node.c +15 -12
- data/ext/nokogiri/xml_reader.c +36 -27
- data/ext/nokogiri/xml_sax_parser.c +24 -8
- data/ext/nokogiri/xml_syntax_error.c +5 -5
- data/ext/nokogiri/xml_xpath_context.c +4 -2
- data/ext/nokogiri/xslt_stylesheet.c +1 -1
- data/lib/nokogiri/css/generated_tokenizer.rb +2 -2
- data/lib/nokogiri/css/tokenizer.rex +2 -2
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml.rb +4 -0
- data/lib/nokogiri/xml/reader.rb +3 -1
- data/lib/nokogiri/xml/sax/parser.rb +30 -2
- data/lib/nokogiri/xml/sax/push_parser.rb +2 -1
- data/test/css/test_parser.rb +6 -0
- data/test/html/test_document.rb +14 -0
- data/test/test_reader.rb +1 -2
- data/test/xml/sax/test_parser.rb +43 -1
- data/test/xml/test_document_encoding.rb +25 -0
- data/test/xml/test_dtd_encoding.rb +30 -0
- data/test/xml/test_node_encoding.rb +76 -0
- data/test/xml/test_reader_encoding.rb +125 -0
- metadata +10 -2
data/History.ja.txt
CHANGED
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
@@ -151,12 +151,16 @@ test/xml/test_builder.rb
|
|
151
151
|
test/xml/test_cdata.rb
|
152
152
|
test/xml/test_comment.rb
|
153
153
|
test/xml/test_document.rb
|
154
|
+
test/xml/test_document_encoding.rb
|
154
155
|
test/xml/test_document_fragment.rb
|
155
156
|
test/xml/test_dtd.rb
|
157
|
+
test/xml/test_dtd_encoding.rb
|
156
158
|
test/xml/test_entity_reference.rb
|
157
159
|
test/xml/test_node.rb
|
160
|
+
test/xml/test_node_encoding.rb
|
158
161
|
test/xml/test_node_set.rb
|
159
162
|
test/xml/test_processing_instruction.rb
|
163
|
+
test/xml/test_reader_encoding.rb
|
160
164
|
test/xml/test_text.rb
|
161
165
|
test/xml/test_xpath.rb
|
162
166
|
vendor/hoe.rb
|
data/ext/nokogiri/native.c
CHANGED
data/ext/nokogiri/native.h
CHANGED
@@ -12,6 +12,46 @@
|
|
12
12
|
#include <libxml/HTMLparser.h>
|
13
13
|
#include <libxml/HTMLtree.h>
|
14
14
|
|
15
|
+
|
16
|
+
#ifndef UNUSED
|
17
|
+
# if defined(__GNUC__)
|
18
|
+
# define MAYBE_UNUSED(name) name __attribute__((unused))
|
19
|
+
# define UNUSED(name) MAYBE_UNUSED(UNUSED_ ## name)
|
20
|
+
# else
|
21
|
+
# define MAYBE_UNUSED(name) name
|
22
|
+
# define UNUSED(name) name
|
23
|
+
# endif
|
24
|
+
#endif
|
25
|
+
|
26
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
27
|
+
|
28
|
+
#include <ruby/encoding.h>
|
29
|
+
|
30
|
+
#define NOKOGIRI_STR_NEW2(str, encoding) \
|
31
|
+
({ \
|
32
|
+
VALUE _string = rb_str_new2((const char *)str); \
|
33
|
+
if(NULL != encoding) \
|
34
|
+
rb_enc_associate_index(_string, rb_enc_find_index(encoding)); \
|
35
|
+
_string; \
|
36
|
+
})
|
37
|
+
|
38
|
+
#define NOKOGIRI_STR_NEW(str, len, encoding) \
|
39
|
+
({ \
|
40
|
+
VALUE _string = rb_str_new((const char *)str, (long)len); \
|
41
|
+
if(NULL != encoding) \
|
42
|
+
rb_enc_associate_index(_string, rb_enc_find_index(encoding)); \
|
43
|
+
_string; \
|
44
|
+
})
|
45
|
+
|
46
|
+
#else
|
47
|
+
|
48
|
+
#define NOKOGIRI_STR_NEW2(str, doc) \
|
49
|
+
rb_str_new2((const char *)str)
|
50
|
+
|
51
|
+
#define NOKOGIRI_STR_NEW(str, len, doc) \
|
52
|
+
rb_str_new((const char *)str, (long)len)
|
53
|
+
#endif
|
54
|
+
|
15
55
|
#include <xml_io.h>
|
16
56
|
#include <xml_document.h>
|
17
57
|
#include <html_document.h>
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -20,7 +20,7 @@ static VALUE url(VALUE self)
|
|
20
20
|
Data_Get_Struct(self, xmlDoc, doc);
|
21
21
|
|
22
22
|
if(doc->URL)
|
23
|
-
return
|
23
|
+
return NOKOGIRI_STR_NEW2(doc->URL, doc->encoding);
|
24
24
|
|
25
25
|
return Qnil;
|
26
26
|
}
|
@@ -72,7 +72,7 @@ static VALUE encoding(VALUE self)
|
|
72
72
|
Data_Get_Struct(self, xmlDoc, doc);
|
73
73
|
|
74
74
|
if(!doc->encoding) return Qnil;
|
75
|
-
return
|
75
|
+
return NOKOGIRI_STR_NEW2(doc->encoding, doc->encoding);
|
76
76
|
}
|
77
77
|
|
78
78
|
/*
|
data/ext/nokogiri/xml_dtd.c
CHANGED
@@ -8,20 +8,21 @@ static void notation_copier(void *payload, void *data, xmlChar *name)
|
|
8
8
|
xmlNotationPtr c_notation = (xmlNotationPtr)payload;
|
9
9
|
|
10
10
|
VALUE notation = rb_funcall(klass, rb_intern("new"), 3,
|
11
|
-
c_notation->name ?
|
12
|
-
c_notation->PublicID ?
|
13
|
-
c_notation->SystemID ?
|
11
|
+
c_notation->name ? NOKOGIRI_STR_NEW2(c_notation->name, "UTF-8") : Qnil,
|
12
|
+
c_notation->PublicID ? NOKOGIRI_STR_NEW2(c_notation->PublicID, "UTF-8") : Qnil,
|
13
|
+
c_notation->SystemID ? NOKOGIRI_STR_NEW2(c_notation->SystemID, "UTF-8") : Qnil);
|
14
14
|
|
15
|
-
rb_hash_aset(hash,
|
15
|
+
rb_hash_aset(hash, NOKOGIRI_STR_NEW2(name, "UTF-8"),notation);
|
16
16
|
}
|
17
17
|
|
18
|
-
static void element_copier(void *
|
18
|
+
static void element_copier(void *_payload, void *data, xmlChar *name)
|
19
19
|
{
|
20
20
|
VALUE hash = (VALUE)data;
|
21
|
+
xmlNodePtr payload = (xmlNodePtr)_payload;
|
21
22
|
|
22
|
-
VALUE element = Nokogiri_wrap_xml_node(
|
23
|
+
VALUE element = Nokogiri_wrap_xml_node(payload);
|
23
24
|
|
24
|
-
rb_hash_aset(hash,
|
25
|
+
rb_hash_aset(hash, NOKOGIRI_STR_NEW2(name, payload->doc->encoding), element);
|
25
26
|
}
|
26
27
|
|
27
28
|
/*
|
data/ext/nokogiri/xml_node.c
CHANGED
@@ -39,7 +39,7 @@ static VALUE encode_special_chars(VALUE self, VALUE string)
|
|
39
39
|
(const xmlChar *)StringValuePtr(string)
|
40
40
|
);
|
41
41
|
|
42
|
-
VALUE encoded_str =
|
42
|
+
VALUE encoded_str = NOKOGIRI_STR_NEW2(encoded, node->doc->encoding);
|
43
43
|
xmlFree(encoded);
|
44
44
|
|
45
45
|
return encoded_str;
|
@@ -236,7 +236,8 @@ static VALUE get(VALUE self, VALUE attribute)
|
|
236
236
|
|
237
237
|
if(NULL == propstr) return Qnil;
|
238
238
|
|
239
|
-
rval =
|
239
|
+
rval = NOKOGIRI_STR_NEW2(propstr, node->doc->encoding);
|
240
|
+
|
240
241
|
xmlFree(propstr);
|
241
242
|
return rval ;
|
242
243
|
}
|
@@ -289,8 +290,9 @@ static VALUE namespace(VALUE self)
|
|
289
290
|
{
|
290
291
|
xmlNodePtr node ;
|
291
292
|
Data_Get_Struct(self, xmlNode, node);
|
292
|
-
if (node->ns && node->ns->prefix)
|
293
|
-
return
|
293
|
+
if (node->ns && node->ns->prefix) {
|
294
|
+
return NOKOGIRI_STR_NEW2(node->ns->prefix, node->doc->encoding);
|
295
|
+
}
|
294
296
|
return Qnil ;
|
295
297
|
}
|
296
298
|
|
@@ -354,7 +356,7 @@ static VALUE get_content(VALUE self)
|
|
354
356
|
|
355
357
|
xmlChar * content = xmlNodeGetContent(node);
|
356
358
|
if(content) {
|
357
|
-
VALUE rval =
|
359
|
+
VALUE rval = NOKOGIRI_STR_NEW2(content, node->doc->encoding);
|
358
360
|
xmlFree(content);
|
359
361
|
return rval;
|
360
362
|
}
|
@@ -427,7 +429,8 @@ static VALUE get_name(VALUE self)
|
|
427
429
|
{
|
428
430
|
xmlNodePtr node;
|
429
431
|
Data_Get_Struct(self, xmlNode, node);
|
430
|
-
if(node->name)
|
432
|
+
if(node->name)
|
433
|
+
return NOKOGIRI_STR_NEW2(node->name, node->doc->encoding);
|
431
434
|
return Qnil;
|
432
435
|
}
|
433
436
|
|
@@ -441,11 +444,10 @@ static VALUE path(VALUE self)
|
|
441
444
|
{
|
442
445
|
xmlNodePtr node;
|
443
446
|
xmlChar *path ;
|
444
|
-
VALUE rval ;
|
445
447
|
Data_Get_Struct(self, xmlNode, node);
|
446
448
|
|
447
449
|
path = xmlGetNodePath(node);
|
448
|
-
rval =
|
450
|
+
VALUE rval = NOKOGIRI_STR_NEW2(path, node->doc->encoding);
|
449
451
|
xmlFree(path);
|
450
452
|
return rval ;
|
451
453
|
}
|
@@ -602,14 +604,12 @@ static VALUE dump_html(VALUE self)
|
|
602
604
|
xmlNodePtr node ;
|
603
605
|
Data_Get_Struct(self, xmlNode, node);
|
604
606
|
|
605
|
-
VALUE html;
|
606
|
-
|
607
607
|
if(node->doc->type == XML_DOCUMENT_NODE)
|
608
608
|
return rb_funcall(self, rb_intern("to_xml"), 0);
|
609
609
|
|
610
610
|
buf = xmlBufferCreate() ;
|
611
611
|
htmlNodeDump(buf, node->doc, node);
|
612
|
-
html =
|
612
|
+
VALUE html = NOKOGIRI_STR_NEW2(buf->content, node->doc->encoding);
|
613
613
|
xmlBufferFree(buf);
|
614
614
|
return html ;
|
615
615
|
}
|
@@ -722,7 +722,10 @@ void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
|
|
722
722
|
sprintf(key, "%s", XMLNS_PREFIX);
|
723
723
|
}
|
724
724
|
|
725
|
-
rb_hash_aset(attr_hash,
|
725
|
+
rb_hash_aset(attr_hash,
|
726
|
+
NOKOGIRI_STR_NEW2(key, node->doc->encoding),
|
727
|
+
NOKOGIRI_STR_NEW2(ns->href, node->doc->encoding)
|
728
|
+
);
|
726
729
|
if (key != buffer) {
|
727
730
|
free(key);
|
728
731
|
}
|
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -131,6 +131,16 @@ static VALUE attribute_nodes(VALUE self)
|
|
131
131
|
rb_iv_set(rb_doc, "@decorators", Qnil);
|
132
132
|
ptr->doc->_private = (void *)rb_doc;
|
133
133
|
}
|
134
|
+
VALUE enc = rb_iv_get(self, "@encoding");
|
135
|
+
|
136
|
+
if(enc != Qnil && NULL == ptr->doc->encoding) {
|
137
|
+
ptr->doc->encoding = calloc((size_t)RSTRING_LEN(enc), sizeof(char));
|
138
|
+
strncpy(
|
139
|
+
(char *)ptr->doc->encoding,
|
140
|
+
StringValuePtr(enc),
|
141
|
+
(size_t)RSTRING_LEN(enc)
|
142
|
+
);
|
143
|
+
}
|
134
144
|
|
135
145
|
Nokogiri_xml_node_properties(ptr, attr);
|
136
146
|
|
@@ -157,7 +167,9 @@ static VALUE attribute_at(VALUE self, VALUE index)
|
|
157
167
|
);
|
158
168
|
if(value == NULL) return Qnil;
|
159
169
|
|
160
|
-
VALUE
|
170
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
171
|
+
VALUE rb_value = NOKOGIRI_STR_NEW2(value,
|
172
|
+
RTEST(enc) ? StringValuePtr(enc) : NULL);
|
161
173
|
xmlFree(value);
|
162
174
|
return rb_value;
|
163
175
|
}
|
@@ -193,7 +205,9 @@ static VALUE reader_attribute(VALUE self, VALUE name)
|
|
193
205
|
}
|
194
206
|
if(value == NULL) return Qnil;
|
195
207
|
|
196
|
-
VALUE
|
208
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
209
|
+
VALUE rb_value = NOKOGIRI_STR_NEW2(value,
|
210
|
+
RTEST(enc) ? StringValuePtr(enc) : NULL);
|
197
211
|
xmlFree(value);
|
198
212
|
return rb_value;
|
199
213
|
}
|
@@ -230,22 +244,6 @@ static VALUE depth(VALUE self)
|
|
230
244
|
return INT2NUM(depth);
|
231
245
|
}
|
232
246
|
|
233
|
-
/*
|
234
|
-
* call-seq:
|
235
|
-
* encoding
|
236
|
-
*
|
237
|
-
* Get the encoding for the document
|
238
|
-
*/
|
239
|
-
static VALUE encoding(VALUE self)
|
240
|
-
{
|
241
|
-
xmlTextReaderPtr reader;
|
242
|
-
Data_Get_Struct(self, xmlTextReader, reader);
|
243
|
-
const char * encoding = (const char *)xmlTextReaderConstEncoding(reader);
|
244
|
-
if(encoding == NULL) return Qnil;
|
245
|
-
|
246
|
-
return rb_str_new2(encoding);
|
247
|
-
}
|
248
|
-
|
249
247
|
/*
|
250
248
|
* call-seq:
|
251
249
|
* xml_version
|
@@ -259,7 +257,7 @@ static VALUE xml_version(VALUE self)
|
|
259
257
|
const char * version = (const char *)xmlTextReaderConstXmlVersion(reader);
|
260
258
|
if(version == NULL) return Qnil;
|
261
259
|
|
262
|
-
return
|
260
|
+
return NOKOGIRI_STR_NEW2(version, "UTF-8");
|
263
261
|
}
|
264
262
|
|
265
263
|
/*
|
@@ -275,7 +273,9 @@ static VALUE lang(VALUE self)
|
|
275
273
|
const char * lang = (const char *)xmlTextReaderConstXmlLang(reader);
|
276
274
|
if(lang == NULL) return Qnil;
|
277
275
|
|
278
|
-
|
276
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
277
|
+
return NOKOGIRI_STR_NEW2(lang,
|
278
|
+
RTEST(enc) ? StringValuePtr(enc) : NULL);
|
279
279
|
}
|
280
280
|
|
281
281
|
/*
|
@@ -291,7 +291,9 @@ static VALUE value(VALUE self)
|
|
291
291
|
const char * value = (const char *)xmlTextReaderConstValue(reader);
|
292
292
|
if(value == NULL) return Qnil;
|
293
293
|
|
294
|
-
|
294
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
295
|
+
return NOKOGIRI_STR_NEW2(value,
|
296
|
+
RTEST(enc) ? StringValuePtr(enc) : NULL);
|
295
297
|
}
|
296
298
|
|
297
299
|
/*
|
@@ -307,7 +309,9 @@ static VALUE prefix(VALUE self)
|
|
307
309
|
const char * prefix = (const char *)xmlTextReaderConstPrefix(reader);
|
308
310
|
if(prefix == NULL) return Qnil;
|
309
311
|
|
310
|
-
|
312
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
313
|
+
return NOKOGIRI_STR_NEW2(prefix,
|
314
|
+
RTEST(enc) ? StringValuePtr(enc) : NULL);
|
311
315
|
}
|
312
316
|
|
313
317
|
/*
|
@@ -323,7 +327,9 @@ static VALUE namespace_uri(VALUE self)
|
|
323
327
|
const char * uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
|
324
328
|
if(uri == NULL) return Qnil;
|
325
329
|
|
326
|
-
|
330
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
331
|
+
return NOKOGIRI_STR_NEW2(uri,
|
332
|
+
RTEST(enc) ? StringValuePtr(enc) : NULL);
|
327
333
|
}
|
328
334
|
|
329
335
|
/*
|
@@ -339,7 +345,9 @@ static VALUE local_name(VALUE self)
|
|
339
345
|
const char * name = (const char *)xmlTextReaderConstLocalName(reader);
|
340
346
|
if(name == NULL) return Qnil;
|
341
347
|
|
342
|
-
|
348
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
349
|
+
return NOKOGIRI_STR_NEW2(name,
|
350
|
+
RTEST(enc) ? StringValuePtr(enc) : NULL);
|
343
351
|
}
|
344
352
|
|
345
353
|
/*
|
@@ -355,7 +363,9 @@ static VALUE name(VALUE self)
|
|
355
363
|
const char * name = (const char *)xmlTextReaderConstName(reader);
|
356
364
|
if(name == NULL) return Qnil;
|
357
365
|
|
358
|
-
|
366
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
367
|
+
return NOKOGIRI_STR_NEW2(name,
|
368
|
+
RTEST(enc) ? StringValuePtr(enc) : NULL);
|
359
369
|
}
|
360
370
|
|
361
371
|
/*
|
@@ -437,7 +447,7 @@ static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
|
|
437
447
|
}
|
438
448
|
|
439
449
|
VALUE rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
|
440
|
-
rb_funcall(rb_reader, rb_intern("initialize"),
|
450
|
+
rb_funcall(rb_reader, rb_intern("initialize"), 2, rb_url, encoding);
|
441
451
|
|
442
452
|
return rb_reader;
|
443
453
|
}
|
@@ -468,7 +478,6 @@ void init_xml_reader()
|
|
468
478
|
rb_define_method(klass, "value", value, 0);
|
469
479
|
rb_define_method(klass, "lang", lang, 0);
|
470
480
|
rb_define_method(klass, "xml_version", xml_version, 0);
|
471
|
-
rb_define_method(klass, "encoding", encoding, 0);
|
472
481
|
rb_define_method(klass, "depth", depth, 0);
|
473
482
|
rb_define_method(klass, "attribute_count", attribute_count, 0);
|
474
483
|
rb_define_method(klass, "attribute", reader_attribute, 1);
|
@@ -81,11 +81,14 @@ static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
|
|
81
81
|
VALUE self = (VALUE)ctx;
|
82
82
|
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
83
83
|
VALUE attributes = rb_ary_new();
|
84
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
84
85
|
const xmlChar * attr;
|
85
86
|
int i = 0;
|
86
87
|
if(atts) {
|
87
88
|
while((attr = atts[i]) != NULL) {
|
88
|
-
rb_funcall(attributes, rb_intern("<<"), 1,
|
89
|
+
rb_funcall(attributes, rb_intern("<<"), 1,
|
90
|
+
NOKOGIRI_STR_NEW2(attr, RTEST(enc) ? StringValuePtr(enc) : NULL)
|
91
|
+
);
|
89
92
|
i++;
|
90
93
|
}
|
91
94
|
}
|
@@ -93,7 +96,7 @@ static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
|
|
93
96
|
rb_funcall( doc,
|
94
97
|
rb_intern("start_element"),
|
95
98
|
2,
|
96
|
-
|
99
|
+
NOKOGIRI_STR_NEW2(name, RTEST(enc) ? StringValuePtr(enc) : NULL),
|
97
100
|
attributes
|
98
101
|
);
|
99
102
|
}
|
@@ -101,23 +104,28 @@ static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
|
|
101
104
|
static void end_element(void * ctx, const xmlChar *name)
|
102
105
|
{
|
103
106
|
VALUE self = (VALUE)ctx;
|
107
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
104
108
|
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
105
|
-
rb_funcall(doc, rb_intern("end_element"), 1,
|
109
|
+
rb_funcall(doc, rb_intern("end_element"), 1,
|
110
|
+
NOKOGIRI_STR_NEW2(name, RTEST(enc) ? StringValuePtr(enc) : NULL)
|
111
|
+
);
|
106
112
|
}
|
107
113
|
|
108
114
|
static void characters_func(void * ctx, const xmlChar * ch, int len)
|
109
115
|
{
|
110
116
|
VALUE self = (VALUE)ctx;
|
117
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
111
118
|
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
112
|
-
VALUE str =
|
119
|
+
VALUE str = NOKOGIRI_STR_NEW(ch, len, RTEST(enc) ? StringValuePtr(enc):NULL);
|
113
120
|
rb_funcall(doc, rb_intern("characters"), 1, str);
|
114
121
|
}
|
115
122
|
|
116
123
|
static void comment_func(void * ctx, const xmlChar * value)
|
117
124
|
{
|
118
125
|
VALUE self = (VALUE)ctx;
|
126
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
119
127
|
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
120
|
-
VALUE str =
|
128
|
+
VALUE str = NOKOGIRI_STR_NEW2(value, RTEST(enc) ? StringValuePtr(enc):NULL);
|
121
129
|
rb_funcall(doc, rb_intern("comment"), 1, str);
|
122
130
|
}
|
123
131
|
|
@@ -162,6 +170,7 @@ static void warning_func(void * ctx, const char *msg, ...)
|
|
162
170
|
{
|
163
171
|
VALUE self = (VALUE)ctx;
|
164
172
|
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
173
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
165
174
|
char * message;
|
166
175
|
|
167
176
|
va_list args;
|
@@ -169,13 +178,16 @@ static void warning_func(void * ctx, const char *msg, ...)
|
|
169
178
|
vasprintf(&message, msg, args);
|
170
179
|
va_end(args);
|
171
180
|
|
172
|
-
rb_funcall(doc, rb_intern("warning"), 1,
|
181
|
+
rb_funcall(doc, rb_intern("warning"), 1,
|
182
|
+
NOKOGIRI_STR_NEW2(message, RTEST(enc) ? StringValuePtr(enc) : NULL)
|
183
|
+
);
|
173
184
|
free(message);
|
174
185
|
}
|
175
186
|
|
176
187
|
static void error_func(void * ctx, const char *msg, ...)
|
177
188
|
{
|
178
189
|
VALUE self = (VALUE)ctx;
|
190
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
179
191
|
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
180
192
|
char * message;
|
181
193
|
|
@@ -184,15 +196,19 @@ static void error_func(void * ctx, const char *msg, ...)
|
|
184
196
|
vasprintf(&message, msg, args);
|
185
197
|
va_end(args);
|
186
198
|
|
187
|
-
rb_funcall(doc, rb_intern("error"), 1,
|
199
|
+
rb_funcall(doc, rb_intern("error"), 1,
|
200
|
+
NOKOGIRI_STR_NEW2(message, RTEST(enc) ? StringValuePtr(enc) : NULL)
|
201
|
+
);
|
188
202
|
free(message);
|
189
203
|
}
|
190
204
|
|
191
205
|
static void cdata_block(void * ctx, const xmlChar * value, int len)
|
192
206
|
{
|
193
207
|
VALUE self = (VALUE)ctx;
|
208
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
194
209
|
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
195
|
-
VALUE string =
|
210
|
+
VALUE string =
|
211
|
+
NOKOGIRI_STR_NEW(value, len, RTEST(enc) ? StringValuePtr(enc) : NULL);
|
196
212
|
rb_funcall(doc, rb_intern("cdata_block"), 1, string);
|
197
213
|
}
|
198
214
|
|
@@ -45,7 +45,7 @@ static VALUE str3(VALUE self)
|
|
45
45
|
xmlErrorPtr error;
|
46
46
|
Data_Get_Struct(self, xmlError, error);
|
47
47
|
if(error->str3)
|
48
|
-
return
|
48
|
+
return NOKOGIRI_STR_NEW2(error->str3, "UTF-8");
|
49
49
|
return Qnil;
|
50
50
|
}
|
51
51
|
|
@@ -60,7 +60,7 @@ static VALUE str2(VALUE self)
|
|
60
60
|
xmlErrorPtr error;
|
61
61
|
Data_Get_Struct(self, xmlError, error);
|
62
62
|
if(error->str2)
|
63
|
-
return
|
63
|
+
return NOKOGIRI_STR_NEW2(error->str2, "UTF-8");
|
64
64
|
return Qnil;
|
65
65
|
}
|
66
66
|
|
@@ -75,7 +75,7 @@ static VALUE str1(VALUE self)
|
|
75
75
|
xmlErrorPtr error;
|
76
76
|
Data_Get_Struct(self, xmlError, error);
|
77
77
|
if(error->str1)
|
78
|
-
return
|
78
|
+
return NOKOGIRI_STR_NEW2(error->str1, "UTF-8");
|
79
79
|
return Qnil;
|
80
80
|
}
|
81
81
|
|
@@ -103,7 +103,7 @@ static VALUE file(VALUE self)
|
|
103
103
|
xmlErrorPtr error;
|
104
104
|
Data_Get_Struct(self, xmlError, error);
|
105
105
|
if(error->file)
|
106
|
-
return
|
106
|
+
return NOKOGIRI_STR_NEW2(error->file, "UTF-8");
|
107
107
|
|
108
108
|
return Qnil;
|
109
109
|
}
|
@@ -157,7 +157,7 @@ static VALUE message(VALUE self)
|
|
157
157
|
{
|
158
158
|
xmlErrorPtr error;
|
159
159
|
Data_Get_Struct(self, xmlError, error);
|
160
|
-
return
|
160
|
+
return NOKOGIRI_STR_NEW2(error->message, "UTF-8");
|
161
161
|
}
|
162
162
|
|
163
163
|
void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error)
|
@@ -46,7 +46,7 @@ static void ruby_funcall(xmlXPathParserContextPtr ctx, int nargs)
|
|
46
46
|
obj = valuePop(ctx);
|
47
47
|
switch(obj->type) {
|
48
48
|
case XPATH_STRING:
|
49
|
-
argv[i] =
|
49
|
+
argv[i] = NOKOGIRI_STR_NEW2(obj->stringval, ctx->context->doc->encoding);
|
50
50
|
break;
|
51
51
|
case XPATH_BOOLEAN:
|
52
52
|
argv[i] = obj->boolval == 1 ? Qtrue : Qfalse;
|
@@ -58,7 +58,9 @@ static void ruby_funcall(xmlXPathParserContextPtr ctx, int nargs)
|
|
58
58
|
argv[i] = Nokogiri_wrap_xml_node_set(obj->nodesetval);
|
59
59
|
break;
|
60
60
|
default:
|
61
|
-
argv[i] =
|
61
|
+
argv[i] = NOKOGIRI_STR_NEW2(
|
62
|
+
xmlXPathCastToString(obj), ctx->context->doc->encoding
|
63
|
+
);
|
62
64
|
}
|
63
65
|
xmlXPathFreeNodeSetList(obj);
|
64
66
|
} while(i-- > 0);
|
@@ -46,7 +46,7 @@ static VALUE serialize(VALUE self, VALUE xmlobj)
|
|
46
46
|
Data_Get_Struct(xmlobj, xmlDoc, xml);
|
47
47
|
Data_Get_Struct(self, xsltStylesheet, ss);
|
48
48
|
xsltSaveResultToString(&doc_ptr, &doc_len, xml, ss);
|
49
|
-
rval =
|
49
|
+
rval = NOKOGIRI_STR_NEW(doc_ptr, doc_len, xml->encoding);
|
50
50
|
xmlFree(doc_ptr);
|
51
51
|
return rval ;
|
52
52
|
}
|
@@ -83,13 +83,13 @@ class GeneratedTokenizer < GeneratedParser
|
|
83
83
|
when (text = ss.scan(/[\s\r\n\f]*=[\s\r\n\f]*/))
|
84
84
|
@rex_tokens.push action { [:EQUAL, text] }
|
85
85
|
|
86
|
-
when (text = ss.scan(/[\s\r\n\f]*\)
|
86
|
+
when (text = ss.scan(/[\s\r\n\f]*\)/))
|
87
87
|
@rex_tokens.push action { [:RPAREN, text] }
|
88
88
|
|
89
89
|
when (text = ss.scan(/[\s\r\n\f]*\[[\s\r\n\f]*/))
|
90
90
|
@rex_tokens.push action { [:LSQUARE, text] }
|
91
91
|
|
92
|
-
when (text = ss.scan(/[\s\r\n\f]*\]
|
92
|
+
when (text = ss.scan(/[\s\r\n\f]*\]/))
|
93
93
|
@rex_tokens.push action { [:RSQUARE, text] }
|
94
94
|
|
95
95
|
when (text = ss.scan(/[\s\r\n\f]*\+[\s\r\n\f]*/))
|
@@ -32,9 +32,9 @@ rule
|
|
32
32
|
{w}\*={w} { [:SUBSTRINGMATCH, text] }
|
33
33
|
{w}!={w} { [:NOT_EQUAL, text] }
|
34
34
|
{w}={w} { [:EQUAL, text] }
|
35
|
-
{w}\){
|
35
|
+
{w}\) { [:RPAREN, text] }
|
36
36
|
{w}\[{w} { [:LSQUARE, text] }
|
37
|
-
{w}\]{
|
37
|
+
{w}\] { [:RSQUARE, text] }
|
38
38
|
{w}\+{w} { [:PLUS, text] }
|
39
39
|
{w}>{w} { [:GREATER, text] }
|
40
40
|
{w},{w} { [:COMMA, text] }
|
data/lib/nokogiri/version.rb
CHANGED
data/lib/nokogiri/xml.rb
CHANGED
@@ -55,6 +55,10 @@ module Nokogiri
|
|
55
55
|
PARSE_NOXINCNODE = 1 << 15 # do not generate XINCLUDE START/END nodes
|
56
56
|
|
57
57
|
class << self
|
58
|
+
def Reader string, url = nil, encoding = nil, options = 0
|
59
|
+
Reader.from_memory(string, url, encoding, options)
|
60
|
+
end
|
61
|
+
|
58
62
|
###
|
59
63
|
# Parse an XML document. See Nokogiri.XML.
|
60
64
|
def parse string_or_io, url = nil, encoding = nil, options = 2159
|
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -2,8 +2,35 @@ module Nokogiri
|
|
2
2
|
module XML
|
3
3
|
module SAX
|
4
4
|
class Parser
|
5
|
+
ENCODINGS = {
|
6
|
+
'NONE' => 0, # No char encoding detected
|
7
|
+
'UTF-8' => 1, # UTF-8
|
8
|
+
'UTF16LE' => 2, # UTF-16 little endian
|
9
|
+
'UTF16BE' => 3, # UTF-16 big endian
|
10
|
+
'UCS4LE' => 4, # UCS-4 little endian
|
11
|
+
'UCS4BE' => 5, # UCS-4 big endian
|
12
|
+
'EBCDIC' => 6, # EBCDIC uh!
|
13
|
+
'UCS4-2143' => 7, # UCS-4 unusual ordering
|
14
|
+
'UCS4-3412' => 8, # UCS-4 unusual ordering
|
15
|
+
'UCS2' => 9, # UCS-2
|
16
|
+
'ISO-8859-1' => 10, # ISO-8859-1 ISO Latin 1
|
17
|
+
'ISO-8859-2' => 11, # ISO-8859-2 ISO Latin 2
|
18
|
+
'ISO-8859-3' => 12, # ISO-8859-3
|
19
|
+
'ISO-8859-4' => 13, # ISO-8859-4
|
20
|
+
'ISO-8859-5' => 14, # ISO-8859-5
|
21
|
+
'ISO-8859-6' => 15, # ISO-8859-6
|
22
|
+
'ISO-8859-7' => 16, # ISO-8859-7
|
23
|
+
'ISO-8859-8' => 17, # ISO-8859-8
|
24
|
+
'ISO-8859-9' => 18, # ISO-8859-9
|
25
|
+
'ISO-2022-JP' => 19, # ISO-2022-JP
|
26
|
+
'SHIFT-JIS' => 20, # Shift_JIS
|
27
|
+
'EUC-JP' => 21, # EUC-JP
|
28
|
+
'ASCII' => 22, # pure ASCII
|
29
|
+
}
|
30
|
+
|
5
31
|
attr_accessor :document
|
6
32
|
def initialize(doc = XML::SAX::Document.new)
|
33
|
+
@encoding = 'ASCII'
|
7
34
|
@document = doc
|
8
35
|
end
|
9
36
|
|
@@ -20,8 +47,9 @@ module Nokogiri
|
|
20
47
|
|
21
48
|
###
|
22
49
|
# Parse given +io+
|
23
|
-
def parse_io io, encoding =
|
24
|
-
|
50
|
+
def parse_io io, encoding = 'ASCII'
|
51
|
+
@encoding = encoding
|
52
|
+
native_parse_io io, ENCODINGS[@encoding] || ENCODINGS['ASCII']
|
25
53
|
end
|
26
54
|
|
27
55
|
###
|
@@ -25,8 +25,9 @@ module Nokogiri
|
|
25
25
|
class PushParser
|
26
26
|
attr_accessor :document
|
27
27
|
|
28
|
-
def initialize(doc = XML::SAX::Document.new, file_name = nil)
|
28
|
+
def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = 'ASCII')
|
29
29
|
@document = doc
|
30
|
+
@encoding = encoding
|
30
31
|
@sax_parser = XML::SAX::Parser.new(doc)
|
31
32
|
|
32
33
|
## Create our push parser context
|
data/test/css/test_parser.rb
CHANGED
@@ -71,6 +71,12 @@ module Nokogiri
|
|
71
71
|
@parser.parse("a[@id='Boing']")
|
72
72
|
end
|
73
73
|
|
74
|
+
def test_attributes_with_at_and_stuff
|
75
|
+
## This is non standard CSS
|
76
|
+
assert_xpath "//a[@id = 'Boing']//div",
|
77
|
+
@parser.parse("a[@id='Boing'] div")
|
78
|
+
end
|
79
|
+
|
74
80
|
def test_not_equal
|
75
81
|
## This is non standard CSS
|
76
82
|
assert_xpath "//a[child::text() != 'Boing']",
|
data/test/html/test_document.rb
CHANGED
@@ -105,6 +105,20 @@ module Nokogiri
|
|
105
105
|
assert_equal 3, found.length
|
106
106
|
end
|
107
107
|
|
108
|
+
def test_find_by_css_with_square_brackets
|
109
|
+
found = @html.css("div[@id='header'] > h1")
|
110
|
+
found = @html.css("div[@id='header'] h1") # this blows up on commit 6fa0f6d329d9dbf1cc21c0ac72f7e627bb4c05fc
|
111
|
+
assert_equal 1, found.length
|
112
|
+
end
|
113
|
+
|
114
|
+
def test_find_with_function
|
115
|
+
found = @html.css("div:awesome() h1", Class.new {
|
116
|
+
def awesome divs
|
117
|
+
[divs.first]
|
118
|
+
end
|
119
|
+
}.new)
|
120
|
+
end
|
121
|
+
|
108
122
|
def test_dup_shallow
|
109
123
|
found = @html.search('//div/a').first
|
110
124
|
dup = found.dup(0)
|
data/test/test_reader.rb
CHANGED
@@ -136,8 +136,7 @@ class TestReader < Nokogiri::TestCase
|
|
136
136
|
</awesome>
|
137
137
|
eoxml
|
138
138
|
reader = Nokogiri::XML::Reader.from_memory(string, nil, 'UTF-8')
|
139
|
-
|
140
|
-
assert_equal [nil], reader.map { |x| x.encoding }.uniq
|
139
|
+
assert_equal ['UTF-8'], reader.map { |x| x.encoding }.uniq
|
141
140
|
end
|
142
141
|
|
143
142
|
def test_xml_version
|
data/test/xml/sax/test_parser.rb
CHANGED
@@ -22,6 +22,12 @@ module Nokogiri
|
|
22
22
|
assert @parser.document.errors
|
23
23
|
assert @parser.document.errors.length > 0
|
24
24
|
|
25
|
+
if RUBY_VERSION =~ /^1\.9/
|
26
|
+
doc.errors.each do |error|
|
27
|
+
assert_equal 'UTF-8', error.message.encoding.name
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
25
31
|
assert_equal doc.errors.length, @parser.document.errors.length
|
26
32
|
end
|
27
33
|
|
@@ -35,9 +41,45 @@ module Nokogiri
|
|
35
41
|
|
36
42
|
def test_parse_io
|
37
43
|
File.open(XML_FILE, 'rb') { |f|
|
38
|
-
@parser.parse_io(f)
|
44
|
+
@parser.parse_io(f, 'UTF-8')
|
39
45
|
}
|
40
46
|
assert(@parser.document.cdata_blocks.length > 0)
|
47
|
+
if RUBY_VERSION =~ /^1\.9/
|
48
|
+
called = false
|
49
|
+
@parser.document.start_elements.flatten.each do |thing|
|
50
|
+
assert_equal 'UTF-8', thing.encoding.name
|
51
|
+
called = true
|
52
|
+
end
|
53
|
+
assert called
|
54
|
+
|
55
|
+
called = false
|
56
|
+
@parser.document.end_elements.flatten.each do |thing|
|
57
|
+
assert_equal 'UTF-8', thing.encoding.name
|
58
|
+
called = true
|
59
|
+
end
|
60
|
+
assert called
|
61
|
+
|
62
|
+
called = false
|
63
|
+
@parser.document.data.each do |thing|
|
64
|
+
assert_equal 'UTF-8', thing.encoding.name
|
65
|
+
called = true
|
66
|
+
end
|
67
|
+
assert called
|
68
|
+
|
69
|
+
called = false
|
70
|
+
@parser.document.comments.flatten.each do |thing|
|
71
|
+
assert_equal 'UTF-8', thing.encoding.name
|
72
|
+
called = true
|
73
|
+
end
|
74
|
+
assert called
|
75
|
+
|
76
|
+
called = false
|
77
|
+
@parser.document.cdata_blocks.flatten.each do |thing|
|
78
|
+
assert_equal 'UTF-8', thing.encoding.name
|
79
|
+
called = true
|
80
|
+
end
|
81
|
+
assert called
|
82
|
+
end
|
41
83
|
end
|
42
84
|
|
43
85
|
def test_parse_file
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
if RUBY_VERSION =~ /^1\.9/
|
6
|
+
class TestDocumentEncoding < Nokogiri::TestCase
|
7
|
+
def setup
|
8
|
+
@xml = Nokogiri::XML(File.read(XML_FILE), XML_FILE, 'UTF-8')
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_url
|
12
|
+
assert_equal @xml.encoding, @xml.url.encoding.name
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_encoding
|
16
|
+
assert_equal @xml.encoding, @xml.encoding.encoding.name
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_dotted_version
|
20
|
+
assert_equal 'UTF-8', Nokogiri::LIBXML_VERSION.encoding.name
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
if RUBY_VERSION =~ /^1\.9/
|
6
|
+
class TestDTDEncoding < Nokogiri::TestCase
|
7
|
+
def setup
|
8
|
+
@xml = Nokogiri::XML(File.read(XML_FILE), XML_FILE, 'UTF-8')
|
9
|
+
assert @dtd = @xml.internal_subset
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_entities
|
13
|
+
@dtd.entities.each do |k,v|
|
14
|
+
assert_equal @xml.encoding, k.encoding.name
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_notations
|
19
|
+
@dtd.notations.each do |k,notation|
|
20
|
+
assert_equal 'UTF-8', k.encoding.name
|
21
|
+
%w{ name public_id system_id }.each do |attribute|
|
22
|
+
v = notation.send(:"#{attribute}") || next
|
23
|
+
assert_equal 'UTF-8', v.encoding.name
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
if RUBY_VERSION =~ /^1\.9/
|
6
|
+
class TestNodeEncoding < Nokogiri::TestCase
|
7
|
+
def setup
|
8
|
+
@html = Nokogiri::HTML(File.read(HTML_FILE), HTML_FILE)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_get_attribute
|
12
|
+
node = @html.css('a').first
|
13
|
+
assert_equal @html.encoding, node['href'].encoding.name
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_encode_special_chars
|
17
|
+
foo = @html.css('a').first.encode_special_chars('foo')
|
18
|
+
assert_equal @html.encoding, foo.encoding.name
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_content
|
22
|
+
node = @html.css('a').first
|
23
|
+
assert_equal @html.encoding, node.content.encoding.name
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_name
|
27
|
+
node = @html.css('a').first
|
28
|
+
assert_equal @html.encoding, node.name.encoding.name
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_path
|
32
|
+
node = @html.css('a').first
|
33
|
+
assert_equal @html.encoding, node.path.encoding.name
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_namespace
|
37
|
+
xml = <<-eoxml
|
38
|
+
<root>
|
39
|
+
<car xmlns:part="http://general-motors.com/">
|
40
|
+
<part:tire>Michelin Model XGV</part:tire>
|
41
|
+
</car>
|
42
|
+
<bicycle xmlns:part="http://schwinn.com/">
|
43
|
+
<part:tire>I'm a bicycle tire!</part:tire>
|
44
|
+
</bicycle>
|
45
|
+
</root>
|
46
|
+
eoxml
|
47
|
+
doc = Nokogiri::XML(xml, nil, 'UTF-8')
|
48
|
+
assert_equal 'UTF-8', doc.encoding
|
49
|
+
n = doc.xpath('//part:tire', { 'part' => 'http://schwinn.com/' }).first
|
50
|
+
assert n
|
51
|
+
assert_equal doc.encoding, n.namespace.encoding.name
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_namespace_as_hash
|
55
|
+
xml = <<-eoxml
|
56
|
+
<root>
|
57
|
+
<car xmlns:part="http://general-motors.com/">
|
58
|
+
<part:tire>Michelin Model XGV</part:tire>
|
59
|
+
</car>
|
60
|
+
<bicycle xmlns:part="http://schwinn.com/">
|
61
|
+
<part:tire>I'm a bicycle tire!</part:tire>
|
62
|
+
</bicycle>
|
63
|
+
</root>
|
64
|
+
eoxml
|
65
|
+
doc = Nokogiri::XML(xml, nil, 'UTF-8')
|
66
|
+
assert_equal 'UTF-8', doc.encoding
|
67
|
+
assert n = doc.xpath('//car').first
|
68
|
+
n.namespaces.each do |k,v|
|
69
|
+
assert_equal doc.encoding, v.encoding.name
|
70
|
+
assert_equal doc.encoding, k.encoding.name
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module XML
|
6
|
+
if RUBY_VERSION =~ /^1\.9/
|
7
|
+
class TestReaderEncoding < Nokogiri::TestCase
|
8
|
+
def setup
|
9
|
+
@reader = Nokogiri::XML::Reader(
|
10
|
+
File.read(XML_FILE),
|
11
|
+
XML_FILE,
|
12
|
+
'UTF-8'
|
13
|
+
)
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_attribute_at
|
17
|
+
@reader.each do |node|
|
18
|
+
next unless attribute = node.attribute_at(0)
|
19
|
+
assert_equal @reader.encoding, attribute.encoding.name
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_attributes
|
24
|
+
@reader.each do |node|
|
25
|
+
node.attributes.each do |k,v|
|
26
|
+
assert_equal @reader.encoding, k.encoding.name
|
27
|
+
assert_equal @reader.encoding, v.encoding.name
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_attribute
|
33
|
+
xml = <<-eoxml
|
34
|
+
<x xmlns:tenderlove='http://tenderlovemaking.com/'>
|
35
|
+
<tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
|
36
|
+
</x>
|
37
|
+
eoxml
|
38
|
+
reader = Nokogiri::XML::Reader(xml, nil, 'UTF-8')
|
39
|
+
reader.each do |node|
|
40
|
+
next unless attribute = node.attribute('awesome')
|
41
|
+
assert_equal reader.encoding, attribute.encoding.name
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_xml_version
|
46
|
+
@reader.each do |node|
|
47
|
+
next unless version = node.xml_version
|
48
|
+
assert_equal @reader.encoding, version.encoding.name
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_lang
|
53
|
+
xml = <<-eoxml
|
54
|
+
<awesome>
|
55
|
+
<p xml:lang="en">The quick brown fox jumps over the lazy dog.</p>
|
56
|
+
<p xml:lang="ja">日本語が上手です</p>
|
57
|
+
</awesome>
|
58
|
+
eoxml
|
59
|
+
|
60
|
+
reader = Nokogiri::XML::Reader(xml, nil, 'UTF-8')
|
61
|
+
reader.each do |node|
|
62
|
+
next unless lang = node.lang
|
63
|
+
assert_equal reader.encoding, lang.encoding.name
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_value
|
68
|
+
called = false
|
69
|
+
@reader.each do |node|
|
70
|
+
next unless value = node.value
|
71
|
+
assert_equal @reader.encoding, value.encoding.name
|
72
|
+
called = true
|
73
|
+
end
|
74
|
+
assert called
|
75
|
+
end
|
76
|
+
|
77
|
+
def test_prefix
|
78
|
+
xml = <<-eoxml
|
79
|
+
<x xmlns:edi='http://ecommerce.example.org/schema'>
|
80
|
+
<edi:foo>hello</edi:foo>
|
81
|
+
</x>
|
82
|
+
eoxml
|
83
|
+
reader = Nokogiri::XML::Reader(xml, nil, 'UTF-8')
|
84
|
+
reader.each do |node|
|
85
|
+
next unless prefix = node.prefix
|
86
|
+
assert_equal reader.encoding, prefix.encoding.name
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def test_ns_uri
|
91
|
+
xml = <<-eoxml
|
92
|
+
<x xmlns:edi='http://ecommerce.example.org/schema'>
|
93
|
+
<edi:foo>hello</edi:foo>
|
94
|
+
</x>
|
95
|
+
eoxml
|
96
|
+
reader = Nokogiri::XML::Reader(xml, nil, 'UTF-8')
|
97
|
+
reader.each do |node|
|
98
|
+
next unless uri = node.namespace_uri
|
99
|
+
assert_equal reader.encoding, uri.encoding.name
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_local_name
|
104
|
+
xml = <<-eoxml
|
105
|
+
<x xmlns:edi='http://ecommerce.example.org/schema'>
|
106
|
+
<edi:foo>hello</edi:foo>
|
107
|
+
</x>
|
108
|
+
eoxml
|
109
|
+
reader = Nokogiri::XML::Reader(xml, nil, 'UTF-8')
|
110
|
+
reader.each do |node|
|
111
|
+
next unless lname = node.local_name
|
112
|
+
assert_equal reader.encoding, lname.encoding.name
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def test_name
|
117
|
+
@reader.each do |node|
|
118
|
+
next unless name = node.name
|
119
|
+
assert_equal @reader.encoding, name.encoding.name
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogiri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aaron Patterson
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2009-02-
|
13
|
+
date: 2009-02-23 00:00:00 -08:00
|
14
14
|
default_executable:
|
15
15
|
dependencies: []
|
16
16
|
|
@@ -182,12 +182,16 @@ files:
|
|
182
182
|
- test/xml/test_cdata.rb
|
183
183
|
- test/xml/test_comment.rb
|
184
184
|
- test/xml/test_document.rb
|
185
|
+
- test/xml/test_document_encoding.rb
|
185
186
|
- test/xml/test_document_fragment.rb
|
186
187
|
- test/xml/test_dtd.rb
|
188
|
+
- test/xml/test_dtd_encoding.rb
|
187
189
|
- test/xml/test_entity_reference.rb
|
188
190
|
- test/xml/test_node.rb
|
191
|
+
- test/xml/test_node_encoding.rb
|
189
192
|
- test/xml/test_node_set.rb
|
190
193
|
- test/xml/test_processing_instruction.rb
|
194
|
+
- test/xml/test_reader_encoding.rb
|
191
195
|
- test/xml/test_text.rb
|
192
196
|
- test/xml/test_xpath.rb
|
193
197
|
- vendor/hoe.rb
|
@@ -249,11 +253,15 @@ test_files:
|
|
249
253
|
- test/xml/test_cdata.rb
|
250
254
|
- test/xml/test_comment.rb
|
251
255
|
- test/xml/test_document.rb
|
256
|
+
- test/xml/test_document_encoding.rb
|
252
257
|
- test/xml/test_document_fragment.rb
|
253
258
|
- test/xml/test_dtd.rb
|
259
|
+
- test/xml/test_dtd_encoding.rb
|
254
260
|
- test/xml/test_entity_reference.rb
|
255
261
|
- test/xml/test_node.rb
|
262
|
+
- test/xml/test_node_encoding.rb
|
256
263
|
- test/xml/test_node_set.rb
|
257
264
|
- test/xml/test_processing_instruction.rb
|
265
|
+
- test/xml/test_reader_encoding.rb
|
258
266
|
- test/xml/test_text.rb
|
259
267
|
- test/xml/test_xpath.rb
|