nokogiri 1.11.1 → 1.11.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +12 -12
- data/LICENSE.md +1 -1
- data/README.md +20 -15
- data/ext/nokogiri/depend +34 -474
- data/ext/nokogiri/extconf.rb +253 -183
- data/ext/nokogiri/html_document.c +10 -15
- data/ext/nokogiri/html_element_description.c +84 -71
- data/ext/nokogiri/html_entity_lookup.c +21 -16
- data/ext/nokogiri/html_sax_parser_context.c +66 -65
- data/ext/nokogiri/html_sax_push_parser.c +29 -27
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +171 -63
- data/ext/nokogiri/nokogiri.h +158 -75
- data/ext/nokogiri/test_global_handlers.c +3 -4
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +221 -164
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +30 -27
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +17 -11
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +56 -49
- data/ext/nokogiri/xml_node.c +338 -286
- data/ext/nokogiri/xml_node_set.c +168 -156
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +191 -157
- data/ext/nokogiri/xml_relax_ng.c +29 -23
- data/ext/nokogiri/xml_sax_parser.c +117 -112
- data/ext/nokogiri/xml_sax_parser_context.c +100 -85
- data/ext/nokogiri/xml_sax_push_parser.c +34 -27
- data/ext/nokogiri/xml_schema.c +48 -42
- data/ext/nokogiri/xml_syntax_error.c +21 -23
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +134 -127
- data/ext/nokogiri/xslt_stylesheet.c +157 -157
- data/lib/nokogiri.rb +1 -22
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -15
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +31 -8
- data/lib/nokogiri/xml/document.rb +31 -11
- data/lib/nokogiri/xml/node.rb +38 -42
- data/lib/nokogiri/xml/reader.rb +2 -9
- data/lib/nokogiri/xml/xpath.rb +1 -3
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +70 -0
- metadata +8 -41
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -63
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -25
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -1,4 +1,6 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriHtmlDocument ;
|
2
4
|
|
3
5
|
static ID id_encoding_found;
|
4
6
|
static ID id_to_s;
|
@@ -23,8 +25,7 @@ rb_html_document_s_new(int argc, VALUE *argv, VALUE klass)
|
|
23
25
|
RTEST(uri) ? (const xmlChar *)StringValueCStr(uri) : NULL,
|
24
26
|
RTEST(external_id) ? (const xmlChar *)StringValueCStr(external_id) : NULL
|
25
27
|
);
|
26
|
-
rb_doc =
|
27
|
-
rb_obj_call_init(rb_doc, argc, argv);
|
28
|
+
rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
|
28
29
|
return rb_doc ;
|
29
30
|
}
|
30
31
|
|
@@ -47,7 +48,7 @@ rb_html_document_s_read_io(VALUE klass, VALUE rb_io, VALUE rb_url, VALUE rb_enco
|
|
47
48
|
|
48
49
|
xmlSetStructuredErrorFunc((void *)rb_error_list, Nokogiri_error_array_pusher);
|
49
50
|
|
50
|
-
c_doc = htmlReadIO(
|
51
|
+
c_doc = htmlReadIO(noko_io_read, noko_io_close, (void *)rb_io, c_url, c_encoding, options);
|
51
52
|
|
52
53
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
53
54
|
|
@@ -81,7 +82,7 @@ rb_html_document_s_read_io(VALUE klass, VALUE rb_io, VALUE rb_url, VALUE rb_enco
|
|
81
82
|
return Qnil;
|
82
83
|
}
|
83
84
|
|
84
|
-
rb_doc =
|
85
|
+
rb_doc = noko_xml_document_wrap(klass, c_doc);
|
85
86
|
rb_iv_set(rb_doc, "@errors", rb_error_list);
|
86
87
|
return rb_doc;
|
87
88
|
}
|
@@ -129,7 +130,7 @@ rb_html_document_s_read_memory(VALUE klass, VALUE rb_html, VALUE rb_url, VALUE r
|
|
129
130
|
return Qnil;
|
130
131
|
}
|
131
132
|
|
132
|
-
rb_doc =
|
133
|
+
rb_doc = noko_xml_document_wrap(klass, c_doc);
|
133
134
|
rb_iv_set(rb_doc, "@errors", rb_error_list);
|
134
135
|
return rb_doc;
|
135
136
|
}
|
@@ -148,17 +149,11 @@ rb_html_document_type(VALUE self)
|
|
148
149
|
return INT2NUM((long)doc->type);
|
149
150
|
}
|
150
151
|
|
151
|
-
VALUE cNokogiriHtmlDocument ;
|
152
|
-
|
153
152
|
void
|
154
|
-
|
153
|
+
noko_init_html_document()
|
155
154
|
{
|
156
|
-
|
157
|
-
|
158
|
-
VALUE nokogiri_xml_node = rb_define_class_under(nokogiri_xml, "Node", rb_cObject);
|
159
|
-
VALUE nokogiri_xml_document = rb_define_class_under(nokogiri_xml, "Document", nokogiri_xml_node);
|
160
|
-
VALUE nokogiri_html = rb_define_module_under(nokogiri, "HTML");
|
161
|
-
cNokogiriHtmlDocument = rb_define_class_under(nokogiri_html, "Document", nokogiri_xml_document);
|
155
|
+
assert(cNokogiriXmlDocument);
|
156
|
+
cNokogiriHtmlDocument = rb_define_class_under(mNokogiriHtml, "Document", cNokogiriXmlDocument);
|
162
157
|
|
163
158
|
rb_define_singleton_method(cNokogiriHtmlDocument, "read_memory", rb_html_document_s_read_memory, 4);
|
164
159
|
rb_define_singleton_method(cNokogiriHtmlDocument, "read_io", rb_html_document_s_read_io, 4);
|
@@ -1,4 +1,6 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriHtmlElementDescription ;
|
2
4
|
|
3
5
|
/*
|
4
6
|
* call-seq:
|
@@ -6,9 +8,10 @@
|
|
6
8
|
*
|
7
9
|
* A list of required attributes for this element
|
8
10
|
*/
|
9
|
-
static VALUE
|
11
|
+
static VALUE
|
12
|
+
required_attributes(VALUE self)
|
10
13
|
{
|
11
|
-
const htmlElemDesc *
|
14
|
+
const htmlElemDesc *description;
|
12
15
|
VALUE list;
|
13
16
|
int i;
|
14
17
|
|
@@ -16,9 +19,9 @@ static VALUE required_attributes(VALUE self)
|
|
16
19
|
|
17
20
|
list = rb_ary_new();
|
18
21
|
|
19
|
-
if(NULL == description->attrs_req) return list;
|
22
|
+
if (NULL == description->attrs_req) { return list; }
|
20
23
|
|
21
|
-
for(i = 0; description->attrs_depr[i]; i++) {
|
24
|
+
for (i = 0; description->attrs_depr[i]; i++) {
|
22
25
|
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_req[i]));
|
23
26
|
}
|
24
27
|
|
@@ -31,9 +34,10 @@ static VALUE required_attributes(VALUE self)
|
|
31
34
|
*
|
32
35
|
* A list of deprecated attributes for this element
|
33
36
|
*/
|
34
|
-
static VALUE
|
37
|
+
static VALUE
|
38
|
+
deprecated_attributes(VALUE self)
|
35
39
|
{
|
36
|
-
const htmlElemDesc *
|
40
|
+
const htmlElemDesc *description;
|
37
41
|
VALUE list;
|
38
42
|
int i;
|
39
43
|
|
@@ -41,9 +45,9 @@ static VALUE deprecated_attributes(VALUE self)
|
|
41
45
|
|
42
46
|
list = rb_ary_new();
|
43
47
|
|
44
|
-
if(NULL == description->attrs_depr) return list;
|
48
|
+
if (NULL == description->attrs_depr) { return list; }
|
45
49
|
|
46
|
-
for(i = 0; description->attrs_depr[i]; i++) {
|
50
|
+
for (i = 0; description->attrs_depr[i]; i++) {
|
47
51
|
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_depr[i]));
|
48
52
|
}
|
49
53
|
|
@@ -56,9 +60,10 @@ static VALUE deprecated_attributes(VALUE self)
|
|
56
60
|
*
|
57
61
|
* A list of optional attributes for this element
|
58
62
|
*/
|
59
|
-
static VALUE
|
63
|
+
static VALUE
|
64
|
+
optional_attributes(VALUE self)
|
60
65
|
{
|
61
|
-
const htmlElemDesc *
|
66
|
+
const htmlElemDesc *description;
|
62
67
|
VALUE list;
|
63
68
|
int i;
|
64
69
|
|
@@ -66,9 +71,9 @@ static VALUE optional_attributes(VALUE self)
|
|
66
71
|
|
67
72
|
list = rb_ary_new();
|
68
73
|
|
69
|
-
if(NULL == description->attrs_opt) return list;
|
74
|
+
if (NULL == description->attrs_opt) { return list; }
|
70
75
|
|
71
|
-
for(i = 0; description->attrs_opt[i]; i++) {
|
76
|
+
for (i = 0; description->attrs_opt[i]; i++) {
|
72
77
|
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_opt[i]));
|
73
78
|
}
|
74
79
|
|
@@ -81,13 +86,15 @@ static VALUE optional_attributes(VALUE self)
|
|
81
86
|
*
|
82
87
|
* The default sub element for this element
|
83
88
|
*/
|
84
|
-
static VALUE
|
89
|
+
static VALUE
|
90
|
+
default_sub_element(VALUE self)
|
85
91
|
{
|
86
|
-
const htmlElemDesc *
|
92
|
+
const htmlElemDesc *description;
|
87
93
|
Data_Get_Struct(self, htmlElemDesc, description);
|
88
94
|
|
89
|
-
if (description->defaultsubelt)
|
95
|
+
if (description->defaultsubelt) {
|
90
96
|
return NOKOGIRI_STR_NEW2(description->defaultsubelt);
|
97
|
+
}
|
91
98
|
|
92
99
|
return Qnil;
|
93
100
|
}
|
@@ -98,9 +105,10 @@ static VALUE default_sub_element(VALUE self)
|
|
98
105
|
*
|
99
106
|
* A list of allowed sub elements for this element.
|
100
107
|
*/
|
101
|
-
static VALUE
|
108
|
+
static VALUE
|
109
|
+
sub_elements(VALUE self)
|
102
110
|
{
|
103
|
-
const htmlElemDesc *
|
111
|
+
const htmlElemDesc *description;
|
104
112
|
VALUE list;
|
105
113
|
int i;
|
106
114
|
|
@@ -108,9 +116,9 @@ static VALUE sub_elements(VALUE self)
|
|
108
116
|
|
109
117
|
list = rb_ary_new();
|
110
118
|
|
111
|
-
if(NULL == description->subelts) return list;
|
119
|
+
if (NULL == description->subelts) { return list; }
|
112
120
|
|
113
|
-
for(i = 0; description->subelts[i]; i++) {
|
121
|
+
for (i = 0; description->subelts[i]; i++) {
|
114
122
|
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->subelts[i]));
|
115
123
|
}
|
116
124
|
|
@@ -123,9 +131,10 @@ static VALUE sub_elements(VALUE self)
|
|
123
131
|
*
|
124
132
|
* The description for this element
|
125
133
|
*/
|
126
|
-
static VALUE
|
134
|
+
static VALUE
|
135
|
+
description(VALUE self)
|
127
136
|
{
|
128
|
-
const htmlElemDesc *
|
137
|
+
const htmlElemDesc *description;
|
129
138
|
Data_Get_Struct(self, htmlElemDesc, description);
|
130
139
|
|
131
140
|
return NOKOGIRI_STR_NEW2(description->desc);
|
@@ -137,12 +146,13 @@ static VALUE description(VALUE self)
|
|
137
146
|
*
|
138
147
|
* Is this element an inline element?
|
139
148
|
*/
|
140
|
-
static VALUE
|
149
|
+
static VALUE
|
150
|
+
inline_eh(VALUE self)
|
141
151
|
{
|
142
|
-
const htmlElemDesc *
|
152
|
+
const htmlElemDesc *description;
|
143
153
|
Data_Get_Struct(self, htmlElemDesc, description);
|
144
154
|
|
145
|
-
if(description->isinline) return Qtrue;
|
155
|
+
if (description->isinline) { return Qtrue; }
|
146
156
|
return Qfalse;
|
147
157
|
}
|
148
158
|
|
@@ -152,12 +162,13 @@ static VALUE inline_eh(VALUE self)
|
|
152
162
|
*
|
153
163
|
* Is this element deprecated?
|
154
164
|
*/
|
155
|
-
static VALUE
|
165
|
+
static VALUE
|
166
|
+
deprecated_eh(VALUE self)
|
156
167
|
{
|
157
|
-
const htmlElemDesc *
|
168
|
+
const htmlElemDesc *description;
|
158
169
|
Data_Get_Struct(self, htmlElemDesc, description);
|
159
170
|
|
160
|
-
if(description->depr) return Qtrue;
|
171
|
+
if (description->depr) { return Qtrue; }
|
161
172
|
return Qfalse;
|
162
173
|
}
|
163
174
|
|
@@ -167,12 +178,13 @@ static VALUE deprecated_eh(VALUE self)
|
|
167
178
|
*
|
168
179
|
* Is this an empty element?
|
169
180
|
*/
|
170
|
-
static VALUE
|
181
|
+
static VALUE
|
182
|
+
empty_eh(VALUE self)
|
171
183
|
{
|
172
|
-
const htmlElemDesc *
|
184
|
+
const htmlElemDesc *description;
|
173
185
|
Data_Get_Struct(self, htmlElemDesc, description);
|
174
186
|
|
175
|
-
if(description->empty) return Qtrue;
|
187
|
+
if (description->empty) { return Qtrue; }
|
176
188
|
return Qfalse;
|
177
189
|
}
|
178
190
|
|
@@ -182,12 +194,13 @@ static VALUE empty_eh(VALUE self)
|
|
182
194
|
*
|
183
195
|
* Should the end tag be saved?
|
184
196
|
*/
|
185
|
-
static VALUE
|
197
|
+
static VALUE
|
198
|
+
save_end_tag_eh(VALUE self)
|
186
199
|
{
|
187
|
-
const htmlElemDesc *
|
200
|
+
const htmlElemDesc *description;
|
188
201
|
Data_Get_Struct(self, htmlElemDesc, description);
|
189
202
|
|
190
|
-
if(description->saveEndTag) return Qtrue;
|
203
|
+
if (description->saveEndTag) { return Qtrue; }
|
191
204
|
return Qfalse;
|
192
205
|
}
|
193
206
|
|
@@ -197,12 +210,13 @@ static VALUE save_end_tag_eh(VALUE self)
|
|
197
210
|
*
|
198
211
|
* Can the end tag be implied for this tag?
|
199
212
|
*/
|
200
|
-
static VALUE
|
213
|
+
static VALUE
|
214
|
+
implied_end_tag_eh(VALUE self)
|
201
215
|
{
|
202
|
-
const htmlElemDesc *
|
216
|
+
const htmlElemDesc *description;
|
203
217
|
Data_Get_Struct(self, htmlElemDesc, description);
|
204
218
|
|
205
|
-
if(description->endTag) return Qtrue;
|
219
|
+
if (description->endTag) { return Qtrue; }
|
206
220
|
return Qfalse;
|
207
221
|
}
|
208
222
|
|
@@ -212,12 +226,13 @@ static VALUE implied_end_tag_eh(VALUE self)
|
|
212
226
|
*
|
213
227
|
* Can the start tag be implied for this tag?
|
214
228
|
*/
|
215
|
-
static VALUE
|
229
|
+
static VALUE
|
230
|
+
implied_start_tag_eh(VALUE self)
|
216
231
|
{
|
217
|
-
const htmlElemDesc *
|
232
|
+
const htmlElemDesc *description;
|
218
233
|
Data_Get_Struct(self, htmlElemDesc, description);
|
219
234
|
|
220
|
-
if(description->startTag) return Qtrue;
|
235
|
+
if (description->startTag) { return Qtrue; }
|
221
236
|
return Qfalse;
|
222
237
|
}
|
223
238
|
|
@@ -227,12 +242,13 @@ static VALUE implied_start_tag_eh(VALUE self)
|
|
227
242
|
*
|
228
243
|
* Get the tag name for this ElemementDescription
|
229
244
|
*/
|
230
|
-
static VALUE
|
245
|
+
static VALUE
|
246
|
+
name(VALUE self)
|
231
247
|
{
|
232
|
-
const htmlElemDesc *
|
248
|
+
const htmlElemDesc *description;
|
233
249
|
Data_Get_Struct(self, htmlElemDesc, description);
|
234
250
|
|
235
|
-
if(NULL == description->name) return Qnil;
|
251
|
+
if (NULL == description->name) { return Qnil; }
|
236
252
|
return NOKOGIRI_STR_NEW2(description->name);
|
237
253
|
}
|
238
254
|
|
@@ -242,38 +258,35 @@ static VALUE name(VALUE self)
|
|
242
258
|
*
|
243
259
|
* Get ElemementDescription for +tag_name+
|
244
260
|
*/
|
245
|
-
static VALUE
|
261
|
+
static VALUE
|
262
|
+
get_description(VALUE klass, VALUE tag_name)
|
246
263
|
{
|
247
|
-
const htmlElemDesc *
|
248
|
-
|
249
|
-
|
264
|
+
const htmlElemDesc *description = htmlTagLookup(
|
265
|
+
(const xmlChar *)StringValueCStr(tag_name)
|
266
|
+
);
|
250
267
|
|
251
|
-
if(NULL == description) return Qnil;
|
268
|
+
if (NULL == description) { return Qnil; }
|
252
269
|
return Data_Wrap_Struct(klass, 0, 0, (void *)(uintptr_t)description);
|
253
270
|
}
|
254
271
|
|
255
|
-
|
256
|
-
|
272
|
+
void
|
273
|
+
noko_init_html_element_description()
|
257
274
|
{
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
cNokogiriHtmlElementDescription
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
rb_define_method(
|
267
|
-
rb_define_method(
|
268
|
-
rb_define_method(
|
269
|
-
rb_define_method(
|
270
|
-
rb_define_method(
|
271
|
-
rb_define_method(
|
272
|
-
rb_define_method(
|
273
|
-
rb_define_method(
|
274
|
-
rb_define_method(
|
275
|
-
rb_define_method(klass, "default_sub_element", default_sub_element, 0);
|
276
|
-
rb_define_method(klass, "optional_attributes", optional_attributes, 0);
|
277
|
-
rb_define_method(klass, "deprecated_attributes", deprecated_attributes, 0);
|
278
|
-
rb_define_method(klass, "required_attributes", required_attributes, 0);
|
275
|
+
cNokogiriHtmlElementDescription = rb_define_class_under(mNokogiriHtml, "ElementDescription", rb_cObject);
|
276
|
+
|
277
|
+
rb_define_singleton_method(cNokogiriHtmlElementDescription, "[]", get_description, 1);
|
278
|
+
|
279
|
+
rb_define_method(cNokogiriHtmlElementDescription, "name", name, 0);
|
280
|
+
rb_define_method(cNokogiriHtmlElementDescription, "implied_start_tag?", implied_start_tag_eh, 0);
|
281
|
+
rb_define_method(cNokogiriHtmlElementDescription, "implied_end_tag?", implied_end_tag_eh, 0);
|
282
|
+
rb_define_method(cNokogiriHtmlElementDescription, "save_end_tag?", save_end_tag_eh, 0);
|
283
|
+
rb_define_method(cNokogiriHtmlElementDescription, "empty?", empty_eh, 0);
|
284
|
+
rb_define_method(cNokogiriHtmlElementDescription, "deprecated?", deprecated_eh, 0);
|
285
|
+
rb_define_method(cNokogiriHtmlElementDescription, "inline?", inline_eh, 0);
|
286
|
+
rb_define_method(cNokogiriHtmlElementDescription, "description", description, 0);
|
287
|
+
rb_define_method(cNokogiriHtmlElementDescription, "sub_elements", sub_elements, 0);
|
288
|
+
rb_define_method(cNokogiriHtmlElementDescription, "default_sub_element", default_sub_element, 0);
|
289
|
+
rb_define_method(cNokogiriHtmlElementDescription, "optional_attributes", optional_attributes, 0);
|
290
|
+
rb_define_method(cNokogiriHtmlElementDescription, "deprecated_attributes", deprecated_attributes, 0);
|
291
|
+
rb_define_method(cNokogiriHtmlElementDescription, "required_attributes", required_attributes, 0);
|
279
292
|
}
|
@@ -1,4 +1,6 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
static VALUE cNokogiriHtmlEntityLookup;
|
2
4
|
|
3
5
|
/*
|
4
6
|
* call-seq:
|
@@ -6,27 +8,30 @@
|
|
6
8
|
*
|
7
9
|
* Get the HTML::EntityDescription for +key+
|
8
10
|
*/
|
9
|
-
static VALUE
|
11
|
+
static VALUE
|
12
|
+
get(VALUE _, VALUE rb_entity_name)
|
10
13
|
{
|
11
|
-
|
12
|
-
|
13
|
-
VALUE
|
14
|
+
VALUE cNokogiriHtmlEntityDescription;
|
15
|
+
const htmlEntityDesc *c_entity_desc;
|
16
|
+
VALUE rb_constructor_args[3];
|
14
17
|
|
15
|
-
|
16
|
-
|
18
|
+
c_entity_desc = htmlEntityLookup((const xmlChar *)StringValueCStr(rb_entity_name));
|
19
|
+
if (NULL == c_entity_desc) {
|
20
|
+
return Qnil;
|
21
|
+
}
|
17
22
|
|
18
|
-
|
19
|
-
|
20
|
-
|
23
|
+
rb_constructor_args[0] = INT2NUM((long)c_entity_desc->value);
|
24
|
+
rb_constructor_args[1] = NOKOGIRI_STR_NEW2(c_entity_desc->name);
|
25
|
+
rb_constructor_args[2] = NOKOGIRI_STR_NEW2(c_entity_desc->desc);
|
21
26
|
|
22
|
-
|
27
|
+
cNokogiriHtmlEntityDescription = rb_const_get_at(mNokogiriHtml, rb_intern("EntityDescription"));
|
28
|
+
return rb_class_new_instance(3, rb_constructor_args, cNokogiriHtmlEntityDescription);
|
23
29
|
}
|
24
30
|
|
25
|
-
void
|
31
|
+
void
|
32
|
+
noko_init_html_entity_lookup()
|
26
33
|
{
|
27
|
-
|
28
|
-
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
29
|
-
VALUE klass = rb_define_class_under(html, "EntityLookup", rb_cObject);
|
34
|
+
cNokogiriHtmlEntityLookup = rb_define_class_under(mNokogiriHtml, "EntityLookup", rb_cObject);
|
30
35
|
|
31
|
-
rb_define_method(
|
36
|
+
rb_define_method(cNokogiriHtmlEntityLookup, "get", get, 1);
|
32
37
|
}
|
@@ -1,8 +1,9 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
3
|
VALUE cNokogiriHtmlSaxParserContext ;
|
4
4
|
|
5
|
-
static void
|
5
|
+
static void
|
6
|
+
deallocate(xmlParserCtxtPtr ctxt)
|
6
7
|
{
|
7
8
|
NOKOGIRI_DEBUG_START(ctxt);
|
8
9
|
|
@@ -16,103 +17,103 @@ static void deallocate(xmlParserCtxtPtr ctxt)
|
|
16
17
|
static VALUE
|
17
18
|
parse_memory(VALUE klass, VALUE data, VALUE encoding)
|
18
19
|
{
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
20
|
+
htmlParserCtxtPtr ctxt;
|
21
|
+
|
22
|
+
if (NIL_P(data)) {
|
23
|
+
rb_raise(rb_eArgError, "data cannot be nil");
|
24
|
+
}
|
25
|
+
if (!(int)RSTRING_LEN(data)) {
|
26
|
+
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
27
|
+
}
|
28
|
+
|
29
|
+
ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
|
30
|
+
(int)RSTRING_LEN(data));
|
31
|
+
if (ctxt->sax) {
|
32
|
+
xmlFree(ctxt->sax);
|
33
|
+
ctxt->sax = NULL;
|
34
|
+
}
|
35
|
+
|
36
|
+
if (RTEST(encoding)) {
|
37
|
+
xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding));
|
38
|
+
if (enc != NULL) {
|
39
|
+
xmlSwitchToEncoding(ctxt, enc);
|
40
|
+
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
41
|
+
rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
|
42
|
+
StringValueCStr(encoding));
|
43
|
+
}
|
31
44
|
}
|
45
|
+
}
|
32
46
|
|
33
|
-
|
34
|
-
xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding));
|
35
|
-
if (enc != NULL) {
|
36
|
-
xmlSwitchToEncoding(ctxt, enc);
|
37
|
-
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
38
|
-
rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
|
39
|
-
StringValueCStr(encoding));
|
40
|
-
}
|
41
|
-
}
|
42
|
-
}
|
43
|
-
|
44
|
-
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
47
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
45
48
|
}
|
46
49
|
|
47
|
-
static VALUE
|
50
|
+
static VALUE
|
51
|
+
parse_file(VALUE klass, VALUE filename, VALUE encoding)
|
48
52
|
{
|
49
53
|
htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(
|
50
|
-
|
51
|
-
|
52
|
-
|
54
|
+
StringValueCStr(filename),
|
55
|
+
StringValueCStr(encoding)
|
56
|
+
);
|
53
57
|
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
54
58
|
}
|
55
59
|
|
56
60
|
static VALUE
|
57
61
|
parse_doc(VALUE ctxt_val)
|
58
62
|
{
|
59
|
-
|
60
|
-
|
61
|
-
|
63
|
+
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
|
64
|
+
htmlParseDocument(ctxt);
|
65
|
+
return Qnil;
|
62
66
|
}
|
63
67
|
|
64
68
|
static VALUE
|
65
69
|
parse_doc_finalize(VALUE ctxt_val)
|
66
70
|
{
|
67
|
-
|
71
|
+
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
|
68
72
|
|
69
|
-
|
70
|
-
|
73
|
+
if (ctxt->myDoc) {
|
74
|
+
xmlFreeDoc(ctxt->myDoc);
|
75
|
+
}
|
71
76
|
|
72
|
-
|
73
|
-
|
77
|
+
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
|
78
|
+
return Qnil;
|
74
79
|
}
|
75
80
|
|
76
81
|
static VALUE
|
77
82
|
parse_with(VALUE self, VALUE sax_handler)
|
78
83
|
{
|
79
|
-
|
80
|
-
|
84
|
+
htmlParserCtxtPtr ctxt;
|
85
|
+
htmlSAXHandlerPtr sax;
|
81
86
|
|
82
|
-
|
83
|
-
|
87
|
+
if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
|
88
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
89
|
+
}
|
84
90
|
|
85
|
-
|
86
|
-
|
91
|
+
Data_Get_Struct(self, htmlParserCtxt, ctxt);
|
92
|
+
Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
|
87
93
|
|
88
|
-
|
89
|
-
|
90
|
-
|
94
|
+
/* Free the sax handler since we'll assign our own */
|
95
|
+
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
|
96
|
+
xmlFree(ctxt->sax);
|
97
|
+
}
|
91
98
|
|
92
|
-
|
93
|
-
|
99
|
+
ctxt->sax = sax;
|
100
|
+
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
94
101
|
|
95
|
-
|
102
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
96
103
|
|
97
|
-
|
104
|
+
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
|
98
105
|
|
99
|
-
|
106
|
+
return self;
|
100
107
|
}
|
101
108
|
|
102
|
-
void
|
109
|
+
void
|
110
|
+
noko_init_html_sax_parser_context()
|
103
111
|
{
|
104
|
-
|
105
|
-
|
106
|
-
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
107
|
-
VALUE sax = rb_define_module_under(xml, "SAX");
|
108
|
-
VALUE hsax = rb_define_module_under(html, "SAX");
|
109
|
-
VALUE pc = rb_define_class_under(sax, "ParserContext", rb_cObject);
|
110
|
-
VALUE klass = rb_define_class_under(hsax, "ParserContext", pc);
|
111
|
-
|
112
|
-
cNokogiriHtmlSaxParserContext = klass;
|
112
|
+
assert(cNokogiriXmlSaxParserContext);
|
113
|
+
cNokogiriHtmlSaxParserContext = rb_define_class_under(mNokogiriHtmlSax, "ParserContext", cNokogiriXmlSaxParserContext);
|
113
114
|
|
114
|
-
rb_define_singleton_method(
|
115
|
-
rb_define_singleton_method(
|
115
|
+
rb_define_singleton_method(cNokogiriHtmlSaxParserContext, "memory", parse_memory, 2);
|
116
|
+
rb_define_singleton_method(cNokogiriHtmlSaxParserContext, "file", parse_file, 2);
|
116
117
|
|
117
|
-
rb_define_method(
|
118
|
+
rb_define_method(cNokogiriHtmlSaxParserContext, "parse_with", parse_with, 1);
|
118
119
|
}
|