nokogiri 1.9.1 → 1.15.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +45 -0
- data/LICENSE-DEPENDENCIES.md +1636 -1024
- data/LICENSE.md +5 -28
- data/README.md +203 -89
- data/bin/nokogiri +63 -50
- data/dependencies.yml +33 -61
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +864 -418
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +165 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +108 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +251 -105
- data/ext/nokogiri/nokogiri.h +215 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +17 -17
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +40 -31
- data/ext/nokogiri/xml_comment.c +20 -27
- data/ext/nokogiri/xml_document.c +401 -240
- data/ext/nokogiri/xml_document_fragment.c +13 -17
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +135 -61
- data/ext/nokogiri/xml_node.c +1346 -677
- data/ext/nokogiri/xml_node_set.c +246 -216
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +347 -212
- data/ext/nokogiri/xml_relax_ng.c +86 -77
- data/ext/nokogiri/xml_sax_parser.c +149 -124
- data/ext/nokogiri/xml_sax_parser_context.c +145 -103
- data/ext/nokogiri/xml_sax_push_parser.c +64 -36
- data/ext/nokogiri/xml_schema.c +138 -81
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +36 -26
- data/ext/nokogiri/xml_xpath_context.c +366 -178
- data/ext/nokogiri/xslt_stylesheet.c +335 -189
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +111 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +630 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
- data/gumbo-parser/src/parser.c +4891 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +224 -95
- data/lib/nokogiri/css.rb +56 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +392 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +223 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +98 -54
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +312 -126
- data/lib/nokogiri/xml/document_fragment.rb +93 -48
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +45 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1088 -418
- data/lib/nokogiri/xml/node_set.rb +173 -63
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +42 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +39 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +120 -72
- data/lib/nokogiri/xml/syntax_error.rb +6 -4
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -37
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +101 -22
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +29 -25
- data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
- metadata +128 -265
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
- data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
- data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
@@ -1,279 +0,0 @@
|
|
1
|
-
#include <html_element_description.h>
|
2
|
-
|
3
|
-
/*
|
4
|
-
* call-seq:
|
5
|
-
* required_attributes
|
6
|
-
*
|
7
|
-
* A list of required attributes for this element
|
8
|
-
*/
|
9
|
-
static VALUE required_attributes(VALUE self)
|
10
|
-
{
|
11
|
-
const htmlElemDesc * description;
|
12
|
-
VALUE list;
|
13
|
-
int i;
|
14
|
-
|
15
|
-
Data_Get_Struct(self, htmlElemDesc, description);
|
16
|
-
|
17
|
-
list = rb_ary_new();
|
18
|
-
|
19
|
-
if(NULL == description->attrs_req) return list;
|
20
|
-
|
21
|
-
for(i = 0; description->attrs_depr[i]; i++) {
|
22
|
-
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_req[i]));
|
23
|
-
}
|
24
|
-
|
25
|
-
return list;
|
26
|
-
}
|
27
|
-
|
28
|
-
/*
|
29
|
-
* call-seq:
|
30
|
-
* deprecated_attributes
|
31
|
-
*
|
32
|
-
* A list of deprecated attributes for this element
|
33
|
-
*/
|
34
|
-
static VALUE deprecated_attributes(VALUE self)
|
35
|
-
{
|
36
|
-
const htmlElemDesc * description;
|
37
|
-
VALUE list;
|
38
|
-
int i;
|
39
|
-
|
40
|
-
Data_Get_Struct(self, htmlElemDesc, description);
|
41
|
-
|
42
|
-
list = rb_ary_new();
|
43
|
-
|
44
|
-
if(NULL == description->attrs_depr) return list;
|
45
|
-
|
46
|
-
for(i = 0; description->attrs_depr[i]; i++) {
|
47
|
-
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_depr[i]));
|
48
|
-
}
|
49
|
-
|
50
|
-
return list;
|
51
|
-
}
|
52
|
-
|
53
|
-
/*
|
54
|
-
* call-seq:
|
55
|
-
* optional_attributes
|
56
|
-
*
|
57
|
-
* A list of optional attributes for this element
|
58
|
-
*/
|
59
|
-
static VALUE optional_attributes(VALUE self)
|
60
|
-
{
|
61
|
-
const htmlElemDesc * description;
|
62
|
-
VALUE list;
|
63
|
-
int i;
|
64
|
-
|
65
|
-
Data_Get_Struct(self, htmlElemDesc, description);
|
66
|
-
|
67
|
-
list = rb_ary_new();
|
68
|
-
|
69
|
-
if(NULL == description->attrs_opt) return list;
|
70
|
-
|
71
|
-
for(i = 0; description->attrs_opt[i]; i++) {
|
72
|
-
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_opt[i]));
|
73
|
-
}
|
74
|
-
|
75
|
-
return list;
|
76
|
-
}
|
77
|
-
|
78
|
-
/*
|
79
|
-
* call-seq:
|
80
|
-
* default_sub_element
|
81
|
-
*
|
82
|
-
* The default sub element for this element
|
83
|
-
*/
|
84
|
-
static VALUE default_sub_element(VALUE self)
|
85
|
-
{
|
86
|
-
const htmlElemDesc * description;
|
87
|
-
Data_Get_Struct(self, htmlElemDesc, description);
|
88
|
-
|
89
|
-
if (description->defaultsubelt)
|
90
|
-
return NOKOGIRI_STR_NEW2(description->defaultsubelt);
|
91
|
-
|
92
|
-
return Qnil;
|
93
|
-
}
|
94
|
-
|
95
|
-
/*
|
96
|
-
* call-seq:
|
97
|
-
* sub_elements
|
98
|
-
*
|
99
|
-
* A list of allowed sub elements for this element.
|
100
|
-
*/
|
101
|
-
static VALUE sub_elements(VALUE self)
|
102
|
-
{
|
103
|
-
const htmlElemDesc * description;
|
104
|
-
VALUE list;
|
105
|
-
int i;
|
106
|
-
|
107
|
-
Data_Get_Struct(self, htmlElemDesc, description);
|
108
|
-
|
109
|
-
list = rb_ary_new();
|
110
|
-
|
111
|
-
if(NULL == description->subelts) return list;
|
112
|
-
|
113
|
-
for(i = 0; description->subelts[i]; i++) {
|
114
|
-
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->subelts[i]));
|
115
|
-
}
|
116
|
-
|
117
|
-
return list;
|
118
|
-
}
|
119
|
-
|
120
|
-
/*
|
121
|
-
* call-seq:
|
122
|
-
* description
|
123
|
-
*
|
124
|
-
* The description for this element
|
125
|
-
*/
|
126
|
-
static VALUE description(VALUE self)
|
127
|
-
{
|
128
|
-
const htmlElemDesc * description;
|
129
|
-
Data_Get_Struct(self, htmlElemDesc, description);
|
130
|
-
|
131
|
-
return NOKOGIRI_STR_NEW2(description->desc);
|
132
|
-
}
|
133
|
-
|
134
|
-
/*
|
135
|
-
* call-seq:
|
136
|
-
* inline?
|
137
|
-
*
|
138
|
-
* Is this element an inline element?
|
139
|
-
*/
|
140
|
-
static VALUE inline_eh(VALUE self)
|
141
|
-
{
|
142
|
-
const htmlElemDesc * description;
|
143
|
-
Data_Get_Struct(self, htmlElemDesc, description);
|
144
|
-
|
145
|
-
if(description->isinline) return Qtrue;
|
146
|
-
return Qfalse;
|
147
|
-
}
|
148
|
-
|
149
|
-
/*
|
150
|
-
* call-seq:
|
151
|
-
* deprecated?
|
152
|
-
*
|
153
|
-
* Is this element deprecated?
|
154
|
-
*/
|
155
|
-
static VALUE deprecated_eh(VALUE self)
|
156
|
-
{
|
157
|
-
const htmlElemDesc * description;
|
158
|
-
Data_Get_Struct(self, htmlElemDesc, description);
|
159
|
-
|
160
|
-
if(description->depr) return Qtrue;
|
161
|
-
return Qfalse;
|
162
|
-
}
|
163
|
-
|
164
|
-
/*
|
165
|
-
* call-seq:
|
166
|
-
* empty?
|
167
|
-
*
|
168
|
-
* Is this an empty element?
|
169
|
-
*/
|
170
|
-
static VALUE empty_eh(VALUE self)
|
171
|
-
{
|
172
|
-
const htmlElemDesc * description;
|
173
|
-
Data_Get_Struct(self, htmlElemDesc, description);
|
174
|
-
|
175
|
-
if(description->empty) return Qtrue;
|
176
|
-
return Qfalse;
|
177
|
-
}
|
178
|
-
|
179
|
-
/*
|
180
|
-
* call-seq:
|
181
|
-
* save_end_tag?
|
182
|
-
*
|
183
|
-
* Should the end tag be saved?
|
184
|
-
*/
|
185
|
-
static VALUE save_end_tag_eh(VALUE self)
|
186
|
-
{
|
187
|
-
const htmlElemDesc * description;
|
188
|
-
Data_Get_Struct(self, htmlElemDesc, description);
|
189
|
-
|
190
|
-
if(description->saveEndTag) return Qtrue;
|
191
|
-
return Qfalse;
|
192
|
-
}
|
193
|
-
|
194
|
-
/*
|
195
|
-
* call-seq:
|
196
|
-
* implied_end_tag?
|
197
|
-
*
|
198
|
-
* Can the end tag be implied for this tag?
|
199
|
-
*/
|
200
|
-
static VALUE implied_end_tag_eh(VALUE self)
|
201
|
-
{
|
202
|
-
const htmlElemDesc * description;
|
203
|
-
Data_Get_Struct(self, htmlElemDesc, description);
|
204
|
-
|
205
|
-
if(description->endTag) return Qtrue;
|
206
|
-
return Qfalse;
|
207
|
-
}
|
208
|
-
|
209
|
-
/*
|
210
|
-
* call-seq:
|
211
|
-
* implied_start_tag?
|
212
|
-
*
|
213
|
-
* Can the start tag be implied for this tag?
|
214
|
-
*/
|
215
|
-
static VALUE implied_start_tag_eh(VALUE self)
|
216
|
-
{
|
217
|
-
const htmlElemDesc * description;
|
218
|
-
Data_Get_Struct(self, htmlElemDesc, description);
|
219
|
-
|
220
|
-
if(description->startTag) return Qtrue;
|
221
|
-
return Qfalse;
|
222
|
-
}
|
223
|
-
|
224
|
-
/*
|
225
|
-
* call-seq:
|
226
|
-
* name
|
227
|
-
*
|
228
|
-
* Get the tag name for this ElemementDescription
|
229
|
-
*/
|
230
|
-
static VALUE name(VALUE self)
|
231
|
-
{
|
232
|
-
const htmlElemDesc * description;
|
233
|
-
Data_Get_Struct(self, htmlElemDesc, description);
|
234
|
-
|
235
|
-
if(NULL == description->name) return Qnil;
|
236
|
-
return NOKOGIRI_STR_NEW2(description->name);
|
237
|
-
}
|
238
|
-
|
239
|
-
/*
|
240
|
-
* call-seq:
|
241
|
-
* [](tag_name)
|
242
|
-
*
|
243
|
-
* Get ElemementDescription for +tag_name+
|
244
|
-
*/
|
245
|
-
static VALUE get_description(VALUE klass, VALUE tag_name)
|
246
|
-
{
|
247
|
-
const htmlElemDesc * description = htmlTagLookup(
|
248
|
-
(const xmlChar *)StringValueCStr(tag_name)
|
249
|
-
);
|
250
|
-
|
251
|
-
if(NULL == description) return Qnil;
|
252
|
-
return Data_Wrap_Struct(klass, 0, 0, (void *)(uintptr_t)description);
|
253
|
-
}
|
254
|
-
|
255
|
-
VALUE cNokogiriHtmlElementDescription ;
|
256
|
-
void init_html_element_description()
|
257
|
-
{
|
258
|
-
VALUE nokogiri = rb_define_module("Nokogiri");
|
259
|
-
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
260
|
-
VALUE klass = rb_define_class_under(html, "ElementDescription",rb_cObject);
|
261
|
-
|
262
|
-
cNokogiriHtmlElementDescription = klass;
|
263
|
-
|
264
|
-
rb_define_singleton_method(klass, "[]", get_description, 1);
|
265
|
-
|
266
|
-
rb_define_method(klass, "name", name, 0);
|
267
|
-
rb_define_method(klass, "implied_start_tag?", implied_start_tag_eh, 0);
|
268
|
-
rb_define_method(klass, "implied_end_tag?", implied_end_tag_eh, 0);
|
269
|
-
rb_define_method(klass, "save_end_tag?", save_end_tag_eh, 0);
|
270
|
-
rb_define_method(klass, "empty?", empty_eh, 0);
|
271
|
-
rb_define_method(klass, "deprecated?", deprecated_eh, 0);
|
272
|
-
rb_define_method(klass, "inline?", inline_eh, 0);
|
273
|
-
rb_define_method(klass, "description", description, 0);
|
274
|
-
rb_define_method(klass, "sub_elements", sub_elements, 0);
|
275
|
-
rb_define_method(klass, "default_sub_element", default_sub_element, 0);
|
276
|
-
rb_define_method(klass, "optional_attributes", optional_attributes, 0);
|
277
|
-
rb_define_method(klass, "deprecated_attributes", deprecated_attributes, 0);
|
278
|
-
rb_define_method(klass, "required_attributes", required_attributes, 0);
|
279
|
-
}
|
@@ -1,32 +0,0 @@
|
|
1
|
-
#include <html_entity_lookup.h>
|
2
|
-
|
3
|
-
/*
|
4
|
-
* call-seq:
|
5
|
-
* get(key)
|
6
|
-
*
|
7
|
-
* Get the HTML::EntityDescription for +key+
|
8
|
-
*/
|
9
|
-
static VALUE get(VALUE self, VALUE key)
|
10
|
-
{
|
11
|
-
const htmlEntityDesc * desc =
|
12
|
-
htmlEntityLookup((const xmlChar *)StringValueCStr(key));
|
13
|
-
VALUE klass, args[3];
|
14
|
-
|
15
|
-
if(NULL == desc) return Qnil;
|
16
|
-
klass = rb_const_get(mNokogiriHtml, rb_intern("EntityDescription"));
|
17
|
-
|
18
|
-
args[0] = INT2NUM((long)desc->value);
|
19
|
-
args[1] = NOKOGIRI_STR_NEW2(desc->name);
|
20
|
-
args[2] = NOKOGIRI_STR_NEW2(desc->desc);
|
21
|
-
|
22
|
-
return rb_class_new_instance(3, args, klass);
|
23
|
-
}
|
24
|
-
|
25
|
-
void init_html_entity_lookup()
|
26
|
-
{
|
27
|
-
VALUE nokogiri = rb_define_module("Nokogiri");
|
28
|
-
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
29
|
-
VALUE klass = rb_define_class_under(html, "EntityLookup", rb_cObject);
|
30
|
-
|
31
|
-
rb_define_method(klass, "get", get, 1);
|
32
|
-
}
|
@@ -1,116 +0,0 @@
|
|
1
|
-
#include <html_sax_parser_context.h>
|
2
|
-
|
3
|
-
VALUE cNokogiriHtmlSaxParserContext ;
|
4
|
-
|
5
|
-
static void deallocate(xmlParserCtxtPtr ctxt)
|
6
|
-
{
|
7
|
-
NOKOGIRI_DEBUG_START(handler);
|
8
|
-
|
9
|
-
ctxt->sax = NULL;
|
10
|
-
|
11
|
-
htmlFreeParserCtxt(ctxt);
|
12
|
-
|
13
|
-
NOKOGIRI_DEBUG_END(handler);
|
14
|
-
}
|
15
|
-
|
16
|
-
static VALUE
|
17
|
-
parse_memory(VALUE klass, VALUE data, VALUE encoding)
|
18
|
-
{
|
19
|
-
htmlParserCtxtPtr ctxt;
|
20
|
-
|
21
|
-
if (NIL_P(data))
|
22
|
-
rb_raise(rb_eArgError, "data cannot be nil");
|
23
|
-
if (!(int)RSTRING_LEN(data))
|
24
|
-
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
25
|
-
|
26
|
-
ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
|
27
|
-
(int)RSTRING_LEN(data));
|
28
|
-
if (ctxt->sax) {
|
29
|
-
xmlFree(ctxt->sax);
|
30
|
-
ctxt->sax = NULL;
|
31
|
-
}
|
32
|
-
|
33
|
-
if (RTEST(encoding)) {
|
34
|
-
xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding));
|
35
|
-
if (enc != NULL) {
|
36
|
-
xmlSwitchToEncoding(ctxt, enc);
|
37
|
-
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
38
|
-
rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
|
39
|
-
StringValueCStr(encoding));
|
40
|
-
}
|
41
|
-
}
|
42
|
-
}
|
43
|
-
|
44
|
-
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
45
|
-
}
|
46
|
-
|
47
|
-
static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
|
48
|
-
{
|
49
|
-
htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(
|
50
|
-
StringValueCStr(filename),
|
51
|
-
StringValueCStr(encoding)
|
52
|
-
);
|
53
|
-
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
54
|
-
}
|
55
|
-
|
56
|
-
static VALUE
|
57
|
-
parse_doc(VALUE ctxt_val)
|
58
|
-
{
|
59
|
-
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
|
60
|
-
htmlParseDocument(ctxt);
|
61
|
-
return Qnil;
|
62
|
-
}
|
63
|
-
|
64
|
-
static VALUE
|
65
|
-
parse_doc_finalize(VALUE ctxt_val)
|
66
|
-
{
|
67
|
-
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
|
68
|
-
|
69
|
-
if (ctxt->myDoc)
|
70
|
-
xmlFreeDoc(ctxt->myDoc);
|
71
|
-
|
72
|
-
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
|
73
|
-
return Qnil;
|
74
|
-
}
|
75
|
-
|
76
|
-
static VALUE
|
77
|
-
parse_with(VALUE self, VALUE sax_handler)
|
78
|
-
{
|
79
|
-
htmlParserCtxtPtr ctxt;
|
80
|
-
htmlSAXHandlerPtr sax;
|
81
|
-
|
82
|
-
if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
|
83
|
-
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
84
|
-
|
85
|
-
Data_Get_Struct(self, htmlParserCtxt, ctxt);
|
86
|
-
Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
|
87
|
-
|
88
|
-
/* Free the sax handler since we'll assign our own */
|
89
|
-
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
|
90
|
-
xmlFree(ctxt->sax);
|
91
|
-
|
92
|
-
ctxt->sax = sax;
|
93
|
-
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
94
|
-
|
95
|
-
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
|
96
|
-
|
97
|
-
return self;
|
98
|
-
}
|
99
|
-
|
100
|
-
void init_html_sax_parser_context()
|
101
|
-
{
|
102
|
-
VALUE nokogiri = rb_define_module("Nokogiri");
|
103
|
-
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
104
|
-
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
105
|
-
VALUE sax = rb_define_module_under(xml, "SAX");
|
106
|
-
VALUE hsax = rb_define_module_under(html, "SAX");
|
107
|
-
VALUE pc = rb_define_class_under(sax, "ParserContext", rb_cObject);
|
108
|
-
VALUE klass = rb_define_class_under(hsax, "ParserContext", pc);
|
109
|
-
|
110
|
-
cNokogiriHtmlSaxParserContext = klass;
|
111
|
-
|
112
|
-
rb_define_singleton_method(klass, "memory", parse_memory, 2);
|
113
|
-
rb_define_singleton_method(klass, "file", parse_file, 2);
|
114
|
-
|
115
|
-
rb_define_method(klass, "parse_with", parse_with, 1);
|
116
|
-
}
|
@@ -1,87 +0,0 @@
|
|
1
|
-
#include <html_sax_push_parser.h>
|
2
|
-
|
3
|
-
/*
|
4
|
-
* call-seq:
|
5
|
-
* native_write(chunk, last_chunk)
|
6
|
-
*
|
7
|
-
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
|
8
|
-
*/
|
9
|
-
static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
10
|
-
{
|
11
|
-
xmlParserCtxtPtr ctx;
|
12
|
-
const char * chunk = NULL;
|
13
|
-
int size = 0;
|
14
|
-
|
15
|
-
|
16
|
-
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
17
|
-
|
18
|
-
if(Qnil != _chunk) {
|
19
|
-
chunk = StringValuePtr(_chunk);
|
20
|
-
size = (int)RSTRING_LEN(_chunk);
|
21
|
-
}
|
22
|
-
|
23
|
-
if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
|
24
|
-
if (!(ctx->options & XML_PARSE_RECOVER)) {
|
25
|
-
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
|
26
|
-
Nokogiri_error_raise(NULL, e);
|
27
|
-
}
|
28
|
-
}
|
29
|
-
|
30
|
-
return self;
|
31
|
-
}
|
32
|
-
|
33
|
-
/*
|
34
|
-
* call-seq:
|
35
|
-
* initialize_native(xml_sax, filename)
|
36
|
-
*
|
37
|
-
* Initialize the push parser with +xml_sax+ using +filename+
|
38
|
-
*/
|
39
|
-
static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
|
40
|
-
VALUE encoding)
|
41
|
-
{
|
42
|
-
htmlSAXHandlerPtr sax;
|
43
|
-
const char * filename = NULL;
|
44
|
-
htmlParserCtxtPtr ctx;
|
45
|
-
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
|
46
|
-
|
47
|
-
Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
|
48
|
-
|
49
|
-
if(_filename != Qnil) filename = StringValueCStr(_filename);
|
50
|
-
|
51
|
-
if (!NIL_P(encoding)) {
|
52
|
-
enc = xmlParseCharEncoding(StringValueCStr(encoding));
|
53
|
-
if (enc == XML_CHAR_ENCODING_ERROR)
|
54
|
-
rb_raise(rb_eArgError, "Unsupported Encoding");
|
55
|
-
}
|
56
|
-
|
57
|
-
ctx = htmlCreatePushParserCtxt(
|
58
|
-
sax,
|
59
|
-
NULL,
|
60
|
-
NULL,
|
61
|
-
0,
|
62
|
-
filename,
|
63
|
-
enc
|
64
|
-
);
|
65
|
-
if(ctx == NULL)
|
66
|
-
rb_raise(rb_eRuntimeError, "Could not create a parser context");
|
67
|
-
|
68
|
-
ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
|
69
|
-
|
70
|
-
ctx->sax2 = 1;
|
71
|
-
DATA_PTR(self) = ctx;
|
72
|
-
return self;
|
73
|
-
}
|
74
|
-
|
75
|
-
VALUE cNokogiriHtmlSaxPushParser;
|
76
|
-
void init_html_sax_push_parser()
|
77
|
-
{
|
78
|
-
VALUE nokogiri = rb_define_module("Nokogiri");
|
79
|
-
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
80
|
-
VALUE sax = rb_define_module_under(html, "SAX");
|
81
|
-
VALUE klass = rb_define_class_under(sax, "PushParser", cNokogiriXmlSaxPushParser);
|
82
|
-
|
83
|
-
cNokogiriHtmlSaxPushParser = klass;
|
84
|
-
|
85
|
-
rb_define_private_method(klass, "initialize_native", initialize_native, 3);
|
86
|
-
rb_define_private_method(klass, "native_write", native_write, 2);
|
87
|
-
}
|
data/ext/nokogiri/xml_attr.h
DELETED
data/ext/nokogiri/xml_cdata.h
DELETED
data/ext/nokogiri/xml_comment.h
DELETED
data/ext/nokogiri/xml_document.h
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
#ifndef NOKOGIRI_XML_DOCUMENT
|
2
|
-
#define NOKOGIRI_XML_DOCUMENT
|
3
|
-
|
4
|
-
#include <nokogiri.h>
|
5
|
-
|
6
|
-
struct _nokogiriTuple {
|
7
|
-
VALUE doc;
|
8
|
-
st_table *unlinkedNodes;
|
9
|
-
VALUE node_cache;
|
10
|
-
};
|
11
|
-
typedef struct _nokogiriTuple nokogiriTuple;
|
12
|
-
typedef nokogiriTuple * nokogiriTuplePtr;
|
13
|
-
|
14
|
-
void init_xml_document();
|
15
|
-
VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc);
|
16
|
-
|
17
|
-
#define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
|
18
|
-
#define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
|
19
|
-
#define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
|
20
|
-
#define DOC_NODE_CACHE(x) (((nokogiriTuplePtr)(x->_private))->node_cache)
|
21
|
-
|
22
|
-
extern VALUE cNokogiriXmlDocument ;
|
23
|
-
#endif
|
data/ext/nokogiri/xml_dtd.h
DELETED