nokogiri 1.11.1 → 1.12.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +232 -11
- data/LICENSE.md +1 -1
- data/README.md +27 -21
- data/dependencies.yml +12 -12
- data/ext/nokogiri/depend +35 -474
- data/ext/nokogiri/extconf.rb +391 -243
- data/ext/nokogiri/gumbo.c +611 -0
- data/ext/nokogiri/{html_document.c → html4_document.c} +18 -23
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +119 -0
- data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +29 -27
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +206 -66
- data/ext/nokogiri/nokogiri.h +166 -76
- data/ext/nokogiri/test_global_handlers.c +3 -4
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +258 -200
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +28 -17
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +58 -49
- data/ext/nokogiri/xml_node.c +473 -414
- data/ext/nokogiri/xml_node_set.c +174 -162
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +193 -157
- data/ext/nokogiri/xml_relax_ng.c +29 -23
- data/ext/nokogiri/xml_sax_parser.c +111 -106
- data/ext/nokogiri/xml_sax_parser_context.c +102 -85
- data/ext/nokogiri/xml_sax_push_parser.c +34 -27
- data/ext/nokogiri/xml_schema.c +49 -41
- data/ext/nokogiri/xml_syntax_error.c +21 -23
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +86 -77
- data/ext/nokogiri/xslt_stylesheet.c +157 -156
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +17 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4886 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri.rb +31 -50
- data/lib/nokogiri/css.rb +14 -14
- data/lib/nokogiri/css/parser.rb +2 -2
- data/lib/nokogiri/css/parser.y +1 -1
- data/lib/nokogiri/css/syntax_error.rb +1 -1
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +31 -27
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/{html → html4}/builder.rb +2 -2
- data/lib/nokogiri/{html → html4}/document.rb +4 -4
- data/lib/nokogiri/{html → html4}/document_fragment.rb +17 -17
- data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +42 -9
- data/lib/nokogiri/xml.rb +35 -36
- data/lib/nokogiri/xml/document.rb +74 -28
- data/lib/nokogiri/xml/node.rb +45 -47
- data/lib/nokogiri/xml/parse_options.rb +2 -0
- data/lib/nokogiri/xml/pp.rb +2 -2
- data/lib/nokogiri/xml/reader.rb +2 -9
- data/lib/nokogiri/xml/sax.rb +4 -4
- data/lib/nokogiri/xml/sax/document.rb +24 -30
- data/lib/nokogiri/xml/xpath.rb +3 -5
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- data/lib/nokogiri/xslt.rb +16 -16
- data/lib/nokogiri/xslt/stylesheet.rb +1 -1
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +0 -0
- data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- metadata +117 -109
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -118
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -63
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -25
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/sax/parser_context.rb +0 -17
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
- data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -1,4 +1,6 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriHtml4Document ;
|
2
4
|
|
3
5
|
static ID id_encoding_found;
|
4
6
|
static ID id_to_s;
|
@@ -23,8 +25,7 @@ rb_html_document_s_new(int argc, VALUE *argv, VALUE klass)
|
|
23
25
|
RTEST(uri) ? (const xmlChar *)StringValueCStr(uri) : NULL,
|
24
26
|
RTEST(external_id) ? (const xmlChar *)StringValueCStr(external_id) : NULL
|
25
27
|
);
|
26
|
-
rb_doc =
|
27
|
-
rb_obj_call_init(rb_doc, argc, argv);
|
28
|
+
rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
|
28
29
|
return rb_doc ;
|
29
30
|
}
|
30
31
|
|
@@ -33,7 +34,7 @@ rb_html_document_s_new(int argc, VALUE *argv, VALUE klass)
|
|
33
34
|
* read_io(io, url, encoding, options)
|
34
35
|
*
|
35
36
|
* Read the HTML document from +io+ with given +url+, +encoding+,
|
36
|
-
* and +options+. See Nokogiri::
|
37
|
+
* and +options+. See Nokogiri::HTML4.parse
|
37
38
|
*/
|
38
39
|
static VALUE
|
39
40
|
rb_html_document_s_read_io(VALUE klass, VALUE rb_io, VALUE rb_url, VALUE rb_encoding, VALUE rb_options)
|
@@ -47,7 +48,7 @@ rb_html_document_s_read_io(VALUE klass, VALUE rb_io, VALUE rb_url, VALUE rb_enco
|
|
47
48
|
|
48
49
|
xmlSetStructuredErrorFunc((void *)rb_error_list, Nokogiri_error_array_pusher);
|
49
50
|
|
50
|
-
c_doc = htmlReadIO(
|
51
|
+
c_doc = htmlReadIO(noko_io_read, noko_io_close, (void *)rb_io, c_url, c_encoding, options);
|
51
52
|
|
52
53
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
53
54
|
|
@@ -81,7 +82,7 @@ rb_html_document_s_read_io(VALUE klass, VALUE rb_io, VALUE rb_url, VALUE rb_enco
|
|
81
82
|
return Qnil;
|
82
83
|
}
|
83
84
|
|
84
|
-
rb_doc =
|
85
|
+
rb_doc = noko_xml_document_wrap(klass, c_doc);
|
85
86
|
rb_iv_set(rb_doc, "@errors", rb_error_list);
|
86
87
|
return rb_doc;
|
87
88
|
}
|
@@ -91,7 +92,7 @@ rb_html_document_s_read_io(VALUE klass, VALUE rb_io, VALUE rb_url, VALUE rb_enco
|
|
91
92
|
* read_memory(string, url, encoding, options)
|
92
93
|
*
|
93
94
|
* Read the HTML document contained in +string+ with given +url+, +encoding+,
|
94
|
-
* and +options+. See Nokogiri::
|
95
|
+
* and +options+. See Nokogiri::HTML4.parse
|
95
96
|
*/
|
96
97
|
static VALUE
|
97
98
|
rb_html_document_s_read_memory(VALUE klass, VALUE rb_html, VALUE rb_url, VALUE rb_encoding, VALUE rb_options)
|
@@ -129,7 +130,7 @@ rb_html_document_s_read_memory(VALUE klass, VALUE rb_html, VALUE rb_url, VALUE r
|
|
129
130
|
return Qnil;
|
130
131
|
}
|
131
132
|
|
132
|
-
rb_doc =
|
133
|
+
rb_doc = noko_xml_document_wrap(klass, c_doc);
|
133
134
|
rb_iv_set(rb_doc, "@errors", rb_error_list);
|
134
135
|
return rb_doc;
|
135
136
|
}
|
@@ -148,23 +149,17 @@ rb_html_document_type(VALUE self)
|
|
148
149
|
return INT2NUM((long)doc->type);
|
149
150
|
}
|
150
151
|
|
151
|
-
VALUE cNokogiriHtmlDocument ;
|
152
|
-
|
153
152
|
void
|
154
|
-
|
153
|
+
noko_init_html_document()
|
155
154
|
{
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
rb_define_singleton_method(cNokogiriHtmlDocument, "read_io", rb_html_document_s_read_io, 4);
|
165
|
-
rb_define_singleton_method(cNokogiriHtmlDocument, "new", rb_html_document_s_new, -1);
|
166
|
-
|
167
|
-
rb_define_method(cNokogiriHtmlDocument, "type", rb_html_document_type, 0);
|
155
|
+
assert(cNokogiriXmlDocument);
|
156
|
+
cNokogiriHtml4Document = rb_define_class_under(mNokogiriHtml4, "Document", cNokogiriXmlDocument);
|
157
|
+
|
158
|
+
rb_define_singleton_method(cNokogiriHtml4Document, "read_memory", rb_html_document_s_read_memory, 4);
|
159
|
+
rb_define_singleton_method(cNokogiriHtml4Document, "read_io", rb_html_document_s_read_io, 4);
|
160
|
+
rb_define_singleton_method(cNokogiriHtml4Document, "new", rb_html_document_s_new, -1);
|
161
|
+
|
162
|
+
rb_define_method(cNokogiriHtml4Document, "type", rb_html_document_type, 0);
|
168
163
|
|
169
164
|
id_encoding_found = rb_intern("encoding_found");
|
170
165
|
id_to_s = rb_intern("to_s");
|
@@ -0,0 +1,294 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriHtml4ElementDescription ;
|
4
|
+
|
5
|
+
/*
|
6
|
+
* call-seq:
|
7
|
+
* required_attributes
|
8
|
+
*
|
9
|
+
* A list of required attributes for this element
|
10
|
+
*/
|
11
|
+
static VALUE
|
12
|
+
required_attributes(VALUE self)
|
13
|
+
{
|
14
|
+
const htmlElemDesc *description;
|
15
|
+
VALUE list;
|
16
|
+
int i;
|
17
|
+
|
18
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
19
|
+
|
20
|
+
list = rb_ary_new();
|
21
|
+
|
22
|
+
if (NULL == description->attrs_req) { return list; }
|
23
|
+
|
24
|
+
for (i = 0; description->attrs_depr[i]; i++) {
|
25
|
+
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_req[i]));
|
26
|
+
}
|
27
|
+
|
28
|
+
return list;
|
29
|
+
}
|
30
|
+
|
31
|
+
/*
|
32
|
+
* call-seq:
|
33
|
+
* deprecated_attributes
|
34
|
+
*
|
35
|
+
* A list of deprecated attributes for this element
|
36
|
+
*/
|
37
|
+
static VALUE
|
38
|
+
deprecated_attributes(VALUE self)
|
39
|
+
{
|
40
|
+
const htmlElemDesc *description;
|
41
|
+
VALUE list;
|
42
|
+
int i;
|
43
|
+
|
44
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
45
|
+
|
46
|
+
list = rb_ary_new();
|
47
|
+
|
48
|
+
if (NULL == description->attrs_depr) { return list; }
|
49
|
+
|
50
|
+
for (i = 0; description->attrs_depr[i]; i++) {
|
51
|
+
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_depr[i]));
|
52
|
+
}
|
53
|
+
|
54
|
+
return list;
|
55
|
+
}
|
56
|
+
|
57
|
+
/*
|
58
|
+
* call-seq:
|
59
|
+
* optional_attributes
|
60
|
+
*
|
61
|
+
* A list of optional attributes for this element
|
62
|
+
*/
|
63
|
+
static VALUE
|
64
|
+
optional_attributes(VALUE self)
|
65
|
+
{
|
66
|
+
const htmlElemDesc *description;
|
67
|
+
VALUE list;
|
68
|
+
int i;
|
69
|
+
|
70
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
71
|
+
|
72
|
+
list = rb_ary_new();
|
73
|
+
|
74
|
+
if (NULL == description->attrs_opt) { return list; }
|
75
|
+
|
76
|
+
for (i = 0; description->attrs_opt[i]; i++) {
|
77
|
+
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->attrs_opt[i]));
|
78
|
+
}
|
79
|
+
|
80
|
+
return list;
|
81
|
+
}
|
82
|
+
|
83
|
+
/*
|
84
|
+
* call-seq:
|
85
|
+
* default_sub_element
|
86
|
+
*
|
87
|
+
* The default sub element for this element
|
88
|
+
*/
|
89
|
+
static VALUE
|
90
|
+
default_sub_element(VALUE self)
|
91
|
+
{
|
92
|
+
const htmlElemDesc *description;
|
93
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
94
|
+
|
95
|
+
if (description->defaultsubelt) {
|
96
|
+
return NOKOGIRI_STR_NEW2(description->defaultsubelt);
|
97
|
+
}
|
98
|
+
|
99
|
+
return Qnil;
|
100
|
+
}
|
101
|
+
|
102
|
+
/*
|
103
|
+
* call-seq:
|
104
|
+
* sub_elements
|
105
|
+
*
|
106
|
+
* A list of allowed sub elements for this element.
|
107
|
+
*/
|
108
|
+
static VALUE
|
109
|
+
sub_elements(VALUE self)
|
110
|
+
{
|
111
|
+
const htmlElemDesc *description;
|
112
|
+
VALUE list;
|
113
|
+
int i;
|
114
|
+
|
115
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
116
|
+
|
117
|
+
list = rb_ary_new();
|
118
|
+
|
119
|
+
if (NULL == description->subelts) { return list; }
|
120
|
+
|
121
|
+
for (i = 0; description->subelts[i]; i++) {
|
122
|
+
rb_ary_push(list, NOKOGIRI_STR_NEW2(description->subelts[i]));
|
123
|
+
}
|
124
|
+
|
125
|
+
return list;
|
126
|
+
}
|
127
|
+
|
128
|
+
/*
|
129
|
+
* call-seq:
|
130
|
+
* description
|
131
|
+
*
|
132
|
+
* The description for this element
|
133
|
+
*/
|
134
|
+
static VALUE
|
135
|
+
description(VALUE self)
|
136
|
+
{
|
137
|
+
const htmlElemDesc *description;
|
138
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
139
|
+
|
140
|
+
return NOKOGIRI_STR_NEW2(description->desc);
|
141
|
+
}
|
142
|
+
|
143
|
+
/*
|
144
|
+
* call-seq:
|
145
|
+
* inline?
|
146
|
+
*
|
147
|
+
* Is this element an inline element?
|
148
|
+
*/
|
149
|
+
static VALUE
|
150
|
+
inline_eh(VALUE self)
|
151
|
+
{
|
152
|
+
const htmlElemDesc *description;
|
153
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
154
|
+
|
155
|
+
if (description->isinline) { return Qtrue; }
|
156
|
+
return Qfalse;
|
157
|
+
}
|
158
|
+
|
159
|
+
/*
|
160
|
+
* call-seq:
|
161
|
+
* deprecated?
|
162
|
+
*
|
163
|
+
* Is this element deprecated?
|
164
|
+
*/
|
165
|
+
static VALUE
|
166
|
+
deprecated_eh(VALUE self)
|
167
|
+
{
|
168
|
+
const htmlElemDesc *description;
|
169
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
170
|
+
|
171
|
+
if (description->depr) { return Qtrue; }
|
172
|
+
return Qfalse;
|
173
|
+
}
|
174
|
+
|
175
|
+
/*
|
176
|
+
* call-seq:
|
177
|
+
* empty?
|
178
|
+
*
|
179
|
+
* Is this an empty element?
|
180
|
+
*/
|
181
|
+
static VALUE
|
182
|
+
empty_eh(VALUE self)
|
183
|
+
{
|
184
|
+
const htmlElemDesc *description;
|
185
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
186
|
+
|
187
|
+
if (description->empty) { return Qtrue; }
|
188
|
+
return Qfalse;
|
189
|
+
}
|
190
|
+
|
191
|
+
/*
|
192
|
+
* call-seq:
|
193
|
+
* save_end_tag?
|
194
|
+
*
|
195
|
+
* Should the end tag be saved?
|
196
|
+
*/
|
197
|
+
static VALUE
|
198
|
+
save_end_tag_eh(VALUE self)
|
199
|
+
{
|
200
|
+
const htmlElemDesc *description;
|
201
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
202
|
+
|
203
|
+
if (description->saveEndTag) { return Qtrue; }
|
204
|
+
return Qfalse;
|
205
|
+
}
|
206
|
+
|
207
|
+
/*
|
208
|
+
* call-seq:
|
209
|
+
* implied_end_tag?
|
210
|
+
*
|
211
|
+
* Can the end tag be implied for this tag?
|
212
|
+
*/
|
213
|
+
static VALUE
|
214
|
+
implied_end_tag_eh(VALUE self)
|
215
|
+
{
|
216
|
+
const htmlElemDesc *description;
|
217
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
218
|
+
|
219
|
+
if (description->endTag) { return Qtrue; }
|
220
|
+
return Qfalse;
|
221
|
+
}
|
222
|
+
|
223
|
+
/*
|
224
|
+
* call-seq:
|
225
|
+
* implied_start_tag?
|
226
|
+
*
|
227
|
+
* Can the start tag be implied for this tag?
|
228
|
+
*/
|
229
|
+
static VALUE
|
230
|
+
implied_start_tag_eh(VALUE self)
|
231
|
+
{
|
232
|
+
const htmlElemDesc *description;
|
233
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
234
|
+
|
235
|
+
if (description->startTag) { return Qtrue; }
|
236
|
+
return Qfalse;
|
237
|
+
}
|
238
|
+
|
239
|
+
/*
|
240
|
+
* call-seq:
|
241
|
+
* name
|
242
|
+
*
|
243
|
+
* Get the tag name for this ElemementDescription
|
244
|
+
*/
|
245
|
+
static VALUE
|
246
|
+
name(VALUE self)
|
247
|
+
{
|
248
|
+
const htmlElemDesc *description;
|
249
|
+
Data_Get_Struct(self, htmlElemDesc, description);
|
250
|
+
|
251
|
+
if (NULL == description->name) { return Qnil; }
|
252
|
+
return NOKOGIRI_STR_NEW2(description->name);
|
253
|
+
}
|
254
|
+
|
255
|
+
/*
|
256
|
+
* call-seq:
|
257
|
+
* [](tag_name)
|
258
|
+
*
|
259
|
+
* Get ElemementDescription for +tag_name+
|
260
|
+
*/
|
261
|
+
static VALUE
|
262
|
+
get_description(VALUE klass, VALUE tag_name)
|
263
|
+
{
|
264
|
+
const htmlElemDesc *description = htmlTagLookup(
|
265
|
+
(const xmlChar *)StringValueCStr(tag_name)
|
266
|
+
);
|
267
|
+
|
268
|
+
if (NULL == description) { return Qnil; }
|
269
|
+
return Data_Wrap_Struct(klass, 0, 0, (void *)(uintptr_t)description);
|
270
|
+
}
|
271
|
+
|
272
|
+
void
|
273
|
+
noko_init_html_element_description()
|
274
|
+
{
|
275
|
+
cNokogiriHtml4ElementDescription = rb_define_class_under(mNokogiriHtml4, "ElementDescription", rb_cObject);
|
276
|
+
|
277
|
+
rb_undef_alloc_func(cNokogiriHtml4ElementDescription);
|
278
|
+
|
279
|
+
rb_define_singleton_method(cNokogiriHtml4ElementDescription, "[]", get_description, 1);
|
280
|
+
|
281
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "name", name, 0);
|
282
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "implied_start_tag?", implied_start_tag_eh, 0);
|
283
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "implied_end_tag?", implied_end_tag_eh, 0);
|
284
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "save_end_tag?", save_end_tag_eh, 0);
|
285
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "empty?", empty_eh, 0);
|
286
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "deprecated?", deprecated_eh, 0);
|
287
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "inline?", inline_eh, 0);
|
288
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "description", description, 0);
|
289
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "sub_elements", sub_elements, 0);
|
290
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "default_sub_element", default_sub_element, 0);
|
291
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "optional_attributes", optional_attributes, 0);
|
292
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "deprecated_attributes", deprecated_attributes, 0);
|
293
|
+
rb_define_method(cNokogiriHtml4ElementDescription, "required_attributes", required_attributes, 0);
|
294
|
+
}
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
static VALUE cNokogiriHtml4EntityLookup;
|
4
|
+
|
5
|
+
/*
|
6
|
+
* call-seq:
|
7
|
+
* get(key)
|
8
|
+
*
|
9
|
+
* Get the HTML4::EntityDescription for +key+
|
10
|
+
*/
|
11
|
+
static VALUE
|
12
|
+
get(VALUE _, VALUE rb_entity_name)
|
13
|
+
{
|
14
|
+
VALUE cNokogiriHtml4EntityDescription;
|
15
|
+
const htmlEntityDesc *c_entity_desc;
|
16
|
+
VALUE rb_constructor_args[3];
|
17
|
+
|
18
|
+
c_entity_desc = htmlEntityLookup((const xmlChar *)StringValueCStr(rb_entity_name));
|
19
|
+
if (NULL == c_entity_desc) {
|
20
|
+
return Qnil;
|
21
|
+
}
|
22
|
+
|
23
|
+
rb_constructor_args[0] = INT2NUM((long)c_entity_desc->value);
|
24
|
+
rb_constructor_args[1] = NOKOGIRI_STR_NEW2(c_entity_desc->name);
|
25
|
+
rb_constructor_args[2] = NOKOGIRI_STR_NEW2(c_entity_desc->desc);
|
26
|
+
|
27
|
+
cNokogiriHtml4EntityDescription = rb_const_get_at(mNokogiriHtml4, rb_intern("EntityDescription"));
|
28
|
+
return rb_class_new_instance(3, rb_constructor_args, cNokogiriHtml4EntityDescription);
|
29
|
+
}
|
30
|
+
|
31
|
+
void
|
32
|
+
noko_init_html_entity_lookup()
|
33
|
+
{
|
34
|
+
cNokogiriHtml4EntityLookup = rb_define_class_under(mNokogiriHtml4, "EntityLookup", rb_cObject);
|
35
|
+
|
36
|
+
rb_define_method(cNokogiriHtml4EntityLookup, "get", get, 1);
|
37
|
+
}
|
@@ -0,0 +1,119 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriHtml4SaxParserContext ;
|
4
|
+
|
5
|
+
static void
|
6
|
+
deallocate(xmlParserCtxtPtr ctxt)
|
7
|
+
{
|
8
|
+
NOKOGIRI_DEBUG_START(ctxt);
|
9
|
+
|
10
|
+
ctxt->sax = NULL;
|
11
|
+
|
12
|
+
htmlFreeParserCtxt(ctxt);
|
13
|
+
|
14
|
+
NOKOGIRI_DEBUG_END(ctxt);
|
15
|
+
}
|
16
|
+
|
17
|
+
static VALUE
|
18
|
+
parse_memory(VALUE klass, VALUE data, VALUE encoding)
|
19
|
+
{
|
20
|
+
htmlParserCtxtPtr ctxt;
|
21
|
+
|
22
|
+
if (NIL_P(data)) {
|
23
|
+
rb_raise(rb_eArgError, "data cannot be nil");
|
24
|
+
}
|
25
|
+
if (!(int)RSTRING_LEN(data)) {
|
26
|
+
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
27
|
+
}
|
28
|
+
|
29
|
+
ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
|
30
|
+
(int)RSTRING_LEN(data));
|
31
|
+
if (ctxt->sax) {
|
32
|
+
xmlFree(ctxt->sax);
|
33
|
+
ctxt->sax = NULL;
|
34
|
+
}
|
35
|
+
|
36
|
+
if (RTEST(encoding)) {
|
37
|
+
xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding));
|
38
|
+
if (enc != NULL) {
|
39
|
+
xmlSwitchToEncoding(ctxt, enc);
|
40
|
+
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
41
|
+
rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
|
42
|
+
StringValueCStr(encoding));
|
43
|
+
}
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
48
|
+
}
|
49
|
+
|
50
|
+
static VALUE
|
51
|
+
parse_file(VALUE klass, VALUE filename, VALUE encoding)
|
52
|
+
{
|
53
|
+
htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(
|
54
|
+
StringValueCStr(filename),
|
55
|
+
StringValueCStr(encoding)
|
56
|
+
);
|
57
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
58
|
+
}
|
59
|
+
|
60
|
+
static VALUE
|
61
|
+
parse_doc(VALUE ctxt_val)
|
62
|
+
{
|
63
|
+
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
|
64
|
+
htmlParseDocument(ctxt);
|
65
|
+
return Qnil;
|
66
|
+
}
|
67
|
+
|
68
|
+
static VALUE
|
69
|
+
parse_doc_finalize(VALUE ctxt_val)
|
70
|
+
{
|
71
|
+
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
|
72
|
+
|
73
|
+
if (ctxt->myDoc) {
|
74
|
+
xmlFreeDoc(ctxt->myDoc);
|
75
|
+
}
|
76
|
+
|
77
|
+
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
|
78
|
+
return Qnil;
|
79
|
+
}
|
80
|
+
|
81
|
+
static VALUE
|
82
|
+
parse_with(VALUE self, VALUE sax_handler)
|
83
|
+
{
|
84
|
+
htmlParserCtxtPtr ctxt;
|
85
|
+
htmlSAXHandlerPtr sax;
|
86
|
+
|
87
|
+
if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
|
88
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
89
|
+
}
|
90
|
+
|
91
|
+
Data_Get_Struct(self, htmlParserCtxt, ctxt);
|
92
|
+
Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
|
93
|
+
|
94
|
+
/* Free the sax handler since we'll assign our own */
|
95
|
+
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
|
96
|
+
xmlFree(ctxt->sax);
|
97
|
+
}
|
98
|
+
|
99
|
+
ctxt->sax = sax;
|
100
|
+
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
101
|
+
|
102
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
103
|
+
|
104
|
+
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
|
105
|
+
|
106
|
+
return self;
|
107
|
+
}
|
108
|
+
|
109
|
+
void
|
110
|
+
noko_init_html_sax_parser_context()
|
111
|
+
{
|
112
|
+
assert(cNokogiriXmlSaxParserContext);
|
113
|
+
cNokogiriHtml4SaxParserContext = rb_define_class_under(mNokogiriHtml4Sax, "ParserContext", cNokogiriXmlSaxParserContext);
|
114
|
+
|
115
|
+
rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "memory", parse_memory, 2);
|
116
|
+
rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "file", parse_file, 2);
|
117
|
+
|
118
|
+
rb_define_method(cNokogiriHtml4SaxParserContext, "parse_with", parse_with, 1);
|
119
|
+
}
|