nokogiri 1.10.10 → 1.13.9
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +5 -0
- data/LICENSE-DEPENDENCIES.md +1173 -884
- data/LICENSE.md +1 -1
- data/README.md +178 -96
- data/bin/nokogiri +63 -50
- data/dependencies.yml +13 -64
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +761 -424
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +119 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +228 -91
- data/ext/nokogiri/nokogiri.h +199 -88
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +17 -17
- data/ext/nokogiri/xml_attribute_decl.c +21 -21
- data/ext/nokogiri/xml_cdata.c +14 -19
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +296 -220
- data/ext/nokogiri/xml_document_fragment.c +12 -16
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +25 -25
- data/ext/nokogiri/xml_encoding_handler.c +43 -18
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +98 -53
- data/ext/nokogiri/xml_node.c +1065 -653
- data/ext/nokogiri/xml_node_set.c +178 -166
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +277 -175
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +112 -112
- data/ext/nokogiri/xml_sax_parser_context.c +112 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +98 -48
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +14 -18
- data/ext/nokogiri/xml_xpath_context.c +226 -115
- data/ext/nokogiri/xslt_stylesheet.c +265 -173
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +5 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +218 -91
- data/lib/nokogiri/css.rb +50 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/{html → html4}/document.rb +103 -105
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +91 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +100 -0
- data/lib/nokogiri/html5.rb +478 -0
- data/lib/nokogiri/jruby/dependencies.rb +21 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +222 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +6 -3
- data/lib/nokogiri/xml/attribute_decl.rb +3 -1
- data/lib/nokogiri/xml/builder.rb +74 -33
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +224 -86
- data/lib/nokogiri/xml/document_fragment.rb +46 -44
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +2 -0
- data/lib/nokogiri/xml/element_decl.rb +3 -1
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +3 -0
- data/lib/nokogiri/xml/node/save_options.rb +10 -5
- data/lib/nokogiri/xml/node.rb +884 -378
- data/lib/nokogiri/xml/node_set.rb +51 -54
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +22 -8
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +25 -26
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +38 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +112 -72
- data/lib/nokogiri/xml/syntax_error.rb +6 -4
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -37
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +29 -20
- data/lib/nokogiri.rb +49 -65
- data/lib/xsd/xmlparser/nokogiri.rb +26 -24
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
- data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
- metadata +189 -142
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
data/ext/nokogiri/xml_relax_ng.c
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
3
|
+
VALUE cNokogiriXmlRelaxNG;
|
4
|
+
|
5
|
+
static void
|
6
|
+
dealloc(xmlRelaxNGPtr schema)
|
4
7
|
{
|
5
8
|
NOKOGIRI_DEBUG_START(schema);
|
6
9
|
xmlRelaxNGFree(schema);
|
@@ -13,7 +16,8 @@ static void dealloc(xmlRelaxNGPtr schema)
|
|
13
16
|
*
|
14
17
|
* Validate a Nokogiri::XML::Document against this RelaxNG schema.
|
15
18
|
*/
|
16
|
-
static VALUE
|
19
|
+
static VALUE
|
20
|
+
validate_document(VALUE self, VALUE document)
|
17
21
|
{
|
18
22
|
xmlDocPtr doc;
|
19
23
|
xmlRelaxNGPtr schema;
|
@@ -27,7 +31,7 @@ static VALUE validate_document(VALUE self, VALUE document)
|
|
27
31
|
|
28
32
|
valid_ctxt = xmlRelaxNGNewValidCtxt(schema);
|
29
33
|
|
30
|
-
if(NULL == valid_ctxt) {
|
34
|
+
if (NULL == valid_ctxt) {
|
31
35
|
/* we have a problem */
|
32
36
|
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
33
37
|
}
|
@@ -53,16 +57,25 @@ static VALUE validate_document(VALUE self, VALUE document)
|
|
53
57
|
*
|
54
58
|
* Create a new RelaxNG from the contents of +string+
|
55
59
|
*/
|
56
|
-
static VALUE
|
60
|
+
static VALUE
|
61
|
+
read_memory(int argc, VALUE *argv, VALUE klass)
|
57
62
|
{
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
);
|
63
|
+
VALUE content;
|
64
|
+
VALUE parse_options;
|
65
|
+
xmlRelaxNGParserCtxtPtr ctx;
|
62
66
|
xmlRelaxNGPtr schema;
|
63
|
-
VALUE errors
|
67
|
+
VALUE errors;
|
64
68
|
VALUE rb_schema;
|
69
|
+
int scanned_args = 0;
|
70
|
+
|
71
|
+
scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
|
72
|
+
if (scanned_args == 1) {
|
73
|
+
parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
74
|
+
}
|
65
75
|
|
76
|
+
ctx = xmlRelaxNGNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
|
77
|
+
|
78
|
+
errors = rb_ary_new();
|
66
79
|
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
67
80
|
|
68
81
|
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
|
@@ -78,18 +91,20 @@ static VALUE read_memory(VALUE klass, VALUE content)
|
|
78
91
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
79
92
|
xmlRelaxNGFreeParserCtxt(ctx);
|
80
93
|
|
81
|
-
if(NULL == schema) {
|
94
|
+
if (NULL == schema) {
|
82
95
|
xmlErrorPtr error = xmlGetLastError();
|
83
|
-
if(error)
|
96
|
+
if (error) {
|
84
97
|
Nokogiri_error_raise(NULL, error);
|
85
|
-
else
|
98
|
+
} else {
|
86
99
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
100
|
+
}
|
87
101
|
|
88
102
|
return Qnil;
|
89
103
|
}
|
90
104
|
|
91
105
|
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
92
106
|
rb_iv_set(rb_schema, "@errors", errors);
|
107
|
+
rb_iv_set(rb_schema, "@parse_options", parse_options);
|
93
108
|
|
94
109
|
return rb_schema;
|
95
110
|
}
|
@@ -100,18 +115,26 @@ static VALUE read_memory(VALUE klass, VALUE content)
|
|
100
115
|
*
|
101
116
|
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
|
102
117
|
*/
|
103
|
-
static VALUE
|
118
|
+
static VALUE
|
119
|
+
from_document(int argc, VALUE *argv, VALUE klass)
|
104
120
|
{
|
121
|
+
VALUE document;
|
122
|
+
VALUE parse_options;
|
105
123
|
xmlDocPtr doc;
|
106
124
|
xmlRelaxNGParserCtxtPtr ctx;
|
107
125
|
xmlRelaxNGPtr schema;
|
108
126
|
VALUE errors;
|
109
127
|
VALUE rb_schema;
|
128
|
+
int scanned_args = 0;
|
129
|
+
|
130
|
+
scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
|
110
131
|
|
111
132
|
Data_Get_Struct(document, xmlDoc, doc);
|
133
|
+
doc = doc->doc; /* In case someone passes us a node. ugh. */
|
112
134
|
|
113
|
-
|
114
|
-
|
135
|
+
if (scanned_args == 1) {
|
136
|
+
parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
137
|
+
}
|
115
138
|
|
116
139
|
ctx = xmlRelaxNGNewDocParserCtxt(doc);
|
117
140
|
|
@@ -129,33 +152,34 @@ static VALUE from_document(VALUE klass, VALUE document)
|
|
129
152
|
schema = xmlRelaxNGParse(ctx);
|
130
153
|
|
131
154
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
155
|
+
xmlRelaxNGFreeParserCtxt(ctx);
|
132
156
|
|
133
|
-
if(NULL == schema) {
|
157
|
+
if (NULL == schema) {
|
134
158
|
xmlErrorPtr error = xmlGetLastError();
|
135
|
-
if(error)
|
159
|
+
if (error) {
|
136
160
|
Nokogiri_error_raise(NULL, error);
|
137
|
-
else
|
161
|
+
} else {
|
138
162
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
163
|
+
}
|
139
164
|
|
140
165
|
return Qnil;
|
141
166
|
}
|
142
167
|
|
143
168
|
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
144
169
|
rb_iv_set(rb_schema, "@errors", errors);
|
170
|
+
rb_iv_set(rb_schema, "@parse_options", parse_options);
|
145
171
|
|
146
172
|
return rb_schema;
|
147
173
|
}
|
148
174
|
|
149
|
-
|
150
|
-
|
175
|
+
void
|
176
|
+
noko_init_xml_relax_ng()
|
151
177
|
{
|
152
|
-
|
153
|
-
|
154
|
-
VALUE klass = rb_define_class_under(xml, "RelaxNG", cNokogiriXmlSchema);
|
178
|
+
assert(cNokogiriXmlSchema);
|
179
|
+
cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema);
|
155
180
|
|
156
|
-
cNokogiriXmlRelaxNG
|
181
|
+
rb_define_singleton_method(cNokogiriXmlRelaxNG, "read_memory", read_memory, -1);
|
182
|
+
rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", from_document, -1);
|
157
183
|
|
158
|
-
|
159
|
-
rb_define_singleton_method(klass, "from_document", from_document, 1);
|
160
|
-
rb_define_private_method(klass, "validate_document", validate_document, 1);
|
184
|
+
rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", validate_document, 1);
|
161
185
|
}
|
@@ -1,23 +1,23 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
4
|
-
void vasprintf_free (void *p);
|
3
|
+
VALUE cNokogiriXmlSaxParser ;
|
5
4
|
|
6
5
|
static ID id_start_document, id_end_document, id_start_element, id_end_element;
|
7
6
|
static ID id_start_element_namespace, id_end_element_namespace;
|
8
7
|
static ID id_comment, id_characters, id_xmldecl, id_error, id_warning;
|
9
|
-
static ID id_cdata_block
|
8
|
+
static ID id_cdata_block;
|
10
9
|
static ID id_processing_instruction;
|
11
10
|
|
12
|
-
static void
|
11
|
+
static void
|
12
|
+
start_document(void *ctx)
|
13
13
|
{
|
14
14
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
15
15
|
VALUE doc = rb_iv_get(self, "@document");
|
16
16
|
|
17
17
|
xmlParserCtxtPtr ctxt = NOKOGIRI_SAX_CTXT(ctx);
|
18
18
|
|
19
|
-
if(NULL != ctxt && ctxt->html != 1) {
|
20
|
-
if(ctxt->standalone != -1) {
|
19
|
+
if (NULL != ctxt && ctxt->html != 1) {
|
20
|
+
if (ctxt->standalone != -1) { /* -1 means there was no declaration */
|
21
21
|
VALUE encoding = Qnil ;
|
22
22
|
VALUE standalone = Qnil;
|
23
23
|
VALUE version;
|
@@ -29,8 +29,7 @@ static void start_document(void * ctx)
|
|
29
29
|
|
30
30
|
version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil;
|
31
31
|
|
32
|
-
switch(ctxt->standalone)
|
33
|
-
{
|
32
|
+
switch (ctxt->standalone) {
|
34
33
|
case 0:
|
35
34
|
standalone = NOKOGIRI_STR_NEW2("no");
|
36
35
|
break;
|
@@ -46,138 +45,140 @@ static void start_document(void * ctx)
|
|
46
45
|
rb_funcall(doc, id_start_document, 0);
|
47
46
|
}
|
48
47
|
|
49
|
-
static void
|
48
|
+
static void
|
49
|
+
end_document(void *ctx)
|
50
50
|
{
|
51
51
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
52
52
|
VALUE doc = rb_iv_get(self, "@document");
|
53
53
|
rb_funcall(doc, id_end_document, 0);
|
54
54
|
}
|
55
55
|
|
56
|
-
static void
|
56
|
+
static void
|
57
|
+
start_element(void *ctx, const xmlChar *name, const xmlChar **atts)
|
57
58
|
{
|
58
59
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
59
60
|
VALUE doc = rb_iv_get(self, "@document");
|
60
61
|
VALUE attributes = rb_ary_new();
|
61
|
-
const xmlChar *
|
62
|
+
const xmlChar *attr;
|
62
63
|
int i = 0;
|
63
|
-
if(atts) {
|
64
|
-
while((attr = atts[i]) != NULL) {
|
65
|
-
const xmlChar *
|
64
|
+
if (atts) {
|
65
|
+
while ((attr = atts[i]) != NULL) {
|
66
|
+
const xmlChar *val = atts[i + 1];
|
66
67
|
VALUE value = val != NULL ? NOKOGIRI_STR_NEW2(val) : Qnil;
|
67
68
|
rb_ary_push(attributes, rb_ary_new3(2, NOKOGIRI_STR_NEW2(attr), value));
|
68
|
-
i+=2;
|
69
|
+
i += 2;
|
69
70
|
}
|
70
71
|
}
|
71
72
|
|
72
|
-
rb_funcall(
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
73
|
+
rb_funcall(doc,
|
74
|
+
id_start_element,
|
75
|
+
2,
|
76
|
+
NOKOGIRI_STR_NEW2(name),
|
77
|
+
attributes
|
78
|
+
);
|
78
79
|
}
|
79
80
|
|
80
|
-
static void
|
81
|
+
static void
|
82
|
+
end_element(void *ctx, const xmlChar *name)
|
81
83
|
{
|
82
84
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
83
85
|
VALUE doc = rb_iv_get(self, "@document");
|
84
86
|
rb_funcall(doc, id_end_element, 1, NOKOGIRI_STR_NEW2(name));
|
85
87
|
}
|
86
88
|
|
87
|
-
static VALUE
|
88
|
-
|
89
|
-
int nb_attributes,
|
90
|
-
const xmlChar ** attributes)
|
89
|
+
static VALUE
|
90
|
+
attributes_as_array(int attributes_len, const xmlChar **c_attributes)
|
91
91
|
{
|
92
|
-
VALUE
|
92
|
+
VALUE rb_array = rb_ary_new2((long)attributes_len);
|
93
|
+
VALUE cNokogiriXmlSaxParserAttribute;
|
93
94
|
|
94
|
-
|
95
|
-
if (
|
95
|
+
cNokogiriXmlSaxParserAttribute = rb_const_get_at(cNokogiriXmlSaxParser, rb_intern("Attribute"));
|
96
|
+
if (c_attributes) {
|
96
97
|
/* Each attribute is an array of [localname, prefix, URI, value, end] */
|
97
98
|
int i;
|
98
|
-
for (i = 0; i <
|
99
|
-
VALUE
|
99
|
+
for (i = 0; i < attributes_len * 5; i += 5) {
|
100
|
+
VALUE rb_constructor_args[4], rb_attribute;
|
100
101
|
|
101
|
-
|
102
|
-
|
103
|
-
|
102
|
+
rb_constructor_args[0] = RBSTR_OR_QNIL(c_attributes[i + 0]); /* localname */
|
103
|
+
rb_constructor_args[1] = RBSTR_OR_QNIL(c_attributes[i + 1]); /* prefix */
|
104
|
+
rb_constructor_args[2] = RBSTR_OR_QNIL(c_attributes[i + 2]); /* URI */
|
104
105
|
|
105
106
|
/* value */
|
106
|
-
|
107
|
-
|
107
|
+
rb_constructor_args[3] = NOKOGIRI_STR_NEW((const char *)c_attributes[i + 3],
|
108
|
+
(c_attributes[i + 4] - c_attributes[i + 3]));
|
108
109
|
|
109
|
-
|
110
|
-
rb_ary_push(
|
110
|
+
rb_attribute = rb_class_new_instance(4, rb_constructor_args, cNokogiriXmlSaxParserAttribute);
|
111
|
+
rb_ary_push(rb_array, rb_attribute);
|
111
112
|
}
|
112
113
|
}
|
113
114
|
|
114
|
-
return
|
115
|
+
return rb_array;
|
115
116
|
}
|
116
117
|
|
117
118
|
static void
|
118
|
-
start_element_ns
|
119
|
-
void *
|
120
|
-
const xmlChar *
|
121
|
-
const xmlChar *
|
122
|
-
const xmlChar *
|
119
|
+
start_element_ns(
|
120
|
+
void *ctx,
|
121
|
+
const xmlChar *localname,
|
122
|
+
const xmlChar *prefix,
|
123
|
+
const xmlChar *uri,
|
123
124
|
int nb_namespaces,
|
124
|
-
const xmlChar **
|
125
|
+
const xmlChar **namespaces,
|
125
126
|
int nb_attributes,
|
126
127
|
int nb_defaulted,
|
127
|
-
const xmlChar **
|
128
|
+
const xmlChar **attributes)
|
128
129
|
{
|
129
130
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
130
131
|
VALUE doc = rb_iv_get(self, "@document");
|
131
132
|
|
132
|
-
VALUE
|
133
|
+
VALUE attribute_ary = attributes_as_array(nb_attributes, attributes);
|
133
134
|
|
134
135
|
VALUE ns_list = rb_ary_new2((long)nb_namespaces);
|
135
136
|
|
136
137
|
if (namespaces) {
|
137
138
|
int i;
|
138
|
-
for (i = 0; i < nb_namespaces * 2; i += 2)
|
139
|
-
{
|
139
|
+
for (i = 0; i < nb_namespaces * 2; i += 2) {
|
140
140
|
rb_ary_push(ns_list,
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
141
|
+
rb_ary_new3((long)2,
|
142
|
+
RBSTR_OR_QNIL(namespaces[i + 0]),
|
143
|
+
RBSTR_OR_QNIL(namespaces[i + 1])
|
144
|
+
)
|
145
|
+
);
|
146
146
|
}
|
147
147
|
}
|
148
148
|
|
149
|
-
rb_funcall(
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
149
|
+
rb_funcall(doc,
|
150
|
+
id_start_element_namespace,
|
151
|
+
5,
|
152
|
+
NOKOGIRI_STR_NEW2(localname),
|
153
|
+
attribute_ary,
|
154
|
+
RBSTR_OR_QNIL(prefix),
|
155
|
+
RBSTR_OR_QNIL(uri),
|
156
|
+
ns_list
|
157
|
+
);
|
158
158
|
}
|
159
159
|
|
160
160
|
/**
|
161
161
|
* end_element_ns was borrowed heavily from libxml-ruby.
|
162
162
|
*/
|
163
163
|
static void
|
164
|
-
end_element_ns
|
165
|
-
void *
|
166
|
-
const xmlChar *
|
167
|
-
const xmlChar *
|
168
|
-
const xmlChar *
|
164
|
+
end_element_ns(
|
165
|
+
void *ctx,
|
166
|
+
const xmlChar *localname,
|
167
|
+
const xmlChar *prefix,
|
168
|
+
const xmlChar *uri)
|
169
169
|
{
|
170
170
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
171
171
|
VALUE doc = rb_iv_get(self, "@document");
|
172
172
|
|
173
173
|
rb_funcall(doc, id_end_element_namespace, 3,
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
174
|
+
NOKOGIRI_STR_NEW2(localname),
|
175
|
+
RBSTR_OR_QNIL(prefix),
|
176
|
+
RBSTR_OR_QNIL(uri)
|
177
|
+
);
|
178
178
|
}
|
179
179
|
|
180
|
-
static void
|
180
|
+
static void
|
181
|
+
characters_func(void *ctx, const xmlChar *ch, int len)
|
181
182
|
{
|
182
183
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
183
184
|
VALUE doc = rb_iv_get(self, "@document");
|
@@ -185,7 +186,8 @@ static void characters_func(void * ctx, const xmlChar * ch, int len)
|
|
185
186
|
rb_funcall(doc, id_characters, 1, str);
|
186
187
|
}
|
187
188
|
|
188
|
-
static void
|
189
|
+
static void
|
190
|
+
comment_func(void *ctx, const xmlChar *value)
|
189
191
|
{
|
190
192
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
191
193
|
VALUE doc = rb_iv_get(self, "@document");
|
@@ -193,11 +195,12 @@ static void comment_func(void * ctx, const xmlChar * value)
|
|
193
195
|
rb_funcall(doc, id_comment, 1, str);
|
194
196
|
}
|
195
197
|
|
196
|
-
static void
|
198
|
+
static void
|
199
|
+
warning_func(void *ctx, const char *msg, ...)
|
197
200
|
{
|
198
201
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
199
202
|
VALUE doc = rb_iv_get(self, "@document");
|
200
|
-
char *
|
203
|
+
char *message;
|
201
204
|
VALUE ruby_message;
|
202
205
|
|
203
206
|
va_list args;
|
@@ -206,15 +209,16 @@ static void warning_func(void * ctx, const char *msg, ...)
|
|
206
209
|
va_end(args);
|
207
210
|
|
208
211
|
ruby_message = NOKOGIRI_STR_NEW2(message);
|
209
|
-
|
212
|
+
free(message);
|
210
213
|
rb_funcall(doc, id_warning, 1, ruby_message);
|
211
214
|
}
|
212
215
|
|
213
|
-
static void
|
216
|
+
static void
|
217
|
+
error_func(void *ctx, const char *msg, ...)
|
214
218
|
{
|
215
219
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
216
220
|
VALUE doc = rb_iv_get(self, "@document");
|
217
|
-
char *
|
221
|
+
char *message;
|
218
222
|
VALUE ruby_message;
|
219
223
|
|
220
224
|
va_list args;
|
@@ -223,11 +227,12 @@ static void error_func(void * ctx, const char *msg, ...)
|
|
223
227
|
va_end(args);
|
224
228
|
|
225
229
|
ruby_message = NOKOGIRI_STR_NEW2(message);
|
226
|
-
|
230
|
+
free(message);
|
227
231
|
rb_funcall(doc, id_error, 1, ruby_message);
|
228
232
|
}
|
229
233
|
|
230
|
-
static void
|
234
|
+
static void
|
235
|
+
cdata_block(void *ctx, const xmlChar *value, int len)
|
231
236
|
{
|
232
237
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
233
238
|
VALUE doc = rb_iv_get(self, "@document");
|
@@ -235,7 +240,8 @@ static void cdata_block(void * ctx, const xmlChar * value, int len)
|
|
235
240
|
rb_funcall(doc, id_cdata_block, 1, string);
|
236
241
|
}
|
237
242
|
|
238
|
-
static void
|
243
|
+
static void
|
244
|
+
processing_instruction(void *ctx, const xmlChar *name, const xmlChar *content)
|
239
245
|
{
|
240
246
|
VALUE rb_content;
|
241
247
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
@@ -243,27 +249,27 @@ static void processing_instruction(void * ctx, const xmlChar * name, const xmlCh
|
|
243
249
|
|
244
250
|
rb_content = content ? NOKOGIRI_STR_NEW2(content) : Qnil;
|
245
251
|
|
246
|
-
rb_funcall(
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
+
rb_funcall(doc,
|
253
|
+
id_processing_instruction,
|
254
|
+
2,
|
255
|
+
NOKOGIRI_STR_NEW2(name),
|
256
|
+
rb_content
|
257
|
+
);
|
252
258
|
}
|
253
259
|
|
254
|
-
static void
|
260
|
+
static void
|
261
|
+
deallocate(xmlSAXHandlerPtr handler)
|
255
262
|
{
|
256
263
|
NOKOGIRI_DEBUG_START(handler);
|
257
264
|
free(handler);
|
258
265
|
NOKOGIRI_DEBUG_END(handler);
|
259
266
|
}
|
260
267
|
|
261
|
-
static VALUE
|
268
|
+
static VALUE
|
269
|
+
allocate(VALUE klass)
|
262
270
|
{
|
263
271
|
xmlSAXHandlerPtr handler = calloc((size_t)1, sizeof(xmlSAXHandler));
|
264
272
|
|
265
|
-
xmlSetStructuredErrorFunc(NULL, NULL);
|
266
|
-
|
267
273
|
handler->startDocument = start_document;
|
268
274
|
handler->endDocument = end_document;
|
269
275
|
handler->startElement = start_element;
|
@@ -281,29 +287,23 @@ static VALUE allocate(VALUE klass)
|
|
281
287
|
return Data_Wrap_Struct(klass, NULL, deallocate, handler);
|
282
288
|
}
|
283
289
|
|
284
|
-
|
285
|
-
|
290
|
+
void
|
291
|
+
noko_init_xml_sax_parser()
|
286
292
|
{
|
287
|
-
|
288
|
-
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
289
|
-
VALUE sax = rb_define_module_under(xml, "SAX");
|
290
|
-
VALUE klass = rb_define_class_under(sax, "Parser", rb_cObject);
|
291
|
-
|
292
|
-
cNokogiriXmlSaxParser = klass;
|
293
|
+
cNokogiriXmlSaxParser = rb_define_class_under(mNokogiriXmlSax, "Parser", rb_cObject);
|
293
294
|
|
294
|
-
rb_define_alloc_func(
|
295
|
+
rb_define_alloc_func(cNokogiriXmlSaxParser, allocate);
|
295
296
|
|
296
297
|
id_start_document = rb_intern("start_document");
|
297
|
-
id_end_document
|
298
|
-
id_start_element
|
299
|
-
id_end_element
|
300
|
-
id_comment
|
301
|
-
id_characters
|
302
|
-
id_xmldecl
|
303
|
-
id_error
|
304
|
-
id_warning
|
305
|
-
id_cdata_block
|
306
|
-
id_cAttribute = rb_intern("Attribute");
|
298
|
+
id_end_document = rb_intern("end_document");
|
299
|
+
id_start_element = rb_intern("start_element");
|
300
|
+
id_end_element = rb_intern("end_element");
|
301
|
+
id_comment = rb_intern("comment");
|
302
|
+
id_characters = rb_intern("characters");
|
303
|
+
id_xmldecl = rb_intern("xmldecl");
|
304
|
+
id_error = rb_intern("error");
|
305
|
+
id_warning = rb_intern("warning");
|
306
|
+
id_cdata_block = rb_intern("cdata_block");
|
307
307
|
id_start_element_namespace = rb_intern("start_element_namespace");
|
308
308
|
id_end_element_namespace = rb_intern("end_element_namespace");
|
309
309
|
id_processing_instruction = rb_intern("processing_instruction");
|