nokogiri 1.10.9 → 1.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +190 -95
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +909 -422
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +258 -105
- data/ext/nokogiri/nokogiri.h +207 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +18 -18
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +33 -33
- data/ext/nokogiri/xml_comment.c +19 -31
- data/ext/nokogiri/xml_document.c +499 -323
- data/ext/nokogiri/xml_document_fragment.c +17 -36
- data/ext/nokogiri/xml_dtd.c +65 -59
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1429 -723
- data/ext/nokogiri/xml_node_set.c +257 -225
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +340 -231
- data/ext/nokogiri/xml_relax_ng.c +87 -99
- data/ext/nokogiri/xml_sax_parser.c +269 -176
- data/ext/nokogiri/xml_sax_parser_context.c +286 -152
- data/ext/nokogiri/xml_sax_push_parser.c +111 -64
- data/ext/nokogiri/xml_schema.c +132 -140
- data/ext/nokogiri/xml_syntax_error.c +52 -23
- data/ext/nokogiri/xml_text.c +37 -30
- data/ext/nokogiri/xml_xpath_context.c +373 -185
- data/ext/nokogiri/xslt_stylesheet.c +342 -191
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +658 -0
- data/gumbo-parser/src/error.h +152 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
- data/gumbo-parser/src/parser.c +4932 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +14 -8
- data/lib/nokogiri/css/parser.rb +399 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +16 -71
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +7 -5
- data/lib/nokogiri/css/tokenizer.rex +11 -9
- data/lib/nokogiri/css/xpath_visitor.rb +242 -96
- data/lib/nokogiri/css.rb +122 -17
- data/lib/nokogiri/decorators/slop.rb +11 -11
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +83 -35
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +359 -130
- data/lib/nokogiri/xml/document_fragment.rb +170 -54
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1168 -420
- data/lib/nokogiri/xml/node_set.rb +145 -67
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +47 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +68 -41
- data/lib/nokogiri/xml/relax_ng.rb +60 -17
- data/lib/nokogiri/xml/sax/document.rb +198 -111
- data/lib/nokogiri/xml/sax/parser.rb +144 -67
- data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
- data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
- data/lib/nokogiri/xml/sax.rb +54 -4
- data/lib/nokogiri/xml/schema.rb +116 -39
- data/lib/nokogiri/xml/searchable.rb +139 -95
- data/lib/nokogiri/xml/syntax_error.rb +29 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +15 -4
- data/lib/nokogiri/xml.rb +45 -55
- data/lib/nokogiri/xslt/stylesheet.rb +32 -8
- data/lib/nokogiri/xslt.rb +103 -30
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +32 -29
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +123 -295
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser.rb +0 -62
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,310 +1,403 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
4
|
-
void vasprintf_free (void *p);
|
3
|
+
VALUE cNokogiriXmlSaxParser ;
|
5
4
|
|
6
|
-
static ID id_start_document
|
7
|
-
static ID
|
8
|
-
static ID
|
9
|
-
static ID
|
5
|
+
static ID id_start_document;
|
6
|
+
static ID id_end_document;
|
7
|
+
static ID id_start_element;
|
8
|
+
static ID id_end_element;
|
9
|
+
static ID id_start_element_namespace;
|
10
|
+
static ID id_end_element_namespace;
|
11
|
+
static ID id_comment;
|
12
|
+
static ID id_characters;
|
13
|
+
static ID id_xmldecl;
|
14
|
+
static ID id_error;
|
15
|
+
static ID id_warning;
|
16
|
+
static ID id_cdata_block;
|
10
17
|
static ID id_processing_instruction;
|
18
|
+
static ID id_reference;
|
19
|
+
|
20
|
+
static size_t
|
21
|
+
xml_sax_parser_memsize(const void *data)
|
22
|
+
{
|
23
|
+
return sizeof(xmlSAXHandler);
|
24
|
+
}
|
25
|
+
|
26
|
+
/* Used by Nokogiri::XML::SAX::Parser and Nokogiri::HTML::SAX::Parser */
|
27
|
+
static const rb_data_type_t xml_sax_parser_type = {
|
28
|
+
.wrap_struct_name = "xmlSAXHandler",
|
29
|
+
.function = {
|
30
|
+
.dfree = RUBY_TYPED_DEFAULT_FREE,
|
31
|
+
.dsize = xml_sax_parser_memsize
|
32
|
+
},
|
33
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
34
|
+
};
|
11
35
|
|
12
|
-
static void
|
36
|
+
static void
|
37
|
+
noko_xml_sax_parser_start_document_callback(void *ctx)
|
13
38
|
{
|
14
|
-
|
39
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
40
|
+
VALUE self = (VALUE)ctxt->_private;
|
15
41
|
VALUE doc = rb_iv_get(self, "@document");
|
16
42
|
|
17
|
-
|
18
|
-
|
19
|
-
if(
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
}
|
41
|
-
|
42
|
-
rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone);
|
43
|
+
xmlSAX2StartDocument(ctx);
|
44
|
+
|
45
|
+
if (ctxt->standalone != -1) { /* -1 means there was no declaration */
|
46
|
+
VALUE encoding = Qnil ;
|
47
|
+
VALUE standalone = Qnil;
|
48
|
+
VALUE version;
|
49
|
+
|
50
|
+
if (ctxt->encoding) {
|
51
|
+
encoding = NOKOGIRI_STR_NEW2(ctxt->encoding) ;
|
52
|
+
} else if (ctxt->input && ctxt->input->encoding) { // unnecessary after v2.12.0 / gnome/libxml2@ec7be506
|
53
|
+
encoding = NOKOGIRI_STR_NEW2(ctxt->input->encoding) ;
|
54
|
+
}
|
55
|
+
|
56
|
+
version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil;
|
57
|
+
|
58
|
+
/* TODO try using xmlSAX2IsStandalone */
|
59
|
+
switch (ctxt->standalone) {
|
60
|
+
case 0:
|
61
|
+
standalone = NOKOGIRI_STR_NEW2("no");
|
62
|
+
break;
|
63
|
+
case 1:
|
64
|
+
standalone = NOKOGIRI_STR_NEW2("yes");
|
65
|
+
break;
|
43
66
|
}
|
67
|
+
|
68
|
+
rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone);
|
44
69
|
}
|
45
70
|
|
46
71
|
rb_funcall(doc, id_start_document, 0);
|
47
72
|
}
|
48
73
|
|
49
|
-
static void
|
74
|
+
static void
|
75
|
+
noko_xml_sax_parser_end_document_callback(void *ctx)
|
50
76
|
{
|
51
|
-
|
77
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
78
|
+
VALUE self = (VALUE)ctxt->_private;
|
52
79
|
VALUE doc = rb_iv_get(self, "@document");
|
80
|
+
|
53
81
|
rb_funcall(doc, id_end_document, 0);
|
54
82
|
}
|
55
83
|
|
56
|
-
static void
|
84
|
+
static void
|
85
|
+
noko_xml_sax_parser_start_element_callback(void *ctx, const xmlChar *name, const xmlChar **atts)
|
57
86
|
{
|
58
|
-
|
87
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
88
|
+
VALUE self = (VALUE)ctxt->_private;
|
59
89
|
VALUE doc = rb_iv_get(self, "@document");
|
90
|
+
|
60
91
|
VALUE attributes = rb_ary_new();
|
61
|
-
const xmlChar *
|
92
|
+
const xmlChar *attr;
|
62
93
|
int i = 0;
|
63
|
-
if(atts) {
|
64
|
-
while((attr = atts[i]) != NULL) {
|
65
|
-
const xmlChar *
|
94
|
+
if (atts) {
|
95
|
+
while ((attr = atts[i]) != NULL) {
|
96
|
+
const xmlChar *val = atts[i + 1];
|
66
97
|
VALUE value = val != NULL ? NOKOGIRI_STR_NEW2(val) : Qnil;
|
67
98
|
rb_ary_push(attributes, rb_ary_new3(2, NOKOGIRI_STR_NEW2(attr), value));
|
68
|
-
i+=2;
|
99
|
+
i += 2;
|
69
100
|
}
|
70
101
|
}
|
71
102
|
|
72
|
-
rb_funcall(
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
103
|
+
rb_funcall(doc,
|
104
|
+
id_start_element,
|
105
|
+
2,
|
106
|
+
NOKOGIRI_STR_NEW2(name),
|
107
|
+
attributes
|
108
|
+
);
|
78
109
|
}
|
79
110
|
|
80
|
-
static void
|
111
|
+
static void
|
112
|
+
noko_xml_sax_parser_end_element_callback(void *ctx, const xmlChar *name)
|
81
113
|
{
|
82
|
-
|
114
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
115
|
+
VALUE self = (VALUE)ctxt->_private;
|
83
116
|
VALUE doc = rb_iv_get(self, "@document");
|
117
|
+
|
84
118
|
rb_funcall(doc, id_end_element, 1, NOKOGIRI_STR_NEW2(name));
|
85
119
|
}
|
86
120
|
|
87
|
-
static VALUE
|
88
|
-
|
89
|
-
int nb_attributes,
|
90
|
-
const xmlChar ** attributes)
|
121
|
+
static VALUE
|
122
|
+
xml_sax_parser_marshal_attributes(int attributes_len, const xmlChar **c_attributes)
|
91
123
|
{
|
92
|
-
VALUE
|
124
|
+
VALUE rb_array = rb_ary_new2((long)attributes_len);
|
125
|
+
VALUE cNokogiriXmlSaxParserAttribute;
|
93
126
|
|
94
|
-
|
95
|
-
if (
|
127
|
+
cNokogiriXmlSaxParserAttribute = rb_const_get_at(cNokogiriXmlSaxParser, rb_intern("Attribute"));
|
128
|
+
if (c_attributes) {
|
96
129
|
/* Each attribute is an array of [localname, prefix, URI, value, end] */
|
97
130
|
int i;
|
98
|
-
for (i = 0; i <
|
99
|
-
VALUE
|
131
|
+
for (i = 0; i < attributes_len * 5; i += 5) {
|
132
|
+
VALUE rb_constructor_args[4], rb_attribute;
|
100
133
|
|
101
|
-
|
102
|
-
|
103
|
-
|
134
|
+
rb_constructor_args[0] = RBSTR_OR_QNIL(c_attributes[i + 0]); /* localname */
|
135
|
+
rb_constructor_args[1] = RBSTR_OR_QNIL(c_attributes[i + 1]); /* prefix */
|
136
|
+
rb_constructor_args[2] = RBSTR_OR_QNIL(c_attributes[i + 2]); /* URI */
|
104
137
|
|
105
138
|
/* value */
|
106
|
-
|
107
|
-
|
139
|
+
rb_constructor_args[3] = NOKOGIRI_STR_NEW((const char *)c_attributes[i + 3],
|
140
|
+
(c_attributes[i + 4] - c_attributes[i + 3]));
|
108
141
|
|
109
|
-
|
110
|
-
rb_ary_push(
|
142
|
+
rb_attribute = rb_class_new_instance(4, rb_constructor_args, cNokogiriXmlSaxParserAttribute);
|
143
|
+
rb_ary_push(rb_array, rb_attribute);
|
111
144
|
}
|
112
145
|
}
|
113
146
|
|
114
|
-
return
|
147
|
+
return rb_array;
|
115
148
|
}
|
116
149
|
|
117
150
|
static void
|
118
|
-
|
119
|
-
void *
|
120
|
-
const xmlChar *
|
121
|
-
const xmlChar *
|
122
|
-
const xmlChar *
|
151
|
+
noko_xml_sax_parser_start_element_ns_callback(
|
152
|
+
void *ctx,
|
153
|
+
const xmlChar *localname,
|
154
|
+
const xmlChar *prefix,
|
155
|
+
const xmlChar *uri,
|
123
156
|
int nb_namespaces,
|
124
|
-
const xmlChar **
|
157
|
+
const xmlChar **namespaces,
|
125
158
|
int nb_attributes,
|
126
159
|
int nb_defaulted,
|
127
|
-
const xmlChar **
|
160
|
+
const xmlChar **attributes)
|
128
161
|
{
|
129
|
-
|
162
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
163
|
+
VALUE self = (VALUE)ctxt->_private;
|
130
164
|
VALUE doc = rb_iv_get(self, "@document");
|
131
165
|
|
132
|
-
VALUE
|
166
|
+
VALUE attribute_ary = xml_sax_parser_marshal_attributes(nb_attributes, attributes);
|
133
167
|
|
134
168
|
VALUE ns_list = rb_ary_new2((long)nb_namespaces);
|
135
169
|
|
136
170
|
if (namespaces) {
|
137
171
|
int i;
|
138
|
-
for (i = 0; i < nb_namespaces * 2; i += 2)
|
139
|
-
{
|
172
|
+
for (i = 0; i < nb_namespaces * 2; i += 2) {
|
140
173
|
rb_ary_push(ns_list,
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
174
|
+
rb_ary_new3((long)2,
|
175
|
+
RBSTR_OR_QNIL(namespaces[i + 0]),
|
176
|
+
RBSTR_OR_QNIL(namespaces[i + 1])
|
177
|
+
)
|
178
|
+
);
|
146
179
|
}
|
147
180
|
}
|
148
181
|
|
149
|
-
rb_funcall(
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
182
|
+
rb_funcall(doc,
|
183
|
+
id_start_element_namespace,
|
184
|
+
5,
|
185
|
+
NOKOGIRI_STR_NEW2(localname),
|
186
|
+
attribute_ary,
|
187
|
+
RBSTR_OR_QNIL(prefix),
|
188
|
+
RBSTR_OR_QNIL(uri),
|
189
|
+
ns_list
|
190
|
+
);
|
158
191
|
}
|
159
192
|
|
160
193
|
/**
|
161
194
|
* end_element_ns was borrowed heavily from libxml-ruby.
|
162
195
|
*/
|
163
196
|
static void
|
164
|
-
|
165
|
-
void *
|
166
|
-
const xmlChar *
|
167
|
-
const xmlChar *
|
168
|
-
const xmlChar *
|
197
|
+
noko_xml_sax_parser_end_element_ns_callback(
|
198
|
+
void *ctx,
|
199
|
+
const xmlChar *localname,
|
200
|
+
const xmlChar *prefix,
|
201
|
+
const xmlChar *uri)
|
169
202
|
{
|
170
|
-
|
203
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
204
|
+
VALUE self = (VALUE)ctxt->_private;
|
171
205
|
VALUE doc = rb_iv_get(self, "@document");
|
172
206
|
|
173
207
|
rb_funcall(doc, id_end_element_namespace, 3,
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
208
|
+
NOKOGIRI_STR_NEW2(localname),
|
209
|
+
RBSTR_OR_QNIL(prefix),
|
210
|
+
RBSTR_OR_QNIL(uri)
|
211
|
+
);
|
178
212
|
}
|
179
213
|
|
180
|
-
static void
|
214
|
+
static void
|
215
|
+
noko_xml_sax_parser_characters_callback(void *ctx, const xmlChar *ch, int len)
|
181
216
|
{
|
182
|
-
|
217
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
218
|
+
VALUE self = (VALUE)ctxt->_private;
|
183
219
|
VALUE doc = rb_iv_get(self, "@document");
|
220
|
+
|
184
221
|
VALUE str = NOKOGIRI_STR_NEW(ch, len);
|
185
222
|
rb_funcall(doc, id_characters, 1, str);
|
186
223
|
}
|
187
224
|
|
188
|
-
static void
|
225
|
+
static void
|
226
|
+
noko_xml_sax_parser_comment_callback(void *ctx, const xmlChar *value)
|
189
227
|
{
|
190
|
-
|
228
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
229
|
+
VALUE self = (VALUE)ctxt->_private;
|
191
230
|
VALUE doc = rb_iv_get(self, "@document");
|
231
|
+
|
192
232
|
VALUE str = NOKOGIRI_STR_NEW2(value);
|
193
233
|
rb_funcall(doc, id_comment, 1, str);
|
194
234
|
}
|
195
235
|
|
196
|
-
|
236
|
+
PRINTFLIKE_DECL(2, 3)
|
237
|
+
static void
|
238
|
+
noko_xml_sax_parser_warning_callback(void *ctx, const char *msg, ...)
|
197
239
|
{
|
198
|
-
|
240
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
241
|
+
VALUE self = (VALUE)ctxt->_private;
|
199
242
|
VALUE doc = rb_iv_get(self, "@document");
|
200
|
-
char * message;
|
201
|
-
VALUE ruby_message;
|
202
243
|
|
244
|
+
VALUE rb_message;
|
245
|
+
|
246
|
+
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
247
|
+
/* It is not currently possible to pass var args from native
|
248
|
+
functions to sulong, so we work around the issue here. */
|
249
|
+
rb_message = rb_sprintf("warning_func: %s", msg);
|
250
|
+
#else
|
203
251
|
va_list args;
|
204
252
|
va_start(args, msg);
|
205
|
-
|
253
|
+
rb_message = rb_vsprintf(msg, args);
|
206
254
|
va_end(args);
|
255
|
+
#endif
|
207
256
|
|
208
|
-
|
209
|
-
vasprintf_free(message);
|
210
|
-
rb_funcall(doc, id_warning, 1, ruby_message);
|
257
|
+
rb_funcall(doc, id_warning, 1, rb_message);
|
211
258
|
}
|
212
259
|
|
213
|
-
|
260
|
+
PRINTFLIKE_DECL(2, 3)
|
261
|
+
static void
|
262
|
+
noko_xml_sax_parser_error_callback(void *ctx, const char *msg, ...)
|
214
263
|
{
|
215
|
-
|
264
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
265
|
+
VALUE self = (VALUE)ctxt->_private;
|
216
266
|
VALUE doc = rb_iv_get(self, "@document");
|
217
|
-
char * message;
|
218
|
-
VALUE ruby_message;
|
219
267
|
|
268
|
+
VALUE rb_message;
|
269
|
+
|
270
|
+
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
271
|
+
/* It is not currently possible to pass var args from native
|
272
|
+
functions to sulong, so we work around the issue here. */
|
273
|
+
rb_message = rb_sprintf("error_func: %s", msg);
|
274
|
+
#else
|
220
275
|
va_list args;
|
221
276
|
va_start(args, msg);
|
222
|
-
|
277
|
+
rb_message = rb_vsprintf(msg, args);
|
223
278
|
va_end(args);
|
279
|
+
#endif
|
224
280
|
|
225
|
-
|
226
|
-
vasprintf_free(message);
|
227
|
-
rb_funcall(doc, id_error, 1, ruby_message);
|
281
|
+
rb_funcall(doc, id_error, 1, rb_message);
|
228
282
|
}
|
229
283
|
|
230
|
-
static void
|
284
|
+
static void
|
285
|
+
noko_xml_sax_parser_cdata_block_callback(void *ctx, const xmlChar *value, int len)
|
231
286
|
{
|
232
|
-
|
287
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
288
|
+
VALUE self = (VALUE)ctxt->_private;
|
233
289
|
VALUE doc = rb_iv_get(self, "@document");
|
290
|
+
|
234
291
|
VALUE string = NOKOGIRI_STR_NEW(value, len);
|
235
292
|
rb_funcall(doc, id_cdata_block, 1, string);
|
236
293
|
}
|
237
294
|
|
238
|
-
static void
|
295
|
+
static void
|
296
|
+
noko_xml_sax_parser_processing_instruction_callback(void *ctx, const xmlChar *name, const xmlChar *content)
|
239
297
|
{
|
240
|
-
|
241
|
-
VALUE self =
|
298
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
299
|
+
VALUE self = (VALUE)ctxt->_private;
|
242
300
|
VALUE doc = rb_iv_get(self, "@document");
|
243
301
|
|
244
|
-
rb_content = content ? NOKOGIRI_STR_NEW2(content) : Qnil;
|
302
|
+
VALUE rb_content = content ? NOKOGIRI_STR_NEW2(content) : Qnil;
|
245
303
|
|
246
|
-
rb_funcall(
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
304
|
+
rb_funcall(doc,
|
305
|
+
id_processing_instruction,
|
306
|
+
2,
|
307
|
+
NOKOGIRI_STR_NEW2(name),
|
308
|
+
rb_content
|
309
|
+
);
|
252
310
|
}
|
253
311
|
|
254
|
-
static void
|
312
|
+
static void
|
313
|
+
noko_xml_sax_parser_reference_callback(void *ctx, const xmlChar *name)
|
255
314
|
{
|
256
|
-
|
257
|
-
|
258
|
-
|
315
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctx;
|
316
|
+
xmlEntityPtr entity = xmlSAX2GetEntity(ctxt, name);
|
317
|
+
|
318
|
+
VALUE self = (VALUE)ctxt->_private;
|
319
|
+
VALUE doc = rb_iv_get(self, "@document");
|
320
|
+
|
321
|
+
if (entity && entity->content) {
|
322
|
+
rb_funcall(doc, id_reference, 2, NOKOGIRI_STR_NEW2(entity->name), NOKOGIRI_STR_NEW2(entity->content));
|
323
|
+
} else {
|
324
|
+
rb_funcall(doc, id_reference, 2, NOKOGIRI_STR_NEW2(name), Qnil);
|
325
|
+
}
|
259
326
|
}
|
260
327
|
|
261
|
-
static VALUE
|
328
|
+
static VALUE
|
329
|
+
noko_xml_sax_parser__initialize_native(VALUE self)
|
262
330
|
{
|
263
|
-
xmlSAXHandlerPtr handler =
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
handler->
|
268
|
-
handler->
|
269
|
-
handler->
|
270
|
-
handler->
|
271
|
-
handler->
|
272
|
-
handler->
|
273
|
-
handler->
|
274
|
-
handler->
|
275
|
-
handler->
|
276
|
-
handler->
|
277
|
-
handler->
|
278
|
-
|
331
|
+
xmlSAXHandlerPtr handler = noko_xml_sax_parser_unwrap(self);
|
332
|
+
|
333
|
+
handler->startDocument = noko_xml_sax_parser_start_document_callback;
|
334
|
+
handler->endDocument = noko_xml_sax_parser_end_document_callback;
|
335
|
+
handler->startElement = noko_xml_sax_parser_start_element_callback;
|
336
|
+
handler->endElement = noko_xml_sax_parser_end_element_callback;
|
337
|
+
handler->startElementNs = noko_xml_sax_parser_start_element_ns_callback;
|
338
|
+
handler->endElementNs = noko_xml_sax_parser_end_element_ns_callback;
|
339
|
+
handler->characters = noko_xml_sax_parser_characters_callback;
|
340
|
+
handler->comment = noko_xml_sax_parser_comment_callback;
|
341
|
+
handler->warning = noko_xml_sax_parser_warning_callback;
|
342
|
+
handler->error = noko_xml_sax_parser_error_callback;
|
343
|
+
handler->cdataBlock = noko_xml_sax_parser_cdata_block_callback;
|
344
|
+
handler->processingInstruction = noko_xml_sax_parser_processing_instruction_callback;
|
345
|
+
handler->reference = noko_xml_sax_parser_reference_callback;
|
346
|
+
|
347
|
+
/* use some of libxml2's default callbacks to managed DTDs and entities */
|
348
|
+
handler->getEntity = xmlSAX2GetEntity;
|
349
|
+
handler->internalSubset = xmlSAX2InternalSubset;
|
350
|
+
handler->externalSubset = xmlSAX2ExternalSubset;
|
351
|
+
handler->isStandalone = xmlSAX2IsStandalone;
|
352
|
+
handler->hasInternalSubset = xmlSAX2HasInternalSubset;
|
353
|
+
handler->hasExternalSubset = xmlSAX2HasExternalSubset;
|
354
|
+
handler->resolveEntity = xmlSAX2ResolveEntity;
|
355
|
+
handler->getParameterEntity = xmlSAX2GetParameterEntity;
|
356
|
+
handler->entityDecl = xmlSAX2EntityDecl;
|
357
|
+
handler->unparsedEntityDecl = xmlSAX2UnparsedEntityDecl;
|
358
|
+
|
279
359
|
handler->initialized = XML_SAX2_MAGIC;
|
280
360
|
|
281
|
-
return
|
361
|
+
return self;
|
282
362
|
}
|
283
363
|
|
284
|
-
VALUE
|
285
|
-
|
364
|
+
static VALUE
|
365
|
+
noko_xml_sax_parser_allocate(VALUE klass)
|
366
|
+
{
|
367
|
+
xmlSAXHandlerPtr handler;
|
368
|
+
return TypedData_Make_Struct(klass, xmlSAXHandler, &xml_sax_parser_type, handler);
|
369
|
+
}
|
370
|
+
|
371
|
+
xmlSAXHandlerPtr
|
372
|
+
noko_xml_sax_parser_unwrap(VALUE rb_sax_handler)
|
373
|
+
{
|
374
|
+
xmlSAXHandlerPtr c_sax_handler;
|
375
|
+
TypedData_Get_Struct(rb_sax_handler, xmlSAXHandler, &xml_sax_parser_type, c_sax_handler);
|
376
|
+
return c_sax_handler;
|
377
|
+
}
|
378
|
+
|
379
|
+
void
|
380
|
+
noko_init_xml_sax_parser(void)
|
286
381
|
{
|
287
|
-
|
288
|
-
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
289
|
-
VALUE sax = rb_define_module_under(xml, "SAX");
|
290
|
-
VALUE klass = rb_define_class_under(sax, "Parser", rb_cObject);
|
382
|
+
cNokogiriXmlSaxParser = rb_define_class_under(mNokogiriXmlSax, "Parser", rb_cObject);
|
291
383
|
|
292
|
-
cNokogiriXmlSaxParser
|
384
|
+
rb_define_alloc_func(cNokogiriXmlSaxParser, noko_xml_sax_parser_allocate);
|
293
385
|
|
294
|
-
|
386
|
+
rb_define_private_method(cNokogiriXmlSaxParser, "initialize_native",
|
387
|
+
noko_xml_sax_parser__initialize_native, 0);
|
295
388
|
|
296
389
|
id_start_document = rb_intern("start_document");
|
297
|
-
id_end_document
|
298
|
-
id_start_element
|
299
|
-
id_end_element
|
300
|
-
id_comment
|
301
|
-
id_characters
|
302
|
-
id_xmldecl
|
303
|
-
id_error
|
304
|
-
id_warning
|
305
|
-
id_cdata_block
|
306
|
-
id_cAttribute = rb_intern("Attribute");
|
390
|
+
id_end_document = rb_intern("end_document");
|
391
|
+
id_start_element = rb_intern("start_element");
|
392
|
+
id_end_element = rb_intern("end_element");
|
393
|
+
id_comment = rb_intern("comment");
|
394
|
+
id_characters = rb_intern("characters");
|
395
|
+
id_xmldecl = rb_intern("xmldecl");
|
396
|
+
id_error = rb_intern("error");
|
397
|
+
id_warning = rb_intern("warning");
|
398
|
+
id_cdata_block = rb_intern("cdata_block");
|
307
399
|
id_start_element_namespace = rb_intern("start_element_namespace");
|
308
400
|
id_end_element_namespace = rb_intern("end_element_namespace");
|
309
401
|
id_processing_instruction = rb_intern("processing_instruction");
|
402
|
+
id_reference = rb_intern("reference");
|
310
403
|
}
|