nokogiri 1.10.7 → 1.16.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +42 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +188 -96
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +862 -421
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +165 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +108 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +251 -105
- data/ext/nokogiri/nokogiri.h +222 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +17 -17
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +39 -31
- data/ext/nokogiri/xml_comment.c +20 -27
- data/ext/nokogiri/xml_document.c +408 -243
- data/ext/nokogiri/xml_document_fragment.c +13 -17
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1343 -674
- data/ext/nokogiri/xml_node_set.c +246 -216
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +305 -213
- data/ext/nokogiri/xml_relax_ng.c +87 -78
- data/ext/nokogiri/xml_sax_parser.c +149 -124
- data/ext/nokogiri/xml_sax_parser_context.c +149 -103
- data/ext/nokogiri/xml_sax_push_parser.c +65 -37
- data/ext/nokogiri/xml_schema.c +138 -82
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +35 -26
- data/ext/nokogiri/xml_xpath_context.c +363 -178
- data/ext/nokogiri/xslt_stylesheet.c +335 -189
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +126 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +630 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
- data/gumbo-parser/src/parser.c +4891 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +5 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +205 -96
- data/lib/nokogiri/css.rb +56 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +326 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +75 -34
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +312 -127
- data/lib/nokogiri/xml/document_fragment.rb +93 -48
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1096 -419
- data/lib/nokogiri/xml/node_set.rb +137 -61
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +42 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +39 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +120 -72
- data/lib/nokogiri/xml/syntax_error.rb +7 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +39 -38
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +101 -22
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +29 -25
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +121 -291
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
data/ext/nokogiri/xml_relax_ng.c
CHANGED
@@ -1,33 +1,44 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
3
|
+
VALUE cNokogiriXmlRelaxNG;
|
4
|
+
|
5
|
+
static void
|
6
|
+
xml_relax_ng_deallocate(void *data)
|
4
7
|
{
|
5
|
-
|
8
|
+
xmlRelaxNGPtr schema = data;
|
6
9
|
xmlRelaxNGFree(schema);
|
7
|
-
NOKOGIRI_DEBUG_END(schema);
|
8
10
|
}
|
9
11
|
|
12
|
+
static const rb_data_type_t xml_relax_ng_type = {
|
13
|
+
.wrap_struct_name = "Nokogiri::XML::RelaxNG",
|
14
|
+
.function = {
|
15
|
+
.dfree = xml_relax_ng_deallocate,
|
16
|
+
},
|
17
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
18
|
+
};
|
19
|
+
|
10
20
|
/*
|
11
21
|
* call-seq:
|
12
22
|
* validate_document(document)
|
13
23
|
*
|
14
24
|
* Validate a Nokogiri::XML::Document against this RelaxNG schema.
|
15
25
|
*/
|
16
|
-
static VALUE
|
26
|
+
static VALUE
|
27
|
+
validate_document(VALUE self, VALUE document)
|
17
28
|
{
|
18
29
|
xmlDocPtr doc;
|
19
30
|
xmlRelaxNGPtr schema;
|
20
31
|
VALUE errors;
|
21
32
|
xmlRelaxNGValidCtxtPtr valid_ctxt;
|
22
33
|
|
23
|
-
|
24
|
-
|
34
|
+
TypedData_Get_Struct(self, xmlRelaxNG, &xml_relax_ng_type, schema);
|
35
|
+
doc = noko_xml_document_unwrap(document);
|
25
36
|
|
26
37
|
errors = rb_ary_new();
|
27
38
|
|
28
39
|
valid_ctxt = xmlRelaxNGNewValidCtxt(schema);
|
29
40
|
|
30
|
-
if(NULL == valid_ctxt) {
|
41
|
+
if (NULL == valid_ctxt) {
|
31
42
|
/* we have a problem */
|
32
43
|
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
33
44
|
}
|
@@ -47,115 +58,113 @@ static VALUE validate_document(VALUE self, VALUE document)
|
|
47
58
|
return errors;
|
48
59
|
}
|
49
60
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
static VALUE read_memory(VALUE klass, VALUE content)
|
61
|
+
static VALUE
|
62
|
+
xml_relax_ng_parse_schema(
|
63
|
+
VALUE klass,
|
64
|
+
xmlRelaxNGParserCtxtPtr c_parser_context,
|
65
|
+
VALUE rb_parse_options
|
66
|
+
)
|
57
67
|
{
|
58
|
-
|
59
|
-
(const char *)StringValuePtr(content),
|
60
|
-
(int)RSTRING_LEN(content)
|
61
|
-
);
|
62
|
-
xmlRelaxNGPtr schema;
|
63
|
-
VALUE errors = rb_ary_new();
|
68
|
+
VALUE rb_errors;
|
64
69
|
VALUE rb_schema;
|
70
|
+
xmlRelaxNGPtr c_schema;
|
71
|
+
|
72
|
+
if (NIL_P(rb_parse_options)) {
|
73
|
+
rb_parse_options = rb_const_get_at(
|
74
|
+
rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")),
|
75
|
+
rb_intern("DEFAULT_SCHEMA")
|
76
|
+
);
|
77
|
+
}
|
65
78
|
|
66
|
-
|
79
|
+
rb_errors = rb_ary_new();
|
80
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
|
67
81
|
|
68
82
|
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
|
69
83
|
xmlRelaxNGSetParserStructuredErrors(
|
70
|
-
|
84
|
+
c_parser_context,
|
71
85
|
Nokogiri_error_array_pusher,
|
72
|
-
(void *)
|
86
|
+
(void *)rb_errors
|
73
87
|
);
|
74
88
|
#endif
|
75
89
|
|
76
|
-
|
90
|
+
c_schema = xmlRelaxNGParse(c_parser_context);
|
77
91
|
|
78
92
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
79
|
-
xmlRelaxNGFreeParserCtxt(
|
93
|
+
xmlRelaxNGFreeParserCtxt(c_parser_context);
|
80
94
|
|
81
|
-
if(NULL ==
|
82
|
-
|
83
|
-
if(error)
|
95
|
+
if (NULL == c_schema) {
|
96
|
+
xmlErrorConstPtr error = xmlGetLastError();
|
97
|
+
if (error) {
|
84
98
|
Nokogiri_error_raise(NULL, error);
|
85
|
-
else
|
99
|
+
} else {
|
86
100
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
101
|
+
}
|
87
102
|
|
88
103
|
return Qnil;
|
89
104
|
}
|
90
105
|
|
91
|
-
rb_schema =
|
92
|
-
rb_iv_set(rb_schema, "@errors",
|
106
|
+
rb_schema = TypedData_Wrap_Struct(klass, &xml_relax_ng_type, c_schema);
|
107
|
+
rb_iv_set(rb_schema, "@errors", rb_errors);
|
108
|
+
rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
|
93
109
|
|
94
110
|
return rb_schema;
|
95
111
|
}
|
96
112
|
|
97
113
|
/*
|
98
114
|
* call-seq:
|
99
|
-
*
|
115
|
+
* read_memory(string)
|
100
116
|
*
|
101
|
-
* Create a new RelaxNG
|
117
|
+
* Create a new RelaxNG from the contents of +string+
|
102
118
|
*/
|
103
|
-
static VALUE
|
119
|
+
static VALUE
|
120
|
+
read_memory(int argc, VALUE *argv, VALUE klass)
|
104
121
|
{
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
VALUE errors;
|
109
|
-
VALUE rb_schema;
|
110
|
-
|
111
|
-
Data_Get_Struct(document, xmlDoc, doc);
|
122
|
+
VALUE rb_content;
|
123
|
+
VALUE rb_parse_options;
|
124
|
+
xmlRelaxNGParserCtxtPtr c_parser_context;
|
112
125
|
|
113
|
-
|
114
|
-
doc = doc->doc;
|
126
|
+
rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options);
|
115
127
|
|
116
|
-
|
128
|
+
c_parser_context = xmlRelaxNGNewMemParserCtxt(
|
129
|
+
(const char *)StringValuePtr(rb_content),
|
130
|
+
(int)RSTRING_LEN(rb_content)
|
131
|
+
);
|
117
132
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
|
122
|
-
xmlRelaxNGSetParserStructuredErrors(
|
123
|
-
ctx,
|
124
|
-
Nokogiri_error_array_pusher,
|
125
|
-
(void *)errors
|
126
|
-
);
|
127
|
-
#endif
|
128
|
-
|
129
|
-
schema = xmlRelaxNGParse(ctx);
|
133
|
+
return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
|
134
|
+
}
|
130
135
|
|
131
|
-
|
136
|
+
/*
|
137
|
+
* call-seq:
|
138
|
+
* from_document(doc)
|
139
|
+
*
|
140
|
+
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
|
141
|
+
*/
|
142
|
+
static VALUE
|
143
|
+
from_document(int argc, VALUE *argv, VALUE klass)
|
144
|
+
{
|
145
|
+
VALUE rb_document;
|
146
|
+
VALUE rb_parse_options;
|
147
|
+
xmlDocPtr c_document;
|
148
|
+
xmlRelaxNGParserCtxtPtr c_parser_context;
|
132
149
|
|
133
|
-
|
134
|
-
xmlErrorPtr error = xmlGetLastError();
|
135
|
-
if(error)
|
136
|
-
Nokogiri_error_raise(NULL, error);
|
137
|
-
else
|
138
|
-
rb_raise(rb_eRuntimeError, "Could not parse document");
|
150
|
+
rb_scan_args(argc, argv, "11", &rb_document, &rb_parse_options);
|
139
151
|
|
140
|
-
|
141
|
-
|
152
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
153
|
+
c_document = c_document->doc; /* In case someone passes us a node. ugh. */
|
142
154
|
|
143
|
-
|
144
|
-
rb_iv_set(rb_schema, "@errors", errors);
|
155
|
+
c_parser_context = xmlRelaxNGNewDocParserCtxt(c_document);
|
145
156
|
|
146
|
-
return
|
157
|
+
return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
|
147
158
|
}
|
148
159
|
|
149
|
-
|
150
|
-
void
|
160
|
+
void
|
161
|
+
noko_init_xml_relax_ng(void)
|
151
162
|
{
|
152
|
-
|
153
|
-
|
154
|
-
VALUE klass = rb_define_class_under(xml, "RelaxNG", cNokogiriXmlSchema);
|
163
|
+
assert(cNokogiriXmlSchema);
|
164
|
+
cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema);
|
155
165
|
|
156
|
-
cNokogiriXmlRelaxNG
|
166
|
+
rb_define_singleton_method(cNokogiriXmlRelaxNG, "read_memory", read_memory, -1);
|
167
|
+
rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", from_document, -1);
|
157
168
|
|
158
|
-
|
159
|
-
rb_define_singleton_method(klass, "from_document", from_document, 1);
|
160
|
-
rb_define_private_method(klass, "validate_document", validate_document, 1);
|
169
|
+
rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", validate_document, 1);
|
161
170
|
}
|
@@ -1,23 +1,23 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
4
|
-
void vasprintf_free (void *p);
|
3
|
+
VALUE cNokogiriXmlSaxParser ;
|
5
4
|
|
6
5
|
static ID id_start_document, id_end_document, id_start_element, id_end_element;
|
7
6
|
static ID id_start_element_namespace, id_end_element_namespace;
|
8
7
|
static ID id_comment, id_characters, id_xmldecl, id_error, id_warning;
|
9
|
-
static ID id_cdata_block
|
8
|
+
static ID id_cdata_block;
|
10
9
|
static ID id_processing_instruction;
|
11
10
|
|
12
|
-
static void
|
11
|
+
static void
|
12
|
+
start_document(void *ctx)
|
13
13
|
{
|
14
14
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
15
15
|
VALUE doc = rb_iv_get(self, "@document");
|
16
16
|
|
17
17
|
xmlParserCtxtPtr ctxt = NOKOGIRI_SAX_CTXT(ctx);
|
18
18
|
|
19
|
-
if(NULL != ctxt && ctxt->html != 1) {
|
20
|
-
if(ctxt->standalone != -1) {
|
19
|
+
if (NULL != ctxt && ctxt->html != 1) {
|
20
|
+
if (ctxt->standalone != -1) { /* -1 means there was no declaration */
|
21
21
|
VALUE encoding = Qnil ;
|
22
22
|
VALUE standalone = Qnil;
|
23
23
|
VALUE version;
|
@@ -29,8 +29,7 @@ static void start_document(void * ctx)
|
|
29
29
|
|
30
30
|
version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil;
|
31
31
|
|
32
|
-
switch(ctxt->standalone)
|
33
|
-
{
|
32
|
+
switch (ctxt->standalone) {
|
34
33
|
case 0:
|
35
34
|
standalone = NOKOGIRI_STR_NEW2("no");
|
36
35
|
break;
|
@@ -46,138 +45,140 @@ static void start_document(void * ctx)
|
|
46
45
|
rb_funcall(doc, id_start_document, 0);
|
47
46
|
}
|
48
47
|
|
49
|
-
static void
|
48
|
+
static void
|
49
|
+
end_document(void *ctx)
|
50
50
|
{
|
51
51
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
52
52
|
VALUE doc = rb_iv_get(self, "@document");
|
53
53
|
rb_funcall(doc, id_end_document, 0);
|
54
54
|
}
|
55
55
|
|
56
|
-
static void
|
56
|
+
static void
|
57
|
+
start_element(void *ctx, const xmlChar *name, const xmlChar **atts)
|
57
58
|
{
|
58
59
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
59
60
|
VALUE doc = rb_iv_get(self, "@document");
|
60
61
|
VALUE attributes = rb_ary_new();
|
61
|
-
const xmlChar *
|
62
|
+
const xmlChar *attr;
|
62
63
|
int i = 0;
|
63
|
-
if(atts) {
|
64
|
-
while((attr = atts[i]) != NULL) {
|
65
|
-
const xmlChar *
|
64
|
+
if (atts) {
|
65
|
+
while ((attr = atts[i]) != NULL) {
|
66
|
+
const xmlChar *val = atts[i + 1];
|
66
67
|
VALUE value = val != NULL ? NOKOGIRI_STR_NEW2(val) : Qnil;
|
67
68
|
rb_ary_push(attributes, rb_ary_new3(2, NOKOGIRI_STR_NEW2(attr), value));
|
68
|
-
i+=2;
|
69
|
+
i += 2;
|
69
70
|
}
|
70
71
|
}
|
71
72
|
|
72
|
-
rb_funcall(
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
73
|
+
rb_funcall(doc,
|
74
|
+
id_start_element,
|
75
|
+
2,
|
76
|
+
NOKOGIRI_STR_NEW2(name),
|
77
|
+
attributes
|
78
|
+
);
|
78
79
|
}
|
79
80
|
|
80
|
-
static void
|
81
|
+
static void
|
82
|
+
end_element(void *ctx, const xmlChar *name)
|
81
83
|
{
|
82
84
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
83
85
|
VALUE doc = rb_iv_get(self, "@document");
|
84
86
|
rb_funcall(doc, id_end_element, 1, NOKOGIRI_STR_NEW2(name));
|
85
87
|
}
|
86
88
|
|
87
|
-
static VALUE
|
88
|
-
|
89
|
-
int nb_attributes,
|
90
|
-
const xmlChar ** attributes)
|
89
|
+
static VALUE
|
90
|
+
attributes_as_array(int attributes_len, const xmlChar **c_attributes)
|
91
91
|
{
|
92
|
-
VALUE
|
92
|
+
VALUE rb_array = rb_ary_new2((long)attributes_len);
|
93
|
+
VALUE cNokogiriXmlSaxParserAttribute;
|
93
94
|
|
94
|
-
|
95
|
-
if (
|
95
|
+
cNokogiriXmlSaxParserAttribute = rb_const_get_at(cNokogiriXmlSaxParser, rb_intern("Attribute"));
|
96
|
+
if (c_attributes) {
|
96
97
|
/* Each attribute is an array of [localname, prefix, URI, value, end] */
|
97
98
|
int i;
|
98
|
-
for (i = 0; i <
|
99
|
-
VALUE
|
99
|
+
for (i = 0; i < attributes_len * 5; i += 5) {
|
100
|
+
VALUE rb_constructor_args[4], rb_attribute;
|
100
101
|
|
101
|
-
|
102
|
-
|
103
|
-
|
102
|
+
rb_constructor_args[0] = RBSTR_OR_QNIL(c_attributes[i + 0]); /* localname */
|
103
|
+
rb_constructor_args[1] = RBSTR_OR_QNIL(c_attributes[i + 1]); /* prefix */
|
104
|
+
rb_constructor_args[2] = RBSTR_OR_QNIL(c_attributes[i + 2]); /* URI */
|
104
105
|
|
105
106
|
/* value */
|
106
|
-
|
107
|
-
|
107
|
+
rb_constructor_args[3] = NOKOGIRI_STR_NEW((const char *)c_attributes[i + 3],
|
108
|
+
(c_attributes[i + 4] - c_attributes[i + 3]));
|
108
109
|
|
109
|
-
|
110
|
-
rb_ary_push(
|
110
|
+
rb_attribute = rb_class_new_instance(4, rb_constructor_args, cNokogiriXmlSaxParserAttribute);
|
111
|
+
rb_ary_push(rb_array, rb_attribute);
|
111
112
|
}
|
112
113
|
}
|
113
114
|
|
114
|
-
return
|
115
|
+
return rb_array;
|
115
116
|
}
|
116
117
|
|
117
118
|
static void
|
118
|
-
start_element_ns
|
119
|
-
void *
|
120
|
-
const xmlChar *
|
121
|
-
const xmlChar *
|
122
|
-
const xmlChar *
|
119
|
+
start_element_ns(
|
120
|
+
void *ctx,
|
121
|
+
const xmlChar *localname,
|
122
|
+
const xmlChar *prefix,
|
123
|
+
const xmlChar *uri,
|
123
124
|
int nb_namespaces,
|
124
|
-
const xmlChar **
|
125
|
+
const xmlChar **namespaces,
|
125
126
|
int nb_attributes,
|
126
127
|
int nb_defaulted,
|
127
|
-
const xmlChar **
|
128
|
+
const xmlChar **attributes)
|
128
129
|
{
|
129
130
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
130
131
|
VALUE doc = rb_iv_get(self, "@document");
|
131
132
|
|
132
|
-
VALUE
|
133
|
+
VALUE attribute_ary = attributes_as_array(nb_attributes, attributes);
|
133
134
|
|
134
135
|
VALUE ns_list = rb_ary_new2((long)nb_namespaces);
|
135
136
|
|
136
137
|
if (namespaces) {
|
137
138
|
int i;
|
138
|
-
for (i = 0; i < nb_namespaces * 2; i += 2)
|
139
|
-
{
|
139
|
+
for (i = 0; i < nb_namespaces * 2; i += 2) {
|
140
140
|
rb_ary_push(ns_list,
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
141
|
+
rb_ary_new3((long)2,
|
142
|
+
RBSTR_OR_QNIL(namespaces[i + 0]),
|
143
|
+
RBSTR_OR_QNIL(namespaces[i + 1])
|
144
|
+
)
|
145
|
+
);
|
146
146
|
}
|
147
147
|
}
|
148
148
|
|
149
|
-
rb_funcall(
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
149
|
+
rb_funcall(doc,
|
150
|
+
id_start_element_namespace,
|
151
|
+
5,
|
152
|
+
NOKOGIRI_STR_NEW2(localname),
|
153
|
+
attribute_ary,
|
154
|
+
RBSTR_OR_QNIL(prefix),
|
155
|
+
RBSTR_OR_QNIL(uri),
|
156
|
+
ns_list
|
157
|
+
);
|
158
158
|
}
|
159
159
|
|
160
160
|
/**
|
161
161
|
* end_element_ns was borrowed heavily from libxml-ruby.
|
162
162
|
*/
|
163
163
|
static void
|
164
|
-
end_element_ns
|
165
|
-
void *
|
166
|
-
const xmlChar *
|
167
|
-
const xmlChar *
|
168
|
-
const xmlChar *
|
164
|
+
end_element_ns(
|
165
|
+
void *ctx,
|
166
|
+
const xmlChar *localname,
|
167
|
+
const xmlChar *prefix,
|
168
|
+
const xmlChar *uri)
|
169
169
|
{
|
170
170
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
171
171
|
VALUE doc = rb_iv_get(self, "@document");
|
172
172
|
|
173
173
|
rb_funcall(doc, id_end_element_namespace, 3,
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
174
|
+
NOKOGIRI_STR_NEW2(localname),
|
175
|
+
RBSTR_OR_QNIL(prefix),
|
176
|
+
RBSTR_OR_QNIL(uri)
|
177
|
+
);
|
178
178
|
}
|
179
179
|
|
180
|
-
static void
|
180
|
+
static void
|
181
|
+
characters_func(void *ctx, const xmlChar *ch, int len)
|
181
182
|
{
|
182
183
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
183
184
|
VALUE doc = rb_iv_get(self, "@document");
|
@@ -185,7 +186,8 @@ static void characters_func(void * ctx, const xmlChar * ch, int len)
|
|
185
186
|
rb_funcall(doc, id_characters, 1, str);
|
186
187
|
}
|
187
188
|
|
188
|
-
static void
|
189
|
+
static void
|
190
|
+
comment_func(void *ctx, const xmlChar *value)
|
189
191
|
{
|
190
192
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
191
193
|
VALUE doc = rb_iv_get(self, "@document");
|
@@ -193,41 +195,52 @@ static void comment_func(void * ctx, const xmlChar * value)
|
|
193
195
|
rb_funcall(doc, id_comment, 1, str);
|
194
196
|
}
|
195
197
|
|
196
|
-
|
198
|
+
PRINTFLIKE_DECL(2, 3)
|
199
|
+
static void
|
200
|
+
warning_func(void *ctx, const char *msg, ...)
|
197
201
|
{
|
198
202
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
199
203
|
VALUE doc = rb_iv_get(self, "@document");
|
200
|
-
|
201
|
-
VALUE ruby_message;
|
204
|
+
VALUE rb_message;
|
202
205
|
|
206
|
+
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
207
|
+
/* It is not currently possible to pass var args from native
|
208
|
+
functions to sulong, so we work around the issue here. */
|
209
|
+
rb_message = rb_sprintf("warning_func: %s", msg);
|
210
|
+
#else
|
203
211
|
va_list args;
|
204
212
|
va_start(args, msg);
|
205
|
-
|
213
|
+
rb_message = rb_vsprintf(msg, args);
|
206
214
|
va_end(args);
|
215
|
+
#endif
|
207
216
|
|
208
|
-
|
209
|
-
vasprintf_free(message);
|
210
|
-
rb_funcall(doc, id_warning, 1, ruby_message);
|
217
|
+
rb_funcall(doc, id_warning, 1, rb_message);
|
211
218
|
}
|
212
219
|
|
213
|
-
|
220
|
+
PRINTFLIKE_DECL(2, 3)
|
221
|
+
static void
|
222
|
+
error_func(void *ctx, const char *msg, ...)
|
214
223
|
{
|
215
224
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
216
225
|
VALUE doc = rb_iv_get(self, "@document");
|
217
|
-
|
218
|
-
VALUE ruby_message;
|
226
|
+
VALUE rb_message;
|
219
227
|
|
228
|
+
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
229
|
+
/* It is not currently possible to pass var args from native
|
230
|
+
functions to sulong, so we work around the issue here. */
|
231
|
+
rb_message = rb_sprintf("error_func: %s", msg);
|
232
|
+
#else
|
220
233
|
va_list args;
|
221
234
|
va_start(args, msg);
|
222
|
-
|
235
|
+
rb_message = rb_vsprintf(msg, args);
|
223
236
|
va_end(args);
|
237
|
+
#endif
|
224
238
|
|
225
|
-
|
226
|
-
vasprintf_free(message);
|
227
|
-
rb_funcall(doc, id_error, 1, ruby_message);
|
239
|
+
rb_funcall(doc, id_error, 1, rb_message);
|
228
240
|
}
|
229
241
|
|
230
|
-
static void
|
242
|
+
static void
|
243
|
+
cdata_block(void *ctx, const xmlChar *value, int len)
|
231
244
|
{
|
232
245
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
233
246
|
VALUE doc = rb_iv_get(self, "@document");
|
@@ -235,7 +248,8 @@ static void cdata_block(void * ctx, const xmlChar * value, int len)
|
|
235
248
|
rb_funcall(doc, id_cdata_block, 1, string);
|
236
249
|
}
|
237
250
|
|
238
|
-
static void
|
251
|
+
static void
|
252
|
+
processing_instruction(void *ctx, const xmlChar *name, const xmlChar *content)
|
239
253
|
{
|
240
254
|
VALUE rb_content;
|
241
255
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
@@ -243,26 +257,35 @@ static void processing_instruction(void * ctx, const xmlChar * name, const xmlCh
|
|
243
257
|
|
244
258
|
rb_content = content ? NOKOGIRI_STR_NEW2(content) : Qnil;
|
245
259
|
|
246
|
-
rb_funcall(
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
260
|
+
rb_funcall(doc,
|
261
|
+
id_processing_instruction,
|
262
|
+
2,
|
263
|
+
NOKOGIRI_STR_NEW2(name),
|
264
|
+
rb_content
|
265
|
+
);
|
252
266
|
}
|
253
267
|
|
254
|
-
static
|
268
|
+
static size_t
|
269
|
+
memsize(const void *data)
|
255
270
|
{
|
256
|
-
|
257
|
-
free(handler);
|
258
|
-
NOKOGIRI_DEBUG_END(handler);
|
271
|
+
return sizeof(xmlSAXHandler);
|
259
272
|
}
|
260
273
|
|
261
|
-
|
274
|
+
/* Used by Nokogiri::XML::SAX::Parser and Nokogiri::HTML::SAX::Parser */
|
275
|
+
static const rb_data_type_t noko_sax_handler_type = {
|
276
|
+
.wrap_struct_name = "Nokogiri::SAXHandler",
|
277
|
+
.function = {
|
278
|
+
.dfree = RUBY_TYPED_DEFAULT_FREE,
|
279
|
+
.dsize = memsize
|
280
|
+
},
|
281
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
282
|
+
};
|
283
|
+
|
284
|
+
static VALUE
|
285
|
+
allocate(VALUE klass)
|
262
286
|
{
|
263
|
-
xmlSAXHandlerPtr handler
|
264
|
-
|
265
|
-
xmlSetStructuredErrorFunc(NULL, NULL);
|
287
|
+
xmlSAXHandlerPtr handler;
|
288
|
+
VALUE self = TypedData_Make_Struct(klass, xmlSAXHandler, &noko_sax_handler_type, handler);
|
266
289
|
|
267
290
|
handler->startDocument = start_document;
|
268
291
|
handler->endDocument = end_document;
|
@@ -278,32 +301,34 @@ static VALUE allocate(VALUE klass)
|
|
278
301
|
handler->processingInstruction = processing_instruction;
|
279
302
|
handler->initialized = XML_SAX2_MAGIC;
|
280
303
|
|
281
|
-
return
|
304
|
+
return self;
|
282
305
|
}
|
283
306
|
|
284
|
-
|
285
|
-
|
307
|
+
xmlSAXHandlerPtr
|
308
|
+
noko_sax_handler_unwrap(VALUE rb_sax_handler)
|
286
309
|
{
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
310
|
+
xmlSAXHandlerPtr c_sax_handler;
|
311
|
+
TypedData_Get_Struct(rb_sax_handler, xmlSAXHandler, &noko_sax_handler_type, c_sax_handler);
|
312
|
+
return c_sax_handler;
|
313
|
+
}
|
291
314
|
|
292
|
-
|
315
|
+
void
|
316
|
+
noko_init_xml_sax_parser(void)
|
317
|
+
{
|
318
|
+
cNokogiriXmlSaxParser = rb_define_class_under(mNokogiriXmlSax, "Parser", rb_cObject);
|
293
319
|
|
294
|
-
rb_define_alloc_func(
|
320
|
+
rb_define_alloc_func(cNokogiriXmlSaxParser, allocate);
|
295
321
|
|
296
322
|
id_start_document = rb_intern("start_document");
|
297
|
-
id_end_document
|
298
|
-
id_start_element
|
299
|
-
id_end_element
|
300
|
-
id_comment
|
301
|
-
id_characters
|
302
|
-
id_xmldecl
|
303
|
-
id_error
|
304
|
-
id_warning
|
305
|
-
id_cdata_block
|
306
|
-
id_cAttribute = rb_intern("Attribute");
|
323
|
+
id_end_document = rb_intern("end_document");
|
324
|
+
id_start_element = rb_intern("start_element");
|
325
|
+
id_end_element = rb_intern("end_element");
|
326
|
+
id_comment = rb_intern("comment");
|
327
|
+
id_characters = rb_intern("characters");
|
328
|
+
id_xmldecl = rb_intern("xmldecl");
|
329
|
+
id_error = rb_intern("error");
|
330
|
+
id_warning = rb_intern("warning");
|
331
|
+
id_cdata_block = rb_intern("cdata_block");
|
307
332
|
id_start_element_namespace = rb_intern("start_element_namespace");
|
308
333
|
id_end_element_namespace = rb_intern("end_element_namespace");
|
309
334
|
id_processing_instruction = rb_intern("processing_instruction");
|