nokogiri 1.5.10 → 1.13.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +5 -0
- data/LICENSE-DEPENDENCIES.md +1903 -0
- data/LICENSE.md +9 -0
- data/README.md +280 -0
- data/bin/nokogiri +84 -31
- data/dependencies.yml +73 -0
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +956 -100
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +120 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +232 -87
- data/ext/nokogiri/nokogiri.h +188 -129
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +49 -40
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +24 -23
- data/ext/nokogiri/xml_comment.c +29 -21
- data/ext/nokogiri/xml_document.c +327 -223
- data/ext/nokogiri/xml_document_fragment.c +12 -16
- data/ext/nokogiri/xml_dtd.c +56 -50
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +45 -20
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +74 -32
- data/ext/nokogiri/xml_node.c +1290 -680
- data/ext/nokogiri/xml_node_set.c +239 -208
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +227 -189
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +123 -125
- data/ext/nokogiri/xml_sax_parser_context.c +138 -79
- data/ext/nokogiri/xml_sax_push_parser.c +88 -35
- data/ext/nokogiri/xml_schema.c +112 -33
- data/ext/nokogiri/xml_syntax_error.c +50 -23
- data/ext/nokogiri/xml_text.c +14 -18
- data/ext/nokogiri/xml_xpath_context.c +227 -140
- data/ext/nokogiri/xslt_stylesheet.c +162 -168
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -58
- data/lib/nokogiri/css/parser.rb +327 -288
- data/lib/nokogiri/css/parser.y +67 -45
- data/lib/nokogiri/css/parser_extras.rb +52 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +7 -6
- data/lib/nokogiri/css/xpath_visitor.rb +263 -75
- data/lib/nokogiri/css.rb +50 -17
- data/lib/nokogiri/decorators/slop.rb +17 -8
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +331 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -15
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +88 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +96 -0
- data/lib/nokogiri/html5.rb +477 -0
- data/lib/nokogiri/jruby/dependencies.rb +21 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +221 -0
- data/lib/nokogiri/version.rb +3 -90
- data/lib/nokogiri/xml/attr.rb +6 -3
- data/lib/nokogiri/xml/attribute_decl.rb +3 -1
- data/lib/nokogiri/xml/builder.rb +96 -54
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +234 -95
- data/lib/nokogiri/xml/document_fragment.rb +86 -36
- data/lib/nokogiri/xml/dtd.rb +16 -4
- data/lib/nokogiri/xml/element_content.rb +2 -0
- data/lib/nokogiri/xml/element_decl.rb +3 -1
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +3 -0
- data/lib/nokogiri/xml/node/save_options.rb +8 -4
- data/lib/nokogiri/xml/node.rb +947 -502
- data/lib/nokogiri/xml/node_set.rb +168 -159
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +40 -5
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +25 -26
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +23 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +43 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +259 -0
- data/lib/nokogiri/xml/syntax_error.rb +25 -1
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -36
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +18 -16
- data/lib/nokogiri.rb +69 -69
- data/lib/xsd/xmlparser/nokogiri.rb +26 -24
- data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- metadata +382 -460
- data/.autotest +0 -26
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +0 -785
- data/CHANGELOG.rdoc +0 -783
- data/C_CODING_STYLE.rdoc +0 -33
- data/Manifest.txt +0 -303
- data/README.ja.rdoc +0 -106
- data/README.rdoc +0 -175
- data/ROADMAP.md +0 -90
- data/Rakefile +0 -228
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/build_all +0 -105
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -56
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -13
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -14
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -254
- data/lib/nokogiri/html/document_fragment.rb +0 -41
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/lib/nokogiri/html/sax/push_parser.rb +0 -16
- data/tasks/cross_compile.rb +0 -150
- data/tasks/nokogiri.org.rb +0 -24
- data/tasks/test.rb +0 -95
- data/test/css/test_nthiness.rb +0 -159
- data/test/css/test_parser.rb +0 -341
- data/test/css/test_tokenizer.rb +0 -198
- data/test/css/test_xpath_visitor.rb +0 -91
- data/test/decorators/test_slop.rb +0 -16
- data/test/files/2ch.html +0 -108
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -850
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -154
- data/test/html/sax/test_parser.rb +0 -141
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -552
- data/test/html/test_document_encoding.rb +0 -138
- data/test/html/test_document_fragment.rb +0 -261
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -196
- data/test/html/test_node_encoding.rb +0 -27
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -45
- data/test/test_encoding_handler.rb +0 -46
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -132
- data/test/test_reader.rb +0 -555
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -254
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -366
- data/test/xml/sax/test_parser_context.rb +0 -106
- data/test/xml/sax/test_push_parser.rb +0 -157
- data/test/xml/test_attr.rb +0 -64
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -306
- data/test/xml/test_c14n.rb +0 -151
- data/test/xml/test_cdata.rb +0 -48
- data/test/xml/test_comment.rb +0 -29
- data/test/xml/test_document.rb +0 -828
- data/test/xml/test_document_encoding.rb +0 -28
- data/test/xml/test_document_fragment.rb +0 -223
- data/test/xml/test_dtd.rb +0 -103
- data/test/xml/test_dtd_encoding.rb +0 -33
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -245
- data/test/xml/test_namespace.rb +0 -95
- data/test/xml/test_node.rb +0 -1137
- data/test/xml/test_node_attributes.rb +0 -96
- data/test/xml/test_node_encoding.rb +0 -107
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -374
- data/test/xml/test_node_set.rb +0 -755
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader_encoding.rb +0 -142
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -103
- data/test/xml/test_syntax_error.rb +0 -12
- data/test/xml/test_text.rb +0 -45
- data/test/xml/test_unparented_node.rb +0 -422
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -295
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
- data/test_all +0 -81
data/ext/nokogiri/xml_relax_ng.c
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
3
|
+
VALUE cNokogiriXmlRelaxNG;
|
4
|
+
|
5
|
+
static void
|
6
|
+
dealloc(xmlRelaxNGPtr schema)
|
4
7
|
{
|
5
8
|
NOKOGIRI_DEBUG_START(schema);
|
6
9
|
xmlRelaxNGFree(schema);
|
@@ -13,7 +16,8 @@ static void dealloc(xmlRelaxNGPtr schema)
|
|
13
16
|
*
|
14
17
|
* Validate a Nokogiri::XML::Document against this RelaxNG schema.
|
15
18
|
*/
|
16
|
-
static VALUE
|
19
|
+
static VALUE
|
20
|
+
validate_document(VALUE self, VALUE document)
|
17
21
|
{
|
18
22
|
xmlDocPtr doc;
|
19
23
|
xmlRelaxNGPtr schema;
|
@@ -27,7 +31,7 @@ static VALUE validate_document(VALUE self, VALUE document)
|
|
27
31
|
|
28
32
|
valid_ctxt = xmlRelaxNGNewValidCtxt(schema);
|
29
33
|
|
30
|
-
if(NULL == valid_ctxt) {
|
34
|
+
if (NULL == valid_ctxt) {
|
31
35
|
/* we have a problem */
|
32
36
|
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
33
37
|
}
|
@@ -53,16 +57,25 @@ static VALUE validate_document(VALUE self, VALUE document)
|
|
53
57
|
*
|
54
58
|
* Create a new RelaxNG from the contents of +string+
|
55
59
|
*/
|
56
|
-
static VALUE
|
60
|
+
static VALUE
|
61
|
+
read_memory(int argc, VALUE *argv, VALUE klass)
|
57
62
|
{
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
);
|
63
|
+
VALUE content;
|
64
|
+
VALUE parse_options;
|
65
|
+
xmlRelaxNGParserCtxtPtr ctx;
|
62
66
|
xmlRelaxNGPtr schema;
|
63
|
-
VALUE errors
|
67
|
+
VALUE errors;
|
64
68
|
VALUE rb_schema;
|
69
|
+
int scanned_args = 0;
|
70
|
+
|
71
|
+
scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
|
72
|
+
if (scanned_args == 1) {
|
73
|
+
parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
74
|
+
}
|
65
75
|
|
76
|
+
ctx = xmlRelaxNGNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
|
77
|
+
|
78
|
+
errors = rb_ary_new();
|
66
79
|
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
67
80
|
|
68
81
|
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
|
@@ -78,18 +91,20 @@ static VALUE read_memory(VALUE klass, VALUE content)
|
|
78
91
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
79
92
|
xmlRelaxNGFreeParserCtxt(ctx);
|
80
93
|
|
81
|
-
if(NULL == schema) {
|
94
|
+
if (NULL == schema) {
|
82
95
|
xmlErrorPtr error = xmlGetLastError();
|
83
|
-
if(error)
|
96
|
+
if (error) {
|
84
97
|
Nokogiri_error_raise(NULL, error);
|
85
|
-
else
|
98
|
+
} else {
|
86
99
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
100
|
+
}
|
87
101
|
|
88
102
|
return Qnil;
|
89
103
|
}
|
90
104
|
|
91
105
|
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
92
106
|
rb_iv_set(rb_schema, "@errors", errors);
|
107
|
+
rb_iv_set(rb_schema, "@parse_options", parse_options);
|
93
108
|
|
94
109
|
return rb_schema;
|
95
110
|
}
|
@@ -100,18 +115,26 @@ static VALUE read_memory(VALUE klass, VALUE content)
|
|
100
115
|
*
|
101
116
|
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
|
102
117
|
*/
|
103
|
-
static VALUE
|
118
|
+
static VALUE
|
119
|
+
from_document(int argc, VALUE *argv, VALUE klass)
|
104
120
|
{
|
121
|
+
VALUE document;
|
122
|
+
VALUE parse_options;
|
105
123
|
xmlDocPtr doc;
|
106
124
|
xmlRelaxNGParserCtxtPtr ctx;
|
107
125
|
xmlRelaxNGPtr schema;
|
108
126
|
VALUE errors;
|
109
127
|
VALUE rb_schema;
|
128
|
+
int scanned_args = 0;
|
129
|
+
|
130
|
+
scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
|
110
131
|
|
111
132
|
Data_Get_Struct(document, xmlDoc, doc);
|
133
|
+
doc = doc->doc; /* In case someone passes us a node. ugh. */
|
112
134
|
|
113
|
-
|
114
|
-
|
135
|
+
if (scanned_args == 1) {
|
136
|
+
parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
137
|
+
}
|
115
138
|
|
116
139
|
ctx = xmlRelaxNGNewDocParserCtxt(doc);
|
117
140
|
|
@@ -129,33 +152,34 @@ static VALUE from_document(VALUE klass, VALUE document)
|
|
129
152
|
schema = xmlRelaxNGParse(ctx);
|
130
153
|
|
131
154
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
155
|
+
xmlRelaxNGFreeParserCtxt(ctx);
|
132
156
|
|
133
|
-
if(NULL == schema) {
|
157
|
+
if (NULL == schema) {
|
134
158
|
xmlErrorPtr error = xmlGetLastError();
|
135
|
-
if(error)
|
159
|
+
if (error) {
|
136
160
|
Nokogiri_error_raise(NULL, error);
|
137
|
-
else
|
161
|
+
} else {
|
138
162
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
163
|
+
}
|
139
164
|
|
140
165
|
return Qnil;
|
141
166
|
}
|
142
167
|
|
143
168
|
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
144
169
|
rb_iv_set(rb_schema, "@errors", errors);
|
170
|
+
rb_iv_set(rb_schema, "@parse_options", parse_options);
|
145
171
|
|
146
172
|
return rb_schema;
|
147
173
|
}
|
148
174
|
|
149
|
-
|
150
|
-
|
175
|
+
void
|
176
|
+
noko_init_xml_relax_ng()
|
151
177
|
{
|
152
|
-
|
153
|
-
|
154
|
-
VALUE klass = rb_define_class_under(xml, "RelaxNG", cNokogiriXmlSchema);
|
178
|
+
assert(cNokogiriXmlSchema);
|
179
|
+
cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema);
|
155
180
|
|
156
|
-
cNokogiriXmlRelaxNG
|
181
|
+
rb_define_singleton_method(cNokogiriXmlRelaxNG, "read_memory", read_memory, -1);
|
182
|
+
rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", from_document, -1);
|
157
183
|
|
158
|
-
|
159
|
-
rb_define_singleton_method(klass, "from_document", from_document, 1);
|
160
|
-
rb_define_private_method(klass, "validate_document", validate_document, 1);
|
184
|
+
rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", validate_document, 1);
|
161
185
|
}
|
@@ -1,38 +1,35 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
4
|
-
void vasprintf_free (void *p);
|
3
|
+
VALUE cNokogiriXmlSaxParser ;
|
5
4
|
|
6
5
|
static ID id_start_document, id_end_document, id_start_element, id_end_element;
|
7
6
|
static ID id_start_element_namespace, id_end_element_namespace;
|
8
7
|
static ID id_comment, id_characters, id_xmldecl, id_error, id_warning;
|
9
|
-
static ID id_cdata_block
|
8
|
+
static ID id_cdata_block;
|
10
9
|
static ID id_processing_instruction;
|
11
10
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
static void start_document(void * ctx)
|
11
|
+
static void
|
12
|
+
start_document(void *ctx)
|
16
13
|
{
|
17
14
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
18
15
|
VALUE doc = rb_iv_get(self, "@document");
|
19
16
|
|
20
17
|
xmlParserCtxtPtr ctxt = NOKOGIRI_SAX_CTXT(ctx);
|
21
18
|
|
22
|
-
if(NULL != ctxt && ctxt->html != 1) {
|
23
|
-
if(ctxt->standalone != -1) {
|
24
|
-
VALUE encoding =
|
25
|
-
NOKOGIRI_STR_NEW2(ctxt->encoding) :
|
26
|
-
Qnil;
|
27
|
-
|
28
|
-
VALUE version = ctxt->version ?
|
29
|
-
NOKOGIRI_STR_NEW2(ctxt->version) :
|
30
|
-
Qnil;
|
31
|
-
|
19
|
+
if (NULL != ctxt && ctxt->html != 1) {
|
20
|
+
if (ctxt->standalone != -1) { /* -1 means there was no declaration */
|
21
|
+
VALUE encoding = Qnil ;
|
32
22
|
VALUE standalone = Qnil;
|
23
|
+
VALUE version;
|
24
|
+
if (ctxt->encoding) {
|
25
|
+
encoding = NOKOGIRI_STR_NEW2(ctxt->encoding) ;
|
26
|
+
} else if (ctxt->input && ctxt->input->encoding) {
|
27
|
+
encoding = NOKOGIRI_STR_NEW2(ctxt->input->encoding) ;
|
28
|
+
}
|
33
29
|
|
34
|
-
|
35
|
-
|
30
|
+
version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil;
|
31
|
+
|
32
|
+
switch (ctxt->standalone) {
|
36
33
|
case 0:
|
37
34
|
standalone = NOKOGIRI_STR_NEW2("no");
|
38
35
|
break;
|
@@ -48,138 +45,140 @@ static void start_document(void * ctx)
|
|
48
45
|
rb_funcall(doc, id_start_document, 0);
|
49
46
|
}
|
50
47
|
|
51
|
-
static void
|
48
|
+
static void
|
49
|
+
end_document(void *ctx)
|
52
50
|
{
|
53
51
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
54
52
|
VALUE doc = rb_iv_get(self, "@document");
|
55
53
|
rb_funcall(doc, id_end_document, 0);
|
56
54
|
}
|
57
55
|
|
58
|
-
static void
|
56
|
+
static void
|
57
|
+
start_element(void *ctx, const xmlChar *name, const xmlChar **atts)
|
59
58
|
{
|
60
59
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
61
60
|
VALUE doc = rb_iv_get(self, "@document");
|
62
61
|
VALUE attributes = rb_ary_new();
|
63
|
-
const xmlChar *
|
62
|
+
const xmlChar *attr;
|
64
63
|
int i = 0;
|
65
|
-
if(atts) {
|
66
|
-
while((attr = atts[i]) != NULL) {
|
67
|
-
const xmlChar *
|
64
|
+
if (atts) {
|
65
|
+
while ((attr = atts[i]) != NULL) {
|
66
|
+
const xmlChar *val = atts[i + 1];
|
68
67
|
VALUE value = val != NULL ? NOKOGIRI_STR_NEW2(val) : Qnil;
|
69
68
|
rb_ary_push(attributes, rb_ary_new3(2, NOKOGIRI_STR_NEW2(attr), value));
|
70
|
-
i+=2;
|
69
|
+
i += 2;
|
71
70
|
}
|
72
71
|
}
|
73
72
|
|
74
|
-
rb_funcall(
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
73
|
+
rb_funcall(doc,
|
74
|
+
id_start_element,
|
75
|
+
2,
|
76
|
+
NOKOGIRI_STR_NEW2(name),
|
77
|
+
attributes
|
78
|
+
);
|
80
79
|
}
|
81
80
|
|
82
|
-
static void
|
81
|
+
static void
|
82
|
+
end_element(void *ctx, const xmlChar *name)
|
83
83
|
{
|
84
84
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
85
85
|
VALUE doc = rb_iv_get(self, "@document");
|
86
86
|
rb_funcall(doc, id_end_element, 1, NOKOGIRI_STR_NEW2(name));
|
87
87
|
}
|
88
88
|
|
89
|
-
static VALUE
|
90
|
-
|
91
|
-
int nb_attributes,
|
92
|
-
const xmlChar ** attributes)
|
89
|
+
static VALUE
|
90
|
+
attributes_as_array(int attributes_len, const xmlChar **c_attributes)
|
93
91
|
{
|
94
|
-
VALUE
|
92
|
+
VALUE rb_array = rb_ary_new2((long)attributes_len);
|
93
|
+
VALUE cNokogiriXmlSaxParserAttribute;
|
95
94
|
|
96
|
-
|
97
|
-
if (
|
95
|
+
cNokogiriXmlSaxParserAttribute = rb_const_get_at(cNokogiriXmlSaxParser, rb_intern("Attribute"));
|
96
|
+
if (c_attributes) {
|
98
97
|
/* Each attribute is an array of [localname, prefix, URI, value, end] */
|
99
98
|
int i;
|
100
|
-
for (i = 0; i <
|
101
|
-
VALUE
|
99
|
+
for (i = 0; i < attributes_len * 5; i += 5) {
|
100
|
+
VALUE rb_constructor_args[4], rb_attribute;
|
102
101
|
|
103
|
-
|
104
|
-
|
105
|
-
|
102
|
+
rb_constructor_args[0] = RBSTR_OR_QNIL(c_attributes[i + 0]); /* localname */
|
103
|
+
rb_constructor_args[1] = RBSTR_OR_QNIL(c_attributes[i + 1]); /* prefix */
|
104
|
+
rb_constructor_args[2] = RBSTR_OR_QNIL(c_attributes[i + 2]); /* URI */
|
106
105
|
|
107
106
|
/* value */
|
108
|
-
|
109
|
-
|
107
|
+
rb_constructor_args[3] = NOKOGIRI_STR_NEW((const char *)c_attributes[i + 3],
|
108
|
+
(c_attributes[i + 4] - c_attributes[i + 3]));
|
110
109
|
|
111
|
-
|
112
|
-
rb_ary_push(
|
110
|
+
rb_attribute = rb_class_new_instance(4, rb_constructor_args, cNokogiriXmlSaxParserAttribute);
|
111
|
+
rb_ary_push(rb_array, rb_attribute);
|
113
112
|
}
|
114
113
|
}
|
115
114
|
|
116
|
-
return
|
115
|
+
return rb_array;
|
117
116
|
}
|
118
117
|
|
119
118
|
static void
|
120
|
-
start_element_ns
|
121
|
-
void *
|
122
|
-
const xmlChar *
|
123
|
-
const xmlChar *
|
124
|
-
const xmlChar *
|
119
|
+
start_element_ns(
|
120
|
+
void *ctx,
|
121
|
+
const xmlChar *localname,
|
122
|
+
const xmlChar *prefix,
|
123
|
+
const xmlChar *uri,
|
125
124
|
int nb_namespaces,
|
126
|
-
const xmlChar **
|
125
|
+
const xmlChar **namespaces,
|
127
126
|
int nb_attributes,
|
128
127
|
int nb_defaulted,
|
129
|
-
const xmlChar **
|
128
|
+
const xmlChar **attributes)
|
130
129
|
{
|
131
130
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
132
131
|
VALUE doc = rb_iv_get(self, "@document");
|
133
132
|
|
134
|
-
VALUE
|
133
|
+
VALUE attribute_ary = attributes_as_array(nb_attributes, attributes);
|
135
134
|
|
136
135
|
VALUE ns_list = rb_ary_new2((long)nb_namespaces);
|
137
136
|
|
138
137
|
if (namespaces) {
|
139
138
|
int i;
|
140
|
-
for (i = 0; i < nb_namespaces * 2; i += 2)
|
141
|
-
{
|
139
|
+
for (i = 0; i < nb_namespaces * 2; i += 2) {
|
142
140
|
rb_ary_push(ns_list,
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
141
|
+
rb_ary_new3((long)2,
|
142
|
+
RBSTR_OR_QNIL(namespaces[i + 0]),
|
143
|
+
RBSTR_OR_QNIL(namespaces[i + 1])
|
144
|
+
)
|
145
|
+
);
|
148
146
|
}
|
149
147
|
}
|
150
148
|
|
151
|
-
rb_funcall(
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
149
|
+
rb_funcall(doc,
|
150
|
+
id_start_element_namespace,
|
151
|
+
5,
|
152
|
+
NOKOGIRI_STR_NEW2(localname),
|
153
|
+
attribute_ary,
|
154
|
+
RBSTR_OR_QNIL(prefix),
|
155
|
+
RBSTR_OR_QNIL(uri),
|
156
|
+
ns_list
|
157
|
+
);
|
160
158
|
}
|
161
159
|
|
162
160
|
/**
|
163
|
-
* end_element_ns was borrowed heavily from libxml-ruby.
|
161
|
+
* end_element_ns was borrowed heavily from libxml-ruby.
|
164
162
|
*/
|
165
163
|
static void
|
166
|
-
end_element_ns
|
167
|
-
void *
|
168
|
-
const xmlChar *
|
169
|
-
const xmlChar *
|
170
|
-
const xmlChar *
|
164
|
+
end_element_ns(
|
165
|
+
void *ctx,
|
166
|
+
const xmlChar *localname,
|
167
|
+
const xmlChar *prefix,
|
168
|
+
const xmlChar *uri)
|
171
169
|
{
|
172
170
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
173
171
|
VALUE doc = rb_iv_get(self, "@document");
|
174
172
|
|
175
|
-
rb_funcall(doc, id_end_element_namespace, 3,
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
173
|
+
rb_funcall(doc, id_end_element_namespace, 3,
|
174
|
+
NOKOGIRI_STR_NEW2(localname),
|
175
|
+
RBSTR_OR_QNIL(prefix),
|
176
|
+
RBSTR_OR_QNIL(uri)
|
177
|
+
);
|
180
178
|
}
|
181
179
|
|
182
|
-
static void
|
180
|
+
static void
|
181
|
+
characters_func(void *ctx, const xmlChar *ch, int len)
|
183
182
|
{
|
184
183
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
185
184
|
VALUE doc = rb_iv_get(self, "@document");
|
@@ -187,7 +186,8 @@ static void characters_func(void * ctx, const xmlChar * ch, int len)
|
|
187
186
|
rb_funcall(doc, id_characters, 1, str);
|
188
187
|
}
|
189
188
|
|
190
|
-
static void
|
189
|
+
static void
|
190
|
+
comment_func(void *ctx, const xmlChar *value)
|
191
191
|
{
|
192
192
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
193
193
|
VALUE doc = rb_iv_get(self, "@document");
|
@@ -195,11 +195,12 @@ static void comment_func(void * ctx, const xmlChar * value)
|
|
195
195
|
rb_funcall(doc, id_comment, 1, str);
|
196
196
|
}
|
197
197
|
|
198
|
-
static void
|
198
|
+
static void
|
199
|
+
warning_func(void *ctx, const char *msg, ...)
|
199
200
|
{
|
200
201
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
201
202
|
VALUE doc = rb_iv_get(self, "@document");
|
202
|
-
char *
|
203
|
+
char *message;
|
203
204
|
VALUE ruby_message;
|
204
205
|
|
205
206
|
va_list args;
|
@@ -208,15 +209,16 @@ static void warning_func(void * ctx, const char *msg, ...)
|
|
208
209
|
va_end(args);
|
209
210
|
|
210
211
|
ruby_message = NOKOGIRI_STR_NEW2(message);
|
211
|
-
|
212
|
+
free(message);
|
212
213
|
rb_funcall(doc, id_warning, 1, ruby_message);
|
213
214
|
}
|
214
215
|
|
215
|
-
static void
|
216
|
+
static void
|
217
|
+
error_func(void *ctx, const char *msg, ...)
|
216
218
|
{
|
217
219
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
218
220
|
VALUE doc = rb_iv_get(self, "@document");
|
219
|
-
char *
|
221
|
+
char *message;
|
220
222
|
VALUE ruby_message;
|
221
223
|
|
222
224
|
va_list args;
|
@@ -225,11 +227,12 @@ static void error_func(void * ctx, const char *msg, ...)
|
|
225
227
|
va_end(args);
|
226
228
|
|
227
229
|
ruby_message = NOKOGIRI_STR_NEW2(message);
|
228
|
-
|
230
|
+
free(message);
|
229
231
|
rb_funcall(doc, id_error, 1, ruby_message);
|
230
232
|
}
|
231
233
|
|
232
|
-
static void
|
234
|
+
static void
|
235
|
+
cdata_block(void *ctx, const xmlChar *value, int len)
|
233
236
|
{
|
234
237
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
235
238
|
VALUE doc = rb_iv_get(self, "@document");
|
@@ -237,7 +240,8 @@ static void cdata_block(void * ctx, const xmlChar * value, int len)
|
|
237
240
|
rb_funcall(doc, id_cdata_block, 1, string);
|
238
241
|
}
|
239
242
|
|
240
|
-
static void
|
243
|
+
static void
|
244
|
+
processing_instruction(void *ctx, const xmlChar *name, const xmlChar *content)
|
241
245
|
{
|
242
246
|
VALUE rb_content;
|
243
247
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
@@ -245,27 +249,27 @@ static void processing_instruction(void * ctx, const xmlChar * name, const xmlCh
|
|
245
249
|
|
246
250
|
rb_content = content ? NOKOGIRI_STR_NEW2(content) : Qnil;
|
247
251
|
|
248
|
-
rb_funcall(
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
252
|
+
rb_funcall(doc,
|
253
|
+
id_processing_instruction,
|
254
|
+
2,
|
255
|
+
NOKOGIRI_STR_NEW2(name),
|
256
|
+
rb_content
|
257
|
+
);
|
254
258
|
}
|
255
259
|
|
256
|
-
static void
|
260
|
+
static void
|
261
|
+
deallocate(xmlSAXHandlerPtr handler)
|
257
262
|
{
|
258
263
|
NOKOGIRI_DEBUG_START(handler);
|
259
264
|
free(handler);
|
260
265
|
NOKOGIRI_DEBUG_END(handler);
|
261
266
|
}
|
262
267
|
|
263
|
-
static VALUE
|
268
|
+
static VALUE
|
269
|
+
allocate(VALUE klass)
|
264
270
|
{
|
265
271
|
xmlSAXHandlerPtr handler = calloc((size_t)1, sizeof(xmlSAXHandler));
|
266
272
|
|
267
|
-
xmlSetStructuredErrorFunc(NULL, NULL);
|
268
|
-
|
269
273
|
handler->startDocument = start_document;
|
270
274
|
handler->endDocument = end_document;
|
271
275
|
handler->startElement = start_element;
|
@@ -283,29 +287,23 @@ static VALUE allocate(VALUE klass)
|
|
283
287
|
return Data_Wrap_Struct(klass, NULL, deallocate, handler);
|
284
288
|
}
|
285
289
|
|
286
|
-
|
287
|
-
|
290
|
+
void
|
291
|
+
noko_init_xml_sax_parser()
|
288
292
|
{
|
289
|
-
|
290
|
-
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
291
|
-
VALUE sax = rb_define_module_under(xml, "SAX");
|
292
|
-
VALUE klass = rb_define_class_under(sax, "Parser", rb_cObject);
|
293
|
-
|
294
|
-
cNokogiriXmlSaxParser = klass;
|
293
|
+
cNokogiriXmlSaxParser = rb_define_class_under(mNokogiriXmlSax, "Parser", rb_cObject);
|
295
294
|
|
296
|
-
rb_define_alloc_func(
|
295
|
+
rb_define_alloc_func(cNokogiriXmlSaxParser, allocate);
|
297
296
|
|
298
297
|
id_start_document = rb_intern("start_document");
|
299
|
-
id_end_document
|
300
|
-
id_start_element
|
301
|
-
id_end_element
|
302
|
-
id_comment
|
303
|
-
id_characters
|
304
|
-
id_xmldecl
|
305
|
-
id_error
|
306
|
-
id_warning
|
307
|
-
id_cdata_block
|
308
|
-
id_cAttribute = rb_intern("Attribute");
|
298
|
+
id_end_document = rb_intern("end_document");
|
299
|
+
id_start_element = rb_intern("start_element");
|
300
|
+
id_end_element = rb_intern("end_element");
|
301
|
+
id_comment = rb_intern("comment");
|
302
|
+
id_characters = rb_intern("characters");
|
303
|
+
id_xmldecl = rb_intern("xmldecl");
|
304
|
+
id_error = rb_intern("error");
|
305
|
+
id_warning = rb_intern("warning");
|
306
|
+
id_cdata_block = rb_intern("cdata_block");
|
309
307
|
id_start_element_namespace = rb_intern("start_element_namespace");
|
310
308
|
id_end_element_namespace = rb_intern("end_element_namespace");
|
311
309
|
id_processing_instruction = rb_intern("processing_instruction");
|