nokogiri 1.2.3-x86-mswin32-60 → 1.4.5-x86-mswin32-60
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/.autotest +18 -7
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +297 -3
- data/CHANGELOG.rdoc +289 -0
- data/Manifest.txt +148 -37
- data/README.ja.rdoc +20 -20
- data/README.rdoc +53 -22
- data/Rakefile +127 -211
- data/bin/nokogiri +54 -0
- data/ext/nokogiri/depend +358 -0
- data/ext/nokogiri/extconf.rb +89 -54
- data/ext/nokogiri/html_document.c +34 -27
- data/ext/nokogiri/html_document.h +1 -1
- data/ext/nokogiri/html_element_description.c +276 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +7 -5
- data/ext/nokogiri/html_entity_lookup.h +1 -1
- data/ext/nokogiri/html_sax_parser_context.c +94 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/{native.c → nokogiri.c} +31 -7
- data/ext/nokogiri/{native.h → nokogiri.h} +68 -41
- data/ext/nokogiri/xml_attr.c +20 -9
- data/ext/nokogiri/xml_attr.h +1 -1
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +21 -9
- data/ext/nokogiri/xml_cdata.h +1 -1
- data/ext/nokogiri/xml_comment.c +18 -6
- data/ext/nokogiri/xml_comment.h +1 -1
- data/ext/nokogiri/xml_document.c +247 -68
- data/ext/nokogiri/xml_document.h +5 -3
- data/ext/nokogiri/xml_document_fragment.c +15 -7
- data/ext/nokogiri/xml_document_fragment.h +1 -1
- data/ext/nokogiri/xml_dtd.c +110 -10
- data/ext/nokogiri/xml_dtd.h +3 -1
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +16 -5
- data/ext/nokogiri/xml_entity_reference.h +1 -1
- data/ext/nokogiri/xml_io.c +40 -8
- data/ext/nokogiri/xml_io.h +2 -1
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_namespace.c +84 -0
- data/ext/nokogiri/xml_namespace.h +13 -0
- data/ext/nokogiri/xml_node.c +782 -225
- data/ext/nokogiri/xml_node.h +2 -4
- data/ext/nokogiri/xml_node_set.c +253 -34
- data/ext/nokogiri/xml_node_set.h +2 -2
- data/ext/nokogiri/xml_processing_instruction.c +17 -5
- data/ext/nokogiri/xml_processing_instruction.h +1 -1
- data/ext/nokogiri/xml_reader.c +277 -85
- data/ext/nokogiri/xml_reader.h +1 -1
- data/ext/nokogiri/xml_relax_ng.c +168 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +183 -111
- data/ext/nokogiri/xml_sax_parser.h +30 -1
- data/ext/nokogiri/xml_sax_parser_context.c +199 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +42 -12
- data/ext/nokogiri/xml_sax_push_parser.h +1 -1
- data/ext/nokogiri/xml_schema.c +205 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +28 -173
- data/ext/nokogiri/xml_syntax_error.h +2 -1
- data/ext/nokogiri/xml_text.c +16 -6
- data/ext/nokogiri/xml_text.h +1 -1
- data/ext/nokogiri/xml_xpath_context.c +104 -47
- data/ext/nokogiri/xml_xpath_context.h +1 -1
- data/ext/nokogiri/xslt_stylesheet.c +161 -19
- data/ext/nokogiri/xslt_stylesheet.h +1 -1
- data/lib/nokogiri.rb +47 -8
- data/lib/nokogiri/1.8/nokogiri.so +0 -0
- data/lib/nokogiri/1.9/nokogiri.so +0 -0
- data/lib/nokogiri/css.rb +6 -3
- data/lib/nokogiri/css/node.rb +14 -12
- data/lib/nokogiri/css/parser.rb +665 -62
- data/lib/nokogiri/css/parser.y +20 -10
- data/lib/nokogiri/css/parser_extras.rb +91 -0
- data/lib/nokogiri/css/tokenizer.rb +148 -5
- data/lib/nokogiri/css/tokenizer.rex +10 -9
- data/lib/nokogiri/css/xpath_visitor.rb +47 -44
- data/lib/nokogiri/decorators/slop.rb +8 -4
- data/lib/nokogiri/ffi/encoding_handler.rb +42 -0
- data/lib/nokogiri/ffi/html/document.rb +28 -0
- data/lib/nokogiri/ffi/html/element_description.rb +81 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
- data/lib/nokogiri/ffi/libxml.rb +420 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +38 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +20 -0
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +117 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
- data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xml_parser_input.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +124 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +38 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/weak_bucket.rb +40 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +174 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +67 -0
- data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
- data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
- data/lib/nokogiri/ffi/xml/entity_decl.rb +36 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
- data/lib/nokogiri/ffi/xml/node.rb +559 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +150 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +236 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +143 -0
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +79 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +51 -0
- data/lib/nokogiri/ffi/xml/schema.rb +109 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +98 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +9 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +153 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +77 -0
- data/lib/nokogiri/html.rb +13 -47
- data/lib/nokogiri/html/builder.rb +27 -1
- data/lib/nokogiri/html/document.rb +201 -7
- data/lib/nokogiri/html/document_fragment.rb +41 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +2 -0
- data/lib/nokogiri/html/sax/parser.rb +34 -3
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/nokogiri.rb +1 -0
- data/lib/nokogiri/version.rb +40 -1
- data/lib/nokogiri/version_warning.rb +14 -0
- data/lib/nokogiri/xml.rb +32 -53
- data/lib/nokogiri/xml/attr.rb +5 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +349 -29
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +166 -14
- data/lib/nokogiri/xml/document_fragment.rb +76 -1
- data/lib/nokogiri/xml/dtd.rb +16 -3
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +561 -166
- data/lib/nokogiri/xml/node/save_options.rb +22 -2
- data/lib/nokogiri/xml/node_set.rb +202 -40
- data/lib/nokogiri/xml/parse_options.rb +93 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +2 -0
- data/lib/nokogiri/xml/reader.rb +93 -8
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +1 -7
- data/lib/nokogiri/xml/sax/document.rb +107 -2
- data/lib/nokogiri/xml/sax/parser.rb +57 -7
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +13 -1
- data/lib/nokogiri/xml/schema.rb +63 -0
- data/lib/nokogiri/xml/syntax_error.rb +25 -1
- data/lib/nokogiri/xml/text.rb +4 -1
- data/lib/nokogiri/xml/xpath.rb +1 -1
- data/lib/nokogiri/xml/xpath/syntax_error.rb +3 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -0
- data/lib/nokogiri/xslt.rb +26 -2
- data/lib/nokogiri/xslt/stylesheet.rb +19 -0
- data/lib/xsd/xmlparser/nokogiri.rb +45 -9
- data/tasks/cross_compile.rb +173 -0
- data/tasks/test.rb +25 -69
- data/test/css/test_nthiness.rb +3 -4
- data/test/css/test_parser.rb +75 -20
- data/test/css/test_tokenizer.rb +23 -1
- data/test/css/test_xpath_visitor.rb +10 -1
- data/test/decorators/test_slop.rb +16 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/helper.rb +101 -23
- data/test/html/sax/test_parser.rb +81 -2
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +39 -8
- data/test/html/test_document.rb +186 -23
- data/test/html/test_document_encoding.rb +78 -1
- data/test/html/test_document_fragment.rb +253 -0
- data/test/html/test_element_description.rb +98 -0
- data/test/html/test_named_characters.rb +1 -1
- data/test/html/test_node.rb +124 -36
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +1 -52
- data/test/test_css_cache.rb +2 -13
- data/test/test_encoding_handler.rb +46 -0
- data/test/test_memory_leak.rb +88 -19
- data/test/test_nokogiri.rb +38 -5
- data/test/test_reader.rb +188 -6
- data/test/test_soap4r_sax.rb +52 -0
- data/test/test_xslt_transforms.rb +183 -83
- data/test/xml/node/test_save_options.rb +1 -1
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +175 -4
- data/test/xml/sax/test_parser_context.rb +113 -0
- data/test/xml/sax/test_push_parser.rb +90 -2
- data/test/xml/test_attr.rb +35 -1
- data/test/xml/test_attribute_decl.rb +82 -0
- data/test/xml/test_builder.rb +186 -1
- data/test/xml/test_cdata.rb +32 -1
- data/test/xml/test_comment.rb +13 -1
- data/test/xml/test_document.rb +415 -43
- data/test/xml/test_document_encoding.rb +1 -1
- data/test/xml/test_document_fragment.rb +173 -5
- data/test/xml/test_dtd.rb +61 -6
- data/test/xml/test_dtd_encoding.rb +3 -1
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +120 -0
- data/test/xml/test_entity_reference.rb +5 -1
- data/test/xml/test_namespace.rb +68 -0
- data/test/xml/test_node.rb +546 -201
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +33 -3
- data/test/xml/test_node_reparenting.rb +321 -0
- data/test/xml/test_node_set.rb +538 -2
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +6 -1
- data/test/xml/test_reader_encoding.rb +1 -1
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +94 -0
- data/test/xml/test_syntax_error.rb +12 -0
- data/test/xml/test_text.rb +35 -1
- data/test/xml/test_unparented_node.rb +5 -5
- data/test/xml/test_xpath.rb +142 -11
- data/test/xslt/test_custom_functions.rb +94 -0
- metadata +328 -92
- data/ext/nokogiri/html_sax_parser.c +0 -57
- data/ext/nokogiri/html_sax_parser.h +0 -11
- data/ext/nokogiri/iconv.dll +0 -0
- data/ext/nokogiri/libexslt.dll +0 -0
- data/ext/nokogiri/libxml2.dll +0 -0
- data/ext/nokogiri/libxslt.dll +0 -0
- data/ext/nokogiri/native.so +0 -0
- data/ext/nokogiri/xml_xpath.c +0 -53
- data/ext/nokogiri/xml_xpath.h +0 -11
- data/ext/nokogiri/zlib1.dll +0 -0
- data/lib/action-nokogiri.rb +0 -30
- data/lib/nokogiri/css/generated_parser.rb +0 -713
- data/lib/nokogiri/css/generated_tokenizer.rb +0 -144
- data/lib/nokogiri/decorators.rb +0 -2
- data/lib/nokogiri/decorators/hpricot.rb +0 -3
- data/lib/nokogiri/decorators/hpricot/node.rb +0 -56
- data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -54
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -28
- data/lib/nokogiri/hpricot.rb +0 -51
- data/lib/nokogiri/xml/comment.rb +0 -6
- data/lib/nokogiri/xml/element.rb +0 -6
- data/lib/nokogiri/xml/entity_declaration.rb +0 -9
- data/lib/nokogiri/xml/fragment_handler.rb +0 -34
- data/test/hpricot/files/basic.xhtml +0 -17
- data/test/hpricot/files/boingboing.html +0 -2266
- data/test/hpricot/files/cy0.html +0 -3653
- data/test/hpricot/files/immob.html +0 -400
- data/test/hpricot/files/pace_application.html +0 -1320
- data/test/hpricot/files/tenderlove.html +0 -16
- data/test/hpricot/files/uswebgen.html +0 -220
- data/test/hpricot/files/utf8.html +0 -1054
- data/test/hpricot/files/week9.html +0 -1723
- data/test/hpricot/files/why.xml +0 -19
- data/test/hpricot/load_files.rb +0 -11
- data/test/hpricot/test_alter.rb +0 -68
- data/test/hpricot/test_builder.rb +0 -20
- data/test/hpricot/test_parser.rb +0 -426
- data/test/hpricot/test_paths.rb +0 -15
- data/test/hpricot/test_preserved.rb +0 -77
- data/test/hpricot/test_xml.rb +0 -30
- data/test/test_gc.rb +0 -15
data/ext/nokogiri/xml_reader.h
CHANGED
@@ -0,0 +1,168 @@
|
|
1
|
+
#include <xml_relax_ng.h>
|
2
|
+
|
3
|
+
static void dealloc(xmlRelaxNGPtr schema)
|
4
|
+
{
|
5
|
+
NOKOGIRI_DEBUG_START(schema);
|
6
|
+
xmlRelaxNGFree(schema);
|
7
|
+
NOKOGIRI_DEBUG_END(schema);
|
8
|
+
}
|
9
|
+
|
10
|
+
/*
|
11
|
+
* call-seq:
|
12
|
+
* validate_document(document)
|
13
|
+
*
|
14
|
+
* Validate a Nokogiri::XML::Document against this RelaxNG schema.
|
15
|
+
*/
|
16
|
+
static VALUE validate_document(VALUE self, VALUE document)
|
17
|
+
{
|
18
|
+
xmlDocPtr doc;
|
19
|
+
xmlRelaxNGPtr schema;
|
20
|
+
VALUE errors;
|
21
|
+
xmlRelaxNGValidCtxtPtr valid_ctxt;
|
22
|
+
|
23
|
+
Data_Get_Struct(self, xmlRelaxNG, schema);
|
24
|
+
Data_Get_Struct(document, xmlDoc, doc);
|
25
|
+
|
26
|
+
errors = rb_ary_new();
|
27
|
+
|
28
|
+
valid_ctxt = xmlRelaxNGNewValidCtxt(schema);
|
29
|
+
|
30
|
+
if(NULL == valid_ctxt) {
|
31
|
+
/* we have a problem */
|
32
|
+
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
33
|
+
}
|
34
|
+
|
35
|
+
#ifdef HAVE_XMLRELAXNGSETVALIDSTRUCTUREDERRORS
|
36
|
+
xmlRelaxNGSetValidStructuredErrors(
|
37
|
+
valid_ctxt,
|
38
|
+
Nokogiri_error_array_pusher,
|
39
|
+
(void *)errors
|
40
|
+
);
|
41
|
+
#endif
|
42
|
+
|
43
|
+
xmlRelaxNGValidateDoc(valid_ctxt, doc);
|
44
|
+
|
45
|
+
xmlRelaxNGFreeValidCtxt(valid_ctxt);
|
46
|
+
|
47
|
+
return errors;
|
48
|
+
}
|
49
|
+
|
50
|
+
/*
|
51
|
+
* call-seq:
|
52
|
+
* read_memory(string)
|
53
|
+
*
|
54
|
+
* Create a new RelaxNG from the contents of +string+
|
55
|
+
*/
|
56
|
+
static VALUE read_memory(VALUE klass, VALUE content)
|
57
|
+
{
|
58
|
+
xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt(
|
59
|
+
(const char *)StringValuePtr(content),
|
60
|
+
(int)RSTRING_LEN(content)
|
61
|
+
);
|
62
|
+
xmlRelaxNGPtr schema;
|
63
|
+
VALUE errors = rb_ary_new();
|
64
|
+
VALUE rb_schema;
|
65
|
+
|
66
|
+
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
67
|
+
|
68
|
+
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
|
69
|
+
xmlRelaxNGSetParserStructuredErrors(
|
70
|
+
ctx,
|
71
|
+
Nokogiri_error_array_pusher,
|
72
|
+
(void *)errors
|
73
|
+
);
|
74
|
+
#endif
|
75
|
+
|
76
|
+
schema = xmlRelaxNGParse(ctx);
|
77
|
+
|
78
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
79
|
+
xmlRelaxNGFreeParserCtxt(ctx);
|
80
|
+
|
81
|
+
if(NULL == schema) {
|
82
|
+
xmlErrorPtr error = xmlGetLastError();
|
83
|
+
if(error)
|
84
|
+
Nokogiri_error_raise(NULL, error);
|
85
|
+
else
|
86
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
87
|
+
|
88
|
+
return Qnil;
|
89
|
+
}
|
90
|
+
|
91
|
+
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
92
|
+
rb_iv_set(rb_schema, "@errors", errors);
|
93
|
+
|
94
|
+
return rb_schema;
|
95
|
+
}
|
96
|
+
|
97
|
+
/*
|
98
|
+
* call-seq:
|
99
|
+
* from_document(doc)
|
100
|
+
*
|
101
|
+
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
|
102
|
+
*/
|
103
|
+
static VALUE from_document(VALUE klass, VALUE document)
|
104
|
+
{
|
105
|
+
xmlDocPtr doc;
|
106
|
+
xmlRelaxNGParserCtxtPtr ctx;
|
107
|
+
xmlRelaxNGPtr schema;
|
108
|
+
VALUE errors;
|
109
|
+
VALUE rb_schema;
|
110
|
+
|
111
|
+
Data_Get_Struct(document, xmlDoc, doc);
|
112
|
+
|
113
|
+
/* In case someone passes us a node. ugh. */
|
114
|
+
doc = doc->doc;
|
115
|
+
|
116
|
+
ctx = xmlRelaxNGNewDocParserCtxt(doc);
|
117
|
+
|
118
|
+
errors = rb_ary_new();
|
119
|
+
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
120
|
+
|
121
|
+
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
|
122
|
+
xmlRelaxNGSetParserStructuredErrors(
|
123
|
+
ctx,
|
124
|
+
Nokogiri_error_array_pusher,
|
125
|
+
(void *)errors
|
126
|
+
);
|
127
|
+
#endif
|
128
|
+
|
129
|
+
schema = xmlRelaxNGParse(ctx);
|
130
|
+
|
131
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
132
|
+
if (! is_2_6_16()) {
|
133
|
+
xmlRelaxNGFreeParserCtxt(ctx);
|
134
|
+
}
|
135
|
+
|
136
|
+
if(NULL == schema) {
|
137
|
+
xmlErrorPtr error = xmlGetLastError();
|
138
|
+
if(error)
|
139
|
+
Nokogiri_error_raise(NULL, error);
|
140
|
+
else
|
141
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
142
|
+
|
143
|
+
return Qnil;
|
144
|
+
}
|
145
|
+
|
146
|
+
if (is_2_6_16()) {
|
147
|
+
xmlRelaxNGFreeParserCtxt(ctx);
|
148
|
+
}
|
149
|
+
|
150
|
+
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
151
|
+
rb_iv_set(rb_schema, "@errors", errors);
|
152
|
+
|
153
|
+
return rb_schema;
|
154
|
+
}
|
155
|
+
|
156
|
+
VALUE cNokogiriXmlRelaxNG;
|
157
|
+
void init_xml_relax_ng()
|
158
|
+
{
|
159
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
160
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
161
|
+
VALUE klass = rb_define_class_under(xml, "RelaxNG", cNokogiriXmlSchema);
|
162
|
+
|
163
|
+
cNokogiriXmlRelaxNG = klass;
|
164
|
+
|
165
|
+
rb_define_singleton_method(klass, "read_memory", read_memory, 1);
|
166
|
+
rb_define_singleton_method(klass, "from_document", from_document, 1);
|
167
|
+
rb_define_private_method(klass, "validate_document", validate_document, 1);
|
168
|
+
}
|
@@ -1,139 +1,202 @@
|
|
1
|
-
#include <stdio.h>
|
2
1
|
#include <xml_sax_parser.h>
|
3
2
|
|
4
|
-
|
5
|
-
* call-seq:
|
6
|
-
* parse_memory(data)
|
7
|
-
*
|
8
|
-
* Parse the document stored in +data+
|
9
|
-
*/
|
10
|
-
static VALUE parse_memory(VALUE self, VALUE data)
|
11
|
-
{
|
12
|
-
xmlSAXHandlerPtr handler;
|
13
|
-
Data_Get_Struct(self, xmlSAXHandler, handler);
|
14
|
-
xmlSAXUserParseMemory( handler,
|
15
|
-
(void *)self,
|
16
|
-
StringValuePtr(data),
|
17
|
-
RSTRING_LEN(data)
|
18
|
-
);
|
19
|
-
return data;
|
20
|
-
}
|
3
|
+
int vasprintf (char **strp, const char *fmt, va_list ap);
|
21
4
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
* Parse the document accessable via +io+
|
27
|
-
*/
|
28
|
-
static VALUE native_parse_io(VALUE self, VALUE io, VALUE encoding)
|
29
|
-
{
|
30
|
-
xmlSAXHandlerPtr handler;
|
31
|
-
Data_Get_Struct(self, xmlSAXHandler, handler);
|
32
|
-
|
33
|
-
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
|
34
|
-
|
35
|
-
xmlParserCtxtPtr sax_ctx = xmlCreateIOParserCtxt(
|
36
|
-
handler,
|
37
|
-
(void *)self,
|
38
|
-
(xmlInputReadCallback)io_read_callback,
|
39
|
-
(xmlInputCloseCallback)io_close_callback,
|
40
|
-
(void *)io,
|
41
|
-
enc
|
42
|
-
);
|
43
|
-
xmlParseDocument(sax_ctx);
|
44
|
-
xmlFreeParserCtxt(sax_ctx);
|
45
|
-
return io;
|
46
|
-
}
|
5
|
+
static ID id_start_document, id_end_document, id_start_element, id_end_element;
|
6
|
+
static ID id_start_element_namespace, id_end_element_namespace;
|
7
|
+
static ID id_comment, id_characters, id_xmldecl, id_error, id_warning;
|
8
|
+
static ID id_cdata_block, id_cAttribute;
|
47
9
|
|
48
|
-
|
49
|
-
|
50
|
-
* native_parse_file(data)
|
51
|
-
*
|
52
|
-
* Parse the document stored in +data+
|
53
|
-
*/
|
54
|
-
static VALUE native_parse_file(VALUE self, VALUE data)
|
55
|
-
{
|
56
|
-
xmlSAXHandlerPtr handler;
|
57
|
-
Data_Get_Struct(self, xmlSAXHandler, handler);
|
58
|
-
xmlSAXUserParseFile( handler,
|
59
|
-
(void *)self,
|
60
|
-
StringValuePtr(data)
|
61
|
-
);
|
62
|
-
return data;
|
63
|
-
}
|
10
|
+
#define STRING_OR_NULL(str) \
|
11
|
+
(RTEST(str) ? StringValuePtr(str) : NULL)
|
64
12
|
|
65
13
|
static void start_document(void * ctx)
|
66
14
|
{
|
67
|
-
VALUE self = (
|
68
|
-
VALUE doc =
|
69
|
-
|
15
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
16
|
+
VALUE doc = rb_iv_get(self, "@document");
|
17
|
+
|
18
|
+
xmlParserCtxtPtr ctxt = NOKOGIRI_SAX_CTXT(ctx);
|
19
|
+
|
20
|
+
if(NULL != ctxt && ctxt->html != 1) {
|
21
|
+
if(ctxt->standalone != -1) { /* -1 means there was no declaration */
|
22
|
+
VALUE encoding = ctxt->encoding ?
|
23
|
+
NOKOGIRI_STR_NEW2(ctxt->encoding) :
|
24
|
+
Qnil;
|
25
|
+
|
26
|
+
VALUE version = ctxt->version ?
|
27
|
+
NOKOGIRI_STR_NEW2(ctxt->version) :
|
28
|
+
Qnil;
|
29
|
+
|
30
|
+
VALUE standalone = Qnil;
|
31
|
+
|
32
|
+
switch(ctxt->standalone)
|
33
|
+
{
|
34
|
+
case 0:
|
35
|
+
standalone = NOKOGIRI_STR_NEW2("no");
|
36
|
+
break;
|
37
|
+
case 1:
|
38
|
+
standalone = NOKOGIRI_STR_NEW2("yes");
|
39
|
+
break;
|
40
|
+
}
|
41
|
+
|
42
|
+
rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone);
|
43
|
+
}
|
44
|
+
}
|
45
|
+
|
46
|
+
rb_funcall(doc, id_start_document, 0);
|
70
47
|
}
|
71
48
|
|
72
49
|
static void end_document(void * ctx)
|
73
50
|
{
|
74
|
-
VALUE self = (
|
75
|
-
VALUE doc =
|
76
|
-
rb_funcall(doc,
|
51
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
52
|
+
VALUE doc = rb_iv_get(self, "@document");
|
53
|
+
rb_funcall(doc, id_end_document, 0);
|
77
54
|
}
|
78
55
|
|
79
56
|
static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
|
80
57
|
{
|
81
|
-
VALUE self = (
|
82
|
-
VALUE doc =
|
58
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
59
|
+
VALUE doc = rb_iv_get(self, "@document");
|
83
60
|
VALUE attributes = rb_ary_new();
|
84
|
-
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
85
61
|
const xmlChar * attr;
|
86
62
|
int i = 0;
|
87
63
|
if(atts) {
|
88
64
|
while((attr = atts[i]) != NULL) {
|
89
|
-
|
90
|
-
|
91
|
-
);
|
92
|
-
i
|
65
|
+
const xmlChar * val = atts[i+1];
|
66
|
+
VALUE value = val != NULL ? NOKOGIRI_STR_NEW2(val) : Qnil;
|
67
|
+
rb_ary_push(attributes, rb_ary_new3(2, NOKOGIRI_STR_NEW2(attr), value));
|
68
|
+
i+=2;
|
93
69
|
}
|
94
70
|
}
|
95
71
|
|
96
72
|
rb_funcall( doc,
|
97
|
-
|
73
|
+
id_start_element,
|
98
74
|
2,
|
99
|
-
NOKOGIRI_STR_NEW2(name
|
75
|
+
NOKOGIRI_STR_NEW2(name),
|
100
76
|
attributes
|
101
77
|
);
|
102
78
|
}
|
103
79
|
|
104
80
|
static void end_element(void * ctx, const xmlChar *name)
|
105
81
|
{
|
106
|
-
VALUE self = (
|
107
|
-
VALUE
|
108
|
-
|
109
|
-
|
110
|
-
|
82
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
83
|
+
VALUE doc = rb_iv_get(self, "@document");
|
84
|
+
rb_funcall(doc, id_end_element, 1, NOKOGIRI_STR_NEW2(name));
|
85
|
+
}
|
86
|
+
|
87
|
+
static VALUE attributes_as_list(
|
88
|
+
VALUE self,
|
89
|
+
int nb_attributes,
|
90
|
+
const xmlChar ** attributes)
|
91
|
+
{
|
92
|
+
VALUE list = rb_ary_new2((long)nb_attributes);
|
93
|
+
|
94
|
+
VALUE attr_klass = rb_const_get(cNokogiriXmlSaxParser, id_cAttribute);
|
95
|
+
if (attributes) {
|
96
|
+
/* Each attribute is an array of [localname, prefix, URI, value, end] */
|
97
|
+
int i;
|
98
|
+
for (i = 0; i < nb_attributes * 5; i += 5) {
|
99
|
+
VALUE argv[4], attribute;
|
100
|
+
|
101
|
+
argv[0] = RBSTR_OR_QNIL(attributes[i + 0]); /* localname */
|
102
|
+
argv[1] = RBSTR_OR_QNIL(attributes[i + 1]); /* prefix */
|
103
|
+
argv[2] = RBSTR_OR_QNIL(attributes[i + 2]); /* URI */
|
104
|
+
|
105
|
+
/* value */
|
106
|
+
argv[3] = NOKOGIRI_STR_NEW((const char*)attributes[i+3],
|
107
|
+
(attributes[i+4] - attributes[i+3]));
|
108
|
+
|
109
|
+
attribute = rb_class_new_instance(4, argv, attr_klass);
|
110
|
+
rb_ary_push(list, attribute);
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
return list;
|
115
|
+
}
|
116
|
+
|
117
|
+
static void
|
118
|
+
start_element_ns (
|
119
|
+
void * ctx,
|
120
|
+
const xmlChar * localname,
|
121
|
+
const xmlChar * prefix,
|
122
|
+
const xmlChar * uri,
|
123
|
+
int nb_namespaces,
|
124
|
+
const xmlChar ** namespaces,
|
125
|
+
int nb_attributes,
|
126
|
+
int nb_defaulted,
|
127
|
+
const xmlChar ** attributes)
|
128
|
+
{
|
129
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
130
|
+
VALUE doc = rb_iv_get(self, "@document");
|
131
|
+
|
132
|
+
VALUE attribute_list = attributes_as_list(self, nb_attributes, attributes);
|
133
|
+
|
134
|
+
VALUE ns_list = rb_ary_new2((long)nb_namespaces);
|
135
|
+
|
136
|
+
if (namespaces) {
|
137
|
+
int i;
|
138
|
+
for (i = 0; i < nb_namespaces * 2; i += 2)
|
139
|
+
{
|
140
|
+
rb_ary_push(ns_list,
|
141
|
+
rb_ary_new3((long)2,
|
142
|
+
RBSTR_OR_QNIL(namespaces[i + 0]),
|
143
|
+
RBSTR_OR_QNIL(namespaces[i + 1])
|
144
|
+
)
|
145
|
+
);
|
146
|
+
}
|
147
|
+
}
|
148
|
+
|
149
|
+
rb_funcall( doc,
|
150
|
+
id_start_element_namespace,
|
151
|
+
5,
|
152
|
+
NOKOGIRI_STR_NEW2(localname),
|
153
|
+
attribute_list,
|
154
|
+
RBSTR_OR_QNIL(prefix),
|
155
|
+
RBSTR_OR_QNIL(uri),
|
156
|
+
ns_list
|
157
|
+
);
|
158
|
+
}
|
159
|
+
|
160
|
+
/**
|
161
|
+
* end_element_ns was borrowed heavily from libxml-ruby.
|
162
|
+
*/
|
163
|
+
static void
|
164
|
+
end_element_ns (
|
165
|
+
void * ctx,
|
166
|
+
const xmlChar * localname,
|
167
|
+
const xmlChar * prefix,
|
168
|
+
const xmlChar * uri)
|
169
|
+
{
|
170
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
171
|
+
VALUE doc = rb_iv_get(self, "@document");
|
172
|
+
|
173
|
+
rb_funcall(doc, id_end_element_namespace, 3,
|
174
|
+
NOKOGIRI_STR_NEW2(localname),
|
175
|
+
RBSTR_OR_QNIL(prefix),
|
176
|
+
RBSTR_OR_QNIL(uri)
|
111
177
|
);
|
112
178
|
}
|
113
179
|
|
114
180
|
static void characters_func(void * ctx, const xmlChar * ch, int len)
|
115
181
|
{
|
116
|
-
VALUE self = (
|
117
|
-
VALUE
|
118
|
-
VALUE
|
119
|
-
|
120
|
-
rb_funcall(doc, rb_intern("characters"), 1, str);
|
182
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
183
|
+
VALUE doc = rb_iv_get(self, "@document");
|
184
|
+
VALUE str = NOKOGIRI_STR_NEW(ch, len);
|
185
|
+
rb_funcall(doc, id_characters, 1, str);
|
121
186
|
}
|
122
187
|
|
123
188
|
static void comment_func(void * ctx, const xmlChar * value)
|
124
189
|
{
|
125
|
-
VALUE self = (
|
126
|
-
VALUE
|
127
|
-
VALUE
|
128
|
-
|
129
|
-
rb_funcall(doc, rb_intern("comment"), 1, str);
|
190
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
191
|
+
VALUE doc = rb_iv_get(self, "@document");
|
192
|
+
VALUE str = NOKOGIRI_STR_NEW2(value);
|
193
|
+
rb_funcall(doc, id_comment, 1, str);
|
130
194
|
}
|
131
195
|
|
132
196
|
static void warning_func(void * ctx, const char *msg, ...)
|
133
197
|
{
|
134
|
-
VALUE self = (
|
135
|
-
VALUE doc =
|
136
|
-
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
198
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
199
|
+
VALUE doc = rb_iv_get(self, "@document");
|
137
200
|
char * message;
|
138
201
|
|
139
202
|
va_list args;
|
@@ -141,17 +204,14 @@ static void warning_func(void * ctx, const char *msg, ...)
|
|
141
204
|
vasprintf(&message, msg, args);
|
142
205
|
va_end(args);
|
143
206
|
|
144
|
-
rb_funcall(doc,
|
145
|
-
NOKOGIRI_STR_NEW2(message, RTEST(enc) ? StringValuePtr(enc) : NULL)
|
146
|
-
);
|
207
|
+
rb_funcall(doc, id_warning, 1, NOKOGIRI_STR_NEW2(message));
|
147
208
|
free(message);
|
148
209
|
}
|
149
210
|
|
150
211
|
static void error_func(void * ctx, const char *msg, ...)
|
151
212
|
{
|
152
|
-
VALUE self = (
|
153
|
-
VALUE
|
154
|
-
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
213
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
214
|
+
VALUE doc = rb_iv_get(self, "@document");
|
155
215
|
char * message;
|
156
216
|
|
157
217
|
va_list args;
|
@@ -159,20 +219,16 @@ static void error_func(void * ctx, const char *msg, ...)
|
|
159
219
|
vasprintf(&message, msg, args);
|
160
220
|
va_end(args);
|
161
221
|
|
162
|
-
rb_funcall(doc,
|
163
|
-
NOKOGIRI_STR_NEW2(message, RTEST(enc) ? StringValuePtr(enc) : NULL)
|
164
|
-
);
|
222
|
+
rb_funcall(doc, id_error, 1, NOKOGIRI_STR_NEW2(message));
|
165
223
|
free(message);
|
166
224
|
}
|
167
225
|
|
168
226
|
static void cdata_block(void * ctx, const xmlChar * value, int len)
|
169
227
|
{
|
170
|
-
VALUE self = (
|
171
|
-
VALUE
|
172
|
-
VALUE
|
173
|
-
|
174
|
-
NOKOGIRI_STR_NEW(value, len, RTEST(enc) ? StringValuePtr(enc) : NULL);
|
175
|
-
rb_funcall(doc, rb_intern("cdata_block"), 1, string);
|
228
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
229
|
+
VALUE doc = rb_iv_get(self, "@document");
|
230
|
+
VALUE string = NOKOGIRI_STR_NEW(value, len);
|
231
|
+
rb_funcall(doc, id_cdata_block, 1, string);
|
176
232
|
}
|
177
233
|
|
178
234
|
static void deallocate(xmlSAXHandlerPtr handler)
|
@@ -184,17 +240,22 @@ static void deallocate(xmlSAXHandlerPtr handler)
|
|
184
240
|
|
185
241
|
static VALUE allocate(VALUE klass)
|
186
242
|
{
|
187
|
-
xmlSAXHandlerPtr handler = calloc(1, sizeof(xmlSAXHandler));
|
243
|
+
xmlSAXHandlerPtr handler = calloc((size_t)1, sizeof(xmlSAXHandler));
|
244
|
+
|
245
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
188
246
|
|
189
247
|
handler->startDocument = start_document;
|
190
248
|
handler->endDocument = end_document;
|
191
249
|
handler->startElement = start_element;
|
192
250
|
handler->endElement = end_element;
|
251
|
+
handler->startElementNs = start_element_ns;
|
252
|
+
handler->endElementNs = end_element_ns;
|
193
253
|
handler->characters = characters_func;
|
194
254
|
handler->comment = comment_func;
|
195
255
|
handler->warning = warning_func;
|
196
256
|
handler->error = error_func;
|
197
257
|
handler->cdataBlock = cdata_block;
|
258
|
+
handler->initialized = XML_SAX2_MAGIC;
|
198
259
|
|
199
260
|
return Data_Wrap_Struct(klass, NULL, deallocate, handler);
|
200
261
|
}
|
@@ -210,7 +271,18 @@ void init_xml_sax_parser()
|
|
210
271
|
cNokogiriXmlSaxParser = klass;
|
211
272
|
|
212
273
|
rb_define_alloc_func(klass, allocate);
|
213
|
-
|
214
|
-
|
215
|
-
|
274
|
+
|
275
|
+
id_start_document = rb_intern("start_document");
|
276
|
+
id_end_document = rb_intern("end_document");
|
277
|
+
id_start_element = rb_intern("start_element");
|
278
|
+
id_end_element = rb_intern("end_element");
|
279
|
+
id_comment = rb_intern("comment");
|
280
|
+
id_characters = rb_intern("characters");
|
281
|
+
id_xmldecl = rb_intern("xmldecl");
|
282
|
+
id_error = rb_intern("error");
|
283
|
+
id_warning = rb_intern("warning");
|
284
|
+
id_cdata_block = rb_intern("cdata_block");
|
285
|
+
id_cAttribute = rb_intern("Attribute");
|
286
|
+
id_start_element_namespace = rb_intern("start_element_namespace");
|
287
|
+
id_end_element_namespace = rb_intern("end_element_namespace");
|
216
288
|
}
|