nokogiri 1.0.0 → 1.6.8.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/.autotest +26 -0
- data/.cross_rubies +9 -0
- data/.editorconfig +17 -0
- data/.gemtest +0 -0
- data/.travis.yml +51 -0
- data/CHANGELOG.rdoc +1160 -0
- data/CONTRIBUTING.md +42 -0
- data/C_CODING_STYLE.rdoc +33 -0
- data/Gemfile +22 -0
- data/LICENSE.txt +31 -0
- data/Manifest.txt +284 -40
- data/README.md +166 -0
- data/ROADMAP.md +111 -0
- data/Rakefile +310 -199
- data/STANDARD_RESPONSES.md +47 -0
- data/Y_U_NO_GEMSPEC.md +155 -0
- data/appveyor.yml +22 -0
- data/bin/nokogiri +118 -0
- data/build_all +45 -0
- data/dependencies.yml +29 -0
- data/ext/nokogiri/depend +358 -0
- data/ext/nokogiri/extconf.rb +664 -34
- data/ext/nokogiri/html_document.c +120 -33
- data/ext/nokogiri/html_document.h +1 -1
- data/ext/nokogiri/html_element_description.c +279 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +116 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/html_sax_push_parser.c +87 -0
- data/ext/nokogiri/html_sax_push_parser.h +9 -0
- data/ext/nokogiri/nokogiri.c +145 -0
- data/ext/nokogiri/nokogiri.h +131 -0
- data/ext/nokogiri/xml_attr.c +94 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +23 -19
- data/ext/nokogiri/xml_cdata.h +1 -1
- data/ext/nokogiri/xml_comment.c +69 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +501 -54
- data/ext/nokogiri/xml_document.h +14 -1
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +109 -24
- data/ext/nokogiri/xml_dtd.h +3 -1
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +52 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +60 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_namespace.c +117 -0
- data/ext/nokogiri/xml_namespace.h +13 -0
- data/ext/nokogiri/xml_node.c +1285 -315
- data/ext/nokogiri/xml_node.h +4 -6
- data/ext/nokogiri/xml_node_set.c +415 -54
- data/ext/nokogiri/xml_node_set.h +6 -2
- data/ext/nokogiri/xml_processing_instruction.c +56 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +316 -77
- data/ext/nokogiri/xml_reader.h +1 -1
- data/ext/nokogiri/xml_relax_ng.c +161 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +215 -80
- data/ext/nokogiri/xml_sax_parser.h +30 -1
- data/ext/nokogiri/xml_sax_parser_context.c +262 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +115 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +205 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +45 -175
- data/ext/nokogiri/xml_syntax_error.h +4 -2
- data/ext/nokogiri/xml_text.c +37 -14
- data/ext/nokogiri/xml_text.h +1 -1
- data/ext/nokogiri/xml_xpath_context.c +230 -13
- data/ext/nokogiri/xml_xpath_context.h +2 -1
- data/ext/nokogiri/xslt_stylesheet.c +196 -34
- data/ext/nokogiri/xslt_stylesheet.h +6 -1
- data/lib/nokogiri/css/node.rb +18 -61
- data/lib/nokogiri/css/parser.rb +725 -17
- data/lib/nokogiri/css/parser.y +126 -63
- data/lib/nokogiri/css/parser_extras.rb +91 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +148 -5
- data/lib/nokogiri/css/tokenizer.rex +31 -39
- data/lib/nokogiri/css/xpath_visitor.rb +109 -51
- data/lib/nokogiri/css.rb +24 -3
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/html/builder.rb +27 -1
- data/lib/nokogiri/html/document.rb +329 -3
- data/lib/nokogiri/html/document_fragment.rb +39 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/element_description_defaults.rb +671 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +35 -4
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/html/sax/push_parser.rb +36 -0
- data/lib/nokogiri/html.rb +18 -76
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +106 -1
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +395 -31
- data/lib/nokogiri/xml/cdata.rb +4 -2
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +267 -12
- data/lib/nokogiri/xml/document_fragment.rb +149 -0
- data/lib/nokogiri/xml/dtd.rb +27 -1
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node/save_options.rb +61 -0
- data/lib/nokogiri/xml/node.rb +748 -109
- data/lib/nokogiri/xml/node_set.rb +200 -72
- data/lib/nokogiri/xml/parse_options.rb +120 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +102 -4
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax/document.rb +114 -2
- data/lib/nokogiri/xml/sax/parser.rb +97 -7
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/sax.rb +2 -7
- data/lib/nokogiri/xml/schema.rb +63 -0
- data/lib/nokogiri/xml/searchable.rb +221 -0
- data/lib/nokogiri/xml/syntax_error.rb +27 -1
- data/lib/nokogiri/xml/text.rb +4 -1
- data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
- data/lib/nokogiri/xml/xpath.rb +4 -0
- data/lib/nokogiri/xml/xpath_context.rb +3 -1
- data/lib/nokogiri/xml.rb +45 -38
- data/lib/nokogiri/xslt/stylesheet.rb +19 -0
- data/lib/nokogiri/xslt.rb +47 -2
- data/lib/nokogiri.rb +117 -24
- data/lib/xsd/xmlparser/nokogiri.rb +102 -0
- data/patches/sort-patches-by-date +25 -0
- data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
- data/suppressions/README.txt +1 -0
- data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
- data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
- data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
- data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
- data/tasks/test.rb +100 -0
- data/test/css/test_nthiness.rb +73 -6
- data/test/css/test_parser.rb +184 -39
- data/test/css/test_tokenizer.rb +72 -19
- data/test/css/test_xpath_visitor.rb +44 -2
- data/test/decorators/test_slop.rb +20 -0
- data/test/files/2ch.html +108 -0
- data/test/files/GH_1042.html +18 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/atom.xml +344 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/metacharset.html +10 -0
- data/test/files/namespace_pressure_test.xml +1684 -0
- data/test/files/noencoding.html +47 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/saml/saml20assertion_schema.xsd +283 -0
- data/test/files/saml/saml20protocol_schema.xsd +302 -0
- data/test/files/saml/xenc_schema.xsd +146 -0
- data/test/files/saml/xmldsig_schema.xsd +318 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/shift_jis_no_charset.html +9 -0
- data/test/files/slow-xpath.xml +25509 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/test_document_url/bar.xml +2 -0
- data/test/files/test_document_url/document.dtd +4 -0
- data/test/files/test_document_url/document.xml +6 -0
- data/test/files/tlm.html +2 -1
- data/test/files/to_be_xincluded.xml +2 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/files/xinclude.xml +4 -0
- data/test/helper.rb +124 -13
- data/test/html/sax/test_parser.rb +118 -4
- data/test/html/sax/test_parser_context.rb +46 -0
- data/test/html/sax/test_push_parser.rb +87 -0
- data/test/html/test_builder.rb +94 -8
- data/test/html/test_document.rb +626 -11
- data/test/html/test_document_encoding.rb +145 -0
- data/test/html/test_document_fragment.rb +301 -0
- data/test/html/test_element_description.rb +105 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +212 -0
- data/test/html/test_node_encoding.rb +85 -0
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
- data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
- data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
- data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
- data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
- data/test/namespaces/test_namespaces_preservation.rb +31 -0
- data/test/test_convert_xpath.rb +2 -47
- data/test/test_css_cache.rb +45 -0
- data/test/test_encoding_handler.rb +48 -0
- data/test/test_memory_leak.rb +156 -0
- data/test/test_nokogiri.rb +103 -1
- data/test/test_soap4r_sax.rb +52 -0
- data/test/test_xslt_transforms.rb +293 -8
- data/test/xml/node/test_save_options.rb +28 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +309 -8
- data/test/xml/sax/test_parser_context.rb +115 -0
- data/test/xml/sax/test_push_parser.rb +157 -0
- data/test/xml/test_attr.rb +67 -0
- data/test/xml/test_attribute_decl.rb +86 -0
- data/test/xml/test_builder.rb +327 -2
- data/test/xml/test_c14n.rb +180 -0
- data/test/xml/test_cdata.rb +32 -2
- data/test/xml/test_comment.rb +40 -0
- data/test/xml/test_document.rb +846 -35
- data/test/xml/test_document_encoding.rb +31 -0
- data/test/xml/test_document_fragment.rb +271 -0
- data/test/xml/test_dtd.rb +153 -9
- data/test/xml/test_dtd_encoding.rb +31 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +122 -0
- data/test/xml/test_entity_reference.rb +251 -0
- data/test/xml/test_namespace.rb +96 -0
- data/test/xml/test_node.rb +1126 -105
- data/test/xml/test_node_attributes.rb +115 -0
- data/test/xml/test_node_encoding.rb +69 -0
- data/test/xml/test_node_inheritance.rb +32 -0
- data/test/xml/test_node_reparenting.rb +549 -0
- data/test/xml/test_node_set.rb +668 -9
- data/test/xml/test_parse_options.rb +64 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader.rb +589 -0
- data/test/xml/test_reader_encoding.rb +134 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +142 -0
- data/test/xml/test_syntax_error.rb +30 -0
- data/test/xml/test_text.rb +49 -2
- data/test/xml/test_unparented_node.rb +440 -0
- data/test/xml/test_xinclude.rb +83 -0
- data/test/xml/test_xpath.rb +445 -0
- data/test/xslt/test_custom_functions.rb +133 -0
- data/test/xslt/test_exception_handling.rb +37 -0
- data/test_all +107 -0
- metadata +459 -115
- data/History.txt +0 -6
- data/README.ja.txt +0 -86
- data/README.txt +0 -87
- data/ext/nokogiri/html_sax_parser.c +0 -32
- data/ext/nokogiri/html_sax_parser.h +0 -11
- data/ext/nokogiri/native.c +0 -40
- data/ext/nokogiri/native.h +0 -51
- data/ext/nokogiri/xml_xpath.c +0 -46
- data/ext/nokogiri/xml_xpath.h +0 -11
- data/lib/nokogiri/css/generated_parser.rb +0 -653
- data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
- data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
- data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
- data/lib/nokogiri/decorators/hpricot.rb +0 -3
- data/lib/nokogiri/decorators.rb +0 -1
- data/lib/nokogiri/hpricot.rb +0 -47
- data/lib/nokogiri/xml/after_handler.rb +0 -18
- data/lib/nokogiri/xml/before_handler.rb +0 -32
- data/lib/nokogiri/xml/element.rb +0 -6
- data/lib/nokogiri/xml/entity_declaration.rb +0 -9
- data/nokogiri.gemspec +0 -34
- data/test/hpricot/files/basic.xhtml +0 -17
- data/test/hpricot/files/boingboing.html +0 -2266
- data/test/hpricot/files/cy0.html +0 -3653
- data/test/hpricot/files/immob.html +0 -400
- data/test/hpricot/files/pace_application.html +0 -1320
- data/test/hpricot/files/tenderlove.html +0 -16
- data/test/hpricot/files/uswebgen.html +0 -220
- data/test/hpricot/files/utf8.html +0 -1054
- data/test/hpricot/files/week9.html +0 -1723
- data/test/hpricot/files/why.xml +0 -19
- data/test/hpricot/load_files.rb +0 -7
- data/test/hpricot/test_alter.rb +0 -67
- data/test/hpricot/test_builder.rb +0 -27
- data/test/hpricot/test_parser.rb +0 -423
- data/test/hpricot/test_paths.rb +0 -15
- data/test/hpricot/test_preserved.rb +0 -78
- data/test/hpricot/test_xml.rb +0 -30
- data/test/test_reader.rb +0 -222
@@ -0,0 +1,161 @@
|
|
1
|
+
#include <xml_relax_ng.h>
|
2
|
+
|
3
|
+
static void dealloc(xmlRelaxNGPtr schema)
|
4
|
+
{
|
5
|
+
NOKOGIRI_DEBUG_START(schema);
|
6
|
+
xmlRelaxNGFree(schema);
|
7
|
+
NOKOGIRI_DEBUG_END(schema);
|
8
|
+
}
|
9
|
+
|
10
|
+
/*
|
11
|
+
* call-seq:
|
12
|
+
* validate_document(document)
|
13
|
+
*
|
14
|
+
* Validate a Nokogiri::XML::Document against this RelaxNG schema.
|
15
|
+
*/
|
16
|
+
static VALUE validate_document(VALUE self, VALUE document)
|
17
|
+
{
|
18
|
+
xmlDocPtr doc;
|
19
|
+
xmlRelaxNGPtr schema;
|
20
|
+
VALUE errors;
|
21
|
+
xmlRelaxNGValidCtxtPtr valid_ctxt;
|
22
|
+
|
23
|
+
Data_Get_Struct(self, xmlRelaxNG, schema);
|
24
|
+
Data_Get_Struct(document, xmlDoc, doc);
|
25
|
+
|
26
|
+
errors = rb_ary_new();
|
27
|
+
|
28
|
+
valid_ctxt = xmlRelaxNGNewValidCtxt(schema);
|
29
|
+
|
30
|
+
if(NULL == valid_ctxt) {
|
31
|
+
/* we have a problem */
|
32
|
+
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
33
|
+
}
|
34
|
+
|
35
|
+
#ifdef HAVE_XMLRELAXNGSETVALIDSTRUCTUREDERRORS
|
36
|
+
xmlRelaxNGSetValidStructuredErrors(
|
37
|
+
valid_ctxt,
|
38
|
+
Nokogiri_error_array_pusher,
|
39
|
+
(void *)errors
|
40
|
+
);
|
41
|
+
#endif
|
42
|
+
|
43
|
+
xmlRelaxNGValidateDoc(valid_ctxt, doc);
|
44
|
+
|
45
|
+
xmlRelaxNGFreeValidCtxt(valid_ctxt);
|
46
|
+
|
47
|
+
return errors;
|
48
|
+
}
|
49
|
+
|
50
|
+
/*
|
51
|
+
* call-seq:
|
52
|
+
* read_memory(string)
|
53
|
+
*
|
54
|
+
* Create a new RelaxNG from the contents of +string+
|
55
|
+
*/
|
56
|
+
static VALUE read_memory(VALUE klass, VALUE content)
|
57
|
+
{
|
58
|
+
xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt(
|
59
|
+
(const char *)StringValuePtr(content),
|
60
|
+
(int)RSTRING_LEN(content)
|
61
|
+
);
|
62
|
+
xmlRelaxNGPtr schema;
|
63
|
+
VALUE errors = rb_ary_new();
|
64
|
+
VALUE rb_schema;
|
65
|
+
|
66
|
+
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
67
|
+
|
68
|
+
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
|
69
|
+
xmlRelaxNGSetParserStructuredErrors(
|
70
|
+
ctx,
|
71
|
+
Nokogiri_error_array_pusher,
|
72
|
+
(void *)errors
|
73
|
+
);
|
74
|
+
#endif
|
75
|
+
|
76
|
+
schema = xmlRelaxNGParse(ctx);
|
77
|
+
|
78
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
79
|
+
xmlRelaxNGFreeParserCtxt(ctx);
|
80
|
+
|
81
|
+
if(NULL == schema) {
|
82
|
+
xmlErrorPtr error = xmlGetLastError();
|
83
|
+
if(error)
|
84
|
+
Nokogiri_error_raise(NULL, error);
|
85
|
+
else
|
86
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
87
|
+
|
88
|
+
return Qnil;
|
89
|
+
}
|
90
|
+
|
91
|
+
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
92
|
+
rb_iv_set(rb_schema, "@errors", errors);
|
93
|
+
|
94
|
+
return rb_schema;
|
95
|
+
}
|
96
|
+
|
97
|
+
/*
|
98
|
+
* call-seq:
|
99
|
+
* from_document(doc)
|
100
|
+
*
|
101
|
+
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
|
102
|
+
*/
|
103
|
+
static VALUE from_document(VALUE klass, VALUE document)
|
104
|
+
{
|
105
|
+
xmlDocPtr doc;
|
106
|
+
xmlRelaxNGParserCtxtPtr ctx;
|
107
|
+
xmlRelaxNGPtr schema;
|
108
|
+
VALUE errors;
|
109
|
+
VALUE rb_schema;
|
110
|
+
|
111
|
+
Data_Get_Struct(document, xmlDoc, doc);
|
112
|
+
|
113
|
+
/* In case someone passes us a node. ugh. */
|
114
|
+
doc = doc->doc;
|
115
|
+
|
116
|
+
ctx = xmlRelaxNGNewDocParserCtxt(doc);
|
117
|
+
|
118
|
+
errors = rb_ary_new();
|
119
|
+
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
120
|
+
|
121
|
+
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
|
122
|
+
xmlRelaxNGSetParserStructuredErrors(
|
123
|
+
ctx,
|
124
|
+
Nokogiri_error_array_pusher,
|
125
|
+
(void *)errors
|
126
|
+
);
|
127
|
+
#endif
|
128
|
+
|
129
|
+
schema = xmlRelaxNGParse(ctx);
|
130
|
+
|
131
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
132
|
+
|
133
|
+
if(NULL == schema) {
|
134
|
+
xmlErrorPtr error = xmlGetLastError();
|
135
|
+
if(error)
|
136
|
+
Nokogiri_error_raise(NULL, error);
|
137
|
+
else
|
138
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
139
|
+
|
140
|
+
return Qnil;
|
141
|
+
}
|
142
|
+
|
143
|
+
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
144
|
+
rb_iv_set(rb_schema, "@errors", errors);
|
145
|
+
|
146
|
+
return rb_schema;
|
147
|
+
}
|
148
|
+
|
149
|
+
VALUE cNokogiriXmlRelaxNG;
|
150
|
+
void init_xml_relax_ng()
|
151
|
+
{
|
152
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
153
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
154
|
+
VALUE klass = rb_define_class_under(xml, "RelaxNG", cNokogiriXmlSchema);
|
155
|
+
|
156
|
+
cNokogiriXmlRelaxNG = klass;
|
157
|
+
|
158
|
+
rb_define_singleton_method(klass, "read_memory", read_memory, 1);
|
159
|
+
rb_define_singleton_method(klass, "from_document", from_document, 1);
|
160
|
+
rb_define_private_method(klass, "validate_document", validate_document, 1);
|
161
|
+
}
|
@@ -1,135 +1,253 @@
|
|
1
|
-
#define _GNU_SOURCE
|
2
|
-
#include <stdio.h>
|
3
1
|
#include <xml_sax_parser.h>
|
4
2
|
|
5
|
-
|
6
|
-
*
|
7
|
-
* parse_memory(data)
|
8
|
-
*
|
9
|
-
* Parse the document stored in +data+
|
10
|
-
*/
|
11
|
-
static VALUE parse_memory(VALUE self, VALUE data)
|
12
|
-
{
|
13
|
-
xmlSAXHandlerPtr handler;
|
14
|
-
Data_Get_Struct(self, xmlSAXHandler, handler);
|
15
|
-
xmlSAXUserParseMemory( handler,
|
16
|
-
(void *)self,
|
17
|
-
StringValuePtr(data),
|
18
|
-
NUM2INT(rb_funcall(data, rb_intern("length"), 0))
|
19
|
-
);
|
20
|
-
return data;
|
21
|
-
}
|
3
|
+
int vasprintf (char **strp, const char *fmt, va_list ap);
|
4
|
+
void vasprintf_free (void *p);
|
22
5
|
|
23
|
-
static
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
(void *)self,
|
29
|
-
StringValuePtr(data)
|
30
|
-
);
|
31
|
-
return data;
|
32
|
-
}
|
6
|
+
static ID id_start_document, id_end_document, id_start_element, id_end_element;
|
7
|
+
static ID id_start_element_namespace, id_end_element_namespace;
|
8
|
+
static ID id_comment, id_characters, id_xmldecl, id_error, id_warning;
|
9
|
+
static ID id_cdata_block, id_cAttribute;
|
10
|
+
static ID id_processing_instruction;
|
33
11
|
|
34
12
|
static void start_document(void * ctx)
|
35
13
|
{
|
36
|
-
VALUE self = (
|
37
|
-
VALUE doc =
|
38
|
-
|
14
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
15
|
+
VALUE doc = rb_iv_get(self, "@document");
|
16
|
+
|
17
|
+
xmlParserCtxtPtr ctxt = NOKOGIRI_SAX_CTXT(ctx);
|
18
|
+
|
19
|
+
if(NULL != ctxt && ctxt->html != 1) {
|
20
|
+
if(ctxt->standalone != -1) { /* -1 means there was no declaration */
|
21
|
+
VALUE encoding = Qnil ;
|
22
|
+
if (ctxt->encoding) {
|
23
|
+
encoding = NOKOGIRI_STR_NEW2(ctxt->encoding) ;
|
24
|
+
} else if (ctxt->input && ctxt->input->encoding) {
|
25
|
+
encoding = NOKOGIRI_STR_NEW2(ctxt->input->encoding) ;
|
26
|
+
}
|
27
|
+
|
28
|
+
VALUE version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil;
|
29
|
+
|
30
|
+
VALUE standalone = Qnil;
|
31
|
+
switch(ctxt->standalone)
|
32
|
+
{
|
33
|
+
case 0:
|
34
|
+
standalone = NOKOGIRI_STR_NEW2("no");
|
35
|
+
break;
|
36
|
+
case 1:
|
37
|
+
standalone = NOKOGIRI_STR_NEW2("yes");
|
38
|
+
break;
|
39
|
+
}
|
40
|
+
|
41
|
+
rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone);
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
rb_funcall(doc, id_start_document, 0);
|
39
46
|
}
|
40
47
|
|
41
48
|
static void end_document(void * ctx)
|
42
49
|
{
|
43
|
-
VALUE self = (
|
44
|
-
VALUE doc =
|
45
|
-
rb_funcall(doc,
|
50
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
51
|
+
VALUE doc = rb_iv_get(self, "@document");
|
52
|
+
rb_funcall(doc, id_end_document, 0);
|
46
53
|
}
|
47
54
|
|
48
55
|
static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
|
49
56
|
{
|
50
|
-
VALUE self = (
|
51
|
-
VALUE doc =
|
57
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
58
|
+
VALUE doc = rb_iv_get(self, "@document");
|
52
59
|
VALUE attributes = rb_ary_new();
|
53
60
|
const xmlChar * attr;
|
54
61
|
int i = 0;
|
55
62
|
if(atts) {
|
56
63
|
while((attr = atts[i]) != NULL) {
|
57
|
-
|
58
|
-
|
64
|
+
const xmlChar * val = atts[i+1];
|
65
|
+
VALUE value = val != NULL ? NOKOGIRI_STR_NEW2(val) : Qnil;
|
66
|
+
rb_ary_push(attributes, rb_ary_new3(2, NOKOGIRI_STR_NEW2(attr), value));
|
67
|
+
i+=2;
|
59
68
|
}
|
60
69
|
}
|
61
70
|
|
62
71
|
rb_funcall( doc,
|
63
|
-
|
72
|
+
id_start_element,
|
64
73
|
2,
|
65
|
-
|
74
|
+
NOKOGIRI_STR_NEW2(name),
|
66
75
|
attributes
|
67
76
|
);
|
68
77
|
}
|
69
78
|
|
70
79
|
static void end_element(void * ctx, const xmlChar *name)
|
71
80
|
{
|
72
|
-
VALUE self = (
|
73
|
-
VALUE doc =
|
74
|
-
rb_funcall(doc,
|
81
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
82
|
+
VALUE doc = rb_iv_get(self, "@document");
|
83
|
+
rb_funcall(doc, id_end_element, 1, NOKOGIRI_STR_NEW2(name));
|
84
|
+
}
|
85
|
+
|
86
|
+
static VALUE attributes_as_list(
|
87
|
+
VALUE self,
|
88
|
+
int nb_attributes,
|
89
|
+
const xmlChar ** attributes)
|
90
|
+
{
|
91
|
+
VALUE list = rb_ary_new2((long)nb_attributes);
|
92
|
+
|
93
|
+
VALUE attr_klass = rb_const_get(cNokogiriXmlSaxParser, id_cAttribute);
|
94
|
+
if (attributes) {
|
95
|
+
/* Each attribute is an array of [localname, prefix, URI, value, end] */
|
96
|
+
int i;
|
97
|
+
for (i = 0; i < nb_attributes * 5; i += 5) {
|
98
|
+
VALUE argv[4], attribute;
|
99
|
+
|
100
|
+
argv[0] = RBSTR_OR_QNIL(attributes[i + 0]); /* localname */
|
101
|
+
argv[1] = RBSTR_OR_QNIL(attributes[i + 1]); /* prefix */
|
102
|
+
argv[2] = RBSTR_OR_QNIL(attributes[i + 2]); /* URI */
|
103
|
+
|
104
|
+
/* value */
|
105
|
+
argv[3] = NOKOGIRI_STR_NEW((const char*)attributes[i+3],
|
106
|
+
(attributes[i+4] - attributes[i+3]));
|
107
|
+
|
108
|
+
attribute = rb_class_new_instance(4, argv, attr_klass);
|
109
|
+
rb_ary_push(list, attribute);
|
110
|
+
}
|
111
|
+
}
|
112
|
+
|
113
|
+
return list;
|
114
|
+
}
|
115
|
+
|
116
|
+
static void
|
117
|
+
start_element_ns (
|
118
|
+
void * ctx,
|
119
|
+
const xmlChar * localname,
|
120
|
+
const xmlChar * prefix,
|
121
|
+
const xmlChar * uri,
|
122
|
+
int nb_namespaces,
|
123
|
+
const xmlChar ** namespaces,
|
124
|
+
int nb_attributes,
|
125
|
+
int nb_defaulted,
|
126
|
+
const xmlChar ** attributes)
|
127
|
+
{
|
128
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
129
|
+
VALUE doc = rb_iv_get(self, "@document");
|
130
|
+
|
131
|
+
VALUE attribute_list = attributes_as_list(self, nb_attributes, attributes);
|
132
|
+
|
133
|
+
VALUE ns_list = rb_ary_new2((long)nb_namespaces);
|
134
|
+
|
135
|
+
if (namespaces) {
|
136
|
+
int i;
|
137
|
+
for (i = 0; i < nb_namespaces * 2; i += 2)
|
138
|
+
{
|
139
|
+
rb_ary_push(ns_list,
|
140
|
+
rb_ary_new3((long)2,
|
141
|
+
RBSTR_OR_QNIL(namespaces[i + 0]),
|
142
|
+
RBSTR_OR_QNIL(namespaces[i + 1])
|
143
|
+
)
|
144
|
+
);
|
145
|
+
}
|
146
|
+
}
|
147
|
+
|
148
|
+
rb_funcall( doc,
|
149
|
+
id_start_element_namespace,
|
150
|
+
5,
|
151
|
+
NOKOGIRI_STR_NEW2(localname),
|
152
|
+
attribute_list,
|
153
|
+
RBSTR_OR_QNIL(prefix),
|
154
|
+
RBSTR_OR_QNIL(uri),
|
155
|
+
ns_list
|
156
|
+
);
|
157
|
+
}
|
158
|
+
|
159
|
+
/**
|
160
|
+
* end_element_ns was borrowed heavily from libxml-ruby.
|
161
|
+
*/
|
162
|
+
static void
|
163
|
+
end_element_ns (
|
164
|
+
void * ctx,
|
165
|
+
const xmlChar * localname,
|
166
|
+
const xmlChar * prefix,
|
167
|
+
const xmlChar * uri)
|
168
|
+
{
|
169
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
170
|
+
VALUE doc = rb_iv_get(self, "@document");
|
171
|
+
|
172
|
+
rb_funcall(doc, id_end_element_namespace, 3,
|
173
|
+
NOKOGIRI_STR_NEW2(localname),
|
174
|
+
RBSTR_OR_QNIL(prefix),
|
175
|
+
RBSTR_OR_QNIL(uri)
|
176
|
+
);
|
75
177
|
}
|
76
178
|
|
77
179
|
static void characters_func(void * ctx, const xmlChar * ch, int len)
|
78
180
|
{
|
79
|
-
VALUE self = (
|
80
|
-
VALUE doc =
|
81
|
-
VALUE str =
|
82
|
-
rb_funcall(doc,
|
181
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
182
|
+
VALUE doc = rb_iv_get(self, "@document");
|
183
|
+
VALUE str = NOKOGIRI_STR_NEW(ch, len);
|
184
|
+
rb_funcall(doc, id_characters, 1, str);
|
83
185
|
}
|
84
186
|
|
85
187
|
static void comment_func(void * ctx, const xmlChar * value)
|
86
188
|
{
|
87
|
-
VALUE self = (
|
88
|
-
VALUE doc =
|
89
|
-
VALUE str =
|
90
|
-
rb_funcall(doc,
|
189
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
190
|
+
VALUE doc = rb_iv_get(self, "@document");
|
191
|
+
VALUE str = NOKOGIRI_STR_NEW2(value);
|
192
|
+
rb_funcall(doc, id_comment, 1, str);
|
91
193
|
}
|
92
194
|
|
93
|
-
#ifndef XP_WIN
|
94
195
|
static void warning_func(void * ctx, const char *msg, ...)
|
95
196
|
{
|
96
|
-
VALUE self = (
|
97
|
-
VALUE doc =
|
197
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
198
|
+
VALUE doc = rb_iv_get(self, "@document");
|
98
199
|
char * message;
|
200
|
+
VALUE ruby_message;
|
99
201
|
|
100
202
|
va_list args;
|
101
203
|
va_start(args, msg);
|
102
204
|
vasprintf(&message, msg, args);
|
103
205
|
va_end(args);
|
104
206
|
|
105
|
-
|
106
|
-
|
207
|
+
ruby_message = NOKOGIRI_STR_NEW2(message);
|
208
|
+
vasprintf_free(message);
|
209
|
+
rb_funcall(doc, id_warning, 1, ruby_message);
|
107
210
|
}
|
108
|
-
#endif
|
109
211
|
|
110
|
-
#ifndef XP_WIN
|
111
212
|
static void error_func(void * ctx, const char *msg, ...)
|
112
213
|
{
|
113
|
-
VALUE self = (
|
114
|
-
VALUE doc =
|
214
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
215
|
+
VALUE doc = rb_iv_get(self, "@document");
|
115
216
|
char * message;
|
217
|
+
VALUE ruby_message;
|
116
218
|
|
117
219
|
va_list args;
|
118
220
|
va_start(args, msg);
|
119
221
|
vasprintf(&message, msg, args);
|
120
222
|
va_end(args);
|
121
223
|
|
122
|
-
|
123
|
-
|
224
|
+
ruby_message = NOKOGIRI_STR_NEW2(message);
|
225
|
+
vasprintf_free(message);
|
226
|
+
rb_funcall(doc, id_error, 1, ruby_message);
|
124
227
|
}
|
125
|
-
#endif
|
126
228
|
|
127
229
|
static void cdata_block(void * ctx, const xmlChar * value, int len)
|
128
230
|
{
|
129
|
-
VALUE self = (
|
130
|
-
VALUE doc =
|
131
|
-
VALUE string =
|
132
|
-
rb_funcall(doc,
|
231
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
232
|
+
VALUE doc = rb_iv_get(self, "@document");
|
233
|
+
VALUE string = NOKOGIRI_STR_NEW(value, len);
|
234
|
+
rb_funcall(doc, id_cdata_block, 1, string);
|
235
|
+
}
|
236
|
+
|
237
|
+
static void processing_instruction(void * ctx, const xmlChar * name, const xmlChar * content)
|
238
|
+
{
|
239
|
+
VALUE rb_content;
|
240
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
241
|
+
VALUE doc = rb_iv_get(self, "@document");
|
242
|
+
|
243
|
+
rb_content = content ? NOKOGIRI_STR_NEW2(content) : Qnil;
|
244
|
+
|
245
|
+
rb_funcall( doc,
|
246
|
+
id_processing_instruction,
|
247
|
+
2,
|
248
|
+
NOKOGIRI_STR_NEW2(name),
|
249
|
+
rb_content
|
250
|
+
);
|
133
251
|
}
|
134
252
|
|
135
253
|
static void deallocate(xmlSAXHandlerPtr handler)
|
@@ -141,24 +259,23 @@ static void deallocate(xmlSAXHandlerPtr handler)
|
|
141
259
|
|
142
260
|
static VALUE allocate(VALUE klass)
|
143
261
|
{
|
144
|
-
xmlSAXHandlerPtr handler = calloc(1, sizeof(xmlSAXHandler));
|
262
|
+
xmlSAXHandlerPtr handler = calloc((size_t)1, sizeof(xmlSAXHandler));
|
263
|
+
|
264
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
145
265
|
|
146
266
|
handler->startDocument = start_document;
|
147
267
|
handler->endDocument = end_document;
|
148
268
|
handler->startElement = start_element;
|
149
269
|
handler->endElement = end_element;
|
270
|
+
handler->startElementNs = start_element_ns;
|
271
|
+
handler->endElementNs = end_element_ns;
|
150
272
|
handler->characters = characters_func;
|
151
273
|
handler->comment = comment_func;
|
152
|
-
#ifndef XP_WIN
|
153
|
-
/*
|
154
|
-
* The va*functions aren't in ming, and I don't want to deal with
|
155
|
-
* it right now.....
|
156
|
-
*
|
157
|
-
*/
|
158
274
|
handler->warning = warning_func;
|
159
275
|
handler->error = error_func;
|
160
|
-
#endif
|
161
276
|
handler->cdataBlock = cdata_block;
|
277
|
+
handler->processingInstruction = processing_instruction;
|
278
|
+
handler->initialized = XML_SAX2_MAGIC;
|
162
279
|
|
163
280
|
return Data_Wrap_Struct(klass, NULL, deallocate, handler);
|
164
281
|
}
|
@@ -166,9 +283,27 @@ static VALUE allocate(VALUE klass)
|
|
166
283
|
VALUE cNokogiriXmlSaxParser ;
|
167
284
|
void init_xml_sax_parser()
|
168
285
|
{
|
169
|
-
VALUE
|
170
|
-
|
286
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
287
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
288
|
+
VALUE sax = rb_define_module_under(xml, "SAX");
|
289
|
+
VALUE klass = rb_define_class_under(sax, "Parser", rb_cObject);
|
290
|
+
|
291
|
+
cNokogiriXmlSaxParser = klass;
|
292
|
+
|
171
293
|
rb_define_alloc_func(klass, allocate);
|
172
|
-
|
173
|
-
|
294
|
+
|
295
|
+
id_start_document = rb_intern("start_document");
|
296
|
+
id_end_document = rb_intern("end_document");
|
297
|
+
id_start_element = rb_intern("start_element");
|
298
|
+
id_end_element = rb_intern("end_element");
|
299
|
+
id_comment = rb_intern("comment");
|
300
|
+
id_characters = rb_intern("characters");
|
301
|
+
id_xmldecl = rb_intern("xmldecl");
|
302
|
+
id_error = rb_intern("error");
|
303
|
+
id_warning = rb_intern("warning");
|
304
|
+
id_cdata_block = rb_intern("cdata_block");
|
305
|
+
id_cAttribute = rb_intern("Attribute");
|
306
|
+
id_start_element_namespace = rb_intern("start_element_namespace");
|
307
|
+
id_end_element_namespace = rb_intern("end_element_namespace");
|
308
|
+
id_processing_instruction = rb_intern("processing_instruction");
|
174
309
|
}
|
@@ -1,10 +1,39 @@
|
|
1
1
|
#ifndef NOKOGIRI_XML_SAX_PARSER
|
2
2
|
#define NOKOGIRI_XML_SAX_PARSER
|
3
3
|
|
4
|
-
#include <
|
4
|
+
#include <nokogiri.h>
|
5
5
|
|
6
6
|
void init_xml_sax_parser();
|
7
7
|
|
8
8
|
extern VALUE cNokogiriXmlSaxParser ;
|
9
|
+
|
10
|
+
typedef struct _nokogiriSAXTuple {
|
11
|
+
xmlParserCtxtPtr ctxt;
|
12
|
+
VALUE self;
|
13
|
+
} nokogiriSAXTuple;
|
14
|
+
|
15
|
+
typedef nokogiriSAXTuple * nokogiriSAXTuplePtr;
|
16
|
+
|
17
|
+
#define NOKOGIRI_SAX_SELF(_ctxt) \
|
18
|
+
((nokogiriSAXTuplePtr)(_ctxt))->self
|
19
|
+
|
20
|
+
#define NOKOGIRI_SAX_CTXT(_ctxt) \
|
21
|
+
((nokogiriSAXTuplePtr)(_ctxt))->ctxt
|
22
|
+
|
23
|
+
#define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) \
|
24
|
+
nokogiri_sax_tuple_new(_ctxt, _self)
|
25
|
+
|
26
|
+
static inline nokogiriSAXTuplePtr
|
27
|
+
nokogiri_sax_tuple_new(xmlParserCtxtPtr ctxt, VALUE self)
|
28
|
+
{
|
29
|
+
nokogiriSAXTuplePtr tuple = malloc(sizeof(nokogiriSAXTuple));
|
30
|
+
tuple->self = self;
|
31
|
+
tuple->ctxt = ctxt;
|
32
|
+
return tuple;
|
33
|
+
}
|
34
|
+
|
35
|
+
#define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) \
|
36
|
+
free(_tuple) \
|
37
|
+
|
9
38
|
#endif
|
10
39
|
|