superfeedr-nokogiri 1.4.0.20091116183308
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +27 -0
- data/CHANGELOG.ja.rdoc +330 -0
- data/CHANGELOG.rdoc +314 -0
- data/Manifest.txt +269 -0
- data/README.ja.rdoc +105 -0
- data/README.rdoc +118 -0
- data/Rakefile +244 -0
- data/bin/nokogiri +49 -0
- data/ext/nokogiri/extconf.rb +145 -0
- data/ext/nokogiri/html_document.c +145 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +92 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/nokogiri.c +89 -0
- data/ext/nokogiri/nokogiri.h +145 -0
- data/ext/nokogiri/xml_attr.c +92 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +67 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +54 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +52 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +388 -0
- data/ext/nokogiri/xml_document.h +24 -0
- data/ext/nokogiri/xml_document_fragment.c +46 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +192 -0
- data/ext/nokogiri/xml_dtd.h +10 -0
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_entity_decl.c +97 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +31 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_namespace.c +74 -0
- data/ext/nokogiri/xml_namespace.h +12 -0
- data/ext/nokogiri/xml_node.c +1060 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +397 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +593 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +159 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +286 -0
- data/ext/nokogiri/xml_sax_parser.h +43 -0
- data/ext/nokogiri/xml_sax_parser_context.c +155 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +114 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +156 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +261 -0
- data/ext/nokogiri/xml_syntax_error.h +13 -0
- data/ext/nokogiri/xml_text.c +48 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +53 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +239 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +131 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri.rb +116 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +646 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +142 -0
- data/lib/nokogiri/css/node.rb +99 -0
- data/lib/nokogiri/css/parser.rb +82 -0
- data/lib/nokogiri/css/parser.y +227 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rex +54 -0
- data/lib/nokogiri/css/xpath_visitor.rb +162 -0
- data/lib/nokogiri/decorators/slop.rb +33 -0
- data/lib/nokogiri/ffi/html/document.rb +28 -0
- data/lib/nokogiri/ffi/html/element_description.rb +85 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
- data/lib/nokogiri/ffi/libxml.rb +356 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
- data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +135 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +69 -0
- data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
- data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
- data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
- data/lib/nokogiri/ffi/xml/node.rb +444 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +133 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +227 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +142 -0
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +39 -0
- data/lib/nokogiri/ffi/xml/schema.rb +92 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +91 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
- data/lib/nokogiri/html.rb +35 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +88 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +48 -0
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +33 -0
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +67 -0
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +405 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +131 -0
- data/lib/nokogiri/xml/document_fragment.rb +69 -0
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +15 -0
- data/lib/nokogiri/xml/fragment_handler.rb +71 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +665 -0
- data/lib/nokogiri/xml/node/save_options.rb +42 -0
- data/lib/nokogiri/xml/node_set.rb +307 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +85 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +74 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +4 -0
- data/lib/nokogiri/xml/sax/document.rb +160 -0
- data/lib/nokogiri/xml/sax/parser.rb +115 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +61 -0
- data/lib/nokogiri/xml/syntax_error.rb +38 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +48 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +71 -0
- data/tasks/test.rb +100 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +277 -0
- data/test/css/test_tokenizer.rb +183 -0
- data/test/css/test_xpath_visitor.rb +76 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/helper.rb +136 -0
- data/test/html/sax/test_parser.rb +64 -0
- data/test/html/sax/test_parser_context.rb +48 -0
- data/test/html/test_builder.rb +164 -0
- data/test/html/test_document.rb +390 -0
- data/test/html/test_document_encoding.rb +77 -0
- data/test/html/test_document_fragment.rb +132 -0
- data/test/html/test_element_description.rb +94 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +228 -0
- data/test/html/test_node_encoding.rb +27 -0
- data/test/test_convert_xpath.rb +135 -0
- data/test/test_css_cache.rb +45 -0
- data/test/test_gc.rb +15 -0
- data/test/test_memory_leak.rb +77 -0
- data/test/test_nokogiri.rb +134 -0
- data/test/test_reader.rb +358 -0
- data/test/test_xslt_transforms.rb +131 -0
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +307 -0
- data/test/xml/sax/test_parser_context.rb +56 -0
- data/test/xml/sax/test_push_parser.rb +131 -0
- data/test/xml/test_attr.rb +38 -0
- data/test/xml/test_attribute_decl.rb +82 -0
- data/test/xml/test_builder.rb +167 -0
- data/test/xml/test_cdata.rb +38 -0
- data/test/xml/test_comment.rb +29 -0
- data/test/xml/test_document.rb +607 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +138 -0
- data/test/xml/test_dtd.rb +82 -0
- data/test/xml/test_dtd_encoding.rb +33 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +83 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +68 -0
- data/test/xml/test_node.rb +889 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +107 -0
- data/test/xml/test_node_set.rb +531 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +89 -0
- data/test/xml/test_syntax_error.rb +27 -0
- data/test/xml/test_text.rb +30 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +106 -0
- metadata +430 -0
@@ -0,0 +1,159 @@
|
|
1
|
+
#include <xml_relax_ng.h>
|
2
|
+
|
3
|
+
static void dealloc(xmlRelaxNGPtr schema)
|
4
|
+
{
|
5
|
+
NOKOGIRI_DEBUG_START(schema);
|
6
|
+
xmlRelaxNGFree(schema);
|
7
|
+
NOKOGIRI_DEBUG_END(schema);
|
8
|
+
}
|
9
|
+
|
10
|
+
/*
|
11
|
+
* call-seq:
|
12
|
+
* validate_document(document)
|
13
|
+
*
|
14
|
+
* Validate a Nokogiri::XML::Document against this RelaxNG schema.
|
15
|
+
*/
|
16
|
+
static VALUE validate_document(VALUE self, VALUE document)
|
17
|
+
{
|
18
|
+
xmlDocPtr doc;
|
19
|
+
xmlRelaxNGPtr schema;
|
20
|
+
|
21
|
+
Data_Get_Struct(self, xmlRelaxNG, schema);
|
22
|
+
Data_Get_Struct(document, xmlDoc, doc);
|
23
|
+
|
24
|
+
VALUE errors = rb_ary_new();
|
25
|
+
|
26
|
+
xmlRelaxNGValidCtxtPtr valid_ctxt = xmlRelaxNGNewValidCtxt(schema);
|
27
|
+
|
28
|
+
if(NULL == valid_ctxt) {
|
29
|
+
// we have a problem
|
30
|
+
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
31
|
+
}
|
32
|
+
|
33
|
+
#ifdef HAVE_XMLRELAXNGSETVALIDSTRUCTUREDERRORS
|
34
|
+
xmlRelaxNGSetValidStructuredErrors(
|
35
|
+
valid_ctxt,
|
36
|
+
Nokogiri_error_array_pusher,
|
37
|
+
(void *)errors
|
38
|
+
);
|
39
|
+
#endif
|
40
|
+
|
41
|
+
xmlRelaxNGValidateDoc(valid_ctxt, doc);
|
42
|
+
|
43
|
+
xmlRelaxNGFreeValidCtxt(valid_ctxt);
|
44
|
+
|
45
|
+
return errors;
|
46
|
+
}
|
47
|
+
|
48
|
+
/*
|
49
|
+
* call-seq:
|
50
|
+
* read_memory(string)
|
51
|
+
*
|
52
|
+
* Create a new RelaxNG from the contents of +string+
|
53
|
+
*/
|
54
|
+
static VALUE read_memory(VALUE klass, VALUE content)
|
55
|
+
{
|
56
|
+
xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt(
|
57
|
+
(const char *)StringValuePtr(content),
|
58
|
+
RSTRING_LEN(content)
|
59
|
+
);
|
60
|
+
|
61
|
+
VALUE errors = rb_ary_new();
|
62
|
+
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
63
|
+
|
64
|
+
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
|
65
|
+
xmlRelaxNGSetParserStructuredErrors(
|
66
|
+
ctx,
|
67
|
+
Nokogiri_error_array_pusher,
|
68
|
+
(void *)errors
|
69
|
+
);
|
70
|
+
#endif
|
71
|
+
|
72
|
+
xmlRelaxNGPtr schema = xmlRelaxNGParse(ctx);
|
73
|
+
|
74
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
75
|
+
xmlRelaxNGFreeParserCtxt(ctx);
|
76
|
+
|
77
|
+
if(NULL == schema) {
|
78
|
+
xmlErrorPtr error = xmlGetLastError();
|
79
|
+
if(error)
|
80
|
+
Nokogiri_error_raise(NULL, error);
|
81
|
+
else
|
82
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
83
|
+
|
84
|
+
return Qnil;
|
85
|
+
}
|
86
|
+
|
87
|
+
VALUE rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
88
|
+
rb_iv_set(rb_schema, "@errors", errors);
|
89
|
+
|
90
|
+
return rb_schema;
|
91
|
+
}
|
92
|
+
|
93
|
+
/*
|
94
|
+
* call-seq:
|
95
|
+
* from_document(doc)
|
96
|
+
*
|
97
|
+
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
|
98
|
+
*/
|
99
|
+
static VALUE from_document(VALUE klass, VALUE document)
|
100
|
+
{
|
101
|
+
xmlDocPtr doc;
|
102
|
+
Data_Get_Struct(document, xmlDoc, doc);
|
103
|
+
|
104
|
+
// In case someone passes us a node. ugh.
|
105
|
+
doc = doc->doc;
|
106
|
+
|
107
|
+
xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewDocParserCtxt(doc);
|
108
|
+
|
109
|
+
VALUE errors = rb_ary_new();
|
110
|
+
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
111
|
+
|
112
|
+
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
|
113
|
+
xmlRelaxNGSetParserStructuredErrors(
|
114
|
+
ctx,
|
115
|
+
Nokogiri_error_array_pusher,
|
116
|
+
(void *)errors
|
117
|
+
);
|
118
|
+
#endif
|
119
|
+
|
120
|
+
xmlRelaxNGPtr schema = xmlRelaxNGParse(ctx);
|
121
|
+
|
122
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
123
|
+
if (! is_2_6_16()) {
|
124
|
+
xmlRelaxNGFreeParserCtxt(ctx);
|
125
|
+
}
|
126
|
+
|
127
|
+
if(NULL == schema) {
|
128
|
+
xmlErrorPtr error = xmlGetLastError();
|
129
|
+
if(error)
|
130
|
+
Nokogiri_error_raise(NULL, error);
|
131
|
+
else
|
132
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
133
|
+
|
134
|
+
return Qnil;
|
135
|
+
}
|
136
|
+
|
137
|
+
if (is_2_6_16()) {
|
138
|
+
xmlRelaxNGFreeParserCtxt(ctx);
|
139
|
+
}
|
140
|
+
|
141
|
+
VALUE rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
142
|
+
rb_iv_set(rb_schema, "@errors", errors);
|
143
|
+
|
144
|
+
return rb_schema;
|
145
|
+
}
|
146
|
+
|
147
|
+
VALUE cNokogiriXmlRelaxNG;
|
148
|
+
void init_xml_relax_ng()
|
149
|
+
{
|
150
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
151
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
152
|
+
VALUE klass = rb_define_class_under(xml, "RelaxNG", cNokogiriXmlSchema);
|
153
|
+
|
154
|
+
cNokogiriXmlRelaxNG = klass;
|
155
|
+
|
156
|
+
rb_define_singleton_method(klass, "read_memory", read_memory, 1);
|
157
|
+
rb_define_singleton_method(klass, "from_document", from_document, 1);
|
158
|
+
rb_define_private_method(klass, "validate_document", validate_document, 1);
|
159
|
+
}
|
@@ -0,0 +1,286 @@
|
|
1
|
+
#include <xml_sax_parser.h>
|
2
|
+
|
3
|
+
static ID id_start_document, id_end_document, id_start_element, id_end_element;
|
4
|
+
static ID id_start_element_namespace, id_end_element_namespace;
|
5
|
+
static ID id_comment, id_characters, id_xmldecl, id_error, id_warning;
|
6
|
+
static ID id_cdata_block, id_cAttribute;
|
7
|
+
|
8
|
+
#define STRING_OR_NULL(str) \
|
9
|
+
(RTEST(str) ? StringValuePtr(str) : NULL)
|
10
|
+
|
11
|
+
#define RBSTR_OR_QNIL(_str) \
|
12
|
+
(_str ? NOKOGIRI_STR_NEW2(_str) : Qnil)
|
13
|
+
|
14
|
+
static void start_document(void * ctx)
|
15
|
+
{
|
16
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
17
|
+
VALUE doc = rb_iv_get(self, "@document");
|
18
|
+
|
19
|
+
xmlParserCtxtPtr ctxt = NOKOGIRI_SAX_CTXT(ctx);
|
20
|
+
|
21
|
+
if(NULL != ctxt && ctxt->html != 1) {
|
22
|
+
if(ctxt->standalone != -1) { // -1 means there was no declaration
|
23
|
+
VALUE encoding = ctxt->encoding ?
|
24
|
+
NOKOGIRI_STR_NEW2(ctxt->encoding) :
|
25
|
+
Qnil;
|
26
|
+
|
27
|
+
VALUE version = ctxt->version ?
|
28
|
+
NOKOGIRI_STR_NEW2(ctxt->version) :
|
29
|
+
Qnil;
|
30
|
+
|
31
|
+
VALUE standalone = Qnil;
|
32
|
+
|
33
|
+
switch(ctxt->standalone)
|
34
|
+
{
|
35
|
+
case 0:
|
36
|
+
standalone = NOKOGIRI_STR_NEW2("no");
|
37
|
+
break;
|
38
|
+
case 1:
|
39
|
+
standalone = NOKOGIRI_STR_NEW2("yes");
|
40
|
+
break;
|
41
|
+
}
|
42
|
+
|
43
|
+
rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone);
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
rb_funcall(doc, id_start_document, 0);
|
48
|
+
}
|
49
|
+
|
50
|
+
static void end_document(void * ctx)
|
51
|
+
{
|
52
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
53
|
+
VALUE doc = rb_iv_get(self, "@document");
|
54
|
+
rb_funcall(doc, id_end_document, 0);
|
55
|
+
}
|
56
|
+
|
57
|
+
static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
|
58
|
+
{
|
59
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
60
|
+
VALUE doc = rb_iv_get(self, "@document");
|
61
|
+
VALUE attributes = rb_ary_new();
|
62
|
+
const xmlChar * attr;
|
63
|
+
int i = 0;
|
64
|
+
if(atts) {
|
65
|
+
while((attr = atts[i]) != NULL) {
|
66
|
+
rb_ary_push(attributes, NOKOGIRI_STR_NEW2(attr));
|
67
|
+
i++;
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
rb_funcall( doc,
|
72
|
+
id_start_element,
|
73
|
+
2,
|
74
|
+
NOKOGIRI_STR_NEW2(name),
|
75
|
+
attributes
|
76
|
+
);
|
77
|
+
}
|
78
|
+
|
79
|
+
static void end_element(void * ctx, const xmlChar *name)
|
80
|
+
{
|
81
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
82
|
+
VALUE doc = rb_iv_get(self, "@document");
|
83
|
+
rb_funcall(doc, id_end_element, 1, NOKOGIRI_STR_NEW2(name));
|
84
|
+
}
|
85
|
+
|
86
|
+
static VALUE attributes_as_list(
|
87
|
+
VALUE self,
|
88
|
+
int nb_attributes,
|
89
|
+
const xmlChar ** attributes)
|
90
|
+
{
|
91
|
+
VALUE list = rb_ary_new2((long)nb_attributes);
|
92
|
+
|
93
|
+
VALUE attr_klass = rb_const_get(cNokogiriXmlSaxParser, id_cAttribute);
|
94
|
+
if (attributes) {
|
95
|
+
/* Each attribute is an array of [localname, prefix, URI, value, end] */
|
96
|
+
int i;
|
97
|
+
for (i = 0; i < nb_attributes * 5; i += 5) {
|
98
|
+
VALUE argv[4];
|
99
|
+
argv[0] = RBSTR_OR_QNIL(attributes[i + 0]); /* localname */
|
100
|
+
argv[1] = RBSTR_OR_QNIL(attributes[i + 1]); /* prefix */
|
101
|
+
argv[2] = RBSTR_OR_QNIL(attributes[i + 2]); /* URI */
|
102
|
+
|
103
|
+
/* value */
|
104
|
+
argv[3] = NOKOGIRI_STR_NEW((const char*)attributes[i+3],
|
105
|
+
(attributes[i+4] - attributes[i+3]));
|
106
|
+
|
107
|
+
VALUE attribute = rb_class_new_instance(4, argv, attr_klass);
|
108
|
+
rb_ary_push(list, attribute);
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
return list;
|
113
|
+
}
|
114
|
+
|
115
|
+
static void
|
116
|
+
start_element_ns (
|
117
|
+
void * ctx,
|
118
|
+
const xmlChar * localname,
|
119
|
+
const xmlChar * prefix,
|
120
|
+
const xmlChar * uri,
|
121
|
+
int nb_namespaces,
|
122
|
+
const xmlChar ** namespaces,
|
123
|
+
int nb_attributes,
|
124
|
+
int nb_defaulted,
|
125
|
+
const xmlChar ** attributes)
|
126
|
+
{
|
127
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
128
|
+
VALUE doc = rb_iv_get(self, "@document");
|
129
|
+
|
130
|
+
VALUE attribute_list = attributes_as_list(self, nb_attributes, attributes);
|
131
|
+
|
132
|
+
VALUE ns_list = rb_ary_new2((long)nb_namespaces);
|
133
|
+
|
134
|
+
if (namespaces) {
|
135
|
+
int i;
|
136
|
+
for (i = 0; i < nb_namespaces * 2; i += 2)
|
137
|
+
{
|
138
|
+
rb_ary_push(ns_list,
|
139
|
+
rb_ary_new3((long)2,
|
140
|
+
RBSTR_OR_QNIL(namespaces[i + 0]),
|
141
|
+
RBSTR_OR_QNIL(namespaces[i + 1])
|
142
|
+
)
|
143
|
+
);
|
144
|
+
}
|
145
|
+
}
|
146
|
+
|
147
|
+
rb_funcall( doc,
|
148
|
+
id_start_element_namespace,
|
149
|
+
5,
|
150
|
+
NOKOGIRI_STR_NEW2(localname),
|
151
|
+
attribute_list,
|
152
|
+
RBSTR_OR_QNIL(prefix),
|
153
|
+
RBSTR_OR_QNIL(uri),
|
154
|
+
ns_list
|
155
|
+
);
|
156
|
+
}
|
157
|
+
|
158
|
+
/**
|
159
|
+
* end_element_ns was borrowed heavily from libxml-ruby.
|
160
|
+
*/
|
161
|
+
static void
|
162
|
+
end_element_ns (
|
163
|
+
void * ctx,
|
164
|
+
const xmlChar * localname,
|
165
|
+
const xmlChar * prefix,
|
166
|
+
const xmlChar * uri)
|
167
|
+
{
|
168
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
169
|
+
VALUE doc = rb_iv_get(self, "@document");
|
170
|
+
|
171
|
+
rb_funcall(doc, id_end_element_namespace, 3,
|
172
|
+
NOKOGIRI_STR_NEW2(localname),
|
173
|
+
RBSTR_OR_QNIL(prefix),
|
174
|
+
RBSTR_OR_QNIL(uri)
|
175
|
+
);
|
176
|
+
}
|
177
|
+
|
178
|
+
static void characters_func(void * ctx, const xmlChar * ch, int len)
|
179
|
+
{
|
180
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
181
|
+
VALUE doc = rb_iv_get(self, "@document");
|
182
|
+
VALUE str = NOKOGIRI_STR_NEW(ch, len);
|
183
|
+
rb_funcall(doc, id_characters, 1, str);
|
184
|
+
}
|
185
|
+
|
186
|
+
static void comment_func(void * ctx, const xmlChar * value)
|
187
|
+
{
|
188
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
189
|
+
VALUE doc = rb_iv_get(self, "@document");
|
190
|
+
VALUE str = NOKOGIRI_STR_NEW2(value);
|
191
|
+
rb_funcall(doc, id_comment, 1, str);
|
192
|
+
}
|
193
|
+
|
194
|
+
static void warning_func(void * ctx, const char *msg, ...)
|
195
|
+
{
|
196
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
197
|
+
VALUE doc = rb_iv_get(self, "@document");
|
198
|
+
char * message;
|
199
|
+
|
200
|
+
va_list args;
|
201
|
+
va_start(args, msg);
|
202
|
+
vasprintf(&message, msg, args);
|
203
|
+
va_end(args);
|
204
|
+
|
205
|
+
rb_funcall(doc, id_warning, 1, NOKOGIRI_STR_NEW2(message));
|
206
|
+
free(message);
|
207
|
+
}
|
208
|
+
|
209
|
+
static void error_func(void * ctx, const char *msg, ...)
|
210
|
+
{
|
211
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
212
|
+
VALUE doc = rb_iv_get(self, "@document");
|
213
|
+
char * message;
|
214
|
+
|
215
|
+
va_list args;
|
216
|
+
va_start(args, msg);
|
217
|
+
vasprintf(&message, msg, args);
|
218
|
+
va_end(args);
|
219
|
+
|
220
|
+
rb_funcall(doc, id_error, 1, NOKOGIRI_STR_NEW2(message));
|
221
|
+
free(message);
|
222
|
+
}
|
223
|
+
|
224
|
+
static void cdata_block(void * ctx, const xmlChar * value, int len)
|
225
|
+
{
|
226
|
+
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
227
|
+
VALUE doc = rb_iv_get(self, "@document");
|
228
|
+
VALUE string = NOKOGIRI_STR_NEW(value, len);
|
229
|
+
rb_funcall(doc, id_cdata_block, 1, string);
|
230
|
+
}
|
231
|
+
|
232
|
+
static void deallocate(xmlSAXHandlerPtr handler)
|
233
|
+
{
|
234
|
+
NOKOGIRI_DEBUG_START(handler);
|
235
|
+
free(handler);
|
236
|
+
NOKOGIRI_DEBUG_END(handler);
|
237
|
+
}
|
238
|
+
|
239
|
+
static VALUE allocate(VALUE klass)
|
240
|
+
{
|
241
|
+
xmlSAXHandlerPtr handler = calloc(1, sizeof(xmlSAXHandler));
|
242
|
+
|
243
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
244
|
+
|
245
|
+
handler->startDocument = start_document;
|
246
|
+
handler->endDocument = end_document;
|
247
|
+
handler->startElement = start_element;
|
248
|
+
handler->endElement = end_element;
|
249
|
+
handler->startElementNs = start_element_ns;
|
250
|
+
handler->endElementNs = end_element_ns;
|
251
|
+
handler->characters = characters_func;
|
252
|
+
handler->comment = comment_func;
|
253
|
+
handler->warning = warning_func;
|
254
|
+
handler->error = error_func;
|
255
|
+
handler->cdataBlock = cdata_block;
|
256
|
+
handler->initialized = XML_SAX2_MAGIC;
|
257
|
+
|
258
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, handler);
|
259
|
+
}
|
260
|
+
|
261
|
+
VALUE cNokogiriXmlSaxParser ;
|
262
|
+
void init_xml_sax_parser()
|
263
|
+
{
|
264
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
265
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
266
|
+
VALUE sax = rb_define_module_under(xml, "SAX");
|
267
|
+
VALUE klass = rb_define_class_under(sax, "Parser", rb_cObject);
|
268
|
+
|
269
|
+
cNokogiriXmlSaxParser = klass;
|
270
|
+
|
271
|
+
rb_define_alloc_func(klass, allocate);
|
272
|
+
|
273
|
+
id_start_document = rb_intern("start_document");
|
274
|
+
id_end_document = rb_intern("end_document");
|
275
|
+
id_start_element = rb_intern("start_element");
|
276
|
+
id_end_element = rb_intern("end_element");
|
277
|
+
id_comment = rb_intern("comment");
|
278
|
+
id_characters = rb_intern("characters");
|
279
|
+
id_xmldecl = rb_intern("xmldecl");
|
280
|
+
id_error = rb_intern("error");
|
281
|
+
id_warning = rb_intern("warning");
|
282
|
+
id_cdata_block = rb_intern("cdata_block");
|
283
|
+
id_cAttribute = rb_intern("Attribute");
|
284
|
+
id_start_element_namespace = rb_intern("start_element_namespace");
|
285
|
+
id_end_element_namespace = rb_intern("end_element_namespace");
|
286
|
+
}
|