nokogiri 1.3.0-x86-mswin32
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/.autotest +27 -0
- data/CHANGELOG.ja.rdoc +233 -0
- data/CHANGELOG.rdoc +222 -0
- data/Manifest.txt +247 -0
- data/README.ja.rdoc +103 -0
- data/README.rdoc +117 -0
- data/Rakefile +205 -0
- data/bin/nokogiri +47 -0
- data/ext/nokogiri/extconf.rb +89 -0
- data/ext/nokogiri/html_document.c +183 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +30 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser.c +57 -0
- data/ext/nokogiri/html_sax_parser.h +11 -0
- data/ext/nokogiri/iconv.dll +0 -0
- data/ext/nokogiri/libexslt.dll +0 -0
- data/ext/nokogiri/libxml2.dll +0 -0
- data/ext/nokogiri/libxslt.dll +0 -0
- data/ext/nokogiri/nokogiri.c +81 -0
- data/ext/nokogiri/nokogiri.h +149 -0
- data/ext/nokogiri/xml_attr.c +92 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_cdata.c +53 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +51 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +308 -0
- data/ext/nokogiri/xml_document.h +21 -0
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +102 -0
- data/ext/nokogiri/xml_dtd.h +8 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +24 -0
- data/ext/nokogiri/xml_io.h +10 -0
- data/ext/nokogiri/xml_namespace.c +69 -0
- data/ext/nokogiri/xml_namespace.h +12 -0
- data/ext/nokogiri/xml_node.c +928 -0
- data/ext/nokogiri/xml_node.h +14 -0
- data/ext/nokogiri/xml_node_set.c +386 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +572 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +106 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +336 -0
- data/ext/nokogiri/xml_sax_parser.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +86 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +107 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +203 -0
- data/ext/nokogiri/xml_syntax_error.h +12 -0
- data/ext/nokogiri/xml_text.c +47 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +53 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +252 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +131 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/ext/nokogiri/zlib1.dll +0 -0
- data/lib/action-nokogiri.rb +36 -0
- data/lib/nokogiri.rb +110 -0
- data/lib/nokogiri/1.8/nokogiri.so +0 -0
- data/lib/nokogiri/1.9/nokogiri.so +0 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +748 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +144 -0
- data/lib/nokogiri/css/node.rb +107 -0
- data/lib/nokogiri/css/parser.rb +82 -0
- data/lib/nokogiri/css/parser.y +227 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +11 -0
- data/lib/nokogiri/css/tokenizer.rex +54 -0
- data/lib/nokogiri/css/xpath_visitor.rb +172 -0
- data/lib/nokogiri/decorators.rb +2 -0
- data/lib/nokogiri/decorators/hpricot.rb +3 -0
- data/lib/nokogiri/decorators/hpricot/node.rb +56 -0
- data/lib/nokogiri/decorators/hpricot/node_set.rb +54 -0
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +30 -0
- data/lib/nokogiri/decorators/slop.rb +33 -0
- data/lib/nokogiri/ffi/html/document.rb +37 -0
- data/lib/nokogiri/ffi/html/element_description.rb +85 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
- data/lib/nokogiri/ffi/libxml.rb +314 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +107 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
- data/lib/nokogiri/ffi/xml/node.rb +380 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +217 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
- data/lib/nokogiri/ffi/xml/schema.rb +55 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
- data/lib/nokogiri/hpricot.rb +62 -0
- data/lib/nokogiri/html.rb +34 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +71 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +47 -0
- data/lib/nokogiri/nokogiri.rb +1 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +29 -0
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +62 -0
- data/lib/nokogiri/xml/attr.rb +9 -0
- data/lib/nokogiri/xml/builder.rb +254 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/document.rb +100 -0
- data/lib/nokogiri/xml/document_fragment.rb +49 -0
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/entity_declaration.rb +11 -0
- data/lib/nokogiri/xml/fragment_handler.rb +55 -0
- data/lib/nokogiri/xml/namespace.rb +7 -0
- data/lib/nokogiri/xml/node.rb +745 -0
- data/lib/nokogiri/xml/node/save_options.rb +42 -0
- data/lib/nokogiri/xml/node_set.rb +238 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +80 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +66 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +3 -0
- data/lib/nokogiri/xml/sax/document.rb +143 -0
- data/lib/nokogiri/xml/sax/parser.rb +101 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +65 -0
- data/lib/nokogiri/xml/syntax_error.rb +34 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +48 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +64 -0
- data/tasks/test.rb +161 -0
- data/test/css/test_nthiness.rb +160 -0
- data/test/css/test_parser.rb +277 -0
- data/test/css/test_tokenizer.rb +176 -0
- data/test/css/test_xpath_visitor.rb +76 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/helper.rb +123 -0
- data/test/hpricot/files/basic.xhtml +17 -0
- data/test/hpricot/files/boingboing.html +2266 -0
- data/test/hpricot/files/cy0.html +3653 -0
- data/test/hpricot/files/immob.html +400 -0
- data/test/hpricot/files/pace_application.html +1320 -0
- data/test/hpricot/files/tenderlove.html +16 -0
- data/test/hpricot/files/uswebgen.html +220 -0
- data/test/hpricot/files/utf8.html +1054 -0
- data/test/hpricot/files/week9.html +1723 -0
- data/test/hpricot/files/why.xml +19 -0
- data/test/hpricot/load_files.rb +11 -0
- data/test/hpricot/test_alter.rb +68 -0
- data/test/hpricot/test_builder.rb +20 -0
- data/test/hpricot/test_parser.rb +426 -0
- data/test/hpricot/test_paths.rb +15 -0
- data/test/hpricot/test_preserved.rb +77 -0
- data/test/hpricot/test_xml.rb +30 -0
- data/test/html/sax/test_parser.rb +52 -0
- data/test/html/test_builder.rb +156 -0
- data/test/html/test_document.rb +361 -0
- data/test/html/test_document_encoding.rb +46 -0
- data/test/html/test_document_fragment.rb +97 -0
- data/test/html/test_element_description.rb +95 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +165 -0
- data/test/test_convert_xpath.rb +186 -0
- data/test/test_css_cache.rb +56 -0
- data/test/test_gc.rb +15 -0
- data/test/test_memory_leak.rb +77 -0
- data/test/test_nokogiri.rb +127 -0
- data/test/test_reader.rb +316 -0
- data/test/test_xslt_transforms.rb +131 -0
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +169 -0
- data/test/xml/sax/test_push_parser.rb +92 -0
- data/test/xml/test_attr.rb +38 -0
- data/test/xml/test_builder.rb +73 -0
- data/test/xml/test_cdata.rb +38 -0
- data/test/xml/test_comment.rb +23 -0
- data/test/xml/test_document.rb +397 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +76 -0
- data/test/xml/test_dtd.rb +42 -0
- data/test/xml/test_dtd_encoding.rb +31 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +43 -0
- data/test/xml/test_node.rb +808 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +84 -0
- data/test/xml/test_node_set.rb +368 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +65 -0
- data/test/xml/test_text.rb +18 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +106 -0
- metadata +409 -0
@@ -0,0 +1,106 @@
|
|
1
|
+
#include <xml_relax_ng.h>
|
2
|
+
|
3
|
+
static void dealloc(xmlRelaxNGPtr schema)
|
4
|
+
{
|
5
|
+
NOKOGIRI_DEBUG_START(schema);
|
6
|
+
xmlRelaxNGFree(schema);
|
7
|
+
NOKOGIRI_DEBUG_END(schema);
|
8
|
+
}
|
9
|
+
|
10
|
+
/*
|
11
|
+
* call-seq:
|
12
|
+
* validate_document(document)
|
13
|
+
*
|
14
|
+
* Validate a Nokogiri::XML::Document against this RelaxNG schema.
|
15
|
+
*/
|
16
|
+
static VALUE validate_document(VALUE self, VALUE document)
|
17
|
+
{
|
18
|
+
xmlDocPtr doc;
|
19
|
+
xmlRelaxNGPtr schema;
|
20
|
+
|
21
|
+
Data_Get_Struct(self, xmlRelaxNG, schema);
|
22
|
+
Data_Get_Struct(document, xmlDoc, doc);
|
23
|
+
|
24
|
+
VALUE errors = rb_ary_new();
|
25
|
+
|
26
|
+
xmlRelaxNGValidCtxtPtr valid_ctxt = xmlRelaxNGNewValidCtxt(schema);
|
27
|
+
|
28
|
+
if(NULL == valid_ctxt) {
|
29
|
+
// we have a problem
|
30
|
+
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
31
|
+
}
|
32
|
+
|
33
|
+
if (! is_2_6_16()) {
|
34
|
+
xmlRelaxNGSetValidStructuredErrors(
|
35
|
+
valid_ctxt,
|
36
|
+
Nokogiri_error_array_pusher,
|
37
|
+
(void *)errors
|
38
|
+
);
|
39
|
+
}
|
40
|
+
|
41
|
+
xmlRelaxNGValidateDoc(valid_ctxt, doc);
|
42
|
+
|
43
|
+
xmlRelaxNGFreeValidCtxt(valid_ctxt);
|
44
|
+
|
45
|
+
return errors;
|
46
|
+
}
|
47
|
+
|
48
|
+
/*
|
49
|
+
* call-seq:
|
50
|
+
* read_memory(string)
|
51
|
+
*
|
52
|
+
* Create a new RelaxNG from the contents of +string+
|
53
|
+
*/
|
54
|
+
static VALUE read_memory(VALUE klass, VALUE content)
|
55
|
+
{
|
56
|
+
xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt(
|
57
|
+
(const char *)StringValuePtr(content),
|
58
|
+
RSTRING_LEN(content)
|
59
|
+
);
|
60
|
+
|
61
|
+
VALUE errors = rb_ary_new();
|
62
|
+
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
63
|
+
|
64
|
+
if (! is_2_6_16()) {
|
65
|
+
xmlRelaxNGSetParserStructuredErrors(
|
66
|
+
ctx,
|
67
|
+
Nokogiri_error_array_pusher,
|
68
|
+
(void *)errors
|
69
|
+
);
|
70
|
+
}
|
71
|
+
|
72
|
+
xmlRelaxNGPtr schema = xmlRelaxNGParse(ctx);
|
73
|
+
|
74
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
75
|
+
xmlRelaxNGFreeParserCtxt(ctx);
|
76
|
+
|
77
|
+
if(NULL == schema) {
|
78
|
+
xmlErrorPtr error = xmlGetLastError();
|
79
|
+
if(error)
|
80
|
+
rb_funcall(rb_mKernel, rb_intern("raise"), 1,
|
81
|
+
Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
|
82
|
+
);
|
83
|
+
else
|
84
|
+
rb_raise(rb_eRuntimeError, "Could not parse document");
|
85
|
+
|
86
|
+
return Qnil;
|
87
|
+
}
|
88
|
+
|
89
|
+
VALUE rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
90
|
+
rb_iv_set(rb_schema, "@errors", errors);
|
91
|
+
|
92
|
+
return rb_schema;
|
93
|
+
}
|
94
|
+
|
95
|
+
VALUE cNokogiriXmlRelaxNG;
|
96
|
+
void init_xml_relax_ng()
|
97
|
+
{
|
98
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
99
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
100
|
+
VALUE klass = rb_define_class_under(xml, "RelaxNG", cNokogiriXmlSchema);
|
101
|
+
|
102
|
+
cNokogiriXmlRelaxNG = klass;
|
103
|
+
|
104
|
+
rb_define_singleton_method(klass, "read_memory", read_memory, 1);
|
105
|
+
rb_define_private_method(klass, "validate_document", validate_document, 1);
|
106
|
+
}
|
@@ -0,0 +1,336 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
/*
|
4
|
+
* call-seq:
|
5
|
+
* parse_memory(data)
|
6
|
+
*
|
7
|
+
* Parse the document stored in +data+
|
8
|
+
*/
|
9
|
+
static VALUE parse_memory(VALUE self, VALUE data)
|
10
|
+
{
|
11
|
+
xmlSAXHandlerPtr handler;
|
12
|
+
Data_Get_Struct(self, xmlSAXHandler, handler);
|
13
|
+
|
14
|
+
if(Qnil == data) rb_raise(rb_eArgError, "data cannot be nil");
|
15
|
+
|
16
|
+
xmlSAXUserParseMemory( handler,
|
17
|
+
(void *)self,
|
18
|
+
StringValuePtr(data),
|
19
|
+
RSTRING_LEN(data)
|
20
|
+
);
|
21
|
+
return data;
|
22
|
+
}
|
23
|
+
|
24
|
+
/*
|
25
|
+
* call-seq:
|
26
|
+
* native_parse_io(data, encoding)
|
27
|
+
*
|
28
|
+
* Parse the document accessable via +io+
|
29
|
+
*/
|
30
|
+
static VALUE native_parse_io(VALUE self, VALUE io, VALUE encoding)
|
31
|
+
{
|
32
|
+
xmlSAXHandlerPtr handler;
|
33
|
+
Data_Get_Struct(self, xmlSAXHandler, handler);
|
34
|
+
|
35
|
+
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
|
36
|
+
|
37
|
+
xmlParserCtxtPtr sax_ctx = xmlCreateIOParserCtxt(
|
38
|
+
handler,
|
39
|
+
(void *)self,
|
40
|
+
(xmlInputReadCallback)io_read_callback,
|
41
|
+
(xmlInputCloseCallback)io_close_callback,
|
42
|
+
(void *)io,
|
43
|
+
enc
|
44
|
+
);
|
45
|
+
xmlParseDocument(sax_ctx);
|
46
|
+
xmlFreeParserCtxt(sax_ctx);
|
47
|
+
return io;
|
48
|
+
}
|
49
|
+
|
50
|
+
/*
|
51
|
+
* call-seq:
|
52
|
+
* native_parse_file(data)
|
53
|
+
*
|
54
|
+
* Parse the document stored in +data+
|
55
|
+
*/
|
56
|
+
static VALUE native_parse_file(VALUE self, VALUE data)
|
57
|
+
{
|
58
|
+
xmlSAXHandlerPtr handler;
|
59
|
+
Data_Get_Struct(self, xmlSAXHandler, handler);
|
60
|
+
xmlSAXUserParseFile( handler,
|
61
|
+
(void *)self,
|
62
|
+
StringValuePtr(data)
|
63
|
+
);
|
64
|
+
return data;
|
65
|
+
}
|
66
|
+
|
67
|
+
static void start_document(void * ctx)
|
68
|
+
{
|
69
|
+
VALUE self = (VALUE)ctx;
|
70
|
+
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
71
|
+
rb_funcall(doc, rb_intern("start_document"), 0);
|
72
|
+
}
|
73
|
+
|
74
|
+
static void end_document(void * ctx)
|
75
|
+
{
|
76
|
+
VALUE self = (VALUE)ctx;
|
77
|
+
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
78
|
+
rb_funcall(doc, rb_intern("end_document"), 0);
|
79
|
+
}
|
80
|
+
|
81
|
+
static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
|
82
|
+
{
|
83
|
+
VALUE self = (VALUE)ctx;
|
84
|
+
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
85
|
+
VALUE attributes = rb_ary_new();
|
86
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
87
|
+
const xmlChar * attr;
|
88
|
+
int i = 0;
|
89
|
+
if(atts) {
|
90
|
+
while((attr = atts[i]) != NULL) {
|
91
|
+
rb_funcall(attributes, rb_intern("<<"), 1,
|
92
|
+
NOKOGIRI_STR_NEW2(attr, RTEST(enc) ? StringValuePtr(enc) : NULL)
|
93
|
+
);
|
94
|
+
i++;
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
rb_funcall( doc,
|
99
|
+
rb_intern("start_element"),
|
100
|
+
2,
|
101
|
+
NOKOGIRI_STR_NEW2(name, RTEST(enc) ? StringValuePtr(enc) : NULL),
|
102
|
+
attributes
|
103
|
+
);
|
104
|
+
}
|
105
|
+
|
106
|
+
static void end_element(void * ctx, const xmlChar *name)
|
107
|
+
{
|
108
|
+
VALUE self = (VALUE)ctx;
|
109
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
110
|
+
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
111
|
+
rb_funcall(doc, rb_intern("end_element"), 1,
|
112
|
+
NOKOGIRI_STR_NEW2(name, RTEST(enc) ? StringValuePtr(enc) : NULL)
|
113
|
+
);
|
114
|
+
}
|
115
|
+
|
116
|
+
/**
|
117
|
+
* start_element_ns was borrowed heavily from libxml-ruby.
|
118
|
+
*/
|
119
|
+
static void
|
120
|
+
start_element_ns (
|
121
|
+
void * ctx,
|
122
|
+
const xmlChar * localname,
|
123
|
+
const xmlChar * prefix,
|
124
|
+
const xmlChar * URI,
|
125
|
+
int nb_namespaces,
|
126
|
+
const xmlChar ** namespaces,
|
127
|
+
int nb_attributes,
|
128
|
+
int nb_defaulted,
|
129
|
+
const xmlChar ** attributes)
|
130
|
+
{
|
131
|
+
VALUE self = (VALUE)ctx;
|
132
|
+
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
133
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
134
|
+
|
135
|
+
VALUE attrHash = rb_hash_new();
|
136
|
+
VALUE nsHash = rb_hash_new();
|
137
|
+
|
138
|
+
if (attributes)
|
139
|
+
{
|
140
|
+
/* Each attribute is an array of [localname, prefix, URI, value, end] */
|
141
|
+
int i;
|
142
|
+
for (i = 0; i < nb_attributes * 5; i += 5)
|
143
|
+
{
|
144
|
+
rb_hash_aset( attrHash,
|
145
|
+
NOKOGIRI_STR_NEW2((const char*)attributes[i+0], RTEST(enc) ? StringValuePtr(enc) : NULL),
|
146
|
+
NOKOGIRI_STR_NEW((const char*)attributes[i+3], (attributes[i+4] - attributes[i+3]), RTEST(enc) ? StringValuePtr(enc) : NULL));
|
147
|
+
}
|
148
|
+
}
|
149
|
+
|
150
|
+
if (namespaces)
|
151
|
+
{
|
152
|
+
int i;
|
153
|
+
for (i = 0; i < nb_namespaces * 2; i += 2)
|
154
|
+
{
|
155
|
+
rb_hash_aset( nsHash,
|
156
|
+
namespaces[i+0] ? NOKOGIRI_STR_NEW2((const char*)namespaces[i+0], RTEST(enc) ? StringValuePtr(enc) : NULL) : Qnil,
|
157
|
+
namespaces[i+1] ? NOKOGIRI_STR_NEW2((const char*)namespaces[i+1], RTEST(enc) ? StringValuePtr(enc) : NULL) : Qnil);
|
158
|
+
}
|
159
|
+
}
|
160
|
+
|
161
|
+
rb_funcall( doc,
|
162
|
+
rb_intern("start_element_ns"),
|
163
|
+
5,
|
164
|
+
NOKOGIRI_STR_NEW2(localname, RTEST(enc) ? StringValuePtr(enc) : NULL),
|
165
|
+
attrHash,
|
166
|
+
prefix ? NOKOGIRI_STR_NEW2(prefix, RTEST(enc) ? StringValuePtr(enc) : NULL) : Qnil,
|
167
|
+
URI ? NOKOGIRI_STR_NEW2(URI, RTEST(enc) ? StringValuePtr(enc) : NULL) : Qnil,
|
168
|
+
nsHash
|
169
|
+
);
|
170
|
+
|
171
|
+
/* Call start element if it's there' */
|
172
|
+
if (rb_respond_to(doc, rb_intern("start_element")))
|
173
|
+
{
|
174
|
+
VALUE name;
|
175
|
+
if (prefix)
|
176
|
+
{
|
177
|
+
name = NOKOGIRI_STR_NEW2(prefix, RTEST(enc) ? StringValuePtr(enc) : NULL);
|
178
|
+
rb_funcall(name, rb_intern("<<"), 1, NOKOGIRI_STR_NEW2(":", RTEST(enc) ? StringValuePtr(enc) : NULL));
|
179
|
+
rb_funcall(name, rb_intern("<<"), 1, NOKOGIRI_STR_NEW2(localname, RTEST(enc) ? StringValuePtr(enc) : NULL));
|
180
|
+
}
|
181
|
+
else
|
182
|
+
{
|
183
|
+
name = NOKOGIRI_STR_NEW2(localname, RTEST(enc) ? StringValuePtr(enc) : NULL);
|
184
|
+
}
|
185
|
+
VALUE attrArray = rb_funcall(attrHash, rb_intern("to_a"), 0);
|
186
|
+
attrArray = rb_funcall(attrArray, rb_intern("flatten"), 0);
|
187
|
+
rb_funcall(doc, rb_intern("start_element"), 2, name, attrArray);
|
188
|
+
}
|
189
|
+
|
190
|
+
}
|
191
|
+
|
192
|
+
/**
|
193
|
+
* end_element_ns was borrowed heavily from libxml-ruby.
|
194
|
+
*/
|
195
|
+
static void
|
196
|
+
end_element_ns (
|
197
|
+
void * ctx,
|
198
|
+
const xmlChar * localname,
|
199
|
+
const xmlChar * prefix,
|
200
|
+
const xmlChar * URI)
|
201
|
+
{
|
202
|
+
VALUE self = (VALUE)ctx;
|
203
|
+
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
204
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
205
|
+
|
206
|
+
rb_funcall(doc, rb_intern("end_element_ns"), 3,
|
207
|
+
NOKOGIRI_STR_NEW2(localname, RTEST(enc) ? StringValuePtr(enc) : NULL),
|
208
|
+
prefix ? NOKOGIRI_STR_NEW2(prefix, RTEST(enc) ? StringValuePtr(enc) : NULL) : Qnil,
|
209
|
+
URI ? NOKOGIRI_STR_NEW2(URI, RTEST(enc) ? StringValuePtr(enc) : NULL) : Qnil
|
210
|
+
);
|
211
|
+
|
212
|
+
/* Call end element for old-times sake */
|
213
|
+
if (rb_respond_to(doc, rb_intern("end_element")))
|
214
|
+
{
|
215
|
+
VALUE name;
|
216
|
+
if (prefix)
|
217
|
+
{
|
218
|
+
name = NOKOGIRI_STR_NEW2(prefix, RTEST(enc) ? StringValuePtr(enc) : NULL);
|
219
|
+
rb_funcall(name, rb_intern("<<"), 1, NOKOGIRI_STR_NEW2(":", RTEST(enc) ? StringValuePtr(enc) : NULL));
|
220
|
+
rb_funcall(name, rb_intern("<<"), 1, NOKOGIRI_STR_NEW2(localname, RTEST(enc) ? StringValuePtr(enc) : NULL));
|
221
|
+
}
|
222
|
+
else
|
223
|
+
{
|
224
|
+
name = NOKOGIRI_STR_NEW2(localname, RTEST(enc) ? StringValuePtr(enc) : NULL);
|
225
|
+
}
|
226
|
+
rb_funcall(doc, rb_intern("end_element"), 1, name);
|
227
|
+
}
|
228
|
+
|
229
|
+
}
|
230
|
+
|
231
|
+
static void characters_func(void * ctx, const xmlChar * ch, int len)
|
232
|
+
{
|
233
|
+
VALUE self = (VALUE)ctx;
|
234
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
235
|
+
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
236
|
+
VALUE str = NOKOGIRI_STR_NEW(ch, len, RTEST(enc) ? StringValuePtr(enc):NULL);
|
237
|
+
rb_funcall(doc, rb_intern("characters"), 1, str);
|
238
|
+
}
|
239
|
+
|
240
|
+
static void comment_func(void * ctx, const xmlChar * value)
|
241
|
+
{
|
242
|
+
VALUE self = (VALUE)ctx;
|
243
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
244
|
+
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
245
|
+
VALUE str = NOKOGIRI_STR_NEW2(value, RTEST(enc) ? StringValuePtr(enc):NULL);
|
246
|
+
rb_funcall(doc, rb_intern("comment"), 1, str);
|
247
|
+
}
|
248
|
+
|
249
|
+
static void warning_func(void * ctx, const char *msg, ...)
|
250
|
+
{
|
251
|
+
VALUE self = (VALUE)ctx;
|
252
|
+
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
253
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
254
|
+
char * message;
|
255
|
+
|
256
|
+
va_list args;
|
257
|
+
va_start(args, msg);
|
258
|
+
vasprintf(&message, msg, args);
|
259
|
+
va_end(args);
|
260
|
+
|
261
|
+
rb_funcall(doc, rb_intern("warning"), 1,
|
262
|
+
NOKOGIRI_STR_NEW2(message, RTEST(enc) ? StringValuePtr(enc) : NULL)
|
263
|
+
);
|
264
|
+
free(message);
|
265
|
+
}
|
266
|
+
|
267
|
+
static void error_func(void * ctx, const char *msg, ...)
|
268
|
+
{
|
269
|
+
VALUE self = (VALUE)ctx;
|
270
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
271
|
+
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
272
|
+
char * message;
|
273
|
+
|
274
|
+
va_list args;
|
275
|
+
va_start(args, msg);
|
276
|
+
vasprintf(&message, msg, args);
|
277
|
+
va_end(args);
|
278
|
+
|
279
|
+
rb_funcall(doc, rb_intern("error"), 1,
|
280
|
+
NOKOGIRI_STR_NEW2(message, RTEST(enc) ? StringValuePtr(enc) : NULL)
|
281
|
+
);
|
282
|
+
free(message);
|
283
|
+
}
|
284
|
+
|
285
|
+
static void cdata_block(void * ctx, const xmlChar * value, int len)
|
286
|
+
{
|
287
|
+
VALUE self = (VALUE)ctx;
|
288
|
+
VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
|
289
|
+
VALUE doc = rb_funcall(self, rb_intern("document"), 0);
|
290
|
+
VALUE string =
|
291
|
+
NOKOGIRI_STR_NEW(value, len, RTEST(enc) ? StringValuePtr(enc) : NULL);
|
292
|
+
rb_funcall(doc, rb_intern("cdata_block"), 1, string);
|
293
|
+
}
|
294
|
+
|
295
|
+
static void deallocate(xmlSAXHandlerPtr handler)
|
296
|
+
{
|
297
|
+
NOKOGIRI_DEBUG_START(handler);
|
298
|
+
free(handler);
|
299
|
+
NOKOGIRI_DEBUG_END(handler);
|
300
|
+
}
|
301
|
+
|
302
|
+
static VALUE allocate(VALUE klass)
|
303
|
+
{
|
304
|
+
xmlSAXHandlerPtr handler = calloc(1, sizeof(xmlSAXHandler));
|
305
|
+
|
306
|
+
handler->startDocument = start_document;
|
307
|
+
handler->endDocument = end_document;
|
308
|
+
handler->startElement = start_element;
|
309
|
+
handler->endElement = end_element;
|
310
|
+
handler->startElementNs = start_element_ns;
|
311
|
+
handler->endElementNs = end_element_ns;
|
312
|
+
handler->characters = characters_func;
|
313
|
+
handler->comment = comment_func;
|
314
|
+
handler->warning = warning_func;
|
315
|
+
handler->error = error_func;
|
316
|
+
handler->cdataBlock = cdata_block;
|
317
|
+
handler->initialized = XML_SAX2_MAGIC;
|
318
|
+
|
319
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, handler);
|
320
|
+
}
|
321
|
+
|
322
|
+
VALUE cNokogiriXmlSaxParser ;
|
323
|
+
void init_xml_sax_parser()
|
324
|
+
{
|
325
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
326
|
+
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
327
|
+
VALUE sax = rb_define_module_under(xml, "SAX");
|
328
|
+
VALUE klass = rb_define_class_under(sax, "Parser", rb_cObject);
|
329
|
+
|
330
|
+
cNokogiriXmlSaxParser = klass;
|
331
|
+
|
332
|
+
rb_define_alloc_func(klass, allocate);
|
333
|
+
rb_define_method(klass, "parse_memory", parse_memory, 1);
|
334
|
+
rb_define_private_method(klass, "native_parse_file", native_parse_file, 1);
|
335
|
+
rb_define_private_method(klass, "native_parse_io", native_parse_io, 2);
|
336
|
+
}
|