nokogiri 1.6.0 → 1.13.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +3 -19
- data/LICENSE-DEPENDENCIES.md +1903 -0
- data/LICENSE.md +9 -0
- data/README.md +280 -0
- data/bin/nokogiri +84 -31
- data/dependencies.yml +23 -4
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +952 -132
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +120 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +231 -96
- data/ext/nokogiri/nokogiri.h +188 -129
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +49 -40
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +24 -23
- data/ext/nokogiri/xml_comment.c +29 -21
- data/ext/nokogiri/xml_document.c +327 -223
- data/ext/nokogiri/xml_document_fragment.c +12 -16
- data/ext/nokogiri/xml_dtd.c +56 -50
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +45 -20
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +74 -32
- data/ext/nokogiri/xml_node.c +1290 -680
- data/ext/nokogiri/xml_node_set.c +239 -208
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +227 -189
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +123 -125
- data/ext/nokogiri/xml_sax_parser_context.c +138 -79
- data/ext/nokogiri/xml_sax_push_parser.c +88 -35
- data/ext/nokogiri/xml_schema.c +112 -33
- data/ext/nokogiri/xml_syntax_error.c +50 -23
- data/ext/nokogiri/xml_text.c +14 -18
- data/ext/nokogiri/xml_xpath_context.c +227 -140
- data/ext/nokogiri/xslt_stylesheet.c +269 -177
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -58
- data/lib/nokogiri/css/parser.rb +407 -357
- data/lib/nokogiri/css/parser.y +265 -246
- data/lib/nokogiri/css/parser_extras.rb +52 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +8 -7
- data/lib/nokogiri/css/xpath_visitor.rb +266 -80
- data/lib/nokogiri/css.rb +50 -17
- data/lib/nokogiri/decorators/slop.rb +17 -8
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +331 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -15
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +88 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +96 -0
- data/lib/nokogiri/html5.rb +477 -0
- data/lib/nokogiri/jruby/dependencies.rb +21 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +221 -0
- data/lib/nokogiri/version.rb +3 -105
- data/lib/nokogiri/xml/attr.rb +6 -3
- data/lib/nokogiri/xml/attribute_decl.rb +3 -1
- data/lib/nokogiri/xml/builder.rb +96 -54
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +234 -95
- data/lib/nokogiri/xml/document_fragment.rb +86 -36
- data/lib/nokogiri/xml/dtd.rb +16 -4
- data/lib/nokogiri/xml/element_content.rb +2 -0
- data/lib/nokogiri/xml/element_decl.rb +3 -1
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +3 -0
- data/lib/nokogiri/xml/node/save_options.rb +8 -4
- data/lib/nokogiri/xml/node.rb +947 -502
- data/lib/nokogiri/xml/node_set.rb +168 -159
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +40 -5
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +25 -26
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +23 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +43 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +270 -0
- data/lib/nokogiri/xml/syntax_error.rb +25 -1
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -36
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +29 -20
- data/lib/nokogiri.rb +69 -69
- data/lib/xsd/xmlparser/nokogiri.rb +26 -24
- data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
- data/ports/archives/libxml2-2.9.13.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
- metadata +278 -362
- data/.autotest +0 -26
- data/.gemtest +0 -0
- data/.travis.yml +0 -27
- data/CHANGELOG.ja.rdoc +0 -819
- data/CHANGELOG.rdoc +0 -819
- data/C_CODING_STYLE.rdoc +0 -33
- data/Manifest.txt +0 -315
- data/README.ja.rdoc +0 -106
- data/README.rdoc +0 -175
- data/ROADMAP.md +0 -90
- data/Rakefile +0 -246
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/build_all +0 -105
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -56
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -13
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -14
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -254
- data/lib/nokogiri/html/document_fragment.rb +0 -41
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/lib/nokogiri/html/sax/push_parser.rb +0 -16
- data/ports/archives/libxml2-2.8.0.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.26.tar.gz +0 -0
- data/tasks/cross_compile.rb +0 -132
- data/tasks/nokogiri.org.rb +0 -24
- data/tasks/test.rb +0 -95
- data/test/css/test_nthiness.rb +0 -159
- data/test/css/test_parser.rb +0 -341
- data/test/css/test_tokenizer.rb +0 -198
- data/test/css/test_xpath_visitor.rb +0 -91
- data/test/decorators/test_slop.rb +0 -16
- data/test/files/2ch.html +0 -108
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/saml/saml20assertion_schema.xsd +0 -283
- data/test/files/saml/saml20protocol_schema.xsd +0 -302
- data/test/files/saml/xenc_schema.xsd +0 -146
- data/test/files/saml/xmldsig_schema.xsd +0 -318
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -850
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -154
- data/test/html/sax/test_parser.rb +0 -141
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -552
- data/test/html/test_document_encoding.rb +0 -138
- data/test/html/test_document_fragment.rb +0 -261
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -196
- data/test/html/test_node_encoding.rb +0 -27
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -45
- data/test/test_encoding_handler.rb +0 -46
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -132
- data/test/test_reader.rb +0 -555
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -254
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -366
- data/test/xml/sax/test_parser_context.rb +0 -106
- data/test/xml/sax/test_push_parser.rb +0 -157
- data/test/xml/test_attr.rb +0 -64
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -306
- data/test/xml/test_c14n.rb +0 -151
- data/test/xml/test_cdata.rb +0 -48
- data/test/xml/test_comment.rb +0 -29
- data/test/xml/test_document.rb +0 -828
- data/test/xml/test_document_encoding.rb +0 -28
- data/test/xml/test_document_fragment.rb +0 -223
- data/test/xml/test_dtd.rb +0 -103
- data/test/xml/test_dtd_encoding.rb +0 -33
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -245
- data/test/xml/test_namespace.rb +0 -95
- data/test/xml/test_node.rb +0 -1137
- data/test/xml/test_node_attributes.rb +0 -96
- data/test/xml/test_node_encoding.rb +0 -107
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -374
- data/test/xml/test_node_set.rb +0 -755
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader_encoding.rb +0 -142
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -103
- data/test/xml/test_syntax_error.rb +0 -12
- data/test/xml/test_text.rb +0 -45
- data/test/xml/test_unparented_node.rb +0 -422
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -295
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
- data/test_all +0 -81
@@ -1,16 +1,17 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
3
|
VALUE cNokogiriXmlSaxParserContext ;
|
4
4
|
|
5
|
-
static void
|
5
|
+
static void
|
6
|
+
deallocate(xmlParserCtxtPtr ctxt)
|
6
7
|
{
|
7
|
-
NOKOGIRI_DEBUG_START(
|
8
|
+
NOKOGIRI_DEBUG_START(ctxt);
|
8
9
|
|
9
10
|
ctxt->sax = NULL;
|
10
11
|
|
11
12
|
xmlFreeParserCtxt(ctxt);
|
12
13
|
|
13
|
-
NOKOGIRI_DEBUG_END(
|
14
|
+
NOKOGIRI_DEBUG_END(ctxt);
|
14
15
|
}
|
15
16
|
|
16
17
|
/*
|
@@ -22,19 +23,19 @@ static void deallocate(xmlParserCtxtPtr ctxt)
|
|
22
23
|
static VALUE
|
23
24
|
parse_io(VALUE klass, VALUE io, VALUE encoding)
|
24
25
|
{
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
26
|
+
xmlParserCtxtPtr ctxt;
|
27
|
+
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
|
28
|
+
|
29
|
+
ctxt = xmlCreateIOParserCtxt(NULL, NULL,
|
30
|
+
(xmlInputReadCallback)noko_io_read,
|
31
|
+
(xmlInputCloseCallback)noko_io_close,
|
32
|
+
(void *)io, enc);
|
33
|
+
if (ctxt->sax) {
|
34
|
+
xmlFree(ctxt->sax);
|
35
|
+
ctxt->sax = NULL;
|
36
|
+
}
|
37
|
+
|
38
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
38
39
|
}
|
39
40
|
|
40
41
|
/*
|
@@ -43,9 +44,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
|
|
43
44
|
*
|
44
45
|
* Parse file given +filename+
|
45
46
|
*/
|
46
|
-
static VALUE
|
47
|
+
static VALUE
|
48
|
+
parse_file(VALUE klass, VALUE filename)
|
47
49
|
{
|
48
|
-
xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(
|
50
|
+
xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
|
49
51
|
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
50
52
|
}
|
51
53
|
|
@@ -58,41 +60,44 @@ static VALUE parse_file(VALUE klass, VALUE filename)
|
|
58
60
|
static VALUE
|
59
61
|
parse_memory(VALUE klass, VALUE data)
|
60
62
|
{
|
61
|
-
|
63
|
+
xmlParserCtxtPtr ctxt;
|
62
64
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
65
|
+
if (NIL_P(data)) {
|
66
|
+
rb_raise(rb_eArgError, "data cannot be nil");
|
67
|
+
}
|
68
|
+
if (!(int)RSTRING_LEN(data)) {
|
69
|
+
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
70
|
+
}
|
67
71
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
72
|
+
ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
|
73
|
+
(int)RSTRING_LEN(data));
|
74
|
+
if (ctxt->sax) {
|
75
|
+
xmlFree(ctxt->sax);
|
76
|
+
ctxt->sax = NULL;
|
77
|
+
}
|
74
78
|
|
75
|
-
|
79
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
76
80
|
}
|
77
81
|
|
78
82
|
static VALUE
|
79
83
|
parse_doc(VALUE ctxt_val)
|
80
84
|
{
|
81
|
-
|
82
|
-
|
83
|
-
|
85
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
86
|
+
xmlParseDocument(ctxt);
|
87
|
+
return Qnil;
|
84
88
|
}
|
85
89
|
|
86
90
|
static VALUE
|
87
91
|
parse_doc_finalize(VALUE ctxt_val)
|
88
92
|
{
|
89
|
-
|
93
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
90
94
|
|
91
|
-
|
92
|
-
|
95
|
+
if (NULL != ctxt->myDoc) {
|
96
|
+
xmlFreeDoc(ctxt->myDoc);
|
97
|
+
}
|
93
98
|
|
94
|
-
|
95
|
-
|
99
|
+
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
|
100
|
+
return Qnil;
|
96
101
|
}
|
97
102
|
|
98
103
|
/*
|
@@ -104,25 +109,29 @@ parse_doc_finalize(VALUE ctxt_val)
|
|
104
109
|
static VALUE
|
105
110
|
parse_with(VALUE self, VALUE sax_handler)
|
106
111
|
{
|
107
|
-
|
108
|
-
|
112
|
+
xmlParserCtxtPtr ctxt;
|
113
|
+
xmlSAXHandlerPtr sax;
|
114
|
+
|
115
|
+
if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
|
116
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
117
|
+
}
|
109
118
|
|
110
|
-
|
111
|
-
|
119
|
+
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
120
|
+
Data_Get_Struct(sax_handler, xmlSAXHandler, sax);
|
112
121
|
|
113
|
-
|
114
|
-
|
122
|
+
/* Free the sax handler since we'll assign our own */
|
123
|
+
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
|
124
|
+
xmlFree(ctxt->sax);
|
125
|
+
}
|
115
126
|
|
116
|
-
|
117
|
-
|
118
|
-
xmlFree(ctxt->sax);
|
127
|
+
ctxt->sax = sax;
|
128
|
+
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
119
129
|
|
120
|
-
|
121
|
-
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
130
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
122
131
|
|
123
|
-
|
132
|
+
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
|
124
133
|
|
125
|
-
|
134
|
+
return Qnil;
|
126
135
|
}
|
127
136
|
|
128
137
|
/*
|
@@ -132,15 +141,17 @@ parse_with(VALUE self, VALUE sax_handler)
|
|
132
141
|
* Should this parser replace entities? & will get converted to '&' if
|
133
142
|
* set to true
|
134
143
|
*/
|
135
|
-
static VALUE
|
144
|
+
static VALUE
|
145
|
+
set_replace_entities(VALUE self, VALUE value)
|
136
146
|
{
|
137
147
|
xmlParserCtxtPtr ctxt;
|
138
148
|
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
139
149
|
|
140
|
-
if(Qfalse == value)
|
150
|
+
if (Qfalse == value) {
|
141
151
|
ctxt->replaceEntities = 0;
|
142
|
-
else
|
152
|
+
} else {
|
143
153
|
ctxt->replaceEntities = 1;
|
154
|
+
}
|
144
155
|
|
145
156
|
return value;
|
146
157
|
}
|
@@ -152,15 +163,17 @@ static VALUE set_replace_entities(VALUE self, VALUE value)
|
|
152
163
|
* Should this parser replace entities? & will get converted to '&' if
|
153
164
|
* set to true
|
154
165
|
*/
|
155
|
-
static VALUE
|
166
|
+
static VALUE
|
167
|
+
get_replace_entities(VALUE self)
|
156
168
|
{
|
157
169
|
xmlParserCtxtPtr ctxt;
|
158
170
|
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
159
171
|
|
160
|
-
if(0 == ctxt->replaceEntities)
|
172
|
+
if (0 == ctxt->replaceEntities) {
|
161
173
|
return Qfalse;
|
162
|
-
else
|
174
|
+
} else {
|
163
175
|
return Qtrue;
|
176
|
+
}
|
164
177
|
}
|
165
178
|
|
166
179
|
/*
|
@@ -168,7 +181,8 @@ static VALUE get_replace_entities(VALUE self)
|
|
168
181
|
*
|
169
182
|
* Get the current line the parser context is processing.
|
170
183
|
*/
|
171
|
-
static VALUE
|
184
|
+
static VALUE
|
185
|
+
line(VALUE self)
|
172
186
|
{
|
173
187
|
xmlParserCtxtPtr ctxt;
|
174
188
|
xmlParserInputPtr io;
|
@@ -176,8 +190,9 @@ static VALUE line(VALUE self)
|
|
176
190
|
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
177
191
|
|
178
192
|
io = ctxt->input;
|
179
|
-
if(io)
|
193
|
+
if (io) {
|
180
194
|
return INT2NUM(io->line);
|
195
|
+
}
|
181
196
|
|
182
197
|
return Qnil;
|
183
198
|
}
|
@@ -187,7 +202,8 @@ static VALUE line(VALUE self)
|
|
187
202
|
*
|
188
203
|
* Get the current column the parser context is processing.
|
189
204
|
*/
|
190
|
-
static VALUE
|
205
|
+
static VALUE
|
206
|
+
column(VALUE self)
|
191
207
|
{
|
192
208
|
xmlParserCtxtPtr ctxt;
|
193
209
|
xmlParserInputPtr io;
|
@@ -195,28 +211,71 @@ static VALUE column(VALUE self)
|
|
195
211
|
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
196
212
|
|
197
213
|
io = ctxt->input;
|
198
|
-
if(io)
|
214
|
+
if (io) {
|
199
215
|
return INT2NUM(io->col);
|
216
|
+
}
|
200
217
|
|
201
218
|
return Qnil;
|
202
219
|
}
|
203
220
|
|
204
|
-
|
221
|
+
/*
|
222
|
+
* call-seq:
|
223
|
+
* recovery=(boolean)
|
224
|
+
*
|
225
|
+
* Should this parser recover from structural errors? It will not stop processing
|
226
|
+
* file on structural errors if set to true
|
227
|
+
*/
|
228
|
+
static VALUE
|
229
|
+
set_recovery(VALUE self, VALUE value)
|
230
|
+
{
|
231
|
+
xmlParserCtxtPtr ctxt;
|
232
|
+
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
233
|
+
|
234
|
+
if (value == Qfalse) {
|
235
|
+
ctxt->recovery = 0;
|
236
|
+
} else {
|
237
|
+
ctxt->recovery = 1;
|
238
|
+
}
|
239
|
+
|
240
|
+
return value;
|
241
|
+
}
|
242
|
+
|
243
|
+
/*
|
244
|
+
* call-seq:
|
245
|
+
* recovery
|
246
|
+
*
|
247
|
+
* Should this parser recover from structural errors? It will not stop processing
|
248
|
+
* file on structural errors if set to true
|
249
|
+
*/
|
250
|
+
static VALUE
|
251
|
+
get_recovery(VALUE self)
|
205
252
|
{
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
253
|
+
xmlParserCtxtPtr ctxt;
|
254
|
+
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
255
|
+
|
256
|
+
if (ctxt->recovery == 0) {
|
257
|
+
return Qfalse;
|
258
|
+
} else {
|
259
|
+
return Qtrue;
|
260
|
+
}
|
261
|
+
}
|
262
|
+
|
263
|
+
void
|
264
|
+
noko_init_xml_sax_parser_context()
|
265
|
+
{
|
266
|
+
cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject);
|
267
|
+
|
268
|
+
rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
|
269
|
+
|
270
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
|
271
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
|
272
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);
|
273
|
+
|
274
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", parse_with, 1);
|
275
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=", set_replace_entities, 1);
|
276
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities", get_replace_entities, 0);
|
277
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", set_recovery, 1);
|
278
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
|
279
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
|
280
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
|
222
281
|
}
|
@@ -1,16 +1,20 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
3
|
+
VALUE cNokogiriXmlSaxPushParser ;
|
4
|
+
|
5
|
+
static void
|
6
|
+
deallocate(xmlParserCtxtPtr ctx)
|
4
7
|
{
|
5
8
|
NOKOGIRI_DEBUG_START(ctx);
|
6
|
-
if(ctx != NULL) {
|
9
|
+
if (ctx != NULL) {
|
7
10
|
NOKOGIRI_SAX_TUPLE_DESTROY(ctx->userData);
|
8
11
|
xmlFreeParserCtxt(ctx);
|
9
12
|
}
|
10
13
|
NOKOGIRI_DEBUG_END(ctx);
|
11
14
|
}
|
12
15
|
|
13
|
-
static VALUE
|
16
|
+
static VALUE
|
17
|
+
allocate(VALUE klass)
|
14
18
|
{
|
15
19
|
return Data_Wrap_Struct(klass, NULL, deallocate, NULL);
|
16
20
|
}
|
@@ -21,21 +25,24 @@ static VALUE allocate(VALUE klass)
|
|
21
25
|
*
|
22
26
|
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
|
23
27
|
*/
|
24
|
-
static VALUE
|
28
|
+
static VALUE
|
29
|
+
native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
25
30
|
{
|
26
31
|
xmlParserCtxtPtr ctx;
|
27
|
-
const char *
|
32
|
+
const char *chunk = NULL;
|
28
33
|
int size = 0;
|
29
34
|
|
30
35
|
|
31
36
|
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
32
37
|
|
33
|
-
if(Qnil != _chunk) {
|
38
|
+
if (Qnil != _chunk) {
|
34
39
|
chunk = StringValuePtr(_chunk);
|
35
40
|
size = (int)RSTRING_LEN(_chunk);
|
36
41
|
}
|
37
42
|
|
38
|
-
|
43
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
44
|
+
|
45
|
+
if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
|
39
46
|
if (!(ctx->options & XML_PARSE_RECOVER)) {
|
40
47
|
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
|
41
48
|
Nokogiri_error_raise(NULL, e);
|
@@ -51,25 +58,27 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
51
58
|
*
|
52
59
|
* Initialize the push parser with +xml_sax+ using +filename+
|
53
60
|
*/
|
54
|
-
static VALUE
|
61
|
+
static VALUE
|
62
|
+
initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
|
55
63
|
{
|
56
64
|
xmlSAXHandlerPtr sax;
|
57
|
-
const char *
|
65
|
+
const char *filename = NULL;
|
58
66
|
xmlParserCtxtPtr ctx;
|
59
67
|
|
60
68
|
Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
|
61
69
|
|
62
|
-
if(_filename != Qnil) filename =
|
70
|
+
if (_filename != Qnil) { filename = StringValueCStr(_filename); }
|
63
71
|
|
64
72
|
ctx = xmlCreatePushParserCtxt(
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
if(ctx == NULL)
|
73
|
+
sax,
|
74
|
+
NULL,
|
75
|
+
NULL,
|
76
|
+
0,
|
77
|
+
filename
|
78
|
+
);
|
79
|
+
if (ctx == NULL) {
|
72
80
|
rb_raise(rb_eRuntimeError, "Could not create a parser context");
|
81
|
+
}
|
73
82
|
|
74
83
|
ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
|
75
84
|
|
@@ -78,7 +87,8 @@ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
|
|
78
87
|
return self;
|
79
88
|
}
|
80
89
|
|
81
|
-
static VALUE
|
90
|
+
static VALUE
|
91
|
+
get_options(VALUE self)
|
82
92
|
{
|
83
93
|
xmlParserCtxtPtr ctx;
|
84
94
|
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
@@ -86,30 +96,73 @@ static VALUE get_options(VALUE self)
|
|
86
96
|
return INT2NUM(ctx->options);
|
87
97
|
}
|
88
98
|
|
89
|
-
static VALUE
|
99
|
+
static VALUE
|
100
|
+
set_options(VALUE self, VALUE options)
|
90
101
|
{
|
91
102
|
xmlParserCtxtPtr ctx;
|
92
103
|
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
93
104
|
|
94
|
-
if (xmlCtxtUseOptions(ctx, (int)NUM2INT(options)) != 0)
|
105
|
+
if (xmlCtxtUseOptions(ctx, (int)NUM2INT(options)) != 0) {
|
95
106
|
rb_raise(rb_eRuntimeError, "Cannot set XML parser context options");
|
107
|
+
}
|
96
108
|
|
97
109
|
return Qnil;
|
98
110
|
}
|
99
111
|
|
100
|
-
|
101
|
-
|
112
|
+
/*
|
113
|
+
* call-seq:
|
114
|
+
* replace_entities
|
115
|
+
*
|
116
|
+
* Should this parser replace entities? & will get converted to '&' if
|
117
|
+
* set to true
|
118
|
+
*/
|
119
|
+
static VALUE
|
120
|
+
get_replace_entities(VALUE self)
|
121
|
+
{
|
122
|
+
xmlParserCtxtPtr ctx;
|
123
|
+
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
124
|
+
|
125
|
+
if (0 == ctx->replaceEntities) {
|
126
|
+
return Qfalse;
|
127
|
+
} else {
|
128
|
+
return Qtrue;
|
129
|
+
}
|
130
|
+
}
|
131
|
+
|
132
|
+
/*
|
133
|
+
* call-seq:
|
134
|
+
* replace_entities=(boolean)
|
135
|
+
*
|
136
|
+
* Should this parser replace entities? & will get converted to '&' if
|
137
|
+
* set to true
|
138
|
+
*/
|
139
|
+
static VALUE
|
140
|
+
set_replace_entities(VALUE self, VALUE value)
|
141
|
+
{
|
142
|
+
xmlParserCtxtPtr ctx;
|
143
|
+
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
144
|
+
|
145
|
+
if (Qfalse == value) {
|
146
|
+
ctx->replaceEntities = 0;
|
147
|
+
} else {
|
148
|
+
ctx->replaceEntities = 1;
|
149
|
+
}
|
150
|
+
|
151
|
+
return value;
|
152
|
+
}
|
153
|
+
|
154
|
+
void
|
155
|
+
noko_init_xml_sax_push_parser()
|
102
156
|
{
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
cNokogiriXmlSaxPushParser =
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
rb_define_private_method(
|
113
|
-
|
114
|
-
rb_define_method(klass, "options=", set_options, 1);
|
157
|
+
cNokogiriXmlSaxPushParser = rb_define_class_under(mNokogiriXmlSax, "PushParser", rb_cObject);
|
158
|
+
|
159
|
+
rb_define_alloc_func(cNokogiriXmlSaxPushParser, allocate);
|
160
|
+
|
161
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "options", get_options, 0);
|
162
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "options=", set_options, 1);
|
163
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities", get_replace_entities, 0);
|
164
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities=", set_replace_entities, 1);
|
165
|
+
|
166
|
+
rb_define_private_method(cNokogiriXmlSaxPushParser, "initialize_native", initialize_native, 2);
|
167
|
+
rb_define_private_method(cNokogiriXmlSaxPushParser, "native_write", native_write, 2);
|
115
168
|
}
|