nokogiri 1.8.5 → 1.15.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +40 -18
- data/LICENSE-DEPENDENCIES.md +1636 -1024
- data/LICENSE.md +5 -28
- data/README.md +203 -90
- data/bin/nokogiri +63 -50
- data/dependencies.yml +33 -61
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +867 -417
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +165 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +108 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +251 -105
- data/ext/nokogiri/nokogiri.h +215 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +42 -37
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +40 -31
- data/ext/nokogiri/xml_comment.c +20 -27
- data/ext/nokogiri/xml_document.c +401 -237
- data/ext/nokogiri/xml_document_fragment.c +13 -17
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +136 -62
- data/ext/nokogiri/xml_node.c +1387 -678
- data/ext/nokogiri/xml_node_set.c +246 -216
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +347 -212
- data/ext/nokogiri/xml_relax_ng.c +86 -77
- data/ext/nokogiri/xml_sax_parser.c +149 -124
- data/ext/nokogiri/xml_sax_parser_context.c +145 -103
- data/ext/nokogiri/xml_sax_push_parser.c +64 -36
- data/ext/nokogiri/xml_schema.c +138 -81
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +36 -26
- data/ext/nokogiri/xml_xpath_context.c +366 -178
- data/ext/nokogiri/xslt_stylesheet.c +335 -189
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +111 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +630 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
- data/gumbo-parser/src/parser.c +4891 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +224 -95
- data/lib/nokogiri/css.rb +56 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +392 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +223 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +98 -54
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +312 -126
- data/lib/nokogiri/xml/document_fragment.rb +104 -48
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +45 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1093 -411
- data/lib/nokogiri/xml/node_set.rb +173 -67
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +42 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +39 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +120 -72
- data/lib/nokogiri/xml/syntax_error.rb +6 -4
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -37
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +101 -22
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +29 -25
- data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
- metadata +126 -399
- data/.autotest +0 -22
- data/.cross_rubies +0 -8
- data/.editorconfig +0 -17
- data/.gemtest +0 -0
- data/.travis.yml +0 -63
- data/CHANGELOG.md +0 -1368
- data/CONTRIBUTING.md +0 -42
- data/C_CODING_STYLE.rdoc +0 -33
- data/Gemfile-libxml-ruby +0 -3
- data/Manifest.txt +0 -370
- data/ROADMAP.md +0 -111
- data/Rakefile +0 -348
- data/SECURITY.md +0 -19
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/appveyor.yml +0 -29
- data/build_all +0 -44
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -15
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
- data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
- data/patches/sort-patches-by-date +0 -25
- data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
- data/suppressions/README.txt +0 -1
- data/suppressions/nokogiri_ruby-2.supp +0 -10
- data/tasks/test.rb +0 -100
- data/test/css/test_nthiness.rb +0 -226
- data/test/css/test_parser.rb +0 -386
- data/test/css/test_tokenizer.rb +0 -215
- data/test/css/test_xpath_visitor.rb +0 -96
- data/test/decorators/test_slop.rb +0 -23
- data/test/files/2ch.html +0 -108
- data/test/files/GH_1042.html +0 -18
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/atom.xml +0 -344
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/namespace_pressure_test.xml +0 -1684
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/saml/saml20assertion_schema.xsd +0 -283
- data/test/files/saml/saml20protocol_schema.xsd +0 -302
- data/test/files/saml/xenc_schema.xsd +0 -146
- data/test/files/saml/xmldsig_schema.xsd +0 -318
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/shift_jis_no_charset.html +0 -9
- data/test/files/slow-xpath.xml +0 -25509
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -851
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -271
- data/test/html/sax/test_parser.rb +0 -168
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/sax/test_parser_text.rb +0 -163
- data/test/html/sax/test_push_parser.rb +0 -87
- data/test/html/test_attributes.rb +0 -85
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -712
- data/test/html/test_document_encoding.rb +0 -143
- data/test/html/test_document_fragment.rb +0 -310
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -212
- data/test/html/test_node_encoding.rb +0 -91
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
- data/test/namespaces/test_namespaces_preservation.rb +0 -31
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -47
- data/test/test_encoding_handler.rb +0 -48
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -138
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -314
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -402
- data/test/xml/sax/test_parser_context.rb +0 -115
- data/test/xml/sax/test_parser_text.rb +0 -202
- data/test/xml/sax/test_push_parser.rb +0 -265
- data/test/xml/test_attr.rb +0 -74
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -341
- data/test/xml/test_c14n.rb +0 -180
- data/test/xml/test_cdata.rb +0 -54
- data/test/xml/test_comment.rb +0 -40
- data/test/xml/test_document.rb +0 -982
- data/test/xml/test_document_encoding.rb +0 -31
- data/test/xml/test_document_fragment.rb +0 -298
- data/test/xml/test_dtd.rb +0 -187
- data/test/xml/test_dtd_encoding.rb +0 -31
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -262
- data/test/xml/test_namespace.rb +0 -96
- data/test/xml/test_node.rb +0 -1325
- data/test/xml/test_node_attributes.rb +0 -115
- data/test/xml/test_node_encoding.rb +0 -75
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -592
- data/test/xml/test_node_set.rb +0 -809
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader.rb +0 -620
- data/test/xml/test_reader_encoding.rb +0 -134
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -142
- data/test/xml/test_syntax_error.rb +0 -36
- data/test/xml/test_text.rb +0 -60
- data/test/xml/test_unparented_node.rb +0 -483
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -470
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
@@ -1,18 +1,44 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
3
|
VALUE cNokogiriXmlSaxParserContext ;
|
4
4
|
|
5
|
-
static
|
6
|
-
{
|
7
|
-
NOKOGIRI_DEBUG_START(handler);
|
5
|
+
static ID id_read;
|
8
6
|
|
7
|
+
static void
|
8
|
+
xml_sax_parser_context_free(void *data)
|
9
|
+
{
|
10
|
+
xmlParserCtxtPtr ctxt = data;
|
9
11
|
ctxt->sax = NULL;
|
10
|
-
|
11
12
|
xmlFreeParserCtxt(ctxt);
|
13
|
+
}
|
14
|
+
|
15
|
+
/*
|
16
|
+
* note that htmlParserCtxtPtr == xmlParserCtxtPtr and xmlFreeParserCtxt() == htmlFreeParserCtxt()
|
17
|
+
* so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
|
18
|
+
*/
|
19
|
+
static const rb_data_type_t xml_sax_parser_context_type = {
|
20
|
+
.wrap_struct_name = "Nokogiri::XML::SAX::ParserContext",
|
21
|
+
.function = {
|
22
|
+
.dfree = xml_sax_parser_context_free,
|
23
|
+
},
|
24
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
25
|
+
};
|
26
|
+
|
27
|
+
xmlParserCtxtPtr
|
28
|
+
noko_xml_sax_parser_context_unwrap(VALUE rb_context)
|
29
|
+
{
|
30
|
+
xmlParserCtxtPtr c_context;
|
31
|
+
TypedData_Get_Struct(rb_context, xmlParserCtxt, &xml_sax_parser_context_type, c_context);
|
32
|
+
return c_context;
|
33
|
+
}
|
12
34
|
|
13
|
-
|
35
|
+
VALUE
|
36
|
+
noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
|
37
|
+
{
|
38
|
+
return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
|
14
39
|
}
|
15
40
|
|
41
|
+
|
16
42
|
/*
|
17
43
|
* call-seq:
|
18
44
|
* parse_io(io, encoding)
|
@@ -22,19 +48,23 @@ static void deallocate(xmlParserCtxtPtr ctxt)
|
|
22
48
|
static VALUE
|
23
49
|
parse_io(VALUE klass, VALUE io, VALUE encoding)
|
24
50
|
{
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
51
|
+
xmlParserCtxtPtr ctxt;
|
52
|
+
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
|
53
|
+
|
54
|
+
if (!rb_respond_to(io, id_read)) {
|
55
|
+
rb_raise(rb_eTypeError, "argument expected to respond to :read");
|
56
|
+
}
|
57
|
+
|
58
|
+
ctxt = xmlCreateIOParserCtxt(NULL, NULL,
|
59
|
+
(xmlInputReadCallback)noko_io_read,
|
60
|
+
(xmlInputCloseCallback)noko_io_close,
|
61
|
+
(void *)io, enc);
|
62
|
+
if (ctxt->sax) {
|
63
|
+
xmlFree(ctxt->sax);
|
64
|
+
ctxt->sax = NULL;
|
65
|
+
}
|
66
|
+
|
67
|
+
return noko_xml_sax_parser_context_wrap(klass, ctxt);
|
38
68
|
}
|
39
69
|
|
40
70
|
/*
|
@@ -43,10 +73,17 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
|
|
43
73
|
*
|
44
74
|
* Parse file given +filename+
|
45
75
|
*/
|
46
|
-
static VALUE
|
76
|
+
static VALUE
|
77
|
+
parse_file(VALUE klass, VALUE filename)
|
47
78
|
{
|
48
79
|
xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
|
49
|
-
|
80
|
+
|
81
|
+
if (ctxt->sax) {
|
82
|
+
xmlFree(ctxt->sax);
|
83
|
+
ctxt->sax = NULL;
|
84
|
+
}
|
85
|
+
|
86
|
+
return noko_xml_sax_parser_context_wrap(klass, ctxt);
|
50
87
|
}
|
51
88
|
|
52
89
|
/*
|
@@ -58,41 +95,43 @@ static VALUE parse_file(VALUE klass, VALUE filename)
|
|
58
95
|
static VALUE
|
59
96
|
parse_memory(VALUE klass, VALUE data)
|
60
97
|
{
|
61
|
-
|
98
|
+
xmlParserCtxtPtr ctxt;
|
99
|
+
|
100
|
+
Check_Type(data, T_STRING);
|
62
101
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
102
|
+
if (!(int)RSTRING_LEN(data)) {
|
103
|
+
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
104
|
+
}
|
67
105
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
106
|
+
ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
|
107
|
+
(int)RSTRING_LEN(data));
|
108
|
+
if (ctxt->sax) {
|
109
|
+
xmlFree(ctxt->sax);
|
110
|
+
ctxt->sax = NULL;
|
111
|
+
}
|
74
112
|
|
75
|
-
|
113
|
+
return noko_xml_sax_parser_context_wrap(klass, ctxt);
|
76
114
|
}
|
77
115
|
|
78
116
|
static VALUE
|
79
117
|
parse_doc(VALUE ctxt_val)
|
80
118
|
{
|
81
|
-
|
82
|
-
|
83
|
-
|
119
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
120
|
+
xmlParseDocument(ctxt);
|
121
|
+
return Qnil;
|
84
122
|
}
|
85
123
|
|
86
124
|
static VALUE
|
87
125
|
parse_doc_finalize(VALUE ctxt_val)
|
88
126
|
{
|
89
|
-
|
127
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
90
128
|
|
91
|
-
|
92
|
-
|
129
|
+
if (NULL != ctxt->myDoc) {
|
130
|
+
xmlFreeDoc(ctxt->myDoc);
|
131
|
+
}
|
93
132
|
|
94
|
-
|
95
|
-
|
133
|
+
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
|
134
|
+
return Qnil;
|
96
135
|
}
|
97
136
|
|
98
137
|
/*
|
@@ -104,25 +143,24 @@ parse_doc_finalize(VALUE ctxt_val)
|
|
104
143
|
static VALUE
|
105
144
|
parse_with(VALUE self, VALUE sax_handler)
|
106
145
|
{
|
107
|
-
|
108
|
-
|
146
|
+
xmlParserCtxtPtr ctxt;
|
147
|
+
xmlSAXHandlerPtr sax;
|
109
148
|
|
110
|
-
|
111
|
-
|
149
|
+
if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
|
150
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
151
|
+
}
|
112
152
|
|
113
|
-
|
114
|
-
|
153
|
+
ctxt = noko_xml_sax_parser_context_unwrap(self);
|
154
|
+
sax = noko_sax_handler_unwrap(sax_handler);
|
115
155
|
|
116
|
-
|
117
|
-
|
118
|
-
xmlFree(ctxt->sax);
|
156
|
+
ctxt->sax = sax;
|
157
|
+
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
119
158
|
|
120
|
-
|
121
|
-
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
159
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
122
160
|
|
123
|
-
|
161
|
+
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
|
124
162
|
|
125
|
-
|
163
|
+
return Qnil;
|
126
164
|
}
|
127
165
|
|
128
166
|
/*
|
@@ -132,15 +170,16 @@ parse_with(VALUE self, VALUE sax_handler)
|
|
132
170
|
* Should this parser replace entities? & will get converted to '&' if
|
133
171
|
* set to true
|
134
172
|
*/
|
135
|
-
static VALUE
|
173
|
+
static VALUE
|
174
|
+
set_replace_entities(VALUE self, VALUE value)
|
136
175
|
{
|
137
|
-
xmlParserCtxtPtr ctxt;
|
138
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
176
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
139
177
|
|
140
|
-
if(Qfalse == value)
|
178
|
+
if (Qfalse == value) {
|
141
179
|
ctxt->replaceEntities = 0;
|
142
|
-
else
|
180
|
+
} else {
|
143
181
|
ctxt->replaceEntities = 1;
|
182
|
+
}
|
144
183
|
|
145
184
|
return value;
|
146
185
|
}
|
@@ -152,15 +191,16 @@ static VALUE set_replace_entities(VALUE self, VALUE value)
|
|
152
191
|
* Should this parser replace entities? & will get converted to '&' if
|
153
192
|
* set to true
|
154
193
|
*/
|
155
|
-
static VALUE
|
194
|
+
static VALUE
|
195
|
+
get_replace_entities(VALUE self)
|
156
196
|
{
|
157
|
-
xmlParserCtxtPtr ctxt;
|
158
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
197
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
159
198
|
|
160
|
-
if(0 == ctxt->replaceEntities)
|
199
|
+
if (0 == ctxt->replaceEntities) {
|
161
200
|
return Qfalse;
|
162
|
-
else
|
201
|
+
} else {
|
163
202
|
return Qtrue;
|
203
|
+
}
|
164
204
|
}
|
165
205
|
|
166
206
|
/*
|
@@ -168,16 +208,16 @@ static VALUE get_replace_entities(VALUE self)
|
|
168
208
|
*
|
169
209
|
* Get the current line the parser context is processing.
|
170
210
|
*/
|
171
|
-
static VALUE
|
211
|
+
static VALUE
|
212
|
+
line(VALUE self)
|
172
213
|
{
|
173
|
-
xmlParserCtxtPtr ctxt;
|
174
214
|
xmlParserInputPtr io;
|
175
|
-
|
176
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
215
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
177
216
|
|
178
217
|
io = ctxt->input;
|
179
|
-
if(io)
|
218
|
+
if (io) {
|
180
219
|
return INT2NUM(io->line);
|
220
|
+
}
|
181
221
|
|
182
222
|
return Qnil;
|
183
223
|
}
|
@@ -187,16 +227,16 @@ static VALUE line(VALUE self)
|
|
187
227
|
*
|
188
228
|
* Get the current column the parser context is processing.
|
189
229
|
*/
|
190
|
-
static VALUE
|
230
|
+
static VALUE
|
231
|
+
column(VALUE self)
|
191
232
|
{
|
192
|
-
xmlParserCtxtPtr ctxt;
|
233
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
193
234
|
xmlParserInputPtr io;
|
194
235
|
|
195
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
196
|
-
|
197
236
|
io = ctxt->input;
|
198
|
-
if(io)
|
237
|
+
if (io) {
|
199
238
|
return INT2NUM(io->col);
|
239
|
+
}
|
200
240
|
|
201
241
|
return Qnil;
|
202
242
|
}
|
@@ -208,15 +248,16 @@ static VALUE column(VALUE self)
|
|
208
248
|
* Should this parser recover from structural errors? It will not stop processing
|
209
249
|
* file on structural errors if set to true
|
210
250
|
*/
|
211
|
-
static VALUE
|
251
|
+
static VALUE
|
252
|
+
set_recovery(VALUE self, VALUE value)
|
212
253
|
{
|
213
|
-
xmlParserCtxtPtr ctxt;
|
214
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
254
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
215
255
|
|
216
|
-
if(value == Qfalse)
|
256
|
+
if (value == Qfalse) {
|
217
257
|
ctxt->recovery = 0;
|
218
|
-
else
|
258
|
+
} else {
|
219
259
|
ctxt->recovery = 1;
|
260
|
+
}
|
220
261
|
|
221
262
|
return value;
|
222
263
|
}
|
@@ -228,35 +269,36 @@ static VALUE set_recovery(VALUE self, VALUE value)
|
|
228
269
|
* Should this parser recover from structural errors? It will not stop processing
|
229
270
|
* file on structural errors if set to true
|
230
271
|
*/
|
231
|
-
static VALUE
|
272
|
+
static VALUE
|
273
|
+
get_recovery(VALUE self)
|
232
274
|
{
|
233
|
-
xmlParserCtxtPtr ctxt;
|
234
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
275
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
235
276
|
|
236
|
-
if(ctxt->recovery == 0)
|
277
|
+
if (ctxt->recovery == 0) {
|
237
278
|
return Qfalse;
|
238
|
-
else
|
279
|
+
} else {
|
239
280
|
return Qtrue;
|
281
|
+
}
|
240
282
|
}
|
241
283
|
|
242
|
-
void
|
284
|
+
void
|
285
|
+
noko_init_xml_sax_parser_context(void)
|
243
286
|
{
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
cNokogiriXmlSaxParserContext
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
rb_define_method(
|
256
|
-
rb_define_method(
|
257
|
-
rb_define_method(
|
258
|
-
rb_define_method(
|
259
|
-
|
260
|
-
|
261
|
-
rb_define_method(klass, "column", column, 0);
|
287
|
+
cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject);
|
288
|
+
|
289
|
+
rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
|
290
|
+
|
291
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
|
292
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
|
293
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);
|
294
|
+
|
295
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", parse_with, 1);
|
296
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=", set_replace_entities, 1);
|
297
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities", get_replace_entities, 0);
|
298
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", set_recovery, 1);
|
299
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
|
300
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
|
301
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
|
302
|
+
|
303
|
+
id_read = rb_intern("read");
|
262
304
|
}
|
@@ -1,18 +1,37 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
3
|
+
VALUE cNokogiriXmlSaxPushParser ;
|
4
|
+
|
5
|
+
static void
|
6
|
+
xml_sax_push_parser_free(void *data)
|
4
7
|
{
|
5
|
-
|
8
|
+
xmlParserCtxtPtr ctx = data;
|
6
9
|
if (ctx != NULL) {
|
7
10
|
NOKOGIRI_SAX_TUPLE_DESTROY(ctx->userData);
|
8
11
|
xmlFreeParserCtxt(ctx);
|
9
12
|
}
|
10
|
-
NOKOGIRI_DEBUG_END(ctx);
|
11
13
|
}
|
12
14
|
|
13
|
-
static
|
15
|
+
static const rb_data_type_t xml_sax_push_parser_type = {
|
16
|
+
.wrap_struct_name = "Nokogiri::XML::SAX::PushParser",
|
17
|
+
.function = {
|
18
|
+
.dfree = xml_sax_push_parser_free,
|
19
|
+
},
|
20
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
21
|
+
};
|
22
|
+
|
23
|
+
static VALUE
|
24
|
+
allocate(VALUE klass)
|
25
|
+
{
|
26
|
+
return TypedData_Wrap_Struct(klass, &xml_sax_push_parser_type, NULL);
|
27
|
+
}
|
28
|
+
|
29
|
+
xmlParserCtxtPtr
|
30
|
+
noko_xml_sax_push_parser_unwrap(VALUE rb_parser)
|
14
31
|
{
|
15
|
-
|
32
|
+
xmlParserCtxtPtr c_parser;
|
33
|
+
TypedData_Get_Struct(rb_parser, xmlParserCtxt, &xml_sax_push_parser_type, c_parser);
|
34
|
+
return c_parser;
|
16
35
|
}
|
17
36
|
|
18
37
|
/*
|
@@ -21,20 +40,23 @@ static VALUE allocate(VALUE klass)
|
|
21
40
|
*
|
22
41
|
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
|
23
42
|
*/
|
24
|
-
static VALUE
|
43
|
+
static VALUE
|
44
|
+
native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
25
45
|
{
|
26
46
|
xmlParserCtxtPtr ctx;
|
27
|
-
const char *
|
47
|
+
const char *chunk = NULL;
|
28
48
|
int size = 0;
|
29
49
|
|
30
50
|
|
31
|
-
|
51
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
32
52
|
|
33
53
|
if (Qnil != _chunk) {
|
34
54
|
chunk = StringValuePtr(_chunk);
|
35
55
|
size = (int)RSTRING_LEN(_chunk);
|
36
56
|
}
|
37
57
|
|
58
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
59
|
+
|
38
60
|
if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
|
39
61
|
if (!(ctx->options & XML_PARSE_RECOVER)) {
|
40
62
|
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
|
@@ -51,13 +73,14 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
51
73
|
*
|
52
74
|
* Initialize the push parser with +xml_sax+ using +filename+
|
53
75
|
*/
|
54
|
-
static VALUE
|
76
|
+
static VALUE
|
77
|
+
initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
|
55
78
|
{
|
56
79
|
xmlSAXHandlerPtr sax;
|
57
|
-
const char *
|
80
|
+
const char *filename = NULL;
|
58
81
|
xmlParserCtxtPtr ctx;
|
59
82
|
|
60
|
-
|
83
|
+
sax = noko_sax_handler_unwrap(_xml_sax);
|
61
84
|
|
62
85
|
if (_filename != Qnil) { filename = StringValueCStr(_filename); }
|
63
86
|
|
@@ -79,18 +102,22 @@ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
|
|
79
102
|
return self;
|
80
103
|
}
|
81
104
|
|
82
|
-
static VALUE
|
105
|
+
static VALUE
|
106
|
+
get_options(VALUE self)
|
83
107
|
{
|
84
108
|
xmlParserCtxtPtr ctx;
|
85
|
-
|
109
|
+
|
110
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
86
111
|
|
87
112
|
return INT2NUM(ctx->options);
|
88
113
|
}
|
89
114
|
|
90
|
-
static VALUE
|
115
|
+
static VALUE
|
116
|
+
set_options(VALUE self, VALUE options)
|
91
117
|
{
|
92
118
|
xmlParserCtxtPtr ctx;
|
93
|
-
|
119
|
+
|
120
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
94
121
|
|
95
122
|
if (xmlCtxtUseOptions(ctx, (int)NUM2INT(options)) != 0) {
|
96
123
|
rb_raise(rb_eRuntimeError, "Cannot set XML parser context options");
|
@@ -106,10 +133,12 @@ static VALUE set_options(VALUE self, VALUE options)
|
|
106
133
|
* Should this parser replace entities? & will get converted to '&' if
|
107
134
|
* set to true
|
108
135
|
*/
|
109
|
-
static VALUE
|
136
|
+
static VALUE
|
137
|
+
get_replace_entities(VALUE self)
|
110
138
|
{
|
111
139
|
xmlParserCtxtPtr ctx;
|
112
|
-
|
140
|
+
|
141
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
113
142
|
|
114
143
|
if (0 == ctx->replaceEntities) {
|
115
144
|
return Qfalse;
|
@@ -125,10 +154,12 @@ static VALUE get_replace_entities(VALUE self)
|
|
125
154
|
* Should this parser replace entities? & will get converted to '&' if
|
126
155
|
* set to true
|
127
156
|
*/
|
128
|
-
static VALUE
|
157
|
+
static VALUE
|
158
|
+
set_replace_entities(VALUE self, VALUE value)
|
129
159
|
{
|
130
160
|
xmlParserCtxtPtr ctx;
|
131
|
-
|
161
|
+
|
162
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
132
163
|
|
133
164
|
if (Qfalse == value) {
|
134
165
|
ctx->replaceEntities = 0;
|
@@ -139,21 +170,18 @@ static VALUE set_replace_entities(VALUE self, VALUE value)
|
|
139
170
|
return value;
|
140
171
|
}
|
141
172
|
|
142
|
-
|
143
|
-
void
|
173
|
+
void
|
174
|
+
noko_init_xml_sax_push_parser(void)
|
144
175
|
{
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
cNokogiriXmlSaxPushParser =
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
rb_define_private_method(
|
155
|
-
|
156
|
-
rb_define_method(klass, "options=", set_options, 1);
|
157
|
-
rb_define_method(klass, "replace_entities", get_replace_entities, 0);
|
158
|
-
rb_define_method(klass, "replace_entities=", set_replace_entities, 1);
|
176
|
+
cNokogiriXmlSaxPushParser = rb_define_class_under(mNokogiriXmlSax, "PushParser", rb_cObject);
|
177
|
+
|
178
|
+
rb_define_alloc_func(cNokogiriXmlSaxPushParser, allocate);
|
179
|
+
|
180
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "options", get_options, 0);
|
181
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "options=", set_options, 1);
|
182
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities", get_replace_entities, 0);
|
183
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities=", set_replace_entities, 1);
|
184
|
+
|
185
|
+
rb_define_private_method(cNokogiriXmlSaxPushParser, "initialize_native", initialize_native, 2);
|
186
|
+
rb_define_private_method(cNokogiriXmlSaxPushParser, "native_write", native_write, 2);
|
159
187
|
}
|