nokogiri 1.10.10 → 1.13.9
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +5 -0
- data/LICENSE-DEPENDENCIES.md +1173 -884
- data/LICENSE.md +1 -1
- data/README.md +178 -96
- data/bin/nokogiri +63 -50
- data/dependencies.yml +13 -64
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +761 -424
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +119 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +228 -91
- data/ext/nokogiri/nokogiri.h +199 -88
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +17 -17
- data/ext/nokogiri/xml_attribute_decl.c +21 -21
- data/ext/nokogiri/xml_cdata.c +14 -19
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +296 -220
- data/ext/nokogiri/xml_document_fragment.c +12 -16
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +25 -25
- data/ext/nokogiri/xml_encoding_handler.c +43 -18
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +98 -53
- data/ext/nokogiri/xml_node.c +1065 -653
- data/ext/nokogiri/xml_node_set.c +178 -166
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +277 -175
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +112 -112
- data/ext/nokogiri/xml_sax_parser_context.c +112 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +98 -48
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +14 -18
- data/ext/nokogiri/xml_xpath_context.c +226 -115
- data/ext/nokogiri/xslt_stylesheet.c +265 -173
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +5 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +218 -91
- data/lib/nokogiri/css.rb +50 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/{html → html4}/document.rb +103 -105
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +91 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +100 -0
- data/lib/nokogiri/html5.rb +478 -0
- data/lib/nokogiri/jruby/dependencies.rb +21 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +222 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +6 -3
- data/lib/nokogiri/xml/attribute_decl.rb +3 -1
- data/lib/nokogiri/xml/builder.rb +74 -33
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +224 -86
- data/lib/nokogiri/xml/document_fragment.rb +46 -44
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +2 -0
- data/lib/nokogiri/xml/element_decl.rb +3 -1
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +3 -0
- data/lib/nokogiri/xml/node/save_options.rb +10 -5
- data/lib/nokogiri/xml/node.rb +884 -378
- data/lib/nokogiri/xml/node_set.rb +51 -54
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +22 -8
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +25 -26
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +38 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +112 -72
- data/lib/nokogiri/xml/syntax_error.rb +6 -4
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -37
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +29 -20
- data/lib/nokogiri.rb +49 -65
- data/lib/xsd/xmlparser/nokogiri.rb +26 -24
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
- data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
- metadata +189 -142
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,16 +1,19 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
3
|
VALUE cNokogiriXmlSaxParserContext ;
|
4
4
|
|
5
|
-
static
|
5
|
+
static ID id_read;
|
6
|
+
|
7
|
+
static void
|
8
|
+
deallocate(xmlParserCtxtPtr ctxt)
|
6
9
|
{
|
7
|
-
NOKOGIRI_DEBUG_START(
|
10
|
+
NOKOGIRI_DEBUG_START(ctxt);
|
8
11
|
|
9
12
|
ctxt->sax = NULL;
|
10
13
|
|
11
14
|
xmlFreeParserCtxt(ctxt);
|
12
15
|
|
13
|
-
NOKOGIRI_DEBUG_END(
|
16
|
+
NOKOGIRI_DEBUG_END(ctxt);
|
14
17
|
}
|
15
18
|
|
16
19
|
/*
|
@@ -22,19 +25,23 @@ static void deallocate(xmlParserCtxtPtr ctxt)
|
|
22
25
|
static VALUE
|
23
26
|
parse_io(VALUE klass, VALUE io, VALUE encoding)
|
24
27
|
{
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
28
|
+
xmlParserCtxtPtr ctxt;
|
29
|
+
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
|
30
|
+
|
31
|
+
if (!rb_respond_to(io, id_read)) {
|
32
|
+
rb_raise(rb_eTypeError, "argument expected to respond to :read");
|
33
|
+
}
|
34
|
+
|
35
|
+
ctxt = xmlCreateIOParserCtxt(NULL, NULL,
|
36
|
+
(xmlInputReadCallback)noko_io_read,
|
37
|
+
(xmlInputCloseCallback)noko_io_close,
|
38
|
+
(void *)io, enc);
|
39
|
+
if (ctxt->sax) {
|
40
|
+
xmlFree(ctxt->sax);
|
41
|
+
ctxt->sax = NULL;
|
42
|
+
}
|
43
|
+
|
44
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
38
45
|
}
|
39
46
|
|
40
47
|
/*
|
@@ -43,7 +50,8 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
|
|
43
50
|
*
|
44
51
|
* Parse file given +filename+
|
45
52
|
*/
|
46
|
-
static VALUE
|
53
|
+
static VALUE
|
54
|
+
parse_file(VALUE klass, VALUE filename)
|
47
55
|
{
|
48
56
|
xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
|
49
57
|
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
@@ -58,41 +66,43 @@ static VALUE parse_file(VALUE klass, VALUE filename)
|
|
58
66
|
static VALUE
|
59
67
|
parse_memory(VALUE klass, VALUE data)
|
60
68
|
{
|
61
|
-
|
69
|
+
xmlParserCtxtPtr ctxt;
|
70
|
+
|
71
|
+
Check_Type(data, T_STRING);
|
62
72
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
73
|
+
if (!(int)RSTRING_LEN(data)) {
|
74
|
+
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
75
|
+
}
|
67
76
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
77
|
+
ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(data),
|
78
|
+
(int)RSTRING_LEN(data));
|
79
|
+
if (ctxt->sax) {
|
80
|
+
xmlFree(ctxt->sax);
|
81
|
+
ctxt->sax = NULL;
|
82
|
+
}
|
74
83
|
|
75
|
-
|
84
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
76
85
|
}
|
77
86
|
|
78
87
|
static VALUE
|
79
88
|
parse_doc(VALUE ctxt_val)
|
80
89
|
{
|
81
|
-
|
82
|
-
|
83
|
-
|
90
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
91
|
+
xmlParseDocument(ctxt);
|
92
|
+
return Qnil;
|
84
93
|
}
|
85
94
|
|
86
95
|
static VALUE
|
87
96
|
parse_doc_finalize(VALUE ctxt_val)
|
88
97
|
{
|
89
|
-
|
98
|
+
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)ctxt_val;
|
90
99
|
|
91
|
-
|
92
|
-
|
100
|
+
if (NULL != ctxt->myDoc) {
|
101
|
+
xmlFreeDoc(ctxt->myDoc);
|
102
|
+
}
|
93
103
|
|
94
|
-
|
95
|
-
|
104
|
+
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
|
105
|
+
return Qnil;
|
96
106
|
}
|
97
107
|
|
98
108
|
/*
|
@@ -104,25 +114,29 @@ parse_doc_finalize(VALUE ctxt_val)
|
|
104
114
|
static VALUE
|
105
115
|
parse_with(VALUE self, VALUE sax_handler)
|
106
116
|
{
|
107
|
-
|
108
|
-
|
117
|
+
xmlParserCtxtPtr ctxt;
|
118
|
+
xmlSAXHandlerPtr sax;
|
109
119
|
|
110
|
-
|
111
|
-
|
120
|
+
if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
|
121
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
122
|
+
}
|
112
123
|
|
113
|
-
|
114
|
-
|
124
|
+
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
125
|
+
Data_Get_Struct(sax_handler, xmlSAXHandler, sax);
|
126
|
+
|
127
|
+
/* Free the sax handler since we'll assign our own */
|
128
|
+
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
|
129
|
+
xmlFree(ctxt->sax);
|
130
|
+
}
|
115
131
|
|
116
|
-
|
117
|
-
|
118
|
-
xmlFree(ctxt->sax);
|
132
|
+
ctxt->sax = sax;
|
133
|
+
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
119
134
|
|
120
|
-
|
121
|
-
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
135
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
122
136
|
|
123
|
-
|
137
|
+
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
|
124
138
|
|
125
|
-
|
139
|
+
return Qnil;
|
126
140
|
}
|
127
141
|
|
128
142
|
/*
|
@@ -132,15 +146,17 @@ parse_with(VALUE self, VALUE sax_handler)
|
|
132
146
|
* Should this parser replace entities? & will get converted to '&' if
|
133
147
|
* set to true
|
134
148
|
*/
|
135
|
-
static VALUE
|
149
|
+
static VALUE
|
150
|
+
set_replace_entities(VALUE self, VALUE value)
|
136
151
|
{
|
137
152
|
xmlParserCtxtPtr ctxt;
|
138
153
|
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
139
154
|
|
140
|
-
if(Qfalse == value)
|
155
|
+
if (Qfalse == value) {
|
141
156
|
ctxt->replaceEntities = 0;
|
142
|
-
else
|
157
|
+
} else {
|
143
158
|
ctxt->replaceEntities = 1;
|
159
|
+
}
|
144
160
|
|
145
161
|
return value;
|
146
162
|
}
|
@@ -152,15 +168,17 @@ static VALUE set_replace_entities(VALUE self, VALUE value)
|
|
152
168
|
* Should this parser replace entities? & will get converted to '&' if
|
153
169
|
* set to true
|
154
170
|
*/
|
155
|
-
static VALUE
|
171
|
+
static VALUE
|
172
|
+
get_replace_entities(VALUE self)
|
156
173
|
{
|
157
174
|
xmlParserCtxtPtr ctxt;
|
158
175
|
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
159
176
|
|
160
|
-
if(0 == ctxt->replaceEntities)
|
177
|
+
if (0 == ctxt->replaceEntities) {
|
161
178
|
return Qfalse;
|
162
|
-
else
|
179
|
+
} else {
|
163
180
|
return Qtrue;
|
181
|
+
}
|
164
182
|
}
|
165
183
|
|
166
184
|
/*
|
@@ -168,7 +186,8 @@ static VALUE get_replace_entities(VALUE self)
|
|
168
186
|
*
|
169
187
|
* Get the current line the parser context is processing.
|
170
188
|
*/
|
171
|
-
static VALUE
|
189
|
+
static VALUE
|
190
|
+
line(VALUE self)
|
172
191
|
{
|
173
192
|
xmlParserCtxtPtr ctxt;
|
174
193
|
xmlParserInputPtr io;
|
@@ -176,8 +195,9 @@ static VALUE line(VALUE self)
|
|
176
195
|
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
177
196
|
|
178
197
|
io = ctxt->input;
|
179
|
-
if(io)
|
198
|
+
if (io) {
|
180
199
|
return INT2NUM(io->line);
|
200
|
+
}
|
181
201
|
|
182
202
|
return Qnil;
|
183
203
|
}
|
@@ -187,7 +207,8 @@ static VALUE line(VALUE self)
|
|
187
207
|
*
|
188
208
|
* Get the current column the parser context is processing.
|
189
209
|
*/
|
190
|
-
static VALUE
|
210
|
+
static VALUE
|
211
|
+
column(VALUE self)
|
191
212
|
{
|
192
213
|
xmlParserCtxtPtr ctxt;
|
193
214
|
xmlParserInputPtr io;
|
@@ -195,8 +216,9 @@ static VALUE column(VALUE self)
|
|
195
216
|
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
196
217
|
|
197
218
|
io = ctxt->input;
|
198
|
-
if(io)
|
219
|
+
if (io) {
|
199
220
|
return INT2NUM(io->col);
|
221
|
+
}
|
200
222
|
|
201
223
|
return Qnil;
|
202
224
|
}
|
@@ -208,15 +230,17 @@ static VALUE column(VALUE self)
|
|
208
230
|
* Should this parser recover from structural errors? It will not stop processing
|
209
231
|
* file on structural errors if set to true
|
210
232
|
*/
|
211
|
-
static VALUE
|
233
|
+
static VALUE
|
234
|
+
set_recovery(VALUE self, VALUE value)
|
212
235
|
{
|
213
236
|
xmlParserCtxtPtr ctxt;
|
214
237
|
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
215
238
|
|
216
|
-
if(value == Qfalse)
|
239
|
+
if (value == Qfalse) {
|
217
240
|
ctxt->recovery = 0;
|
218
|
-
else
|
241
|
+
} else {
|
219
242
|
ctxt->recovery = 1;
|
243
|
+
}
|
220
244
|
|
221
245
|
return value;
|
222
246
|
}
|
@@ -228,35 +252,37 @@ static VALUE set_recovery(VALUE self, VALUE value)
|
|
228
252
|
* Should this parser recover from structural errors? It will not stop processing
|
229
253
|
* file on structural errors if set to true
|
230
254
|
*/
|
231
|
-
static VALUE
|
255
|
+
static VALUE
|
256
|
+
get_recovery(VALUE self)
|
232
257
|
{
|
233
258
|
xmlParserCtxtPtr ctxt;
|
234
259
|
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
235
260
|
|
236
|
-
if(ctxt->recovery == 0)
|
261
|
+
if (ctxt->recovery == 0) {
|
237
262
|
return Qfalse;
|
238
|
-
else
|
263
|
+
} else {
|
239
264
|
return Qtrue;
|
265
|
+
}
|
240
266
|
}
|
241
267
|
|
242
|
-
void
|
268
|
+
void
|
269
|
+
noko_init_xml_sax_parser_context()
|
243
270
|
{
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
cNokogiriXmlSaxParserContext
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
rb_define_method(
|
256
|
-
rb_define_method(
|
257
|
-
rb_define_method(
|
258
|
-
rb_define_method(
|
259
|
-
|
260
|
-
|
261
|
-
rb_define_method(klass, "column", column, 0);
|
271
|
+
cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject);
|
272
|
+
|
273
|
+
rb_undef_alloc_func(cNokogiriXmlSaxParserContext);
|
274
|
+
|
275
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2);
|
276
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1);
|
277
|
+
rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1);
|
278
|
+
|
279
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "parse_with", parse_with, 1);
|
280
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities=", set_replace_entities, 1);
|
281
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "replace_entities", get_replace_entities, 0);
|
282
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery=", set_recovery, 1);
|
283
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
|
284
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
|
285
|
+
rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);
|
286
|
+
|
287
|
+
id_read = rb_intern("read");
|
262
288
|
}
|
@@ -1,6 +1,9 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
3
|
+
VALUE cNokogiriXmlSaxPushParser ;
|
4
|
+
|
5
|
+
static void
|
6
|
+
deallocate(xmlParserCtxtPtr ctx)
|
4
7
|
{
|
5
8
|
NOKOGIRI_DEBUG_START(ctx);
|
6
9
|
if (ctx != NULL) {
|
@@ -10,7 +13,8 @@ static void deallocate(xmlParserCtxtPtr ctx)
|
|
10
13
|
NOKOGIRI_DEBUG_END(ctx);
|
11
14
|
}
|
12
15
|
|
13
|
-
static VALUE
|
16
|
+
static VALUE
|
17
|
+
allocate(VALUE klass)
|
14
18
|
{
|
15
19
|
return Data_Wrap_Struct(klass, NULL, deallocate, NULL);
|
16
20
|
}
|
@@ -21,10 +25,11 @@ static VALUE allocate(VALUE klass)
|
|
21
25
|
*
|
22
26
|
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
|
23
27
|
*/
|
24
|
-
static VALUE
|
28
|
+
static VALUE
|
29
|
+
native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
25
30
|
{
|
26
31
|
xmlParserCtxtPtr ctx;
|
27
|
-
const char *
|
32
|
+
const char *chunk = NULL;
|
28
33
|
int size = 0;
|
29
34
|
|
30
35
|
|
@@ -35,6 +40,8 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
35
40
|
size = (int)RSTRING_LEN(_chunk);
|
36
41
|
}
|
37
42
|
|
43
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
44
|
+
|
38
45
|
if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
|
39
46
|
if (!(ctx->options & XML_PARSE_RECOVER)) {
|
40
47
|
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
|
@@ -51,10 +58,11 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
51
58
|
*
|
52
59
|
* Initialize the push parser with +xml_sax+ using +filename+
|
53
60
|
*/
|
54
|
-
static VALUE
|
61
|
+
static VALUE
|
62
|
+
initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
|
55
63
|
{
|
56
64
|
xmlSAXHandlerPtr sax;
|
57
|
-
const char *
|
65
|
+
const char *filename = NULL;
|
58
66
|
xmlParserCtxtPtr ctx;
|
59
67
|
|
60
68
|
Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
|
@@ -79,7 +87,8 @@ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
|
|
79
87
|
return self;
|
80
88
|
}
|
81
89
|
|
82
|
-
static VALUE
|
90
|
+
static VALUE
|
91
|
+
get_options(VALUE self)
|
83
92
|
{
|
84
93
|
xmlParserCtxtPtr ctx;
|
85
94
|
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
@@ -87,7 +96,8 @@ static VALUE get_options(VALUE self)
|
|
87
96
|
return INT2NUM(ctx->options);
|
88
97
|
}
|
89
98
|
|
90
|
-
static VALUE
|
99
|
+
static VALUE
|
100
|
+
set_options(VALUE self, VALUE options)
|
91
101
|
{
|
92
102
|
xmlParserCtxtPtr ctx;
|
93
103
|
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
@@ -106,7 +116,8 @@ static VALUE set_options(VALUE self, VALUE options)
|
|
106
116
|
* Should this parser replace entities? & will get converted to '&' if
|
107
117
|
* set to true
|
108
118
|
*/
|
109
|
-
static VALUE
|
119
|
+
static VALUE
|
120
|
+
get_replace_entities(VALUE self)
|
110
121
|
{
|
111
122
|
xmlParserCtxtPtr ctx;
|
112
123
|
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
@@ -125,7 +136,8 @@ static VALUE get_replace_entities(VALUE self)
|
|
125
136
|
* Should this parser replace entities? & will get converted to '&' if
|
126
137
|
* set to true
|
127
138
|
*/
|
128
|
-
static VALUE
|
139
|
+
static VALUE
|
140
|
+
set_replace_entities(VALUE self, VALUE value)
|
129
141
|
{
|
130
142
|
xmlParserCtxtPtr ctx;
|
131
143
|
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
@@ -139,21 +151,18 @@ static VALUE set_replace_entities(VALUE self, VALUE value)
|
|
139
151
|
return value;
|
140
152
|
}
|
141
153
|
|
142
|
-
|
143
|
-
|
154
|
+
void
|
155
|
+
noko_init_xml_sax_push_parser()
|
144
156
|
{
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
cNokogiriXmlSaxPushParser =
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
rb_define_private_method(
|
155
|
-
|
156
|
-
rb_define_method(klass, "options=", set_options, 1);
|
157
|
-
rb_define_method(klass, "replace_entities", get_replace_entities, 0);
|
158
|
-
rb_define_method(klass, "replace_entities=", set_replace_entities, 1);
|
157
|
+
cNokogiriXmlSaxPushParser = rb_define_class_under(mNokogiriXmlSax, "PushParser", rb_cObject);
|
158
|
+
|
159
|
+
rb_define_alloc_func(cNokogiriXmlSaxPushParser, allocate);
|
160
|
+
|
161
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "options", get_options, 0);
|
162
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "options=", set_options, 1);
|
163
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities", get_replace_entities, 0);
|
164
|
+
rb_define_method(cNokogiriXmlSaxPushParser, "replace_entities=", set_replace_entities, 1);
|
165
|
+
|
166
|
+
rb_define_private_method(cNokogiriXmlSaxPushParser, "initialize_native", initialize_native, 2);
|
167
|
+
rb_define_private_method(cNokogiriXmlSaxPushParser, "native_write", native_write, 2);
|
159
168
|
}
|