nokogiri 1.6.1-java → 1.6.2-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.editorconfig +17 -0
- data/.travis.yml +6 -6
- data/CHANGELOG.ja.rdoc +61 -8
- data/CHANGELOG.rdoc +58 -3
- data/Gemfile +3 -3
- data/Manifest.txt +57 -1
- data/README.ja.rdoc +22 -17
- data/README.rdoc +23 -18
- data/ROADMAP.md +1 -2
- data/Rakefile +162 -58
- data/build_all +56 -31
- data/dependencies.yml +3 -3
- data/ext/java/nokogiri/NokogiriService.java +9 -5
- data/ext/java/nokogiri/XmlDocument.java +95 -54
- data/ext/java/nokogiri/XmlNode.java +93 -42
- data/ext/java/nokogiri/XmlReader.java +1 -1
- data/ext/java/nokogiri/XmlSaxParserContext.java +33 -0
- data/ext/java/nokogiri/XmlSchema.java +4 -2
- data/ext/java/nokogiri/XmlXpathContext.java +118 -76
- data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +20 -0
- data/ext/java/nokogiri/internals/NokogiriHandler.java +3 -10
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +40 -23
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +59 -54
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +1 -1
- data/ext/java/nokogiri/internals/ParserContext.java +1 -4
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +6 -2
- data/ext/java/nokogiri/internals/c14n/AttrCompare.java +119 -0
- data/ext/java/nokogiri/internals/c14n/C14nHelper.java +159 -0
- data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +37 -0
- data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +93 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +252 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +639 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +38 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +38 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +368 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +295 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +40 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +44 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +44 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +43 -0
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +630 -0
- data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +173 -0
- data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +76 -0
- data/ext/java/nokogiri/internals/c14n/Constants.java +42 -0
- data/ext/java/nokogiri/internals/c14n/ElementProxy.java +293 -0
- data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +93 -0
- data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +79 -0
- data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +165 -0
- data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +76 -0
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +402 -0
- data/ext/java/nokogiri/internals/c14n/NodeFilter.java +51 -0
- data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +179 -0
- data/ext/java/nokogiri/internals/c14n/XMLUtils.java +507 -0
- data/ext/nokogiri/extconf.rb +429 -128
- data/ext/nokogiri/html_document.c +2 -2
- data/ext/nokogiri/nokogiri.c +6 -1
- data/ext/nokogiri/xml_document.c +5 -4
- data/ext/nokogiri/xml_node.c +76 -7
- data/ext/nokogiri/xml_reader.c +1 -1
- data/ext/nokogiri/xml_sax_parser_context.c +40 -0
- data/ext/nokogiri/xml_syntax_error.c +10 -5
- data/ext/nokogiri/xml_syntax_error.h +1 -1
- data/ext/nokogiri/xml_xpath_context.c +2 -14
- data/ext/nokogiri/xslt_stylesheet.c +1 -1
- data/lib/nokogiri.rb +31 -22
- data/lib/nokogiri/css/node.rb +0 -50
- data/lib/nokogiri/css/parser.rb +213 -218
- data/lib/nokogiri/css/parser.y +21 -30
- data/lib/nokogiri/css/xpath_visitor.rb +62 -14
- data/lib/nokogiri/html/document.rb +97 -18
- data/lib/nokogiri/html/sax/parser.rb +2 -2
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +2 -2
- data/lib/nokogiri/xml/dtd.rb +10 -0
- data/lib/nokogiri/xml/node.rb +26 -1
- data/lib/nokogiri/xml/sax/parser.rb +1 -1
- data/ports/patches/libxml2/0001-Fix-parser-local-buffers-size-problems.patch +265 -0
- data/ports/patches/libxml2/0002-Fix-entities-local-buffers-size-problems.patch +102 -0
- data/ports/patches/libxml2/0003-Fix-an-error-in-previous-commit.patch +26 -0
- data/ports/patches/libxml2/0004-Fix-potential-out-of-bound-access.patch +26 -0
- data/ports/patches/libxml2/0005-Detect-excessive-entities-expansion-upon-replacement.patch +158 -0
- data/ports/patches/libxml2/0006-Do-not-fetch-external-parsed-entities.patch +78 -0
- data/ports/patches/libxml2/0007-Enforce-XML_PARSER_EOF-state-handling-through-the-pa.patch +480 -0
- data/ports/patches/libxml2/0008-Improve-handling-of-xmlStopParser.patch +315 -0
- data/ports/patches/libxml2/0009-Fix-a-couple-of-return-without-value.patch +37 -0
- data/ports/patches/libxslt/0001-Adding-doc-update-related-to-1.1.28.patch +222 -0
- data/ports/patches/libxslt/0002-Fix-a-couple-of-places-where-f-printf-parameters-wer.patch +53 -0
- data/ports/patches/libxslt/0003-Initialize-pseudo-random-number-generator-with-curre.patch +60 -0
- data/ports/patches/libxslt/0004-EXSLT-function-str-replace-is-broken-as-is.patch +42 -0
- data/ports/patches/libxslt/0006-Fix-str-padding-to-work-with-UTF-8-strings.patch +164 -0
- data/ports/patches/libxslt/0007-Separate-function-for-predicate-matching-in-patterns.patch +587 -0
- data/ports/patches/libxslt/0008-Fix-direct-pattern-matching.patch +80 -0
- data/ports/patches/libxslt/0009-Fix-certain-patterns-with-predicates.patch +185 -0
- data/ports/patches/libxslt/0010-Fix-handling-of-UTF-8-strings-in-EXSLT-crypto-module.patch +126 -0
- data/ports/patches/libxslt/0013-Memory-leak-in-xsltCompileIdKeyPattern-error-path.patch +25 -0
- data/ports/patches/libxslt/0014-Fix-for-bug-436589.patch +43 -0
- data/ports/patches/libxslt/0015-Fix-mkdir-for-mingw.patch +41 -0
- data/suppressions/README.txt +1 -0
- data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
- data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
- data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
- data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
- data/test/css/test_nthiness.rb +65 -2
- data/test/css/test_parser.rb +27 -10
- data/test/css/test_tokenizer.rb +1 -1
- data/test/css/test_xpath_visitor.rb +6 -1
- data/test/files/atom.xml +344 -0
- data/test/files/shift_jis_no_charset.html +9 -0
- data/test/helper.rb +10 -0
- data/test/html/test_document.rb +74 -7
- data/test/html/test_document_encoding.rb +10 -0
- data/test/html/test_document_fragment.rb +9 -3
- data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
- data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
- data/test/namespaces/test_namespaces_preservation.rb +31 -0
- data/test/test_nokogiri.rb +6 -0
- data/test/test_reader.rb +7 -4
- data/test/test_xslt_transforms.rb +25 -0
- data/test/xml/sax/test_parser.rb +16 -0
- data/test/xml/sax/test_parser_context.rb +9 -0
- data/test/xml/test_builder.rb +9 -0
- data/test/xml/test_c14n.rb +12 -2
- data/test/xml/test_document.rb +66 -0
- data/test/xml/test_document_fragment.rb +5 -0
- data/test/xml/test_dtd.rb +84 -0
- data/test/xml/test_entity_reference.rb +3 -3
- data/test/xml/test_node.rb +21 -3
- data/test/xml/test_node_attributes.rb +17 -0
- data/test/xml/test_schema.rb +26 -0
- data/test/xml/test_text.rb +15 -0
- data/test/xml/test_xpath.rb +87 -0
- data/test_all +3 -3
- metadata +119 -68
- data/tasks/cross_compile.rb +0 -134
@@ -77,7 +77,7 @@ static VALUE read_io( VALUE klass,
|
|
77
77
|
|
78
78
|
error = xmlGetLastError();
|
79
79
|
if(error)
|
80
|
-
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(
|
80
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
81
81
|
else
|
82
82
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
83
83
|
|
@@ -123,7 +123,7 @@ static VALUE read_memory( VALUE klass,
|
|
123
123
|
|
124
124
|
error = xmlGetLastError();
|
125
125
|
if(error)
|
126
|
-
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(
|
126
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
127
127
|
else
|
128
128
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
129
129
|
|
data/ext/nokogiri/nokogiri.c
CHANGED
@@ -14,7 +14,12 @@ VALUE mNokogiriHtmlSax ;
|
|
14
14
|
*/
|
15
15
|
int vasprintf (char **strp, const char *fmt, va_list ap)
|
16
16
|
{
|
17
|
-
|
17
|
+
/* Mingw32/64 have a broken vsnprintf implementation that fails when
|
18
|
+
* using a zero-byte limit in order to retrieve the required size for malloc.
|
19
|
+
* So we use a one byte buffer instead.
|
20
|
+
*/
|
21
|
+
char tmp[1];
|
22
|
+
int len = vsnprintf (tmp, 1, fmt, ap) + 1;
|
18
23
|
char *res = (char *)malloc((unsigned int)len);
|
19
24
|
if (res == NULL)
|
20
25
|
return -1;
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -231,7 +231,7 @@ static VALUE read_io( VALUE klass,
|
|
231
231
|
|
232
232
|
error = xmlGetLastError();
|
233
233
|
if(error)
|
234
|
-
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(
|
234
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
235
235
|
else
|
236
236
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
237
237
|
|
@@ -275,7 +275,7 @@ static VALUE read_memory( VALUE klass,
|
|
275
275
|
|
276
276
|
error = xmlGetLastError();
|
277
277
|
if(error)
|
278
|
-
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(
|
278
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
279
279
|
else
|
280
280
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
281
281
|
|
@@ -417,7 +417,7 @@ static VALUE create_entity(int argc, VALUE *argv, VALUE self)
|
|
417
417
|
if(NULL == ptr) {
|
418
418
|
xmlErrorPtr error = xmlGetLastError();
|
419
419
|
if(error)
|
420
|
-
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(
|
420
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
421
421
|
else
|
422
422
|
rb_raise(rb_eRuntimeError, "Could not create entity");
|
423
423
|
|
@@ -497,6 +497,7 @@ static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
|
|
497
497
|
ns = NULL;
|
498
498
|
}
|
499
499
|
else{
|
500
|
+
Check_Type(incl_ns, T_ARRAY);
|
500
501
|
ns_len = RARRAY_LEN(incl_ns);
|
501
502
|
ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
|
502
503
|
for (i = 0 ; i < ns_len ; i++) {
|
@@ -510,7 +511,7 @@ static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
|
|
510
511
|
xmlC14NExecute(doc, cb, ctx,
|
511
512
|
(int) (NIL_P(mode) ? 0 : NUM2INT(mode)),
|
512
513
|
ns,
|
513
|
-
(int) (
|
514
|
+
(int) RTEST(with_comments),
|
514
515
|
buf);
|
515
516
|
|
516
517
|
xmlOutputBufferClose(buf);
|
data/ext/nokogiri/xml_node.c
CHANGED
@@ -14,7 +14,14 @@ static void debug_node_dealloc(xmlNodePtr x)
|
|
14
14
|
|
15
15
|
static void mark(xmlNodePtr node)
|
16
16
|
{
|
17
|
-
|
17
|
+
xmlNodePtr doc = node->doc;
|
18
|
+
if(doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE) {
|
19
|
+
if(DOC_RUBY_OBJECT_TEST(doc)) {
|
20
|
+
rb_gc_mark(DOC_RUBY_OBJECT(doc));
|
21
|
+
}
|
22
|
+
} else if(node->doc->_private) {
|
23
|
+
rb_gc_mark((VALUE)doc->_private);
|
24
|
+
}
|
18
25
|
}
|
19
26
|
|
20
27
|
/* :nodoc: */
|
@@ -132,7 +139,7 @@ static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
|
|
132
139
|
static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
|
133
140
|
{
|
134
141
|
VALUE reparented_obj ;
|
135
|
-
xmlNodePtr reparentee, pivot, reparented, next_text, new_next_text ;
|
142
|
+
xmlNodePtr reparentee, pivot, reparented, next_text, new_next_text, parent ;
|
136
143
|
|
137
144
|
if(!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode))
|
138
145
|
rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
|
@@ -142,9 +149,71 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep
|
|
142
149
|
Data_Get_Struct(reparentee_obj, xmlNode, reparentee);
|
143
150
|
Data_Get_Struct(pivot_obj, xmlNode, pivot);
|
144
151
|
|
145
|
-
|
146
|
-
|
152
|
+
/*
|
153
|
+
* Check if nodes given are appropriate to have a parent-child
|
154
|
+
* relationship, based on the DOM specification.
|
155
|
+
*
|
156
|
+
* cf. http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/core.html#ID-1590626202
|
157
|
+
*/
|
158
|
+
if (prf == xmlAddChild) {
|
159
|
+
parent = pivot;
|
160
|
+
} else {
|
161
|
+
parent = pivot->parent;
|
162
|
+
}
|
163
|
+
|
164
|
+
if (parent) {
|
165
|
+
switch (parent->type) {
|
166
|
+
case XML_DOCUMENT_NODE:
|
167
|
+
case XML_HTML_DOCUMENT_NODE:
|
168
|
+
switch (reparentee->type) {
|
169
|
+
case XML_ELEMENT_NODE:
|
170
|
+
case XML_PI_NODE:
|
171
|
+
case XML_COMMENT_NODE:
|
172
|
+
case XML_DOCUMENT_TYPE_NODE:
|
173
|
+
/*
|
174
|
+
* The DOM specification says no to adding text-like nodes
|
175
|
+
* directly to a document, but we allow it for compatibility.
|
176
|
+
*/
|
177
|
+
case XML_TEXT_NODE:
|
178
|
+
case XML_CDATA_SECTION_NODE:
|
179
|
+
case XML_ENTITY_REF_NODE:
|
180
|
+
goto ok;
|
181
|
+
}
|
182
|
+
break;
|
183
|
+
case XML_DOCUMENT_FRAG_NODE:
|
184
|
+
case XML_ENTITY_REF_NODE:
|
185
|
+
case XML_ELEMENT_NODE:
|
186
|
+
switch (reparentee->type) {
|
187
|
+
case XML_ELEMENT_NODE:
|
188
|
+
case XML_PI_NODE:
|
189
|
+
case XML_COMMENT_NODE:
|
190
|
+
case XML_TEXT_NODE:
|
191
|
+
case XML_CDATA_SECTION_NODE:
|
192
|
+
case XML_ENTITY_REF_NODE:
|
193
|
+
goto ok;
|
194
|
+
}
|
195
|
+
break;
|
196
|
+
case XML_ATTRIBUTE_NODE:
|
197
|
+
switch (reparentee->type) {
|
198
|
+
case XML_TEXT_NODE:
|
199
|
+
case XML_ENTITY_REF_NODE:
|
200
|
+
goto ok;
|
201
|
+
}
|
202
|
+
break;
|
203
|
+
case XML_TEXT_NODE:
|
204
|
+
/*
|
205
|
+
* xmlAddChild() breaks the DOM specification in that it allows
|
206
|
+
* adding a text node to another, in which case text nodes are
|
207
|
+
* coalesced, but since our JRuby version does not support such
|
208
|
+
* operation, we should inhibit it.
|
209
|
+
*/
|
210
|
+
break;
|
211
|
+
}
|
212
|
+
|
213
|
+
rb_raise(rb_eArgError, "cannot reparent %s there", rb_obj_classname(reparentee_obj));
|
214
|
+
}
|
147
215
|
|
216
|
+
ok:
|
148
217
|
xmlUnlinkNode(reparentee);
|
149
218
|
|
150
219
|
if (reparentee->doc != pivot->doc || reparentee->type == XML_TEXT_NODE) {
|
@@ -850,8 +919,8 @@ static VALUE attribute_nodes(VALUE self)
|
|
850
919
|
* call-seq:
|
851
920
|
* namespace()
|
852
921
|
*
|
853
|
-
* returns the
|
854
|
-
*
|
922
|
+
* returns the namespace of the element or attribute node as a Namespace
|
923
|
+
* object, or nil if there is no namespace for the element or attribute.
|
855
924
|
*/
|
856
925
|
static VALUE namespace(VALUE self)
|
857
926
|
{
|
@@ -1258,7 +1327,7 @@ static VALUE process_xincludes(VALUE self, VALUE options)
|
|
1258
1327
|
|
1259
1328
|
error = xmlGetLastError();
|
1260
1329
|
if(error)
|
1261
|
-
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(
|
1330
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
1262
1331
|
else
|
1263
1332
|
rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
|
1264
1333
|
}
|
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -471,7 +471,7 @@ static VALUE read_more(VALUE self)
|
|
471
471
|
|
472
472
|
error = xmlGetLastError();
|
473
473
|
if(error)
|
474
|
-
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(
|
474
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
475
475
|
else
|
476
476
|
rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
|
477
477
|
|
@@ -201,6 +201,44 @@ static VALUE column(VALUE self)
|
|
201
201
|
return Qnil;
|
202
202
|
}
|
203
203
|
|
204
|
+
/*
|
205
|
+
* call-seq:
|
206
|
+
* recovery=(boolean)
|
207
|
+
*
|
208
|
+
* Should this parser recover from structural errors? It will not stop processing
|
209
|
+
* file on structural errors if if set to true
|
210
|
+
*/
|
211
|
+
static VALUE set_recovery(VALUE self, VALUE value)
|
212
|
+
{
|
213
|
+
xmlParserCtxtPtr ctxt;
|
214
|
+
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
215
|
+
|
216
|
+
if(value == Qfalse)
|
217
|
+
ctxt->recovery = 0;
|
218
|
+
else
|
219
|
+
ctxt->recovery = 1;
|
220
|
+
|
221
|
+
return value;
|
222
|
+
}
|
223
|
+
|
224
|
+
/*
|
225
|
+
* call-seq:
|
226
|
+
* recovery
|
227
|
+
*
|
228
|
+
* Should this parser recover from structural errors? It will not stop processing
|
229
|
+
* file on structural errors if if set to true
|
230
|
+
*/
|
231
|
+
static VALUE get_recovery(VALUE self)
|
232
|
+
{
|
233
|
+
xmlParserCtxtPtr ctxt;
|
234
|
+
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
235
|
+
|
236
|
+
if(ctxt->recovery == 0)
|
237
|
+
return Qfalse;
|
238
|
+
else
|
239
|
+
return Qtrue;
|
240
|
+
}
|
241
|
+
|
204
242
|
void init_xml_sax_parser_context()
|
205
243
|
{
|
206
244
|
VALUE nokogiri = rb_define_module("Nokogiri");
|
@@ -217,6 +255,8 @@ void init_xml_sax_parser_context()
|
|
217
255
|
rb_define_method(klass, "parse_with", parse_with, 1);
|
218
256
|
rb_define_method(klass, "replace_entities=", set_replace_entities, 1);
|
219
257
|
rb_define_method(klass, "replace_entities", get_replace_entities, 0);
|
258
|
+
rb_define_method(klass, "recovery=", set_recovery, 1);
|
259
|
+
rb_define_method(klass, "recovery", get_recovery, 0);
|
220
260
|
rb_define_method(klass, "line", line, 0);
|
221
261
|
rb_define_method(klass, "column", column, 0);
|
222
262
|
}
|
@@ -3,19 +3,24 @@
|
|
3
3
|
void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error)
|
4
4
|
{
|
5
5
|
VALUE list = (VALUE)ctx;
|
6
|
-
rb_ary_push(list, Nokogiri_wrap_xml_syntax_error(
|
6
|
+
rb_ary_push(list, Nokogiri_wrap_xml_syntax_error(error));
|
7
7
|
}
|
8
8
|
|
9
9
|
void Nokogiri_error_raise(void * ctx, xmlErrorPtr error)
|
10
10
|
{
|
11
|
-
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(
|
11
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
12
12
|
}
|
13
13
|
|
14
|
-
VALUE Nokogiri_wrap_xml_syntax_error(
|
14
|
+
VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error)
|
15
15
|
{
|
16
|
-
VALUE msg, e;
|
16
|
+
VALUE msg, e, klass;
|
17
17
|
|
18
|
-
|
18
|
+
klass = cNokogiriXmlSyntaxError;
|
19
|
+
|
20
|
+
if (error->domain == XML_FROM_XPATH) {
|
21
|
+
VALUE xpath = rb_const_get(mNokogiriXml, rb_intern("XPath"));
|
22
|
+
klass = rb_const_get(xpath, rb_intern("SyntaxError"));
|
23
|
+
}
|
19
24
|
|
20
25
|
msg = (error && error->message) ? NOKOGIRI_STR_NEW2(error->message) : Qnil;
|
21
26
|
|
@@ -4,7 +4,7 @@
|
|
4
4
|
#include <nokogiri.h>
|
5
5
|
|
6
6
|
void init_xml_syntax_error();
|
7
|
-
VALUE Nokogiri_wrap_xml_syntax_error(
|
7
|
+
VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error);
|
8
8
|
void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error);
|
9
9
|
NORETURN(void Nokogiri_error_raise(void * ctx, xmlErrorPtr error));
|
10
10
|
|
@@ -171,15 +171,6 @@ static xmlXPathFunction lookup( void *ctx,
|
|
171
171
|
return NULL;
|
172
172
|
}
|
173
173
|
|
174
|
-
NORETURN(static void xpath_exception_handler(void * ctx, xmlErrorPtr error));
|
175
|
-
static void xpath_exception_handler(void * ctx, xmlErrorPtr error)
|
176
|
-
{
|
177
|
-
VALUE xpath = rb_const_get(mNokogiriXml, rb_intern("XPath"));
|
178
|
-
VALUE klass = rb_const_get(xpath, rb_intern("SyntaxError"));
|
179
|
-
|
180
|
-
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(klass, error));
|
181
|
-
}
|
182
|
-
|
183
174
|
NORETURN(static void xpath_generic_exception_handler(void * ctx, const char *msg, ...));
|
184
175
|
static void xpath_generic_exception_handler(void * ctx, const char *msg, ...)
|
185
176
|
{
|
@@ -221,7 +212,7 @@ static VALUE evaluate(int argc, VALUE *argv, VALUE self)
|
|
221
212
|
}
|
222
213
|
|
223
214
|
xmlResetLastError();
|
224
|
-
xmlSetStructuredErrorFunc(NULL,
|
215
|
+
xmlSetStructuredErrorFunc(NULL, Nokogiri_error_raise);
|
225
216
|
|
226
217
|
/* For some reason, xmlXPathEvalExpression will blow up with a generic error */
|
227
218
|
/* when there is a non existent function. */
|
@@ -232,11 +223,8 @@ static VALUE evaluate(int argc, VALUE *argv, VALUE self)
|
|
232
223
|
xmlSetGenericErrorFunc(NULL, NULL);
|
233
224
|
|
234
225
|
if(xpath == NULL) {
|
235
|
-
VALUE xpath = rb_const_get(mNokogiriXml, rb_intern("XPath"));
|
236
|
-
VALUE klass = rb_const_get(xpath, rb_intern("SyntaxError"));
|
237
|
-
|
238
226
|
xmlErrorPtr error = xmlGetLastError();
|
239
|
-
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(
|
227
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
240
228
|
}
|
241
229
|
|
242
230
|
assert(ctx->doc);
|
@@ -237,7 +237,7 @@ static void shutdownFunc(xsltTransformContextPtr ctxt,
|
|
237
237
|
* call-seq:
|
238
238
|
* register(uri, custom_handler_class)
|
239
239
|
*
|
240
|
-
* Register a class that implements custom
|
240
|
+
* Register a class that implements custom XSLT transformation functions.
|
241
241
|
*/
|
242
242
|
static VALUE registr(VALUE self, VALUE uri, VALUE obj)
|
243
243
|
{
|
data/lib/nokogiri.rb
CHANGED
@@ -2,9 +2,6 @@
|
|
2
2
|
# Modify the PATH on windows so that the external DLLs will get loaded.
|
3
3
|
|
4
4
|
require 'rbconfig'
|
5
|
-
ENV['PATH'] = [File.expand_path(
|
6
|
-
File.join(File.dirname(__FILE__), "..", "ext", "nokogiri")
|
7
|
-
), ENV['PATH']].compact.join(';') if RbConfig::CONFIG['host_os'] =~ /(mswin|mingw)/i
|
8
5
|
|
9
6
|
if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
|
10
7
|
# The line below caused a problem on non-GAE rack environment.
|
@@ -13,7 +10,7 @@ if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
|
|
13
10
|
# However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
|
14
11
|
# an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
|
15
12
|
# of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
|
16
|
-
# should skip loading xml jars. This is because those are in WEB-INF/lib and
|
13
|
+
# should skip loading xml jars. This is because those are in WEB-INF/lib and
|
17
14
|
# already set in the classpath.
|
18
15
|
unless $LOAD_PATH.to_s.include?("appengine-rack")
|
19
16
|
require 'stringio'
|
@@ -25,7 +22,12 @@ if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
|
|
25
22
|
end
|
26
23
|
end
|
27
24
|
|
28
|
-
|
25
|
+
begin
|
26
|
+
RUBY_VERSION =~ /(\d+.\d+)/
|
27
|
+
require "nokogiri/#{$1}/nokogiri"
|
28
|
+
rescue LoadError
|
29
|
+
require 'nokogiri/nokogiri'
|
30
|
+
end
|
29
31
|
require 'nokogiri/version'
|
30
32
|
require 'nokogiri/syntax_error'
|
31
33
|
require 'nokogiri/xml'
|
@@ -36,7 +38,8 @@ require 'nokogiri/css'
|
|
36
38
|
require 'nokogiri/html/builder'
|
37
39
|
|
38
40
|
# Nokogiri parses and searches XML/HTML very quickly, and also has
|
39
|
-
# correctly implemented CSS3 selector support as well as XPath
|
41
|
+
# correctly implemented CSS3 selector support as well as XPath 1.0
|
42
|
+
# support.
|
40
43
|
#
|
41
44
|
# Parsing a document returns either a Nokogiri::XML::Document, or a
|
42
45
|
# Nokogiri::HTML::Document depending on the kind of document you parse.
|
@@ -65,20 +68,19 @@ module Nokogiri
|
|
65
68
|
###
|
66
69
|
# Parse an HTML or XML document. +string+ contains the document.
|
67
70
|
def parse string, url = nil, encoding = nil, options = nil
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
doc
|
71
|
+
if string.respond_to?(:read) ||
|
72
|
+
/^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
|
73
|
+
# Expect an HTML indicator to appear within the first 512
|
74
|
+
# characters of a document. (<?xml ?> + <?xml-stylesheet ?>
|
75
|
+
# shouldn't be that long)
|
76
|
+
Nokogiri.HTML(string, url, encoding,
|
77
|
+
options || XML::ParseOptions::DEFAULT_HTML)
|
78
|
+
else
|
79
|
+
Nokogiri.XML(string, url, encoding,
|
80
|
+
options || XML::ParseOptions::DEFAULT_XML)
|
81
|
+
end.tap { |doc|
|
82
|
+
yield doc if block_given?
|
83
|
+
}
|
82
84
|
end
|
83
85
|
|
84
86
|
###
|
@@ -110,6 +112,14 @@ module Nokogiri
|
|
110
112
|
Nokogiri(*args, &block).slop!
|
111
113
|
end
|
112
114
|
end
|
115
|
+
|
116
|
+
# Make sure to support some popular encoding aliases not known by
|
117
|
+
# all iconv implementations.
|
118
|
+
{
|
119
|
+
'Windows-31J' => 'CP932', # Windows-31J is the IANA registered name of CP932.
|
120
|
+
}.each { |alias_name, name|
|
121
|
+
EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
|
122
|
+
}
|
113
123
|
end
|
114
124
|
|
115
125
|
###
|
@@ -120,8 +130,7 @@ end
|
|
120
130
|
# To specify the type of document, use Nokogiri.XML or Nokogiri.HTML.
|
121
131
|
def Nokogiri(*args, &block)
|
122
132
|
if block_given?
|
123
|
-
|
124
|
-
return builder.doc.root
|
133
|
+
Nokogiri::HTML::Builder.new(&block).doc.root
|
125
134
|
else
|
126
135
|
Nokogiri.parse(*args)
|
127
136
|
end
|
data/lib/nokogiri/css/node.rb
CHANGED
@@ -22,60 +22,10 @@ module Nokogiri
|
|
22
22
|
###
|
23
23
|
# Convert this CSS node to xpath with +prefix+ using +visitor+
|
24
24
|
def to_xpath prefix = '//', visitor = XPathVisitor.new
|
25
|
-
self.preprocess!
|
26
25
|
prefix = '.' if ALLOW_COMBINATOR_ON_SELF.include?(type) && value.first.nil?
|
27
26
|
prefix + visitor.accept(self)
|
28
27
|
end
|
29
28
|
|
30
|
-
# Preprocess this node tree
|
31
|
-
def preprocess!
|
32
|
-
### Deal with nth-child
|
33
|
-
matches = find_by_type(
|
34
|
-
[:CONDITIONAL_SELECTOR,
|
35
|
-
[:ELEMENT_NAME],
|
36
|
-
[:PSEUDO_CLASS,
|
37
|
-
[:FUNCTION]
|
38
|
-
]
|
39
|
-
]
|
40
|
-
)
|
41
|
-
matches.each do |match|
|
42
|
-
if match.value[1].value[0].value[0] =~ /^nth-(last-)?child/
|
43
|
-
tag_name = match.value[0].value.first
|
44
|
-
match.value[0].value = ['*']
|
45
|
-
match.value[1] = Node.new(:COMBINATOR, [
|
46
|
-
match.value[1].value[0],
|
47
|
-
Node.new(:FUNCTION, ['self(', tag_name])
|
48
|
-
])
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
### Deal with first-child, last-child
|
53
|
-
matches = find_by_type(
|
54
|
-
[:CONDITIONAL_SELECTOR,
|
55
|
-
[:ELEMENT_NAME], [:PSEUDO_CLASS]
|
56
|
-
])
|
57
|
-
matches.each do |match|
|
58
|
-
if ['first-child', 'last-child'].include?(match.value[1].value.first)
|
59
|
-
which = match.value[1].value.first.gsub(/-\w*$/, '')
|
60
|
-
tag_name = match.value[0].value.first
|
61
|
-
match.value[0].value = ['*']
|
62
|
-
match.value[1] = Node.new(:COMBINATOR, [
|
63
|
-
Node.new(:FUNCTION, ["#{which}("]),
|
64
|
-
Node.new(:FUNCTION, ['self(', tag_name])
|
65
|
-
])
|
66
|
-
elsif 'only-child' == match.value[1].value.first
|
67
|
-
tag_name = match.value[0].value.first
|
68
|
-
match.value[0].value = ['*']
|
69
|
-
match.value[1] = Node.new(:COMBINATOR, [
|
70
|
-
Node.new(:FUNCTION, ["#{match.value[1].value.first}("]),
|
71
|
-
Node.new(:FUNCTION, ['self(', tag_name])
|
72
|
-
])
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
self
|
77
|
-
end
|
78
|
-
|
79
29
|
# Find a node by type using +types+
|
80
30
|
def find_by_type types
|
81
31
|
matches = []
|