nokogiri 1.6.1-java → 1.6.2-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (137) hide show
  1. checksums.yaml +4 -4
  2. data/.editorconfig +17 -0
  3. data/.travis.yml +6 -6
  4. data/CHANGELOG.ja.rdoc +61 -8
  5. data/CHANGELOG.rdoc +58 -3
  6. data/Gemfile +3 -3
  7. data/Manifest.txt +57 -1
  8. data/README.ja.rdoc +22 -17
  9. data/README.rdoc +23 -18
  10. data/ROADMAP.md +1 -2
  11. data/Rakefile +162 -58
  12. data/build_all +56 -31
  13. data/dependencies.yml +3 -3
  14. data/ext/java/nokogiri/NokogiriService.java +9 -5
  15. data/ext/java/nokogiri/XmlDocument.java +95 -54
  16. data/ext/java/nokogiri/XmlNode.java +93 -42
  17. data/ext/java/nokogiri/XmlReader.java +1 -1
  18. data/ext/java/nokogiri/XmlSaxParserContext.java +33 -0
  19. data/ext/java/nokogiri/XmlSchema.java +4 -2
  20. data/ext/java/nokogiri/XmlXpathContext.java +118 -76
  21. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +20 -0
  22. data/ext/java/nokogiri/internals/NokogiriHandler.java +3 -10
  23. data/ext/java/nokogiri/internals/NokogiriHelpers.java +40 -23
  24. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +59 -54
  25. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +1 -1
  26. data/ext/java/nokogiri/internals/ParserContext.java +1 -4
  27. data/ext/java/nokogiri/internals/SaveContextVisitor.java +6 -2
  28. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +119 -0
  29. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +159 -0
  30. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +37 -0
  31. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +93 -0
  32. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +252 -0
  33. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +639 -0
  34. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +38 -0
  35. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +38 -0
  36. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +368 -0
  37. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +295 -0
  38. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +40 -0
  39. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +44 -0
  40. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +44 -0
  41. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +43 -0
  42. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +630 -0
  43. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +173 -0
  44. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +76 -0
  45. data/ext/java/nokogiri/internals/c14n/Constants.java +42 -0
  46. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +293 -0
  47. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +93 -0
  48. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +79 -0
  49. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +165 -0
  50. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +76 -0
  51. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +402 -0
  52. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +51 -0
  53. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +179 -0
  54. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +507 -0
  55. data/ext/nokogiri/extconf.rb +429 -128
  56. data/ext/nokogiri/html_document.c +2 -2
  57. data/ext/nokogiri/nokogiri.c +6 -1
  58. data/ext/nokogiri/xml_document.c +5 -4
  59. data/ext/nokogiri/xml_node.c +76 -7
  60. data/ext/nokogiri/xml_reader.c +1 -1
  61. data/ext/nokogiri/xml_sax_parser_context.c +40 -0
  62. data/ext/nokogiri/xml_syntax_error.c +10 -5
  63. data/ext/nokogiri/xml_syntax_error.h +1 -1
  64. data/ext/nokogiri/xml_xpath_context.c +2 -14
  65. data/ext/nokogiri/xslt_stylesheet.c +1 -1
  66. data/lib/nokogiri.rb +31 -22
  67. data/lib/nokogiri/css/node.rb +0 -50
  68. data/lib/nokogiri/css/parser.rb +213 -218
  69. data/lib/nokogiri/css/parser.y +21 -30
  70. data/lib/nokogiri/css/xpath_visitor.rb +62 -14
  71. data/lib/nokogiri/html/document.rb +97 -18
  72. data/lib/nokogiri/html/sax/parser.rb +2 -2
  73. data/lib/nokogiri/nokogiri.jar +0 -0
  74. data/lib/nokogiri/version.rb +1 -1
  75. data/lib/nokogiri/xml/builder.rb +1 -1
  76. data/lib/nokogiri/xml/document.rb +2 -2
  77. data/lib/nokogiri/xml/dtd.rb +10 -0
  78. data/lib/nokogiri/xml/node.rb +26 -1
  79. data/lib/nokogiri/xml/sax/parser.rb +1 -1
  80. data/ports/patches/libxml2/0001-Fix-parser-local-buffers-size-problems.patch +265 -0
  81. data/ports/patches/libxml2/0002-Fix-entities-local-buffers-size-problems.patch +102 -0
  82. data/ports/patches/libxml2/0003-Fix-an-error-in-previous-commit.patch +26 -0
  83. data/ports/patches/libxml2/0004-Fix-potential-out-of-bound-access.patch +26 -0
  84. data/ports/patches/libxml2/0005-Detect-excessive-entities-expansion-upon-replacement.patch +158 -0
  85. data/ports/patches/libxml2/0006-Do-not-fetch-external-parsed-entities.patch +78 -0
  86. data/ports/patches/libxml2/0007-Enforce-XML_PARSER_EOF-state-handling-through-the-pa.patch +480 -0
  87. data/ports/patches/libxml2/0008-Improve-handling-of-xmlStopParser.patch +315 -0
  88. data/ports/patches/libxml2/0009-Fix-a-couple-of-return-without-value.patch +37 -0
  89. data/ports/patches/libxslt/0001-Adding-doc-update-related-to-1.1.28.patch +222 -0
  90. data/ports/patches/libxslt/0002-Fix-a-couple-of-places-where-f-printf-parameters-wer.patch +53 -0
  91. data/ports/patches/libxslt/0003-Initialize-pseudo-random-number-generator-with-curre.patch +60 -0
  92. data/ports/patches/libxslt/0004-EXSLT-function-str-replace-is-broken-as-is.patch +42 -0
  93. data/ports/patches/libxslt/0006-Fix-str-padding-to-work-with-UTF-8-strings.patch +164 -0
  94. data/ports/patches/libxslt/0007-Separate-function-for-predicate-matching-in-patterns.patch +587 -0
  95. data/ports/patches/libxslt/0008-Fix-direct-pattern-matching.patch +80 -0
  96. data/ports/patches/libxslt/0009-Fix-certain-patterns-with-predicates.patch +185 -0
  97. data/ports/patches/libxslt/0010-Fix-handling-of-UTF-8-strings-in-EXSLT-crypto-module.patch +126 -0
  98. data/ports/patches/libxslt/0013-Memory-leak-in-xsltCompileIdKeyPattern-error-path.patch +25 -0
  99. data/ports/patches/libxslt/0014-Fix-for-bug-436589.patch +43 -0
  100. data/ports/patches/libxslt/0015-Fix-mkdir-for-mingw.patch +41 -0
  101. data/suppressions/README.txt +1 -0
  102. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  103. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  104. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  105. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  106. data/test/css/test_nthiness.rb +65 -2
  107. data/test/css/test_parser.rb +27 -10
  108. data/test/css/test_tokenizer.rb +1 -1
  109. data/test/css/test_xpath_visitor.rb +6 -1
  110. data/test/files/atom.xml +344 -0
  111. data/test/files/shift_jis_no_charset.html +9 -0
  112. data/test/helper.rb +10 -0
  113. data/test/html/test_document.rb +74 -7
  114. data/test/html/test_document_encoding.rb +10 -0
  115. data/test/html/test_document_fragment.rb +9 -3
  116. data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
  117. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  118. data/test/namespaces/test_namespaces_preservation.rb +31 -0
  119. data/test/test_nokogiri.rb +6 -0
  120. data/test/test_reader.rb +7 -4
  121. data/test/test_xslt_transforms.rb +25 -0
  122. data/test/xml/sax/test_parser.rb +16 -0
  123. data/test/xml/sax/test_parser_context.rb +9 -0
  124. data/test/xml/test_builder.rb +9 -0
  125. data/test/xml/test_c14n.rb +12 -2
  126. data/test/xml/test_document.rb +66 -0
  127. data/test/xml/test_document_fragment.rb +5 -0
  128. data/test/xml/test_dtd.rb +84 -0
  129. data/test/xml/test_entity_reference.rb +3 -3
  130. data/test/xml/test_node.rb +21 -3
  131. data/test/xml/test_node_attributes.rb +17 -0
  132. data/test/xml/test_schema.rb +26 -0
  133. data/test/xml/test_text.rb +15 -0
  134. data/test/xml/test_xpath.rb +87 -0
  135. data/test_all +3 -3
  136. metadata +119 -68
  137. data/tasks/cross_compile.rb +0 -134
@@ -77,7 +77,7 @@ static VALUE read_io( VALUE klass,
77
77
 
78
78
  error = xmlGetLastError();
79
79
  if(error)
80
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
80
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
81
81
  else
82
82
  rb_raise(rb_eRuntimeError, "Could not parse document");
83
83
 
@@ -123,7 +123,7 @@ static VALUE read_memory( VALUE klass,
123
123
 
124
124
  error = xmlGetLastError();
125
125
  if(error)
126
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
126
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
127
127
  else
128
128
  rb_raise(rb_eRuntimeError, "Could not parse document");
129
129
 
@@ -14,7 +14,12 @@ VALUE mNokogiriHtmlSax ;
14
14
  */
15
15
  int vasprintf (char **strp, const char *fmt, va_list ap)
16
16
  {
17
- int len = vsnprintf (NULL, 0, fmt, ap) + 1;
17
+ /* Mingw32/64 have a broken vsnprintf implementation that fails when
18
+ * using a zero-byte limit in order to retrieve the required size for malloc.
19
+ * So we use a one byte buffer instead.
20
+ */
21
+ char tmp[1];
22
+ int len = vsnprintf (tmp, 1, fmt, ap) + 1;
18
23
  char *res = (char *)malloc((unsigned int)len);
19
24
  if (res == NULL)
20
25
  return -1;
@@ -231,7 +231,7 @@ static VALUE read_io( VALUE klass,
231
231
 
232
232
  error = xmlGetLastError();
233
233
  if(error)
234
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
234
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
235
235
  else
236
236
  rb_raise(rb_eRuntimeError, "Could not parse document");
237
237
 
@@ -275,7 +275,7 @@ static VALUE read_memory( VALUE klass,
275
275
 
276
276
  error = xmlGetLastError();
277
277
  if(error)
278
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
278
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
279
279
  else
280
280
  rb_raise(rb_eRuntimeError, "Could not parse document");
281
281
 
@@ -417,7 +417,7 @@ static VALUE create_entity(int argc, VALUE *argv, VALUE self)
417
417
  if(NULL == ptr) {
418
418
  xmlErrorPtr error = xmlGetLastError();
419
419
  if(error)
420
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
420
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
421
421
  else
422
422
  rb_raise(rb_eRuntimeError, "Could not create entity");
423
423
 
@@ -497,6 +497,7 @@ static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
497
497
  ns = NULL;
498
498
  }
499
499
  else{
500
+ Check_Type(incl_ns, T_ARRAY);
500
501
  ns_len = RARRAY_LEN(incl_ns);
501
502
  ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
502
503
  for (i = 0 ; i < ns_len ; i++) {
@@ -510,7 +511,7 @@ static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
510
511
  xmlC14NExecute(doc, cb, ctx,
511
512
  (int) (NIL_P(mode) ? 0 : NUM2INT(mode)),
512
513
  ns,
513
- (int) (NIL_P(with_comments) ? 0 : 1),
514
+ (int) RTEST(with_comments),
514
515
  buf);
515
516
 
516
517
  xmlOutputBufferClose(buf);
@@ -14,7 +14,14 @@ static void debug_node_dealloc(xmlNodePtr x)
14
14
 
15
15
  static void mark(xmlNodePtr node)
16
16
  {
17
- rb_gc_mark(DOC_RUBY_OBJECT(node->doc));
17
+ xmlNodePtr doc = node->doc;
18
+ if(doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE) {
19
+ if(DOC_RUBY_OBJECT_TEST(doc)) {
20
+ rb_gc_mark(DOC_RUBY_OBJECT(doc));
21
+ }
22
+ } else if(node->doc->_private) {
23
+ rb_gc_mark((VALUE)doc->_private);
24
+ }
18
25
  }
19
26
 
20
27
  /* :nodoc: */
@@ -132,7 +139,7 @@ static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
132
139
  static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
133
140
  {
134
141
  VALUE reparented_obj ;
135
- xmlNodePtr reparentee, pivot, reparented, next_text, new_next_text ;
142
+ xmlNodePtr reparentee, pivot, reparented, next_text, new_next_text, parent ;
136
143
 
137
144
  if(!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode))
138
145
  rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
@@ -142,9 +149,71 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep
142
149
  Data_Get_Struct(reparentee_obj, xmlNode, reparentee);
143
150
  Data_Get_Struct(pivot_obj, xmlNode, pivot);
144
151
 
145
- if(XML_DOCUMENT_NODE == reparentee->type || XML_HTML_DOCUMENT_NODE == reparentee->type)
146
- rb_raise(rb_eArgError, "cannot reparent a document node");
152
+ /*
153
+ * Check if nodes given are appropriate to have a parent-child
154
+ * relationship, based on the DOM specification.
155
+ *
156
+ * cf. http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/core.html#ID-1590626202
157
+ */
158
+ if (prf == xmlAddChild) {
159
+ parent = pivot;
160
+ } else {
161
+ parent = pivot->parent;
162
+ }
163
+
164
+ if (parent) {
165
+ switch (parent->type) {
166
+ case XML_DOCUMENT_NODE:
167
+ case XML_HTML_DOCUMENT_NODE:
168
+ switch (reparentee->type) {
169
+ case XML_ELEMENT_NODE:
170
+ case XML_PI_NODE:
171
+ case XML_COMMENT_NODE:
172
+ case XML_DOCUMENT_TYPE_NODE:
173
+ /*
174
+ * The DOM specification says no to adding text-like nodes
175
+ * directly to a document, but we allow it for compatibility.
176
+ */
177
+ case XML_TEXT_NODE:
178
+ case XML_CDATA_SECTION_NODE:
179
+ case XML_ENTITY_REF_NODE:
180
+ goto ok;
181
+ }
182
+ break;
183
+ case XML_DOCUMENT_FRAG_NODE:
184
+ case XML_ENTITY_REF_NODE:
185
+ case XML_ELEMENT_NODE:
186
+ switch (reparentee->type) {
187
+ case XML_ELEMENT_NODE:
188
+ case XML_PI_NODE:
189
+ case XML_COMMENT_NODE:
190
+ case XML_TEXT_NODE:
191
+ case XML_CDATA_SECTION_NODE:
192
+ case XML_ENTITY_REF_NODE:
193
+ goto ok;
194
+ }
195
+ break;
196
+ case XML_ATTRIBUTE_NODE:
197
+ switch (reparentee->type) {
198
+ case XML_TEXT_NODE:
199
+ case XML_ENTITY_REF_NODE:
200
+ goto ok;
201
+ }
202
+ break;
203
+ case XML_TEXT_NODE:
204
+ /*
205
+ * xmlAddChild() breaks the DOM specification in that it allows
206
+ * adding a text node to another, in which case text nodes are
207
+ * coalesced, but since our JRuby version does not support such
208
+ * operation, we should inhibit it.
209
+ */
210
+ break;
211
+ }
212
+
213
+ rb_raise(rb_eArgError, "cannot reparent %s there", rb_obj_classname(reparentee_obj));
214
+ }
147
215
 
216
+ ok:
148
217
  xmlUnlinkNode(reparentee);
149
218
 
150
219
  if (reparentee->doc != pivot->doc || reparentee->type == XML_TEXT_NODE) {
@@ -850,8 +919,8 @@ static VALUE attribute_nodes(VALUE self)
850
919
  * call-seq:
851
920
  * namespace()
852
921
  *
853
- * returns the default namespace set on this node (as with an "xmlns="
854
- * attribute), as a Namespace object.
922
+ * returns the namespace of the element or attribute node as a Namespace
923
+ * object, or nil if there is no namespace for the element or attribute.
855
924
  */
856
925
  static VALUE namespace(VALUE self)
857
926
  {
@@ -1258,7 +1327,7 @@ static VALUE process_xincludes(VALUE self, VALUE options)
1258
1327
 
1259
1328
  error = xmlGetLastError();
1260
1329
  if(error)
1261
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
1330
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
1262
1331
  else
1263
1332
  rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
1264
1333
  }
@@ -471,7 +471,7 @@ static VALUE read_more(VALUE self)
471
471
 
472
472
  error = xmlGetLastError();
473
473
  if(error)
474
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
474
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
475
475
  else
476
476
  rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
477
477
 
@@ -201,6 +201,44 @@ static VALUE column(VALUE self)
201
201
  return Qnil;
202
202
  }
203
203
 
204
+ /*
205
+ * call-seq:
206
+ * recovery=(boolean)
207
+ *
208
+ * Should this parser recover from structural errors? It will not stop processing
209
+ * file on structural errors if if set to true
210
+ */
211
+ static VALUE set_recovery(VALUE self, VALUE value)
212
+ {
213
+ xmlParserCtxtPtr ctxt;
214
+ Data_Get_Struct(self, xmlParserCtxt, ctxt);
215
+
216
+ if(value == Qfalse)
217
+ ctxt->recovery = 0;
218
+ else
219
+ ctxt->recovery = 1;
220
+
221
+ return value;
222
+ }
223
+
224
+ /*
225
+ * call-seq:
226
+ * recovery
227
+ *
228
+ * Should this parser recover from structural errors? It will not stop processing
229
+ * file on structural errors if if set to true
230
+ */
231
+ static VALUE get_recovery(VALUE self)
232
+ {
233
+ xmlParserCtxtPtr ctxt;
234
+ Data_Get_Struct(self, xmlParserCtxt, ctxt);
235
+
236
+ if(ctxt->recovery == 0)
237
+ return Qfalse;
238
+ else
239
+ return Qtrue;
240
+ }
241
+
204
242
  void init_xml_sax_parser_context()
205
243
  {
206
244
  VALUE nokogiri = rb_define_module("Nokogiri");
@@ -217,6 +255,8 @@ void init_xml_sax_parser_context()
217
255
  rb_define_method(klass, "parse_with", parse_with, 1);
218
256
  rb_define_method(klass, "replace_entities=", set_replace_entities, 1);
219
257
  rb_define_method(klass, "replace_entities", get_replace_entities, 0);
258
+ rb_define_method(klass, "recovery=", set_recovery, 1);
259
+ rb_define_method(klass, "recovery", get_recovery, 0);
220
260
  rb_define_method(klass, "line", line, 0);
221
261
  rb_define_method(klass, "column", column, 0);
222
262
  }
@@ -3,19 +3,24 @@
3
3
  void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error)
4
4
  {
5
5
  VALUE list = (VALUE)ctx;
6
- rb_ary_push(list, Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
6
+ rb_ary_push(list, Nokogiri_wrap_xml_syntax_error(error));
7
7
  }
8
8
 
9
9
  void Nokogiri_error_raise(void * ctx, xmlErrorPtr error)
10
10
  {
11
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
11
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
12
12
  }
13
13
 
14
- VALUE Nokogiri_wrap_xml_syntax_error(VALUE klass, xmlErrorPtr error)
14
+ VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error)
15
15
  {
16
- VALUE msg, e;
16
+ VALUE msg, e, klass;
17
17
 
18
- if(!klass) klass = cNokogiriXmlSyntaxError;
18
+ klass = cNokogiriXmlSyntaxError;
19
+
20
+ if (error->domain == XML_FROM_XPATH) {
21
+ VALUE xpath = rb_const_get(mNokogiriXml, rb_intern("XPath"));
22
+ klass = rb_const_get(xpath, rb_intern("SyntaxError"));
23
+ }
19
24
 
20
25
  msg = (error && error->message) ? NOKOGIRI_STR_NEW2(error->message) : Qnil;
21
26
 
@@ -4,7 +4,7 @@
4
4
  #include <nokogiri.h>
5
5
 
6
6
  void init_xml_syntax_error();
7
- VALUE Nokogiri_wrap_xml_syntax_error(VALUE klass, xmlErrorPtr error);
7
+ VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error);
8
8
  void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error);
9
9
  NORETURN(void Nokogiri_error_raise(void * ctx, xmlErrorPtr error));
10
10
 
@@ -171,15 +171,6 @@ static xmlXPathFunction lookup( void *ctx,
171
171
  return NULL;
172
172
  }
173
173
 
174
- NORETURN(static void xpath_exception_handler(void * ctx, xmlErrorPtr error));
175
- static void xpath_exception_handler(void * ctx, xmlErrorPtr error)
176
- {
177
- VALUE xpath = rb_const_get(mNokogiriXml, rb_intern("XPath"));
178
- VALUE klass = rb_const_get(xpath, rb_intern("SyntaxError"));
179
-
180
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error(klass, error));
181
- }
182
-
183
174
  NORETURN(static void xpath_generic_exception_handler(void * ctx, const char *msg, ...));
184
175
  static void xpath_generic_exception_handler(void * ctx, const char *msg, ...)
185
176
  {
@@ -221,7 +212,7 @@ static VALUE evaluate(int argc, VALUE *argv, VALUE self)
221
212
  }
222
213
 
223
214
  xmlResetLastError();
224
- xmlSetStructuredErrorFunc(NULL, xpath_exception_handler);
215
+ xmlSetStructuredErrorFunc(NULL, Nokogiri_error_raise);
225
216
 
226
217
  /* For some reason, xmlXPathEvalExpression will blow up with a generic error */
227
218
  /* when there is a non existent function. */
@@ -232,11 +223,8 @@ static VALUE evaluate(int argc, VALUE *argv, VALUE self)
232
223
  xmlSetGenericErrorFunc(NULL, NULL);
233
224
 
234
225
  if(xpath == NULL) {
235
- VALUE xpath = rb_const_get(mNokogiriXml, rb_intern("XPath"));
236
- VALUE klass = rb_const_get(xpath, rb_intern("SyntaxError"));
237
-
238
226
  xmlErrorPtr error = xmlGetLastError();
239
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error(klass, error));
227
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
240
228
  }
241
229
 
242
230
  assert(ctx->doc);
@@ -237,7 +237,7 @@ static void shutdownFunc(xsltTransformContextPtr ctxt,
237
237
  * call-seq:
238
238
  * register(uri, custom_handler_class)
239
239
  *
240
- * Register a class that implements custom XLST transformation functions.
240
+ * Register a class that implements custom XSLT transformation functions.
241
241
  */
242
242
  static VALUE registr(VALUE self, VALUE uri, VALUE obj)
243
243
  {
@@ -2,9 +2,6 @@
2
2
  # Modify the PATH on windows so that the external DLLs will get loaded.
3
3
 
4
4
  require 'rbconfig'
5
- ENV['PATH'] = [File.expand_path(
6
- File.join(File.dirname(__FILE__), "..", "ext", "nokogiri")
7
- ), ENV['PATH']].compact.join(';') if RbConfig::CONFIG['host_os'] =~ /(mswin|mingw)/i
8
5
 
9
6
  if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
10
7
  # The line below caused a problem on non-GAE rack environment.
@@ -13,7 +10,7 @@ if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
13
10
  # However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
14
11
  # an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
15
12
  # of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
16
- # should skip loading xml jars. This is because those are in WEB-INF/lib and
13
+ # should skip loading xml jars. This is because those are in WEB-INF/lib and
17
14
  # already set in the classpath.
18
15
  unless $LOAD_PATH.to_s.include?("appengine-rack")
19
16
  require 'stringio'
@@ -25,7 +22,12 @@ if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
25
22
  end
26
23
  end
27
24
 
28
- require 'nokogiri/nokogiri'
25
+ begin
26
+ RUBY_VERSION =~ /(\d+.\d+)/
27
+ require "nokogiri/#{$1}/nokogiri"
28
+ rescue LoadError
29
+ require 'nokogiri/nokogiri'
30
+ end
29
31
  require 'nokogiri/version'
30
32
  require 'nokogiri/syntax_error'
31
33
  require 'nokogiri/xml'
@@ -36,7 +38,8 @@ require 'nokogiri/css'
36
38
  require 'nokogiri/html/builder'
37
39
 
38
40
  # Nokogiri parses and searches XML/HTML very quickly, and also has
39
- # correctly implemented CSS3 selector support as well as XPath support.
41
+ # correctly implemented CSS3 selector support as well as XPath 1.0
42
+ # support.
40
43
  #
41
44
  # Parsing a document returns either a Nokogiri::XML::Document, or a
42
45
  # Nokogiri::HTML::Document depending on the kind of document you parse.
@@ -65,20 +68,19 @@ module Nokogiri
65
68
  ###
66
69
  # Parse an HTML or XML document. +string+ contains the document.
67
70
  def parse string, url = nil, encoding = nil, options = nil
68
- doc =
69
- if string.respond_to?(:read) ||
70
- string =~ /^\s*<[^Hh>]*html/i # Probably html
71
- Nokogiri.HTML(
72
- string,
73
- url,
74
- encoding, options || XML::ParseOptions::DEFAULT_HTML
75
- )
76
- else
77
- Nokogiri.XML(string, url, encoding,
78
- options || XML::ParseOptions::DEFAULT_XML)
79
- end
80
- yield doc if block_given?
81
- doc
71
+ if string.respond_to?(:read) ||
72
+ /^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
73
+ # Expect an HTML indicator to appear within the first 512
74
+ # characters of a document. (<?xml ?> + <?xml-stylesheet ?>
75
+ # shouldn't be that long)
76
+ Nokogiri.HTML(string, url, encoding,
77
+ options || XML::ParseOptions::DEFAULT_HTML)
78
+ else
79
+ Nokogiri.XML(string, url, encoding,
80
+ options || XML::ParseOptions::DEFAULT_XML)
81
+ end.tap { |doc|
82
+ yield doc if block_given?
83
+ }
82
84
  end
83
85
 
84
86
  ###
@@ -110,6 +112,14 @@ module Nokogiri
110
112
  Nokogiri(*args, &block).slop!
111
113
  end
112
114
  end
115
+
116
+ # Make sure to support some popular encoding aliases not known by
117
+ # all iconv implementations.
118
+ {
119
+ 'Windows-31J' => 'CP932', # Windows-31J is the IANA registered name of CP932.
120
+ }.each { |alias_name, name|
121
+ EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
122
+ }
113
123
  end
114
124
 
115
125
  ###
@@ -120,8 +130,7 @@ end
120
130
  # To specify the type of document, use Nokogiri.XML or Nokogiri.HTML.
121
131
  def Nokogiri(*args, &block)
122
132
  if block_given?
123
- builder = Nokogiri::HTML::Builder.new(&block)
124
- return builder.doc.root
133
+ Nokogiri::HTML::Builder.new(&block).doc.root
125
134
  else
126
135
  Nokogiri.parse(*args)
127
136
  end
@@ -22,60 +22,10 @@ module Nokogiri
22
22
  ###
23
23
  # Convert this CSS node to xpath with +prefix+ using +visitor+
24
24
  def to_xpath prefix = '//', visitor = XPathVisitor.new
25
- self.preprocess!
26
25
  prefix = '.' if ALLOW_COMBINATOR_ON_SELF.include?(type) && value.first.nil?
27
26
  prefix + visitor.accept(self)
28
27
  end
29
28
 
30
- # Preprocess this node tree
31
- def preprocess!
32
- ### Deal with nth-child
33
- matches = find_by_type(
34
- [:CONDITIONAL_SELECTOR,
35
- [:ELEMENT_NAME],
36
- [:PSEUDO_CLASS,
37
- [:FUNCTION]
38
- ]
39
- ]
40
- )
41
- matches.each do |match|
42
- if match.value[1].value[0].value[0] =~ /^nth-(last-)?child/
43
- tag_name = match.value[0].value.first
44
- match.value[0].value = ['*']
45
- match.value[1] = Node.new(:COMBINATOR, [
46
- match.value[1].value[0],
47
- Node.new(:FUNCTION, ['self(', tag_name])
48
- ])
49
- end
50
- end
51
-
52
- ### Deal with first-child, last-child
53
- matches = find_by_type(
54
- [:CONDITIONAL_SELECTOR,
55
- [:ELEMENT_NAME], [:PSEUDO_CLASS]
56
- ])
57
- matches.each do |match|
58
- if ['first-child', 'last-child'].include?(match.value[1].value.first)
59
- which = match.value[1].value.first.gsub(/-\w*$/, '')
60
- tag_name = match.value[0].value.first
61
- match.value[0].value = ['*']
62
- match.value[1] = Node.new(:COMBINATOR, [
63
- Node.new(:FUNCTION, ["#{which}("]),
64
- Node.new(:FUNCTION, ['self(', tag_name])
65
- ])
66
- elsif 'only-child' == match.value[1].value.first
67
- tag_name = match.value[0].value.first
68
- match.value[0].value = ['*']
69
- match.value[1] = Node.new(:COMBINATOR, [
70
- Node.new(:FUNCTION, ["#{match.value[1].value.first}("]),
71
- Node.new(:FUNCTION, ['self(', tag_name])
72
- ])
73
- end
74
- end
75
-
76
- self
77
- end
78
-
79
29
  # Find a node by type using +types+
80
30
  def find_by_type types
81
31
  matches = []