nokogiri 1.6.8.rc3-java → 1.6.8.1-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +5 -3
  3. data/CHANGELOG.rdoc +41 -5
  4. data/CONTRIBUTING.md +1 -1
  5. data/Gemfile +7 -8
  6. data/Manifest.txt +1 -1
  7. data/README.md +0 -3
  8. data/Rakefile +10 -12
  9. data/build_all +1 -1
  10. data/dependencies.yml +29 -4
  11. data/ext/java/nokogiri/XmlAttr.java +3 -1
  12. data/ext/java/nokogiri/XmlDocumentFragment.java +0 -14
  13. data/ext/java/nokogiri/XmlNode.java +74 -58
  14. data/ext/java/nokogiri/internals/NokogiriHelpers.java +7 -7
  15. data/ext/java/nokogiri/internals/SaveContextVisitor.java +5 -1
  16. data/ext/nokogiri/extconf.rb +89 -33
  17. data/ext/nokogiri/xml_node.c +22 -9
  18. data/ext/nokogiri/xml_reader.c +0 -13
  19. data/ext/nokogiri/xml_sax_parser.c +7 -7
  20. data/lib/nokogiri.rb +11 -7
  21. data/lib/nokogiri/html/document.rb +4 -2
  22. data/lib/nokogiri/nokogiri.jar +0 -0
  23. data/lib/nokogiri/version.rb +1 -1
  24. data/lib/nokogiri/xml/document.rb +1 -1
  25. data/lib/nokogiri/xml/parse_options.rb +22 -0
  26. data/tasks/test.rb +5 -0
  27. data/test/html/test_document.rb +26 -0
  28. data/test/html/test_document_encoding.rb +5 -0
  29. data/test/html/test_document_fragment.rb +5 -0
  30. data/test/test_encoding_handler.rb +2 -0
  31. data/test/test_xslt_transforms.rb +33 -0
  32. data/test/xml/sax/test_parser.rb +15 -7
  33. data/test/xml/test_document_encoding.rb +5 -0
  34. data/test/xml/test_document_fragment.rb +12 -0
  35. data/test/xml/test_node_attributes.rb +6 -0
  36. data/test/xml/test_node_reparenting.rb +193 -18
  37. data/test/xml/test_reader.rb +589 -0
  38. data/test/xml/test_unparented_node.rb +13 -0
  39. data/test_all +33 -42
  40. metadata +63 -66
  41. data/test/test_reader.rb +0 -577
@@ -119,12 +119,12 @@ public class NokogiriHelpers {
119
119
  if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) {
120
120
  XmlDocument xmlDocument = (XmlDocument)node.getOwnerDocument().getUserData(CACHED_NODE);
121
121
  if (!(xmlDocument instanceof HtmlDocument)) {
122
- String prefix = getLocalNameForNamespace(((Attr)node).getName());
123
- prefix = prefix != null ? prefix : "";
124
- String href = ((Attr)node).getValue();
125
- XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
126
- if (xmlNamespace != null) return xmlNamespace;
127
- else return XmlNamespace.createFromAttr(ruby, (Attr)node);
122
+ String prefix = getLocalNameForNamespace(((Attr)node).getName());
123
+ prefix = prefix != null ? prefix : "";
124
+ String href = ((Attr)node).getValue();
125
+ XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href);
126
+ if (xmlNamespace != null) return xmlNamespace;
127
+ else return XmlNamespace.createFromAttr(ruby, (Attr)node);
128
128
  }
129
129
  }
130
130
  XmlNode xmlNode = getCachedNode(node);
@@ -134,7 +134,7 @@ public class NokogiriHelpers {
134
134
  }
135
135
  return xmlNode;
136
136
  }
137
-
137
+
138
138
  /**
139
139
  * Construct a new XmlNode wrapping <code>node</code>. The proper
140
140
  * subclass of XmlNode is chosen based on the type of
@@ -741,6 +741,10 @@ public class SaveContextVisitor {
741
741
  return htmlDoc && text.getParentNode().getNodeName().equals("script");
742
742
  }
743
743
 
744
+ private boolean isHtmlStyle(Text text) {
745
+ return htmlDoc && text.getParentNode().getNodeName().equals("style");
746
+ }
747
+
744
748
  private static char lineSeparator = '\n'; // System.getProperty("line.separator"); ?
745
749
  public boolean enter(Text text) {
746
750
  String textContent = text.getNodeValue();
@@ -752,7 +756,7 @@ public class SaveContextVisitor {
752
756
  }
753
757
  }
754
758
 
755
- if (NokogiriHelpers.shouldEncode(text) && !isHtmlScript(text)) {
759
+ if (NokogiriHelpers.shouldEncode(text) && !isHtmlScript(text) && !isHtmlStyle(text)) {
756
760
  textContent = encodeJavaString(textContent);
757
761
  }
758
762
 
@@ -8,6 +8,42 @@ ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
8
8
  #
9
9
  # functions
10
10
  #
11
+ def windows?
12
+ RbConfig::CONFIG['target_os'] =~ /mingw32|mswin/
13
+ end
14
+
15
+ def solaris?
16
+ RbConfig::CONFIG['target_os'] =~ /solaris/
17
+ end
18
+
19
+ def darwin?
20
+ RbConfig::CONFIG['target_os'] =~ /darwin/
21
+ end
22
+
23
+ def nix?
24
+ ! (windows? || solaris? || darwin?)
25
+ end
26
+
27
+ def sh_export_path path
28
+ # because libxslt 1.1.29 configure.in uses AC_PATH_TOOL which treats ":"
29
+ # as a $PATH separator, we need to convert windows paths from
30
+ #
31
+ # C:/path/to/foo
32
+ #
33
+ # to
34
+ #
35
+ # /C/path/to/foo
36
+ #
37
+ # which is sh-compatible, in order to find things properly during
38
+ # configuration
39
+ if windows?
40
+ match = Regexp.new("^([A-Z]):(/.*)").match(path)
41
+ if match && match.length == 3
42
+ return File.join("/", match[1], match[2])
43
+ end
44
+ end
45
+ path
46
+ end
11
47
 
12
48
  def do_help
13
49
  print <<HELP
@@ -70,27 +106,28 @@ def do_clean
70
106
  exit! 0
71
107
  end
72
108
 
73
- # The gem version constraint in the Rakefile is not respected at install time.
74
- # Keep this version in sync with the one in the Rakefile !
75
- require 'rubygems'
76
- gem 'pkg-config', '~> 1.1.7'
77
- require 'pkg-config'
78
- message "Using pkg-config version #{PKGConfig::VERSION}\n"
79
-
80
109
  def package_config pkg, options={}
81
110
  package = pkg_config(pkg)
82
111
  return package if package
83
112
 
84
- return nil unless PKGConfig.have_package(pkg)
113
+ begin
114
+ require 'rubygems'
115
+ gem 'pkg-config', (gem_ver='~> 1.1.7')
116
+ require 'pkg-config' and message("Using pkg-config gem version #{PKGConfig::VERSION}\n")
117
+ rescue LoadError
118
+ message "pkg-config could not be used to find #{pkg}\nPlease install either `pkg-config` or the pkg-config gem per\n\n gem install pkg-config -v #{gem_ver.inspect}\n\n"
119
+ else
120
+ return nil unless PKGConfig.have_package(pkg)
85
121
 
86
- cflags = PKGConfig.cflags(pkg)
87
- ldflags = PKGConfig.libs_only_L(pkg)
88
- libs = PKGConfig.libs_only_l(pkg)
122
+ cflags = PKGConfig.cflags(pkg)
123
+ ldflags = PKGConfig.libs_only_L(pkg)
124
+ libs = PKGConfig.libs_only_l(pkg)
89
125
 
90
- Logging::message "PKGConfig package configuration for %s\n", pkg
91
- Logging::message "cflags: %s\nldflags: %s\nlibs: %s\n\n", cflags, ldflags, libs
126
+ Logging::message "PKGConfig package configuration for %s\n", pkg
127
+ Logging::message "cflags: %s\nldflags: %s\nlibs: %s\n\n", cflags, ldflags, libs
92
128
 
93
- [cflags, ldflags, libs]
129
+ [cflags, ldflags, libs]
130
+ end
94
131
  end
95
132
 
96
133
  def nokogiri_try_compile
@@ -364,7 +401,10 @@ when arg_config('--clean')
364
401
  end
365
402
 
366
403
  RbConfig::MAKEFILE_CONFIG['CC'] = ENV['CC'] if ENV['CC']
404
+ # use same c compiler for libxml and libxslt
405
+ ENV['CC'] = RbConfig::MAKEFILE_CONFIG['CC']
367
406
 
407
+ # TODO: deprecate MacRuby: https://github.com/sparklemotion/nokogiri/issues/1474
368
408
  if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'macruby'
369
409
  $LIBRUBYARG_STATIC.gsub!(/-static/, '')
370
410
  end
@@ -374,17 +414,20 @@ $LIBS << " #{ENV["LIBS"]}"
374
414
  # Read CFLAGS from ENV and make sure compiling works.
375
415
  add_cflags(ENV["CFLAGS"])
376
416
 
377
- case RbConfig::CONFIG['target_os']
378
- when 'mingw32', /mswin/
379
- windows_p = true
417
+ if windows?
380
418
  $CFLAGS << " -DXP_WIN -DXP_WIN32 -DUSE_INCLUDED_VASPRINTF"
381
- when /solaris/
419
+ end
420
+
421
+ if solaris?
382
422
  $CFLAGS << " -DUSE_INCLUDED_VASPRINTF"
383
- when /darwin/
384
- darwin_p = true
423
+ end
424
+
425
+ if darwin?
385
426
  # Let Apple LLVM/clang 5.1 ignore unknown compiler flags
386
427
  add_cflags("-Wno-error=unused-command-line-argument-hard-error-in-future")
387
- else
428
+ end
429
+
430
+ if nix?
388
431
  $CFLAGS << " -g -DXP_UNIX"
389
432
  end
390
433
 
@@ -436,9 +479,12 @@ else
436
479
  dependencies = YAML.load_file(File.join(ROOT, "dependencies.yml"))
437
480
 
438
481
  cross_build_p = enable_config("cross-build")
439
- if cross_build_p || windows_p
440
- zlib_recipe = process_recipe("zlib", dependencies["zlib"], static_p, cross_build_p) do |recipe|
441
- recipe.files = ["http://zlib.net/#{recipe.name}-#{recipe.version}.tar.gz"]
482
+ if cross_build_p || windows?
483
+ zlib_recipe = process_recipe("zlib", dependencies["zlib"]["version"], static_p, cross_build_p) do |recipe|
484
+ recipe.files = [{
485
+ url: "http://zlib.net/#{recipe.name}-#{recipe.version}.tar.gz",
486
+ md5: dependencies["zlib"]["md5"]
487
+ }]
442
488
  class << recipe
443
489
  attr_accessor :cross_build_p
444
490
 
@@ -472,8 +518,11 @@ else
472
518
  recipe.cross_build_p = cross_build_p
473
519
  end
474
520
 
475
- libiconv_recipe = process_recipe("libiconv", dependencies["libiconv"], static_p, cross_build_p) do |recipe|
476
- recipe.files = ["http://ftp.gnu.org/pub/gnu/libiconv/#{recipe.name}-#{recipe.version}.tar.gz"]
521
+ libiconv_recipe = process_recipe("libiconv", dependencies["libiconv"]["version"], static_p, cross_build_p) do |recipe|
522
+ recipe.files = [{
523
+ url: "http://ftp.gnu.org/pub/gnu/libiconv/#{recipe.name}-#{recipe.version}.tar.gz",
524
+ md5: dependencies["libiconv"]["md5"]
525
+ }]
477
526
  recipe.configure_options += [
478
527
  "CPPFLAGS=-Wall",
479
528
  "CFLAGS=-O2 -g",
@@ -482,7 +531,7 @@ else
482
531
  ]
483
532
  end
484
533
  else
485
- if darwin_p && !have_header('iconv.h')
534
+ if darwin? && !have_header('iconv.h')
486
535
  abort <<'EOM'.chomp
487
536
  -----
488
537
  The file "iconv.h" is missing in your build environment,
@@ -498,17 +547,21 @@ EOM
498
547
  end
499
548
  end
500
549
 
501
- unless windows_p
550
+ unless windows?
502
551
  preserving_globals {
503
552
  have_library('z', 'gzdopen', 'zlib.h')
504
553
  } or abort 'zlib is missing; necessary for building libxml2'
505
554
  end
506
555
 
507
- libxml2_recipe = process_recipe("libxml2", dependencies["libxml2"], static_p, cross_build_p) do |recipe|
508
- recipe.files = ["http://xmlsoft.org/sources/#{recipe.name}-#{recipe.version}.tar.gz"]
556
+ libxml2_recipe = process_recipe("libxml2", dependencies["libxml2"]["version"], static_p, cross_build_p) do |recipe|
557
+ recipe.files = [{
558
+ url: "http://xmlsoft.org/sources/#{recipe.name}-#{recipe.version}.tar.gz",
559
+ md5: dependencies["libxml2"]["md5"]
560
+ }]
509
561
  recipe.configure_options += [
510
562
  "--without-python",
511
563
  "--without-readline",
564
+ *(zlib_recipe ? ["--with-zlib=#{zlib_recipe.path}", "CFLAGS=-I#{zlib_recipe.path}/include"] : []),
512
565
  *(libiconv_recipe ? "--with-iconv=#{libiconv_recipe.path}" : iconv_configure_flags),
513
566
  "--with-c14n",
514
567
  "--with-debug",
@@ -516,13 +569,16 @@ EOM
516
569
  ]
517
570
  end
518
571
 
519
- libxslt_recipe = process_recipe("libxslt", dependencies["libxslt"], static_p, cross_build_p) do |recipe|
520
- recipe.files = ["http://xmlsoft.org/sources/#{recipe.name}-#{recipe.version}.tar.gz"]
572
+ libxslt_recipe = process_recipe("libxslt", dependencies["libxslt"]["version"], static_p, cross_build_p) do |recipe|
573
+ recipe.files = [{
574
+ url: "http://xmlsoft.org/sources/#{recipe.name}-#{recipe.version}.tar.gz",
575
+ md5: dependencies["libxslt"]["md5"]
576
+ }]
521
577
  recipe.configure_options += [
522
578
  "--without-python",
523
579
  "--without-crypto",
524
580
  "--with-debug",
525
- "--with-libxml-prefix=#{libxml2_recipe.path}"
581
+ "--with-libxml-prefix=#{sh_export_path(libxml2_recipe.path)}"
526
582
  ]
527
583
  end
528
584
 
@@ -35,13 +35,13 @@ static void relink_namespace(xmlNodePtr reparented)
35
35
  xmlNsPtr ns;
36
36
 
37
37
  if (reparented->type != XML_ATTRIBUTE_NODE &&
38
- reparented->type != XML_ELEMENT_NODE) return;
38
+ reparented->type != XML_ELEMENT_NODE) { return; }
39
39
 
40
40
  if (reparented->ns == NULL || reparented->ns->prefix == NULL) {
41
41
  name = xmlSplitQName2(reparented->name, &prefix);
42
42
 
43
43
  if(reparented->type == XML_ATTRIBUTE_NODE) {
44
- if (prefix == NULL || strcmp((char*)prefix, XMLNS_PREFIX) == 0) return;
44
+ if (prefix == NULL || strcmp((char*)prefix, XMLNS_PREFIX) == 0) { return; }
45
45
  }
46
46
 
47
47
  ns = xmlSearchNs(reparented->doc, reparented, prefix);
@@ -57,18 +57,19 @@ static void relink_namespace(xmlNodePtr reparented)
57
57
  }
58
58
 
59
59
  /* Avoid segv when relinking against unlinked nodes. */
60
- if (reparented->type != XML_ELEMENT_NODE || !reparented->parent) return;
60
+ if (reparented->type != XML_ELEMENT_NODE || !reparented->parent) { return; }
61
61
 
62
62
  /* Make sure that our reparented node has the correct namespaces */
63
- if(!reparented->ns && reparented->doc != (xmlDocPtr)reparented->parent)
63
+ if (!reparented->ns && reparented->doc != (xmlDocPtr)reparented->parent) {
64
64
  xmlSetNs(reparented, reparented->parent->ns);
65
+ }
65
66
 
66
67
  /* Search our parents for an existing definition */
67
- if(reparented->nsDef) {
68
+ if (reparented->nsDef) {
68
69
  xmlNsPtr curr = reparented->nsDef;
69
70
  xmlNsPtr prev = NULL;
70
71
 
71
- while(curr) {
72
+ while (curr) {
72
73
  xmlNsPtr ns = xmlSearchNsByHref(
73
74
  reparented->doc,
74
75
  reparented->parent,
@@ -76,7 +77,7 @@ static void relink_namespace(xmlNodePtr reparented)
76
77
  );
77
78
  /* If we find the namespace is already declared, remove it from this
78
79
  * definition list. */
79
- if(ns && ns != curr) {
80
+ if (ns && ns != curr && xmlStrEqual(ns->prefix, curr->prefix)) {
80
81
  if (prev) {
81
82
  prev->next = curr->next;
82
83
  } else {
@@ -92,12 +93,12 @@ static void relink_namespace(xmlNodePtr reparented)
92
93
 
93
94
  /* Only walk all children if there actually is a namespace we need to */
94
95
  /* reparent. */
95
- if(NULL == reparented->ns) return;
96
+ if (NULL == reparented->ns) { return; }
96
97
 
97
98
  /* When a node gets reparented, walk it's children to make sure that */
98
99
  /* their namespaces are reparented as well. */
99
100
  child = reparented->children;
100
- while(NULL != child) {
101
+ while (NULL != child) {
101
102
  relink_namespace(child);
102
103
  child = child->next;
103
104
  }
@@ -140,6 +141,7 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep
140
141
  {
141
142
  VALUE reparented_obj ;
142
143
  xmlNodePtr reparentee, pivot, reparented, next_text, new_next_text, parent ;
144
+ int original_ns_prefix_is_default = 0 ;
143
145
 
144
146
  if(!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode))
145
147
  rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
@@ -250,10 +252,21 @@ ok:
250
252
  */
251
253
  reparentee->_private = NULL ;
252
254
  }
255
+
256
+ if (reparentee->ns != NULL && reparentee->ns->prefix == NULL) {
257
+ original_ns_prefix_is_default = 1;
258
+ }
259
+
253
260
  nokogiri_root_node(reparentee);
261
+
254
262
  if (!(reparentee = xmlDocCopyNode(reparentee, pivot->doc, 1))) {
255
263
  rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
256
264
  }
265
+
266
+ if (original_ns_prefix_is_default && reparentee->ns != NULL && reparentee->ns->prefix != NULL) {
267
+ /* issue #391, where new node's prefix may become the string "default" */
268
+ reparentee->ns->prefix = NULL;
269
+ }
257
270
  }
258
271
 
259
272
  if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling
@@ -219,19 +219,6 @@ static VALUE reader_attribute(VALUE self, VALUE name)
219
219
  name = StringValue(name) ;
220
220
 
221
221
  value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValueCStr(name));
222
- if(value == NULL) {
223
- /* this section is an attempt to workaround older versions of libxml that
224
- don't handle namespaces properly in all attribute-and-friends functions */
225
- xmlChar *prefix = NULL ;
226
- xmlChar *localname = xmlSplitQName2((xmlChar*)StringValueCStr(name), &prefix);
227
- if (localname != NULL) {
228
- value = xmlTextReaderLookupNamespace(reader, localname);
229
- xmlFree(localname) ;
230
- } else {
231
- value = xmlTextReaderLookupNamespace(reader, prefix);
232
- }
233
- xmlFree(prefix);
234
- }
235
222
  if(value == NULL) return Qnil;
236
223
 
237
224
  rb_value = NOKOGIRI_STR_NEW2(value);
@@ -18,16 +18,16 @@ static void start_document(void * ctx)
18
18
 
19
19
  if(NULL != ctxt && ctxt->html != 1) {
20
20
  if(ctxt->standalone != -1) { /* -1 means there was no declaration */
21
- VALUE encoding = ctxt->encoding ?
22
- NOKOGIRI_STR_NEW2(ctxt->encoding) :
23
- Qnil;
21
+ VALUE encoding = Qnil ;
22
+ if (ctxt->encoding) {
23
+ encoding = NOKOGIRI_STR_NEW2(ctxt->encoding) ;
24
+ } else if (ctxt->input && ctxt->input->encoding) {
25
+ encoding = NOKOGIRI_STR_NEW2(ctxt->input->encoding) ;
26
+ }
24
27
 
25
- VALUE version = ctxt->version ?
26
- NOKOGIRI_STR_NEW2(ctxt->version) :
27
- Qnil;
28
+ VALUE version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil;
28
29
 
29
30
  VALUE standalone = Qnil;
30
-
31
31
  switch(ctxt->standalone)
32
32
  {
33
33
  case 0:
@@ -114,15 +114,19 @@ module Nokogiri
114
114
  def Slop(*args, &block)
115
115
  Nokogiri(*args, &block).slop!
116
116
  end
117
+
118
+ def install_default_aliases
119
+ # Make sure to support some popular encoding aliases not known by
120
+ # all iconv implementations.
121
+ {
122
+ 'Windows-31J' => 'CP932', # Windows-31J is the IANA registered name of CP932.
123
+ }.each { |alias_name, name|
124
+ EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
125
+ }
126
+ end
117
127
  end
118
128
 
119
- # Make sure to support some popular encoding aliases not known by
120
- # all iconv implementations.
121
- {
122
- 'Windows-31J' => 'CP932', # Windows-31J is the IANA registered name of CP932.
123
- }.each { |alias_name, name|
124
- EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
125
- }
129
+ Nokogiri.install_default_aliases
126
130
  end
127
131
 
128
132
  ###
@@ -173,7 +173,7 @@ module Nokogiri
173
173
 
174
174
  if string_or_io.respond_to?(:read)
175
175
  url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
176
- if !encoding
176
+ unless encoding
177
177
  # Libxml2's parser has poor support for encoding
178
178
  # detection. First, it does not recognize the HTML5
179
179
  # style meta charset declaration. Secondly, even if it
@@ -196,7 +196,9 @@ module Nokogiri
196
196
  end
197
197
 
198
198
  # read_memory pukes on empty docs
199
- return new if string_or_io.nil? or string_or_io.empty?
199
+ if string_or_io.nil? or string_or_io.empty?
200
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
201
+ end
200
202
 
201
203
  encoding ||= EncodingReader.detect_encoding(string_or_io)
202
204
 
Binary file
@@ -1,6 +1,6 @@
1
1
  module Nokogiri
2
2
  # The version of Nokogiri you are using
3
- VERSION = '1.6.8.rc3'
3
+ VERSION = '1.6.8.1'
4
4
 
5
5
  class VersionInfo # :nodoc:
6
6
  def jruby?