nokogiri 1.7.2 → 1.8.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.cross_rubies +4 -4
  3. data/.travis.yml +43 -24
  4. data/CHANGELOG.md +54 -6
  5. data/Gemfile +8 -7
  6. data/Gemfile-libxml-ruby +3 -0
  7. data/LICENSE-DEPENDENCIES.md +1612 -0
  8. data/{LICENSE.txt → LICENSE.md} +1 -1
  9. data/Manifest.txt +5 -8
  10. data/README.md +8 -5
  11. data/Rakefile +15 -31
  12. data/appveyor.yml +2 -0
  13. data/dependencies.yml +12 -7
  14. data/ext/nokogiri/extconf.rb +12 -17
  15. data/ext/nokogiri/nokogiri.h +0 -10
  16. data/ext/nokogiri/xml_attr.c +12 -8
  17. data/ext/nokogiri/xml_node.c +17 -14
  18. data/ext/nokogiri/xml_sax_push_parser.c +56 -12
  19. data/lib/nokogiri/html/sax/parser.rb +10 -0
  20. data/lib/nokogiri/version.rb +5 -4
  21. data/lib/nokogiri/xml/document.rb +9 -9
  22. data/lib/nokogiri/xml/node.rb +7 -7
  23. data/lib/nokogiri/xml/node_set.rb +12 -7
  24. data/lib/nokogiri/xml/sax/parser.rb +6 -7
  25. data/lib/nokogiri/xml/searchable.rb +34 -25
  26. data/lib/nokogiri/xml/syntax_error.rb +24 -1
  27. data/test/decorators/test_slop.rb +4 -1
  28. data/test/helper.rb +10 -0
  29. data/test/html/sax/test_parser.rb +27 -0
  30. data/test/html/test_document.rb +12 -1
  31. data/test/html/test_document_encoding.rb +1 -3
  32. data/test/html/test_document_fragment.rb +3 -0
  33. data/test/xml/sax/test_push_parser.rb +48 -0
  34. data/test/xml/test_attr.rb +7 -0
  35. data/test/xml/test_document.rb +1 -1
  36. data/test/xml/test_document_fragment.rb +27 -0
  37. data/test/xml/test_entity_reference.rb +2 -2
  38. data/test/xml/test_node.rb +12 -15
  39. data/test/xml/test_node_reparenting.rb +14 -0
  40. data/test/xml/test_node_set.rb +8 -6
  41. data/test/xml/test_reader.rb +19 -0
  42. data/test/xml/test_syntax_error.rb +21 -15
  43. data/test/xml/test_unparented_node.rb +54 -11
  44. data/test/xml/test_xpath.rb +23 -6
  45. metadata +31 -19
  46. data/suppressions/nokogiri_ree-1.8.7.358.supp +0 -61
  47. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  48. data/suppressions/nokogiri_ruby-1.9.2.320.supp +0 -28
  49. data/suppressions/nokogiri_ruby-1.9.3.327.supp +0 -28
  50. data/test_all +0 -105
@@ -3,7 +3,7 @@
3
3
  static void deallocate(xmlParserCtxtPtr ctx)
4
4
  {
5
5
  NOKOGIRI_DEBUG_START(ctx);
6
- if(ctx != NULL) {
6
+ if (ctx != NULL) {
7
7
  NOKOGIRI_SAX_TUPLE_DESTROY(ctx->userData);
8
8
  xmlFreeParserCtxt(ctx);
9
9
  }
@@ -30,12 +30,12 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
30
30
 
31
31
  Data_Get_Struct(self, xmlParserCtxt, ctx);
32
32
 
33
- if(Qnil != _chunk) {
33
+ if (Qnil != _chunk) {
34
34
  chunk = StringValuePtr(_chunk);
35
35
  size = (int)RSTRING_LEN(_chunk);
36
36
  }
37
37
 
38
- if(xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
38
+ if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
39
39
  if (!(ctx->options & XML_PARSE_RECOVER)) {
40
40
  xmlErrorPtr e = xmlCtxtGetLastError(ctx);
41
41
  Nokogiri_error_raise(NULL, e);
@@ -59,17 +59,18 @@ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
59
59
 
60
60
  Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
61
61
 
62
- if(_filename != Qnil) filename = StringValueCStr(_filename);
62
+ if (_filename != Qnil) { filename = StringValueCStr(_filename); }
63
63
 
64
64
  ctx = xmlCreatePushParserCtxt(
65
- sax,
66
- NULL,
67
- NULL,
68
- 0,
69
- filename
70
- );
71
- if(ctx == NULL)
65
+ sax,
66
+ NULL,
67
+ NULL,
68
+ 0,
69
+ filename
70
+ );
71
+ if (ctx == NULL) {
72
72
  rb_raise(rb_eRuntimeError, "Could not create a parser context");
73
+ }
73
74
 
74
75
  ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
75
76
 
@@ -91,12 +92,53 @@ static VALUE set_options(VALUE self, VALUE options)
91
92
  xmlParserCtxtPtr ctx;
92
93
  Data_Get_Struct(self, xmlParserCtxt, ctx);
93
94
 
94
- if (xmlCtxtUseOptions(ctx, (int)NUM2INT(options)) != 0)
95
+ if (xmlCtxtUseOptions(ctx, (int)NUM2INT(options)) != 0) {
95
96
  rb_raise(rb_eRuntimeError, "Cannot set XML parser context options");
97
+ }
96
98
 
97
99
  return Qnil;
98
100
  }
99
101
 
102
+ /*
103
+ * call-seq:
104
+ * replace_entities
105
+ *
106
+ * Should this parser replace entities? & will get converted to '&' if
107
+ * set to true
108
+ */
109
+ static VALUE get_replace_entities(VALUE self)
110
+ {
111
+ xmlParserCtxtPtr ctx;
112
+ Data_Get_Struct(self, xmlParserCtxt, ctx);
113
+
114
+ if (0 == ctx->replaceEntities) {
115
+ return Qfalse;
116
+ } else {
117
+ return Qtrue;
118
+ }
119
+ }
120
+
121
+ /*
122
+ * call-seq:
123
+ * replace_entities=(boolean)
124
+ *
125
+ * Should this parser replace entities? & will get converted to '&' if
126
+ * set to true
127
+ */
128
+ static VALUE set_replace_entities(VALUE self, VALUE value)
129
+ {
130
+ xmlParserCtxtPtr ctx;
131
+ Data_Get_Struct(self, xmlParserCtxt, ctx);
132
+
133
+ if (Qfalse == value) {
134
+ ctx->replaceEntities = 0;
135
+ } else {
136
+ ctx->replaceEntities = 1;
137
+ }
138
+
139
+ return value;
140
+ }
141
+
100
142
  VALUE cNokogiriXmlSaxPushParser ;
101
143
  void init_xml_sax_push_parser()
102
144
  {
@@ -112,4 +154,6 @@ void init_xml_sax_push_parser()
112
154
  rb_define_private_method(klass, "native_write", native_write, 2);
113
155
  rb_define_method(klass, "options", get_options, 0);
114
156
  rb_define_method(klass, "options=", set_options, 1);
157
+ rb_define_method(klass, "replace_entities", get_replace_entities, 0);
158
+ rb_define_method(klass, "replace_entities=", set_replace_entities, 1);
115
159
  }
@@ -36,6 +36,16 @@ module Nokogiri
36
36
  ctx.parse_with self
37
37
  end
38
38
 
39
+ ###
40
+ # Parse given +io+
41
+ def parse_io io, encoding = 'UTF-8'
42
+ check_encoding(encoding)
43
+ @encoding = encoding
44
+ ctx = ParserContext.io(io, ENCODINGS[encoding])
45
+ yield ctx if block_given?
46
+ ctx.parse_with self
47
+ end
48
+
39
49
  ###
40
50
  # Parse a file with +filename+
41
51
  def parse_file filename, encoding = 'UTF-8'
@@ -1,6 +1,6 @@
1
1
  module Nokogiri
2
2
  # The version of Nokogiri you are using
3
- VERSION = '1.7.2'
3
+ VERSION = '1.8.0'
4
4
 
5
5
  class VersionInfo # :nodoc:
6
6
  def jruby?
@@ -12,9 +12,10 @@ module Nokogiri
12
12
  end
13
13
 
14
14
  def loaded_parser_version
15
- LIBXML_PARSER_VERSION.scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first.collect{ |j|
16
- j.to_i
17
- }.join(".")
15
+ LIBXML_PARSER_VERSION.
16
+ scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first.
17
+ collect(&:to_i).
18
+ join(".")
18
19
  end
19
20
 
20
21
  def compiled_parser_version
@@ -33,10 +33,11 @@ module Nokogiri
33
33
  # +block+ (optional) is passed a configuration object on which
34
34
  # parse options may be set.
35
35
  #
36
- # When parsing untrusted documents, it's recommended that the
37
- # +nonet+ option be used, as shown in this example code:
38
- #
39
- # Nokogiri::XML::Document.parse(xml_string) { |config| config.nonet }
36
+ # By default, Nokogiri treats documents as untrusted, and so
37
+ # does not attempt to load DTDs or access the network. See
38
+ # Nokogiri::XML::ParseOptions for a complete list of options;
39
+ # and that module's DEFAULT_XML constant for what's set (and not
40
+ # set) by default.
40
41
  #
41
42
  # Nokogiri.XML() is a convenience method which will call this method.
42
43
  #
@@ -239,10 +240,10 @@ module Nokogiri
239
240
  undef_method :namespace_definitions, :line, :add_namespace
240
241
 
241
242
  def add_child node_or_tags
242
- raise "Document already has a root node" if root && root.name != 'nokogiri_text_wrapper'
243
+ raise "A document may not have multiple root nodes." if (root && root.name != 'nokogiri_text_wrapper') && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
243
244
  node_or_tags = coerce(node_or_tags)
244
245
  if node_or_tags.is_a?(XML::NodeSet)
245
- raise "Document cannot have multiple root nodes" if node_or_tags.size > 1
246
+ raise "A document may not have multiple root nodes." if node_or_tags.size > 1
246
247
  super(node_or_tags.first)
247
248
  else
248
249
  super
@@ -273,9 +274,8 @@ module Nokogiri
273
274
  (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
274
275
  end
275
276
 
276
- def implied_xpath_contexts # :nodoc:
277
- ["//"]
278
- end
277
+ # @private
278
+ IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
279
279
 
280
280
  def inspect_attributes
281
281
  [:name, :children]
@@ -154,7 +154,7 @@ module Nokogiri
154
154
  def prepend_child node_or_tags
155
155
  if first = children.first
156
156
  # Mimic the error add_child would raise.
157
- raise RuntimeError, "Document already has a root node" if document? && !node_or_tags.processing_instruction?
157
+ raise RuntimeError, "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
158
158
  first.__send__(:add_sibling, :previous, node_or_tags)
159
159
  else
160
160
  add_child(node_or_tags)
@@ -172,6 +172,7 @@ module Nokogiri
172
172
  add_child node_or_tags
173
173
  self
174
174
  end
175
+
175
176
  ###
176
177
  # Insert +node_or_tags+ before this Node (as a sibling).
177
178
  # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
@@ -180,7 +181,7 @@ module Nokogiri
180
181
  #
181
182
  # Also see related method +before+.
182
183
  def add_previous_sibling node_or_tags
183
- raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !node_or_tags.processing_instruction?
184
+ raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
184
185
 
185
186
  add_sibling :previous, node_or_tags
186
187
  end
@@ -193,7 +194,7 @@ module Nokogiri
193
194
  #
194
195
  # Also see related method +after+.
195
196
  def add_next_sibling node_or_tags
196
- raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !node_or_tags.processing_instruction?
197
+ raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
197
198
 
198
199
  add_sibling :next, node_or_tags
199
200
  end
@@ -622,7 +623,7 @@ module Nokogiri
622
623
  encoding = options[:encoding] || document.encoding
623
624
  options[:encoding] = encoding
624
625
 
625
- outstring = ""
626
+ outstring = String.new
626
627
  if encoding && outstring.respond_to?(:force_encoding)
627
628
  outstring.force_encoding(Encoding.find(encoding))
628
629
  end
@@ -818,9 +819,8 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
818
819
  EOERR
819
820
  end
820
821
 
821
- def implied_xpath_contexts # :nodoc:
822
- [".//"]
823
- end
822
+ # @private
823
+ IMPLIED_XPATH_CONTEXTS = [ './/'.freeze ].freeze # :nodoc:
824
824
 
825
825
  def add_child_node_and_reparent_attrs node # :nodoc:
826
826
  add_child_node node
@@ -11,6 +11,8 @@ module Nokogiri
11
11
  # The Document this NodeSet is associated with
12
12
  attr_accessor :document
13
13
 
14
+ alias :clone :dup
15
+
14
16
  # Create a NodeSet with +document+ defaulting to +list+
15
17
  def initialize document, list = []
16
18
  @document = document
@@ -71,9 +73,10 @@ module Nokogiri
71
73
  # For more information see Nokogiri::XML::Searchable#css
72
74
  def css *args
73
75
  rules, handler, ns, _ = extract_params(args)
76
+ paths = css_rules_to_xpath(rules, ns)
74
77
 
75
78
  inject(NodeSet.new(document)) do |set, node|
76
- set += css_internal node, rules, handler, ns
79
+ set + xpath_internal(node, paths, handler, ns, nil)
77
80
  end
78
81
  end
79
82
 
@@ -88,7 +91,7 @@ module Nokogiri
88
91
  paths, handler, ns, binds = extract_params(args)
89
92
 
90
93
  inject(NodeSet.new(document)) do |set, node|
91
- set += node.xpath(*(paths + [ns, handler, binds].compact))
94
+ set + xpath_internal(node, paths, handler, ns, binds)
92
95
  end
93
96
  end
94
97
 
@@ -290,7 +293,11 @@ module Nokogiri
290
293
  # Returns a new NodeSet containing all the children of all the nodes in
291
294
  # the NodeSet
292
295
  def children
293
- inject(NodeSet.new(document)) { |set, node| set += node.children }
296
+ node_set = NodeSet.new(document)
297
+ each do |node|
298
+ node.children.each { |n| node_set.push(n) }
299
+ end
300
+ node_set
294
301
  end
295
302
 
296
303
  ###
@@ -312,11 +319,9 @@ module Nokogiri
312
319
 
313
320
  alias :+ :|
314
321
 
315
- private
322
+ # @private
323
+ IMPLIED_XPATH_CONTEXTS = [ './/'.freeze, 'self::'.freeze ].freeze # :nodoc:
316
324
 
317
- def implied_xpath_contexts # :nodoc:
318
- [".//", "self::"]
319
- end
320
325
  end
321
326
  end
322
327
  end
@@ -68,8 +68,7 @@ module Nokogiri
68
68
 
69
69
  # Create a new Parser with +doc+ and +encoding+
70
70
  def initialize doc = Nokogiri::XML::SAX::Document.new, encoding = 'UTF-8'
71
- check_encoding(encoding)
72
- @encoding = encoding
71
+ @encoding = check_encoding(encoding)
73
72
  @document = doc
74
73
  @warned = false
75
74
  end
@@ -88,9 +87,8 @@ module Nokogiri
88
87
  ###
89
88
  # Parse given +io+
90
89
  def parse_io io, encoding = 'ASCII'
91
- check_encoding(encoding)
92
- @encoding = encoding
93
- ctx = ParserContext.io(io, ENCODINGS[encoding])
90
+ @encoding = check_encoding(encoding)
91
+ ctx = ParserContext.io(io, ENCODINGS[@encoding])
94
92
  yield ctx if block_given?
95
93
  ctx.parse_with self
96
94
  end
@@ -114,8 +112,9 @@ module Nokogiri
114
112
 
115
113
  private
116
114
  def check_encoding(encoding)
117
- encoding.upcase!
118
- raise ArgumentError.new("'#{encoding}' is not a valid encoding") unless ENCODINGS[encoding]
115
+ encoding.upcase.tap do |enc|
116
+ raise ArgumentError.new("'#{enc}' is not a valid encoding") unless ENCODINGS[enc]
117
+ end
119
118
  end
120
119
  end
121
120
  end
@@ -149,30 +149,9 @@ module Nokogiri
149
149
  # }.new)
150
150
  #
151
151
  def xpath *args
152
- return NodeSet.new(document) unless document
153
-
154
152
  paths, handler, ns, binds = extract_params(args)
155
153
 
156
- sets = paths.map do |path|
157
- ctx = XPathContext.new(self)
158
- ctx.register_namespaces(ns)
159
- path = path.gsub(/xmlns:/, ' :') unless Nokogiri.uses_libxml?
160
-
161
- binds.each do |key,value|
162
- ctx.register_variable key.to_s, value
163
- end if binds
164
-
165
- ctx.evaluate(path, handler)
166
- end
167
- return sets.first if sets.length == 1
168
-
169
- NodeSet.new(document) do |combined|
170
- sets.each do |set|
171
- set.each do |node|
172
- combined << node
173
- end
174
- end
175
- end
154
+ xpath_internal self, paths, handler, ns, binds
176
155
  end
177
156
 
178
157
  ##
@@ -189,12 +168,42 @@ module Nokogiri
189
168
  private
190
169
 
191
170
  def css_internal node, rules, handler, ns
192
- xpaths = rules.map { |rule| xpath_query_from_css_rule(rule, ns) }
193
- node.xpath(*(xpaths + [ns, handler].compact))
171
+ xpath_internal node, css_rules_to_xpath(rules, ns), handler, ns, nil
172
+ end
173
+
174
+ def xpath_internal node, paths, handler, ns, binds
175
+ document = node.document
176
+ return NodeSet.new(document) unless document
177
+
178
+ if paths.length == 1
179
+ return xpath_impl(node, paths.first, handler, ns, binds)
180
+ end
181
+
182
+ NodeSet.new(document) do |combined|
183
+ paths.each do |path|
184
+ xpath_impl(node, path, handler, ns, binds).each { |set| combined << set }
185
+ end
186
+ end
187
+ end
188
+
189
+ def xpath_impl node, path, handler, ns, binds
190
+ ctx = XPathContext.new(node)
191
+ ctx.register_namespaces(ns)
192
+ path = path.gsub(/xmlns:/, ' :') unless Nokogiri.uses_libxml?
193
+
194
+ binds.each do |key,value|
195
+ ctx.register_variable key.to_s, value
196
+ end if binds
197
+
198
+ ctx.evaluate(path, handler)
199
+ end
200
+
201
+ def css_rules_to_xpath(rules, ns)
202
+ rules.map { |rule| xpath_query_from_css_rule(rule, ns) }
194
203
  end
195
204
 
196
205
  def xpath_query_from_css_rule rule, ns
197
- implied_xpath_contexts.map do |implied_xpath_context|
206
+ self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
198
207
  CSS.xpath_for(rule.to_s, :prefix => implied_xpath_context, :ns => ns)
199
208
  end.join(' | ')
200
209
  end
@@ -40,7 +40,30 @@ module Nokogiri
40
40
  end
41
41
 
42
42
  def to_s
43
- super.chomp
43
+ message = super.chomp
44
+ [location_to_s, level_to_s, message].
45
+ compact.join(": ").
46
+ force_encoding(message.encoding)
47
+ end
48
+
49
+ private
50
+
51
+ def level_to_s
52
+ case level
53
+ when 3 then "FATAL"
54
+ when 2 then "ERROR"
55
+ when 1 then "WARNING"
56
+ else nil
57
+ end
58
+ end
59
+
60
+ def nil_or_zero?(attribute)
61
+ attribute.nil? || attribute.zero?
62
+ end
63
+
64
+ def location_to_s
65
+ return nil if nil_or_zero?(line) && nil_or_zero?(column)
66
+ "#{line}:#{column}"
44
67
  end
45
68
  end
46
69
  end
@@ -9,10 +9,13 @@ module Nokogiri
9
9
  <description>this is the foo thing</description>
10
10
  </item>
11
11
  eoxml
12
+
12
13
  assert doc.item.respond_to?(:title)
13
14
  assert_equal 'foo', doc.item.title.text
15
+
14
16
  assert doc.item.respond_to?(:_description), 'should have description'
15
- assert 'this is the foo thing', doc.item._description.text
17
+ assert_equal 'this is the foo thing', doc.item._description.text
18
+
16
19
  assert !doc.item.respond_to?(:foo)
17
20
  assert_raise(NoMethodError) { doc.item.foo }
18
21
  end