nokogiri 1.5.6.rc2-java → 1.5.6.rc3-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

@@ -72,6 +72,8 @@ public class XmlDomParserContext extends ParserContext {
72
72
  "http://apache.org/xml/features/nonvalidating/load-dtd-grammar";
73
73
  protected static final String FEATURE_INCLUDE_IGNORABLE_WHITESPACE =
74
74
  "http://apache.org/xml/features/dom/include-ignorable-whitespace";
75
+ protected static final String CONTINUE_AFTER_FATAL_ERROR =
76
+ "http://apache.org/xml/features/continue-after-fatal-error";
75
77
  protected static final String FEATURE_NOT_EXPAND_ENTITY =
76
78
  "http://apache.org/xml/features/dom/create-entity-ref-nodes";
77
79
  protected static final String FEATURE_VALIDATION = "http://xml.org/sax/features/validation";
@@ -110,17 +112,21 @@ public class XmlDomParserContext extends ParserContext {
110
112
  System.setProperty("org.apache.xerces.xni.parser.XMLParserConfiguration",
111
113
  "org.apache.xerces.parsers.XIncludeParserConfiguration");
112
114
  }
113
-
115
+
114
116
  parser = new NokogiriDomParser(options);
115
117
  parser.setErrorHandler(errorHandler);
116
118
 
117
119
  // Fix for Issue#586. This limits entity expansion up to 100000 and nodes up to 3000.
118
- setProperty(SECURITY_MANAGER, new org.apache.xerces.util.SecurityManager());
119
-
120
+ setProperty(SECURITY_MANAGER, new org.apache.xerces.util.SecurityManager());
121
+
120
122
  if (options.noBlanks) {
121
123
  setFeature(FEATURE_INCLUDE_IGNORABLE_WHITESPACE, false);
122
124
  }
123
125
 
126
+ if (options.recover) {
127
+ setFeature(CONTINUE_AFTER_FATAL_ERROR, true);
128
+ }
129
+
124
130
  if (options.dtdValid) {
125
131
  setFeature(FEATURE_VALIDATION, true);
126
132
  }
@@ -367,7 +367,7 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
367
367
  *
368
368
  * For more information on why this probably is *not* a good thing in general,
369
369
  * please direct your browser to
370
- * http://tenderlovemaking.com/2009/04/23/namespaces-in-xml/
370
+ * http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html
371
371
  */
372
372
  VALUE remove_namespaces_bang(VALUE self)
373
373
  {
@@ -84,7 +84,7 @@ static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
84
84
  }
85
85
 
86
86
  /* work around libxml2 issue: https://bugzilla.gnome.org/show_bug.cgi?id=615612 */
87
- if (retval->type == XML_TEXT_NODE) {
87
+ if (retval && retval->type == XML_TEXT_NODE) {
88
88
  if (retval->prev && retval->prev->type == XML_TEXT_NODE) {
89
89
  retval = xmlTextMerge(retval->prev, retval);
90
90
  }
@@ -699,23 +699,40 @@ static VALUE set(VALUE self, VALUE property, VALUE value)
699
699
  *
700
700
  * Get the value for +attribute+
701
701
  */
702
- static VALUE get(VALUE self, VALUE attribute)
702
+ static VALUE get(VALUE self, VALUE rattribute)
703
703
  {
704
704
  xmlNodePtr node;
705
- xmlChar* propstr ;
706
- VALUE rval ;
707
- Data_Get_Struct(self, xmlNode, node);
705
+ xmlChar* value = 0;
706
+ VALUE rvalue ;
707
+ char* attribute = 0;
708
+ char *colon = 0, *attr_name = 0, *prefix = 0;
709
+ xmlNsPtr ns;
708
710
 
709
- if(NIL_P(attribute)) return Qnil;
711
+ if (NIL_P(rattribute)) return Qnil;
710
712
 
711
- propstr = xmlGetProp(node, (xmlChar *)StringValuePtr(attribute));
713
+ Data_Get_Struct(self, xmlNode, node);
714
+ attribute = strdup(StringValuePtr(rattribute));
715
+
716
+ colon = strchr(attribute, ':');
717
+ if (colon) {
718
+ (*colon) = 0 ; /* create two null-terminated strings of the prefix and attribute name */
719
+ prefix = attribute ;
720
+ attr_name = colon + 1 ;
721
+ ns = xmlSearchNs(node->doc, node, (const xmlChar *)(prefix));
722
+ if (ns) {
723
+ value = xmlGetNsProp(node, (xmlChar*)(attr_name), ns->href);
724
+ }
725
+ } else {
726
+ value = xmlGetNoNsProp(node, (xmlChar*)attribute);
727
+ }
712
728
 
713
- if(!propstr) return Qnil;
729
+ free(attribute);
730
+ if (!value) return Qnil;
714
731
 
715
- rval = NOKOGIRI_STR_NEW2(propstr);
732
+ rvalue = NOKOGIRI_STR_NEW2(value);
733
+ xmlFree(value);
716
734
 
717
- xmlFree(propstr);
718
- return rval ;
735
+ return rvalue ;
719
736
  }
720
737
 
721
738
  /*
@@ -892,7 +909,7 @@ static VALUE node_type(VALUE self)
892
909
  *
893
910
  * Set the content for this Node
894
911
  */
895
- static VALUE set_content(VALUE self, VALUE content)
912
+ static VALUE native_content(VALUE self, VALUE content)
896
913
  {
897
914
  xmlNodePtr node, child, next ;
898
915
  Data_Get_Struct(self, xmlNode, node);
@@ -1288,7 +1305,7 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1288
1305
  child_iter = node;
1289
1306
  while (child_iter->parent)
1290
1307
  child_iter = child_iter->parent;
1291
-
1308
+
1292
1309
  if (child_iter->type == XML_DOCUMENT_FRAG_NODE)
1293
1310
  node->doc->children = NULL;
1294
1311
  }
@@ -1458,6 +1475,7 @@ void init_xml_node()
1458
1475
  rb_define_method(klass, "create_external_subset", create_external_subset, 3);
1459
1476
  rb_define_method(klass, "pointer_id", pointer_id, 0);
1460
1477
  rb_define_method(klass, "line", line, 0);
1478
+ rb_define_method(klass, "native_content=", native_content, 1);
1461
1479
 
1462
1480
  rb_define_private_method(klass, "process_xincludes", process_xincludes, 1);
1463
1481
  rb_define_private_method(klass, "in_context", in_context, 2);
@@ -1467,7 +1485,6 @@ void init_xml_node()
1467
1485
  rb_define_private_method(klass, "replace_node", replace, 1);
1468
1486
  rb_define_private_method(klass, "dump_html", dump_html, 0);
1469
1487
  rb_define_private_method(klass, "native_write_to", native_write_to, 4);
1470
- rb_define_private_method(klass, "native_content=", set_content, 1);
1471
1488
  rb_define_private_method(klass, "get", get, 1);
1472
1489
  rb_define_private_method(klass, "set", set, 2);
1473
1490
  rb_define_private_method(klass, "set_namespace", set_namespace, 1);
@@ -7,6 +7,7 @@ static ID id_start_document, id_end_document, id_start_element, id_end_element;
7
7
  static ID id_start_element_namespace, id_end_element_namespace;
8
8
  static ID id_comment, id_characters, id_xmldecl, id_error, id_warning;
9
9
  static ID id_cdata_block, id_cAttribute;
10
+ static ID id_processing_instruction;
10
11
 
11
12
  #define STRING_OR_NULL(str) \
12
13
  (RTEST(str) ? StringValuePtr(str) : NULL)
@@ -236,6 +237,19 @@ static void cdata_block(void * ctx, const xmlChar * value, int len)
236
237
  rb_funcall(doc, id_cdata_block, 1, string);
237
238
  }
238
239
 
240
+ static void processing_instruction(void * ctx, const xmlChar * name, const xmlChar * content)
241
+ {
242
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
243
+ VALUE doc = rb_iv_get(self, "@document");
244
+
245
+ rb_funcall( doc,
246
+ id_processing_instruction,
247
+ 2,
248
+ NOKOGIRI_STR_NEW2(name),
249
+ NOKOGIRI_STR_NEW2(content)
250
+ );
251
+ }
252
+
239
253
  static void deallocate(xmlSAXHandlerPtr handler)
240
254
  {
241
255
  NOKOGIRI_DEBUG_START(handler);
@@ -260,6 +274,7 @@ static VALUE allocate(VALUE klass)
260
274
  handler->warning = warning_func;
261
275
  handler->error = error_func;
262
276
  handler->cdataBlock = cdata_block;
277
+ handler->processingInstruction = processing_instruction;
263
278
  handler->initialized = XML_SAX2_MAGIC;
264
279
 
265
280
  return Data_Wrap_Struct(klass, NULL, deallocate, handler);
@@ -290,4 +305,5 @@ void init_xml_sax_parser()
290
305
  id_cAttribute = rb_intern("Attribute");
291
306
  id_start_element_namespace = rb_intern("start_element_namespace");
292
307
  id_end_element_namespace = rb_intern("end_element_namespace");
308
+ id_processing_instruction = rb_intern("processing_instruction");
293
309
  }
Binary file
@@ -1,6 +1,6 @@
1
1
  module Nokogiri
2
2
  # The version of Nokogiri you are using
3
- VERSION = '1.5.6.rc2'
3
+ VERSION = '1.5.6.rc3'
4
4
 
5
5
  class VersionInfo # :nodoc:
6
6
  def jruby?
@@ -149,13 +149,15 @@ module Nokogiri
149
149
  # Non-prefixed default namespaces (as in "xmlns=") are not included
150
150
  # in the hash.
151
151
  #
152
- # Note this is a very expensive operation in current implementation, as it
153
- # traverses the entire graph, and also has to bring each node across the
154
- # libxml bridge into a ruby object.
152
+ # Note that this method does an xpath lookup for nodes with
153
+ # namespaces, and as a result the order may be dependent on the
154
+ # implementation of the underlying XML library.
155
+ #
155
156
  def collect_namespaces
156
- ns = {}
157
- traverse { |j| ns.merge!(j.namespaces) }
158
- ns
157
+ xpath("//namespace::*").inject({}) do |hash, ns|
158
+ hash[["xmlns",ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
159
+ hash
160
+ end
159
161
  end
160
162
 
161
163
  # Get the list of decorators given +key+
@@ -13,7 +13,8 @@ module Nokogiri
13
13
  children = if ctx
14
14
  # Fix for issue#490
15
15
  if Nokogiri.jruby?
16
- ctx.parse("<root>#{tags}</root>").xpath("/root/node()")
16
+ # fix for issue #770
17
+ ctx.parse("<root #{namespace_declarations(ctx)}>#{tags}</root>").children
17
18
  else
18
19
  ctx.parse(tags)
19
20
  end
@@ -93,6 +94,14 @@ module Nokogiri
93
94
 
94
95
  private
95
96
 
97
+ # fix for issue 770
98
+ def namespace_declarations ctx
99
+ ctx.namespace_scopes.map do |namespace|
100
+ prefix = namespace.prefix.nil? ? "" : ":#{namespace.prefix}"
101
+ %Q{xmlns#{prefix}="#{namespace.href}"}
102
+ end.join ' '
103
+ end
104
+
96
105
  def coerce data
97
106
  return super unless String === data
98
107
 
@@ -251,14 +251,13 @@ module Nokogiri
251
251
  ###
252
252
  # Get the attribute value for the attribute +name+
253
253
  def [] name
254
- return nil unless key?(name.to_s)
255
254
  get(name.to_s)
256
255
  end
257
256
 
258
257
  ###
259
258
  # Set the attribute value for the attribute +name+ to +value+
260
259
  def []= name, value
261
- set name.to_s, value
260
+ set name.to_s, value.to_s
262
261
  end
263
262
 
264
263
  ###
@@ -377,17 +376,22 @@ module Nokogiri
377
376
  #
378
377
  # Also see related method +swap+.
379
378
  def replace node_or_tags
379
+ # We cannot replace a text node directly, otherwise libxml will return
380
+ # an internal error at parser.c:13031, I don't know exactly why
381
+ # libxml is trying to find a parent node that is an element or document
382
+ # so I can't tell if this is bug in libxml or not. issue #775.
383
+ if text?
384
+ replacee = Nokogiri::XML::Node.new 'dummy', document
385
+ add_previous_sibling_node replacee
386
+ unlink
387
+ return replacee.replace node_or_tags
388
+ end
389
+
380
390
  node_or_tags = coerce(node_or_tags)
391
+
381
392
  if node_or_tags.is_a?(XML::NodeSet)
382
- if text?
383
- replacee = Nokogiri::XML::Node.new 'dummy', document
384
- add_previous_sibling_node replacee
385
- unlink
386
- else
387
- replacee = self
388
- end
389
- node_or_tags.each { |n| replacee.add_previous_sibling n }
390
- replacee.unlink
393
+ node_or_tags.each { |n| add_previous_sibling n }
394
+ unlink
391
395
  else
392
396
  replace_node node_or_tags
393
397
  end
@@ -158,6 +158,13 @@ module Nokogiri
158
158
  # +string+ contains the cdata content
159
159
  def cdata_block string
160
160
  end
161
+
162
+ ###
163
+ # Called when processing instructions are found
164
+ # +name+ is the target of the instruction
165
+ # +content+ is the value of the instruction
166
+ def processing_instruction name, content
167
+ end
161
168
  end
162
169
  end
163
170
  end
@@ -6,7 +6,7 @@ module Nokogiri
6
6
  # Register namespaces in +namespaces+
7
7
  def register_namespaces(namespaces)
8
8
  namespaces.each do |k, v|
9
- k = k.gsub(/.*:/,'') # strip off 'xmlns:' or 'xml:'
9
+ k = k.to_s.gsub(/.*:/,'') # strip off 'xmlns:' or 'xml:'
10
10
  register_ns(k, v)
11
11
  end
12
12
  end
@@ -78,6 +78,7 @@ module Nokogiri
78
78
  attr_reader :data, :comments, :cdata_blocks, :start_elements_namespace
79
79
  attr_reader :errors, :warnings, :end_elements_namespace
80
80
  attr_reader :xmldecls
81
+ attr_reader :processing_instructions
81
82
 
82
83
  def xmldecl version, encoding, standalone
83
84
  @xmldecls = [version, encoding, standalone].compact
@@ -141,6 +142,11 @@ module Nokogiri
141
142
  @cdata_blocks += [string]
142
143
  super
143
144
  end
145
+
146
+ def processing_instruction name, content
147
+ @processing_instructions ||= []
148
+ @processing_instructions << [name, content]
149
+ end
144
150
  end
145
151
  end
146
152
  end
@@ -24,6 +24,11 @@ module Nokogiri
24
24
  end
25
25
  end
26
26
 
27
+ def test_colons_are_not_removed
28
+ doc = Nokogiri::HTML::DocumentFragment.parse("<span>3:30pm</span>")
29
+ assert_match /3:30/, doc.to_s
30
+ end
31
+
27
32
  def test_parse_encoding
28
33
  fragment = "<div>hello world</div>"
29
34
  f = Nokogiri::HTML::DocumentFragment.parse fragment, 'ISO-8859-1'
@@ -173,7 +173,12 @@ module Nokogiri
173
173
  end
174
174
  end
175
175
 
176
- assert_equal doc.errors.length, @parser.document.errors.length
176
+ # when using JRuby Nokogiri, more errors will be generated as the DOM
177
+ # parser continue to parse an ill formed document, while the sax parser
178
+ # will stop at the first error
179
+ unless Nokogiri.jruby?
180
+ assert_equal doc.errors.length, @parser.document.errors.length
181
+ end
177
182
  end
178
183
 
179
184
  def test_parse_with_memory_argument
@@ -313,6 +318,15 @@ module Nokogiri
313
318
  @parser.document.start_elements
314
319
  end
315
320
 
321
+ def test_processing_instruction
322
+ @parser.parse_memory(<<-eoxml)
323
+ <?xml-stylesheet href="a.xsl" type="text/xsl"?>
324
+ <?xml version="1.0"?>
325
+ eoxml
326
+ assert_equal [['xml-stylesheet', 'href="a.xsl" type="text/xsl"']],
327
+ @parser.document.processing_instructions
328
+ end
329
+
316
330
  if Nokogiri.uses_libxml? # JRuby SAXParser only parses well-formed XML documents
317
331
  def test_parse_document
318
332
  @parser.parse_memory(<<-eoxml)
@@ -209,6 +209,25 @@ module Nokogiri
209
209
  assert_equal ["bbb","ccc"], builder.doc.at_css("aaa").children.collect(&:name)
210
210
  end
211
211
 
212
+ def test_raw_xml_append_with_namespaces
213
+ doc = Nokogiri::XML::Builder.new do |xml|
214
+ xml.root("xmlns:foo" => "x", "xmlns" => "y") do
215
+ xml << '<Element foo:bar="bazz"/>'
216
+ end
217
+ end.doc
218
+
219
+ el = doc.at 'Element'
220
+ assert_not_nil el
221
+
222
+ assert_equal 'y', el.namespace.href
223
+ assert_nil el.namespace.prefix
224
+
225
+ attr = el.attributes["bar"]
226
+ assert_not_nil attr
227
+ assert_not_nil attr.namespace
228
+ assert_equal "foo", attr.namespace.prefix
229
+ end
230
+
212
231
  def test_cdata
213
232
  builder = Nokogiri::XML::Builder.new do
214
233
  root {
@@ -16,11 +16,27 @@ module Nokogiri
16
16
  @xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
17
17
  end
18
18
 
19
+ def test_document_with_initial_space
20
+ doc = Nokogiri::XML(" <?xml version='1.0' encoding='utf-8' ?><first \>")
21
+ assert_equal 2, doc.children.size
22
+ end
23
+
19
24
  def test_root_set_to_nil
20
25
  @xml.root = nil
21
26
  assert_equal nil, @xml.root
22
27
  end
23
28
 
29
+ def test_ignore_unknown_namespace
30
+ doc = Nokogiri::XML(<<-eoxml)
31
+ <xml>
32
+ <unknown:foo xmlns='hello' />
33
+ <bar />
34
+ </xml>
35
+ eoxml
36
+ refute doc.xpath('//foo').first.namespace # assert that the namespace is nil
37
+ refute_empty doc.xpath('//bar'), "bar wasn't found in the document" # bar should be part of the doc
38
+ end
39
+
24
40
  def test_collect_namespaces
25
41
  doc = Nokogiri::XML(<<-eoxml)
26
42
  <xml>
@@ -716,26 +732,43 @@ module Nokogiri
716
732
  assert @xml.children.respond_to?(:awesome!)
717
733
  end
718
734
 
719
- def test_java_integration
720
- if Nokogiri.jruby?
735
+ if Nokogiri.jruby?
736
+ def wrap_java_document
721
737
  require 'java'
722
738
  factory = javax.xml.parsers.DocumentBuilderFactory.newInstance
723
739
  builder = factory.newDocumentBuilder
724
740
  document = builder.newDocument
725
741
  root = document.createElement("foo")
726
742
  document.appendChild(root)
727
- noko_doc = Nokogiri::XML::Document.wrap(document)
728
- assert_equal 'foo', noko_doc.root.name
743
+ Nokogiri::XML::Document.wrap(document)
744
+ end
745
+ end
729
746
 
730
- noko_doc = Nokogiri::XML(<<eoxml)
747
+ def test_java_integration
748
+ skip("Ruby doesn't have the wrap method") unless Nokogiri.jruby?
749
+ noko_doc = wrap_java_document
750
+ assert_equal 'foo', noko_doc.root.name
751
+
752
+ noko_doc = Nokogiri::XML(<<eoxml)
731
753
  <foo xmlns='hello'>
732
754
  <bar xmlns:foo='world' />
733
755
  </foo>
734
756
  eoxml
735
- dom = noko_doc.to_java
736
- assert dom.kind_of? org.w3c.dom.Document
737
- assert_equal 'foo', dom.getDocumentElement().getTagName()
738
- end
757
+ dom = noko_doc.to_java
758
+ assert dom.kind_of? org.w3c.dom.Document
759
+ assert_equal 'foo', dom.getDocumentElement().getTagName()
760
+ end
761
+
762
+ def test_add_child
763
+ skip("Ruby doesn't have the wrap method") unless Nokogiri.jruby?
764
+ doc = wrap_java_document
765
+ doc.root.add_child "<bar />"
766
+ end
767
+
768
+ def test_can_be_closed
769
+ f = File.open XML_FILE
770
+ Nokogiri::XML f
771
+ f.close
739
772
  end
740
773
  end
741
774
  end