nokogiri 1.5.5.rc3-java → 1.5.6-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (67) hide show
  1. data/CHANGELOG.ja.rdoc +42 -1
  2. data/CHANGELOG.rdoc +41 -1
  3. data/Manifest.txt +8 -1
  4. data/README.ja.rdoc +1 -1
  5. data/README.rdoc +5 -8
  6. data/ROADMAP.md +6 -2
  7. data/Rakefile +29 -7
  8. data/bin/nokogiri +19 -4
  9. data/build_all +56 -17
  10. data/ext/java/nokogiri/HtmlDocument.java +26 -0
  11. data/ext/java/nokogiri/NokogiriService.java +7 -1
  12. data/ext/java/nokogiri/XmlDocument.java +24 -6
  13. data/ext/java/nokogiri/XmlDocumentFragment.java +2 -26
  14. data/ext/java/nokogiri/XmlDtd.java +13 -2
  15. data/ext/java/nokogiri/XmlElement.java +3 -12
  16. data/ext/java/nokogiri/XmlEntityReference.java +32 -8
  17. data/ext/java/nokogiri/XmlNamespace.java +2 -1
  18. data/ext/java/nokogiri/XmlNode.java +83 -31
  19. data/ext/java/nokogiri/XmlSaxPushParser.java +55 -53
  20. data/ext/java/nokogiri/XmlText.java +2 -14
  21. data/ext/java/nokogiri/XsltStylesheet.java +4 -2
  22. data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
  23. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +2 -2
  24. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
  25. data/ext/java/nokogiri/internals/{XmlDomParser.java → NokogiriDomParser.java} +25 -14
  26. data/ext/java/nokogiri/internals/NokogiriEncodingReaderWrapper.java +109 -0
  27. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +123 -0
  28. data/ext/java/nokogiri/internals/NokogiriHandler.java +22 -14
  29. data/ext/java/nokogiri/internals/NokogiriHelpers.java +25 -7
  30. data/ext/java/nokogiri/internals/ParserContext.java +2 -1
  31. data/ext/java/nokogiri/internals/ReaderNode.java +2 -1
  32. data/ext/java/nokogiri/internals/SaveContextVisitor.java +100 -102
  33. data/ext/java/nokogiri/internals/XmlDomParserContext.java +10 -4
  34. data/ext/nokogiri/extconf.rb +1 -0
  35. data/ext/nokogiri/xml_document.c +2 -2
  36. data/ext/nokogiri/xml_node.c +31 -14
  37. data/ext/nokogiri/xml_sax_parser.c +16 -0
  38. data/ext/nokogiri/xslt_stylesheet.c +19 -2
  39. data/lib/nekodtd.jar +0 -0
  40. data/lib/nokogiri/nokogiri.jar +0 -0
  41. data/lib/nokogiri/version.rb +4 -1
  42. data/lib/nokogiri/xml/document.rb +8 -6
  43. data/lib/nokogiri/xml/document_fragment.rb +10 -1
  44. data/lib/nokogiri/xml/node.rb +58 -61
  45. data/lib/nokogiri/xml/sax/document.rb +7 -0
  46. data/lib/nokogiri/xml/sax/parser.rb +7 -0
  47. data/lib/nokogiri/xml/xpath_context.rb +1 -1
  48. data/lib/nokogiri/xslt.rb +1 -1
  49. data/tasks/cross_compile.rb +5 -8
  50. data/test/files/test_document_url/bar.xml +2 -0
  51. data/test/files/test_document_url/document.dtd +4 -0
  52. data/test/files/test_document_url/document.xml +6 -0
  53. data/test/helper.rb +6 -0
  54. data/test/html/test_document.rb +23 -0
  55. data/test/html/test_document_fragment.rb +5 -0
  56. data/test/test_xslt_transforms.rb +30 -0
  57. data/test/xml/sax/test_parser.rb +20 -1
  58. data/test/xml/test_builder.rb +42 -0
  59. data/test/xml/test_document.rb +64 -9
  60. data/test/xml/test_document_fragment.rb +7 -0
  61. data/test/xml/test_entity_reference.rb +12 -0
  62. data/test/xml/test_namespace.rb +20 -0
  63. data/test/xml/test_node.rb +79 -0
  64. data/test/xml/test_node_attributes.rb +29 -0
  65. data/test/xml/test_unparented_node.rb +9 -0
  66. data/test_all +11 -14
  67. metadata +744 -560
@@ -100,6 +100,7 @@ public class NokogiriService implements BasicLibraryService {
100
100
  RubyModule htmlSaxModule = htmlModule.defineModuleUnder("SAX");
101
101
  RubyModule xsltModule = nokogiri.defineModuleUnder("XSLT");
102
102
 
103
+ createJavaLibraryVersionConstants(ruby, nokogiri);
103
104
  createNokogiriModule(ruby, nokogiri);
104
105
  createSyntaxErrors(ruby, nokogiri, xmlModule);
105
106
  RubyClass xmlNode = createXmlModule(ruby, xmlModule);
@@ -108,7 +109,12 @@ public class NokogiriService implements BasicLibraryService {
108
109
  createSaxModule(ruby, xmlSaxModule, htmlSaxModule);
109
110
  createXsltModule(ruby, xsltModule);
110
111
  }
111
-
112
+
113
+ private void createJavaLibraryVersionConstants(Ruby ruby, RubyModule nokogiri) {
114
+ nokogiri.defineConstant("XERCES_VERSION", ruby.newString(org.apache.xerces.impl.Version.getVersion()));
115
+ nokogiri.defineConstant("NEKO_VERSION", ruby.newString(org.cyberneko.html.Version.getVersion()));
116
+ }
117
+
112
118
  private void createNokogiriModule(Ruby ruby, RubyModule nokogiri) {;
113
119
  RubyClass encHandler = nokogiri.defineClassUnder("EncodingHandler", ruby.getObject(), ENCODING_HANDLER_ALLOCATOR);
114
120
  encHandler.defineAnnotatedMethods(EncodingHandler.class);
@@ -64,6 +64,7 @@ import org.jruby.runtime.ThreadContext;
64
64
  import org.jruby.runtime.builtin.IRubyObject;
65
65
  import org.w3c.dom.Attr;
66
66
  import org.w3c.dom.Document;
67
+ import org.w3c.dom.DocumentType;
67
68
  import org.w3c.dom.NamedNodeMap;
68
69
  import org.w3c.dom.Node;
69
70
  import org.w3c.dom.NodeList;
@@ -106,7 +107,7 @@ public class XmlDocument extends XmlNode {
106
107
 
107
108
  public XmlDocument(Ruby ruby, RubyClass klass, Document document) {
108
109
  super(ruby, klass, document);
109
- nsCache = new NokogiriNamespaceCache();
110
+ initializeNamespaceCacheIfNecessary();
110
111
  createAndCacheNamespaces(ruby, document.getDocumentElement());
111
112
  stabilizeTextContent(document);
112
113
  setInstanceVariable("@decorators", ruby.getNil());
@@ -114,7 +115,7 @@ public class XmlDocument extends XmlNode {
114
115
 
115
116
  public void setDocumentNode(ThreadContext context, Node node) {
116
117
  super.setNode(context, node);
117
- if (nsCache == null) nsCache = new NokogiriNamespaceCache();
118
+ initializeNamespaceCacheIfNecessary();
118
119
  Ruby runtime = context.getRuntime();
119
120
  if (node != null) {
120
121
  Document document = (Document)node;
@@ -195,6 +196,10 @@ public class XmlDocument extends XmlNode {
195
196
  return nsCache;
196
197
  }
197
198
 
199
+ public void initializeNamespaceCacheIfNecessary() {
200
+ if (nsCache == null) nsCache = new NokogiriNamespaceCache();
201
+ }
202
+
198
203
  public void setNamespaceCache(NokogiriNamespaceCache nsCache) {
199
204
  this.nsCache = nsCache;
200
205
  }
@@ -284,7 +289,7 @@ public class XmlDocument extends XmlNode {
284
289
 
285
290
  @JRubyMethod
286
291
  public IRubyObject encoding(ThreadContext context) {
287
- if (this.encoding == null) {
292
+ if (this.encoding == null || this.encoding.isNil()) {
288
293
  if (getDocument().getXmlEncoding() == null) {
289
294
  this.encoding = context.getRuntime().getNil();
290
295
  } else {
@@ -292,7 +297,7 @@ public class XmlDocument extends XmlNode {
292
297
  }
293
298
  }
294
299
 
295
- return this.encoding;
300
+ return this.encoding.isNil() ? this.encoding : this.encoding.asString().encode(context, context.getRuntime().newString("UTF-8"));
296
301
  }
297
302
 
298
303
  @JRubyMethod(meta = true)
@@ -434,8 +439,20 @@ public class XmlDocument extends XmlNode {
434
439
  IRubyObject dtd = (IRubyObject) node.getUserData(DTD_INTERNAL_SUBSET);
435
440
 
436
441
  if (dtd == null) {
437
- if (getDocument().getDoctype() == null) dtd = context.getRuntime().getNil();
438
- else dtd = XmlDtd.newFromInternalSubset(context.getRuntime(), getDocument());
442
+ Document document = getDocument();
443
+ if (document.getUserData(XmlDocument.DTD_RAW_DOCUMENT) != null) {
444
+ dtd = XmlDtd.newFromInternalSubset(context.getRuntime(), document);
445
+ } else if (document.getDoctype() != null) {
446
+ DocumentType docType = document.getDoctype();
447
+ dtd = XmlDtd.newEmpty(context.getRuntime(),
448
+ document,
449
+ context.getRuntime().newString(docType.getName()),
450
+ context.getRuntime().newString(docType.getPublicId()),
451
+ context.getRuntime().newString(docType.getSystemId()));
452
+ } else {
453
+ dtd = context.getRuntime().getNil();
454
+ }
455
+
439
456
  setInternalSubset(dtd);
440
457
  }
441
458
 
@@ -518,6 +535,7 @@ public class XmlDocument extends XmlNode {
518
535
  @JRubyMethod(meta=true)
519
536
  public static IRubyObject wrapJavaDocument(ThreadContext context, IRubyObject klazz, IRubyObject arg) {
520
537
  XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), getNokogiriClass(context.getRuntime(), "Nokogiri::XML::Document"));
538
+ RuntimeHelpers.invoke(context, xmlDocument, "initialize");
521
539
  Document document = (Document)arg.toJava(Document.class);
522
540
  xmlDocument.setDocumentNode(context, document);
523
541
  return xmlDocument;
@@ -33,7 +33,6 @@
33
33
  package nokogiri;
34
34
 
35
35
  import static nokogiri.internals.NokogiriHelpers.getLocalNameForNamespace;
36
- import static nokogiri.internals.NokogiriHelpers.getLocalPart;
37
36
  import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
38
37
  import static nokogiri.internals.NokogiriHelpers.getPrefix;
39
38
  import static nokogiri.internals.NokogiriHelpers.isNamespace;
@@ -93,7 +92,6 @@ public class XmlDocumentFragment extends XmlNode {
93
92
  if (args.length > 1 && args[1] instanceof RubyString) {
94
93
  args[1] = trim(context, doc, (RubyString)args[1]);
95
94
  if (XmlDocumentFragment.isTag((RubyString)args[1])) {
96
- args[1] = RubyString.newString(context.getRuntime(), ignoreNamespaceIfNeeded(doc, rubyStringToString(args[1])));
97
95
  args[1] = RubyString.newString(context.getRuntime(), addNamespaceDeclIfNeeded(doc, rubyStringToString(args[1])));
98
96
  }
99
97
  }
@@ -127,32 +125,10 @@ public class XmlDocumentFragment extends XmlNode {
127
125
  if (str.startsWith("<") && str.endsWith(">")) return true;
128
126
  return false;
129
127
  }
130
-
128
+
131
129
  private static Pattern qname_pattern = Pattern.compile("[^</:>\\s]+:[^</:>=\\s]+");
132
130
  private static Pattern starttag_pattern = Pattern.compile("<[^</>]+>");
133
-
134
- private static String ignoreNamespaceIfNeeded(XmlDocument doc, String tags) {
135
- if (doc.getDocument() == null) return tags;
136
- Matcher matcher = qname_pattern.matcher(tags);
137
- Map<String, String> rewriteTable = new HashMap<String, String>();
138
- while(matcher.find()) {
139
- String qName = matcher.group();
140
- if (doc.getDocument().getDocumentElement() != null) {
141
- NamedNodeMap nodeMap = doc.getDocument().getDocumentElement().getAttributes();
142
- if (!isNamespaceDefined(qName, nodeMap)) {
143
- rewriteTable.put(qName, getLocalPart(qName));
144
- }
145
- } else {
146
- rewriteTable.put(qName, getLocalPart(qName));
147
- }
148
- }
149
- Set<String> keys = rewriteTable.keySet();
150
- for (String key : keys) {
151
- tags = tags.replace(key, rewriteTable.get(key));
152
- }
153
- return tags;
154
- }
155
-
131
+
156
132
  private static boolean isNamespaceDefined(String qName, NamedNodeMap nodeMap) {
157
133
  if (isNamespace(qName.intern())) return true;
158
134
  for (int i=0; i < nodeMap.getLength(); i++) {
@@ -129,9 +129,20 @@ public class XmlDtd extends XmlNode {
129
129
  IRubyObject name,
130
130
  IRubyObject external_id,
131
131
  IRubyObject system_id) {
132
- Element placeHolder = doc.createElement("dtd_placeholder");
132
+
133
+ DocumentType placeholder = null;
134
+ if (doc.getDoctype() == null) {
135
+ String javaName = NokogiriHelpers.rubyStringToString(name);
136
+ String javaExternalId = NokogiriHelpers.rubyStringToString(external_id);
137
+ String javaSystemId = NokogiriHelpers.rubyStringToString(system_id);
138
+ placeholder = doc.getImplementation().createDocumentType(javaName, javaExternalId, javaSystemId);
139
+ doc.appendChild(placeholder);
140
+ } else {
141
+ placeholder = doc.getDoctype();
142
+ }
143
+ // FIXME: what if the document had a doc type, why are we here ?
133
144
  XmlDtd dtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, "Nokogiri::XML::DTD"));
134
- dtd.setNode(runtime, placeHolder);
145
+ dtd.setNode(runtime, placeholder);
135
146
  dtd.name = name;
136
147
  dtd.pubId = external_id;
137
148
  dtd.sysId = system_id;
@@ -38,7 +38,6 @@ import org.jruby.Ruby;
38
38
  import org.jruby.RubyArray;
39
39
  import org.jruby.RubyClass;
40
40
  import org.jruby.anno.JRubyClass;
41
- import org.jruby.javasupport.util.RuntimeHelpers;
42
41
  import org.jruby.runtime.ThreadContext;
43
42
  import org.w3c.dom.Element;
44
43
  import org.w3c.dom.Node;
@@ -62,17 +61,9 @@ public class XmlElement extends XmlNode {
62
61
 
63
62
  @Override
64
63
  public void setNode(ThreadContext context, Node node) {
65
- this.node = node;
66
- if (node != null) {
67
- resetCache();
68
- if (node.getNodeType() != Node.DOCUMENT_NODE) {
69
- doc = document(context);
70
- setInstanceVariable("@document", doc);
71
- if (doc != null) {
72
- RuntimeHelpers.invoke(context, doc, "decorate", this);
73
- }
74
- }
75
- }
64
+ super.setNode(context, node);
65
+ if (doc != null)
66
+ setInstanceVariable("@document", doc);
76
67
  }
77
68
 
78
69
  @Override
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * (The MIT License)
3
3
  *
4
- * Copyright (c) 2008 - 2011:
4
+ * Copyright (c) 2008 - 2012:
5
5
  *
6
6
  * * {Aaron Patterson}[http://tenderlovemaking.com]
7
7
  * * {Mike Dalessio}[http://mike.daless.io]
@@ -34,7 +34,6 @@ package nokogiri;
34
34
 
35
35
  import static nokogiri.internals.NokogiriHelpers.getCachedNodeOrCreate;
36
36
  import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
37
-
38
37
  import nokogiri.internals.SaveContextVisitor;
39
38
 
40
39
  import org.jruby.Ruby;
@@ -43,7 +42,6 @@ import org.jruby.anno.JRubyClass;
43
42
  import org.jruby.runtime.ThreadContext;
44
43
  import org.jruby.runtime.builtin.IRubyObject;
45
44
  import org.w3c.dom.Document;
46
- import org.w3c.dom.EntityReference;
47
45
  import org.w3c.dom.Node;
48
46
 
49
47
  /**
@@ -72,14 +70,21 @@ public class XmlEntityReference extends XmlNode {
72
70
  IRubyObject doc = args[0];
73
71
  IRubyObject name = args[1];
74
72
 
75
- Document document = ((XmlNode) doc).getOwnerDocument();
76
- Node node = document.createEntityReference(rubyStringToString(name));
77
- setNode(context, node);
73
+ Document owner = ((XmlNode) doc).getOwnerDocument();
74
+ Node node = new NokogiriEntityReference(owner, rubyStringToString(name));
75
+ super.setNode(context, node);
76
+ }
77
+
78
+ public void setNode(ThreadContext context, Node entityRef) {
79
+ Document owner = entityRef.getOwnerDocument();
80
+ String name = entityRef.getNodeName();
81
+ Node node = new NokogiriEntityReference(owner, name);
82
+ super.setNode(context, node);
78
83
  }
79
84
 
80
85
  @Override
81
86
  public void accept(ThreadContext context, SaveContextVisitor visitor) {
82
- visitor.enter((EntityReference)node);
87
+ visitor.enterEntityReference((NokogiriEntityReference)node);
83
88
  Node child = node.getFirstChild();
84
89
  while (child != null) {
85
90
  IRubyObject nokoNode = getCachedNodeOrCreate(context.getRuntime(), child);
@@ -92,6 +97,25 @@ public class XmlEntityReference extends XmlNode {
92
97
  }
93
98
  child = child.getNextSibling();
94
99
  }
95
- visitor.leave((EntityReference)node);
100
+ visitor.leaveEntityReference((NokogiriEntityReference)node);
101
+ }
102
+
103
+ public class NokogiriEntityReference extends org.apache.xerces.dom.TextImpl {
104
+ // Nokogiri's EntityReference should quack like a org.w3c.dom.Text node.
105
+ // EntityReference node should not raise exception for names such as #xa.
106
+ // This is reported bug in issue#719.
107
+ // Also, EntityReference node should not bother xpath.
108
+ // For this purpose, Node type should be Node.TEXT_NODE.
109
+ public NokogiriEntityReference(Document owner, String name) {
110
+ super((org.apache.xerces.dom.DocumentImpl)owner, name);
111
+ }
112
+
113
+ public short getNodeType() {
114
+ return Node.TEXT_NODE;
115
+ }
116
+
117
+ public String getNodeName() {
118
+ return getNodeValue();
119
+ }
96
120
  }
97
121
  }
@@ -98,7 +98,6 @@ public class XmlNamespace extends RubyObject {
98
98
  this.href = href;
99
99
  this.prefixString = prefixString;
100
100
  this.hrefString = hrefString;
101
- this.attr.setUserData(CACHED_NODE, this, null);
102
101
  setInstanceVariable("@document", xmlDocument);
103
102
  }
104
103
 
@@ -115,6 +114,7 @@ public class XmlNamespace extends RubyObject {
115
114
  IRubyObject href_value = RubyString.newString(runtime, hrefValue);
116
115
  // check namespace cache
117
116
  XmlDocument xmlDocument = (XmlDocument)getCachedNodeOrCreate(runtime, attr.getOwnerDocument());
117
+ xmlDocument.initializeNamespaceCacheIfNecessary();
118
118
  XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefixValue, hrefValue);
119
119
  if (xmlNamespace != null) return xmlNamespace;
120
120
 
@@ -135,6 +135,7 @@ public class XmlNamespace extends RubyObject {
135
135
  Document document = owner.getOwnerDocument();
136
136
  // check namespace cache
137
137
  XmlDocument xmlDocument = (XmlDocument)getCachedNodeOrCreate(runtime, document);
138
+ xmlDocument.initializeNamespaceCacheIfNecessary();
138
139
  XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefixValue, hrefValue);
139
140
  if (xmlNamespace != null) return xmlNamespace;
140
141
 
@@ -45,6 +45,7 @@ import java.io.InputStream;
45
45
  import java.nio.charset.CharacterCodingException;
46
46
  import java.nio.charset.Charset;
47
47
  import java.util.ArrayList;
48
+ import java.util.Iterator;
48
49
  import java.util.List;
49
50
 
50
51
  import nokogiri.internals.HtmlDomParserContext;
@@ -210,22 +211,23 @@ public class XmlNode extends RubyObject {
210
211
  */
211
212
  public XmlNode(Ruby ruby, RubyClass cls, Node node) {
212
213
  super(ruby, cls);
213
- this.node = node;
214
+ setNode(ruby.getCurrentContext(), node);
215
+ }
214
216
 
217
+ protected void decorate(Ruby ruby) {
215
218
  if (node != null) {
216
219
  resetCache();
217
220
 
218
221
  if (node.getNodeType() != Node.DOCUMENT_NODE) {
219
222
  doc = document(ruby.getCurrentContext());
220
223
 
221
- if (doc != null) {
224
+ if (doc != null && doc.isTrue()) {
222
225
  RuntimeHelpers.invoke(ruby.getCurrentContext(), doc, "decorate", this);
223
226
  }
224
227
  }
225
228
  }
226
-
227
229
  }
228
-
230
+
229
231
  /**
230
232
  * Create and return a copy of this object.
231
233
  *
@@ -448,6 +450,7 @@ public class XmlNode extends RubyObject {
448
450
  public void relink_namespace(ThreadContext context) {
449
451
  if (node instanceof Element) {
450
452
  Element e = (Element) node;
453
+ e.getOwnerDocument().setStrictErrorChecking(false);
451
454
  e.getOwnerDocument().renameNode(e, e.lookupNamespaceURI(e.getPrefix()), e.getNodeName());
452
455
 
453
456
  if (e.hasAttributes()) {
@@ -465,6 +468,9 @@ public class XmlNode extends RubyObject {
465
468
  } else {
466
469
  nsUri = attr.getNamespaceURI();
467
470
  }
471
+ if (!(nsUri == null || "".equals(nsUri))) {
472
+ XmlNamespace.createFromAttr(context.getRuntime(), attr);
473
+ }
468
474
  e.getOwnerDocument().renameNode(attr, nsUri, nodeName);
469
475
  }
470
476
  }
@@ -509,13 +515,8 @@ public class XmlNode extends RubyObject {
509
515
 
510
516
  public void setNode(ThreadContext context, Node node) {
511
517
  this.node = node;
512
-
513
- if (node != null) {
514
- resetCache();
515
- if (node.getNodeType() != Node.DOCUMENT_NODE) {
516
- doc = document(context);
517
- }
518
- }
518
+
519
+ decorate(context.getRuntime());
519
520
 
520
521
  if (this instanceof XmlAttr) {
521
522
  ((XmlAttr)this).setNamespaceIfNecessary(context.getRuntime());
@@ -547,7 +548,7 @@ public class XmlNode extends RubyObject {
547
548
  str = NokogiriHelpers.getLocalPart(str);
548
549
  }
549
550
  if (str == null) str = "";
550
- name = context.getRuntime().newString(str);
551
+ name = NokogiriHelpers.stringOrBlank(context.getRuntime(), str);
551
552
  return name;
552
553
  }
553
554
 
@@ -804,23 +805,46 @@ public class XmlNode extends RubyObject {
804
805
 
805
806
  @JRubyMethod(name = {"content", "text", "inner_text"})
806
807
  public IRubyObject content(ThreadContext context) {
807
- if (content != null && content.isNil()) return content;
808
+ if (!node.hasChildNodes() && node.getNodeValue() == null &&
809
+ (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE))
810
+ return context.nil;
808
811
  String textContent;
809
- if (content != null) textContent = rubyStringToString(content);
810
- else if (this instanceof XmlDocument) {
812
+ if (this instanceof XmlDocument) {
811
813
  Node node = ((Document)this.node).getDocumentElement();
812
814
  if (node == null) {
813
815
  textContent = "";
814
816
  } else {
815
- textContent = ((Document)this.node).getDocumentElement().getTextContent().trim();
817
+ Node documentElement = ((Document)this.node).getDocumentElement();
818
+ StringBuffer buffer = new StringBuffer();
819
+ getTextContentRecursively(context, buffer, documentElement);
820
+ textContent = buffer.toString();
816
821
  }
817
822
  } else {
818
- textContent = this.node.getTextContent();
823
+ StringBuffer buffer = new StringBuffer();
824
+ getTextContentRecursively(context, buffer, node);
825
+ textContent = buffer.toString();
819
826
  }
820
- textContent = NokogiriHelpers.convertEncodingByNKFIfNecessary(context.getRuntime(), (XmlDocument)document(context), textContent);
821
- String decodedText = null;
822
- if (textContent != null) decodedText = NokogiriHelpers.decodeJavaString(textContent);
823
- return stringOrNil(context.getRuntime(), decodedText);
827
+ NokogiriHelpers.convertEncodingByNKFIfNecessary(context.getRuntime(), (XmlDocument)document(context), textContent);
828
+ return stringOrNil(context.getRuntime(), textContent);
829
+ }
830
+
831
+ private void getTextContentRecursively(ThreadContext context, StringBuffer buffer, Node currentNode) {
832
+ String textContent = currentNode.getNodeValue();
833
+ if (textContent != null && NokogiriHelpers.shouldDecode(currentNode))
834
+ textContent = NokogiriHelpers.decodeJavaString(textContent);
835
+ if (textContent != null)
836
+ buffer.append(textContent);
837
+ NodeList children = currentNode.getChildNodes();
838
+ for (int i = 0; i < children.getLength(); i++) {
839
+ Node child = children.item(i);
840
+ if (hasTextContent(child))
841
+ getTextContentRecursively(context, buffer, child);
842
+ }
843
+ }
844
+
845
+ private boolean hasTextContent(Node child) {
846
+ return child.getNodeType() != Node.COMMENT_NODE &&
847
+ child.getNodeType() != Node.PROCESSING_INSTRUCTION_NODE;
824
848
  }
825
849
 
826
850
  @JRubyMethod
@@ -888,9 +912,7 @@ public class XmlNode extends RubyObject {
888
912
  String key = rubyStringToString(rbkey);
889
913
  Element element = (Element) node;
890
914
  String value = element.getAttribute(key);
891
- if (value != null) {
892
- return context.getRuntime().newString(value);
893
- }
915
+ return nonEmptyStringOrNil(context.getRuntime(), value);
894
916
  }
895
917
  return context.getRuntime().getNil();
896
918
  }
@@ -1000,7 +1022,7 @@ public class XmlNode extends RubyObject {
1000
1022
  NokogiriNamespaceCache nsCache = xmlDocument.getNamespaceCache();
1001
1023
  String prefix = node.getPrefix();
1002
1024
  XmlNamespace namespace = nsCache.get(prefix == null ? "" : prefix, node.getNamespaceURI());
1003
- if (namespace == null || ((XmlNamespace) namespace).isEmpty()) {
1025
+ if (namespace == null || namespace.isEmpty()) {
1004
1026
  return context.getRuntime().getNil();
1005
1027
  }
1006
1028
 
@@ -1023,10 +1045,10 @@ public class XmlNode extends RubyObject {
1023
1045
  if (doc instanceof HtmlDocument) return namespace_definitions;
1024
1046
  List<XmlNamespace> namespaces = ((XmlDocument)doc).getNamespaceCache().get(node);
1025
1047
  for (XmlNamespace namespace : namespaces) {
1026
- ((RubyArray)namespace_definitions).append(namespace);
1048
+ namespace_definitions.append(namespace);
1027
1049
  }
1028
1050
 
1029
- return (RubyArray) namespace_definitions;
1051
+ return namespace_definitions;
1030
1052
  }
1031
1053
 
1032
1054
  /**
@@ -1056,12 +1078,13 @@ public class XmlNode extends RubyObject {
1056
1078
  }
1057
1079
 
1058
1080
  protected void setContent(IRubyObject content) {
1059
- this.content = content;
1060
1081
  String javaContent = rubyStringToString(content);
1061
1082
  node.setTextContent(javaContent);
1062
1083
  if (javaContent.length() == 0) return;
1063
1084
  if (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE) return;
1064
- node.getFirstChild().setUserData(NokogiriHelpers.ENCODED_STRING, true, null);
1085
+ if (node.getFirstChild() != null) {
1086
+ node.getFirstChild().setUserData(NokogiriHelpers.ENCODED_STRING, true, null);
1087
+ }
1065
1088
  }
1066
1089
 
1067
1090
  private void setContent(String content) {
@@ -1069,7 +1092,7 @@ public class XmlNode extends RubyObject {
1069
1092
  this.content = null; // clear cache
1070
1093
  }
1071
1094
 
1072
- @JRubyMethod(name = "native_content=", visibility = Visibility.PRIVATE)
1095
+ @JRubyMethod(name = "native_content=")
1073
1096
  public IRubyObject native_content_set(ThreadContext context, IRubyObject content) {
1074
1097
  setContent(content);
1075
1098
  return content;
@@ -1158,13 +1181,42 @@ public class XmlNode extends RubyObject {
1158
1181
  String key = rubyStringToString(rbkey);
1159
1182
  String val = rubyStringToString(rbval);
1160
1183
  Element element = (Element) node;
1161
- element.setAttribute(key, val);
1184
+
1185
+ int colonIndex = key.indexOf(":");
1186
+ if (colonIndex > 0) {
1187
+ String prefix = key.substring(0, colonIndex);
1188
+ String uri = null;
1189
+ if (prefix.equals("xml")) {
1190
+ uri = "http://www.w3.org/XML/1998/namespace";
1191
+ } else {
1192
+ uri = findNamespaceHref(context, prefix);
1193
+ }
1194
+ element.setAttributeNS(uri, key, val);
1195
+ } else {
1196
+ element.setAttribute(key, val);
1197
+ }
1162
1198
  return this;
1163
1199
  } else {
1164
1200
  return rbval;
1165
1201
  }
1166
1202
  }
1167
1203
 
1204
+ private String findNamespaceHref(ThreadContext context, String prefix) {
1205
+ XmlNode currentNode = this;
1206
+ while(currentNode != document(context)) {
1207
+ RubyArray namespaces = (RubyArray) currentNode.namespace_scopes(context);
1208
+ Iterator iterator = namespaces.iterator();
1209
+ while(iterator.hasNext()) {
1210
+ XmlNamespace namespace = (XmlNamespace) iterator.next();
1211
+ if (namespace.getPrefix().equals(prefix)) {
1212
+ return namespace.getHref();
1213
+ }
1214
+ }
1215
+ currentNode = (XmlNode) currentNode.parent(context);
1216
+ }
1217
+ return null;
1218
+ }
1219
+
1168
1220
  @JRubyMethod
1169
1221
  public IRubyObject parent(ThreadContext context) {
1170
1222
  /*