nokogiri 1.5.6.rc3-java → 1.5.7-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (50) hide show
  1. data/CHANGELOG.ja.rdoc +87 -26
  2. data/CHANGELOG.rdoc +94 -32
  3. data/Manifest.txt +1 -0
  4. data/Rakefile +28 -15
  5. data/build_all +13 -5
  6. data/ext/java/nokogiri/NokogiriService.java +8 -1
  7. data/ext/java/nokogiri/XmlDocument.java +4 -4
  8. data/ext/java/nokogiri/XmlDtd.java +13 -2
  9. data/ext/java/nokogiri/XmlElement.java +3 -12
  10. data/ext/java/nokogiri/XmlEntityReference.java +11 -31
  11. data/ext/java/nokogiri/XmlNode.java +76 -32
  12. data/ext/java/nokogiri/XmlReader.java +257 -181
  13. data/ext/java/nokogiri/XmlSaxPushParser.java +17 -2
  14. data/ext/java/nokogiri/internals/NokogiriHelpers.java +23 -16
  15. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +18 -1
  16. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +9 -0
  17. data/ext/java/nokogiri/internals/ReaderNode.java +37 -37
  18. data/ext/java/nokogiri/internals/SaveContextVisitor.java +23 -16
  19. data/ext/java/nokogiri/internals/UncloseableInputStream.java +102 -0
  20. data/ext/java/nokogiri/internals/XmlDomParserContext.java +7 -4
  21. data/ext/nokogiri/extconf.rb +1 -0
  22. data/ext/nokogiri/nokogiri.h +4 -0
  23. data/ext/nokogiri/xml_node.c +33 -1
  24. data/ext/nokogiri/xml_reader.c +0 -3
  25. data/ext/nokogiri/xml_sax_parser.c +4 -1
  26. data/lib/nekodtd.jar +0 -0
  27. data/lib/nokogiri.rb +1 -0
  28. data/lib/nokogiri/css/xpath_visitor.rb +1 -1
  29. data/lib/nokogiri/nokogiri.jar +0 -0
  30. data/lib/nokogiri/version.rb +4 -1
  31. data/lib/nokogiri/xml/builder.rb +12 -2
  32. data/lib/nokogiri/xml/document.rb +3 -1
  33. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  34. data/tasks/cross_compile.rb +15 -15
  35. data/test/css/test_parser.rb +9 -9
  36. data/test/css/test_xpath_visitor.rb +1 -1
  37. data/test/helper.rb +1 -0
  38. data/test/html/sax/test_parser.rb +5 -2
  39. data/test/html/test_document_fragment.rb +4 -2
  40. data/test/namespaces/test_namespaces_in_builder_doc.rb +60 -0
  41. data/test/namespaces/test_namespaces_in_created_doc.rb +62 -0
  42. data/test/namespaces/test_namespaces_in_parsed_doc.rb +60 -0
  43. data/test/test_reader.rb +38 -4
  44. data/test/xml/sax/test_parser.rb +10 -1
  45. data/test/xml/test_builder.rb +40 -1
  46. data/test/xml/test_document.rb +50 -2
  47. data/test/xml/test_entity_reference.rb +2 -4
  48. data/test/xml/test_node.rb +30 -1
  49. data/test_all +2 -2
  50. metadata +142 -232
@@ -42,6 +42,7 @@ import java.util.concurrent.ExecutorService;
42
42
  import java.util.concurrent.Executors;
43
43
  import java.util.concurrent.Future;
44
44
  import java.util.concurrent.FutureTask;
45
+ import java.util.concurrent.ThreadFactory;
45
46
 
46
47
  import nokogiri.internals.ClosedStreamException;
47
48
  import nokogiri.internals.NokogiriBlockingQueueInputStream;
@@ -78,6 +79,11 @@ public class XmlSaxPushParser extends RubyObject {
78
79
  super(ruby, rubyClass);
79
80
  }
80
81
 
82
+ @Override
83
+ public void finalize() {
84
+ terminateTask(null);
85
+ }
86
+
81
87
  @JRubyMethod
82
88
  public IRubyObject initialize_native(final ThreadContext context,
83
89
  IRubyObject saxParser,
@@ -159,7 +165,15 @@ public class XmlSaxPushParser extends RubyObject {
159
165
 
160
166
  parserTask = new ParserTask(context, saxParser);
161
167
  futureTask = new FutureTask<XmlSaxParserContext>(parserTask);
162
- executor = Executors.newSingleThreadExecutor();
168
+ executor = Executors.newSingleThreadExecutor(new ThreadFactory() {
169
+ @Override
170
+ public Thread newThread(Runnable r) {
171
+ Thread t = new Thread(r);
172
+ t.setName("XmlSaxPushParser");
173
+ t.setDaemon(true);
174
+ return t;
175
+ }
176
+ });
163
177
  executor.submit(futureTask);
164
178
  }
165
179
  }
@@ -171,7 +185,8 @@ public class XmlSaxPushParser extends RubyObject {
171
185
  } catch (ClosedStreamException ex) {
172
186
  // ignore this exception, it means the stream was closed
173
187
  } catch (Exception e) {
174
- throw context.getRuntime().newRuntimeError(e.getMessage());
188
+ if (context != null)
189
+ throw context.getRuntime().newRuntimeError(e.getMessage());
175
190
  }
176
191
  futureTask.cancel(true);
177
192
  executor.shutdown();
@@ -40,7 +40,6 @@ import java.nio.ByteBuffer;
40
40
  import java.nio.CharBuffer;
41
41
  import java.nio.charset.CharacterCodingException;
42
42
  import java.nio.charset.Charset;
43
- import java.nio.charset.CharsetDecoder;
44
43
  import java.nio.charset.CharsetEncoder;
45
44
  import java.util.ArrayList;
46
45
  import java.util.List;
@@ -72,9 +71,11 @@ import org.jruby.runtime.ThreadContext;
72
71
  import org.jruby.runtime.builtin.IRubyObject;
73
72
  import org.jruby.util.ByteList;
74
73
  import org.w3c.dom.Attr;
74
+ import org.w3c.dom.Document;
75
75
  import org.w3c.dom.NamedNodeMap;
76
76
  import org.w3c.dom.Node;
77
77
  import org.w3c.dom.NodeList;
78
+ import org.w3c.dom.DOMException;
78
79
 
79
80
  /**
80
81
  * A class for various utility methods.
@@ -635,10 +636,11 @@ public class NokogiriHelpers {
635
636
  }
636
637
 
637
638
  public static String newQName(String newPrefix, Node node) {
639
+ String tagName = getLocalPart(node.getNodeName());
638
640
  if(newPrefix == null) {
639
- return node.getLocalName();
641
+ return tagName;
640
642
  } else {
641
- return newPrefix + ":" + node.getLocalName();
643
+ return newPrefix + ":" + tagName;
642
644
  }
643
645
  }
644
646
 
@@ -724,21 +726,14 @@ public class NokogiriHelpers {
724
726
  }
725
727
 
726
728
  public static byte[] convertEncoding(Charset output_charset, String input_string) throws CharacterCodingException {
727
- Charset input = Charset.forName("UTF-8");
728
- CharsetDecoder decoder = input.newDecoder();
729
729
  CharsetEncoder encoder = output_charset.newEncoder();
730
- decoder.reset();
731
- encoder.reset();
732
- ByteBuffer bbuf = ByteBuffer.wrap(input_string.getBytes());
733
- CharBuffer cbuf = decoder.decode(bbuf);
734
- bbuf.clear();
735
- encoder.encode(cbuf, bbuf, true);
736
- int length = bbuf.position();
737
- byte[] bytes = new byte[length];
738
- System.arraycopy(bbuf.array(), 0, bytes, 0, length);
739
- return bytes;
730
+ CharBuffer charBuffer = CharBuffer.wrap(input_string);
731
+ ByteBuffer byteBuffer = encoder.encode(charBuffer);
732
+ byte[] buffer = new byte[byteBuffer.remaining()];
733
+ byteBuffer.get(buffer);
734
+ return buffer;
740
735
  }
741
-
736
+
742
737
  public static String convertEncodingByNKFIfNecessary(Ruby runtime, XmlDocument doc, String thing) {
743
738
  if (!(doc instanceof HtmlDocument)) return thing;
744
739
  String parsed_encoding = ((HtmlDocument)doc).getPraedEncoding();
@@ -815,4 +810,16 @@ public class NokogiriHelpers {
815
810
  public static boolean shouldDecode(Node text) {
816
811
  return !shouldEncode(text);
817
812
  }
813
+
814
+ public static Node renameNode(Node n, String namespaceURI, String qualifiedName) throws DOMException {
815
+ Document doc = n.getOwnerDocument();
816
+ XmlDocument xmlDoc = (XmlDocument)getCachedNode(doc);
817
+ NokogiriNamespaceCache nsCache = xmlDoc.getNamespaceCache();
818
+ int oldHash = n.hashCode();
819
+ Node result = doc.renameNode(n, namespaceURI, qualifiedName);
820
+ if (result != n) {
821
+ nsCache.replaceNode(n, result);
822
+ }
823
+ return result;
824
+ }
818
825
  }
@@ -99,7 +99,7 @@ public class NokogiriNamespaceCache {
99
99
  List<XmlNamespace> namespaces = new ArrayList<XmlNamespace>();
100
100
  for (int i=0; i < keys.size(); i++) {
101
101
  CacheEntry entry = cache.get(i);
102
- if (entry.ownerNode == node) {
102
+ if (entry.isOwner(node)) {
103
103
  namespaces.add(entry.namespace);
104
104
  }
105
105
  }
@@ -151,6 +151,15 @@ public class NokogiriNamespaceCache {
151
151
  defaultNamespace = null;
152
152
  }
153
153
 
154
+ public void replaceNode(Node oldNode, Node newNode) {
155
+ for (int i=0; i < keys.size(); i++) {
156
+ CacheEntry entry = cache.get(i);
157
+ if (entry.isOwner(oldNode)) {
158
+ entry.replaceOwner(newNode);
159
+ }
160
+ }
161
+ }
162
+
154
163
  private class CacheEntry {
155
164
  private XmlNamespace namespace;
156
165
  private Node ownerNode;
@@ -159,5 +168,13 @@ public class NokogiriNamespaceCache {
159
168
  this.namespace = namespace;
160
169
  this.ownerNode = ownerNode;
161
170
  }
171
+
172
+ public Boolean isOwner(Node n) {
173
+ return this.ownerNode.isSameNode(n);
174
+ }
175
+
176
+ public void replaceOwner(Node newNode) {
177
+ this.ownerNode = newNode;
178
+ }
162
179
  }
163
180
  }
@@ -56,6 +56,15 @@ public class NokogiriNonStrictErrorHandler extends NokogiriErrorHandler{
56
56
  }
57
57
 
58
58
  public void fatalError(SAXParseException ex) throws SAXException {
59
+ // fix #837
60
+ // Xerces won't skip the reference entity (and other invalid) constructs
61
+ // found in the prolog, instead it will keep calling this method and we'll
62
+ // keep inserting the error in the document errors array until we run
63
+ // out of memory
64
+ String message = ex.getMessage();
65
+ if (message != null && message.toLowerCase().contains("in prolog")) {
66
+ throw ex;
67
+ }
59
68
  errors.add(ex);
60
69
  }
61
70
 
@@ -17,10 +17,10 @@
17
17
  * distribute, sublicense, and/or sell copies of the Software, and to
18
18
  * permit persons to whom the Software is furnished to do so, subject to
19
19
  * the following conditions:
20
- *
20
+ *
21
21
  * The above copyright notice and this permission notice shall be
22
22
  * included in all copies or substantial portions of the Software.
23
- *
23
+ *
24
24
  * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
25
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
26
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -51,6 +51,7 @@ import nokogiri.XmlAttr;
51
51
  import nokogiri.XmlDocument;
52
52
  import nokogiri.XmlSyntaxError;
53
53
 
54
+ import org.apache.xerces.xni.XMLAttributes;
54
55
  import org.jruby.Ruby;
55
56
  import org.jruby.RubyArray;
56
57
  import org.jruby.RubyBoolean;
@@ -59,12 +60,10 @@ import org.jruby.runtime.ThreadContext;
59
60
  import org.jruby.runtime.builtin.IRubyObject;
60
61
  import org.w3c.dom.Attr;
61
62
  import org.w3c.dom.Document;
62
- import org.xml.sax.Attributes;
63
- import org.xml.sax.SAXParseException;
64
63
 
65
64
  /**
66
65
  * Abstract class of Node for XmlReader.
67
- *
66
+ *
68
67
  * @author Yoko Harada <yokolet@gmail.com>
69
68
  *
70
69
  */
@@ -75,17 +74,18 @@ public abstract class ReaderNode {
75
74
  public Map<String, String> namespaces;
76
75
  public int depth, nodeType;
77
76
  public String lang, localName, xmlBase, prefix, name, uri, value, xmlVersion = "1.0";
77
+ public int startOffset, endOffset;
78
78
  public boolean hasChildren = false;
79
79
  public abstract String getString();
80
80
  private Document document = null;
81
-
81
+
82
82
  private static ElementNode elementNode = null;
83
83
  private static ClosingNode closingNode = null;
84
84
  private static TextNode textNode = null;
85
-
85
+
86
86
  public IRubyObject getAttributeByIndex(IRubyObject index){
87
87
  if(index.isNil()) return index;
88
-
88
+
89
89
  long i = index.convertToInteger().getLongValue();
90
90
  if(i > Integer.MAX_VALUE) {
91
91
  throw ruby.newArgumentError("value too long to be an array index");
@@ -101,7 +101,7 @@ public abstract class ReaderNode {
101
101
  String value = attributeList.getByName(rubyStringToString(name));
102
102
  return stringOrNil(ruby, value);
103
103
  }
104
-
104
+
105
105
  public IRubyObject getAttributeByName(String name){
106
106
  if(attributeList == null) return ruby.getNil();
107
107
  String value = attributeList.getByName(name);
@@ -117,7 +117,8 @@ public abstract class ReaderNode {
117
117
  RubyArray array = RubyArray.newArray(ruby);
118
118
  if (attributeList != null && attributeList.length > 0) {
119
119
  if (document == null) {
120
- document = ((XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(ruby, getNokogiriClass(ruby, "Nokogiri::XML::Document"))).getDocument();
120
+ XmlDocument doc = (XmlDocument) XmlDocument.rbNew(ruby.getCurrentContext(), getNokogiriClass(ruby, "Nokogiri::XML::Document"), new IRubyObject[0]);
121
+ document = doc.getDocument();
121
122
  }
122
123
  for (int i=0; i<attributeList.length; i++) {
123
124
  if (!isNamespace(attributeList.names.get(i))) {
@@ -131,7 +132,7 @@ public abstract class ReaderNode {
131
132
  }
132
133
  return array;
133
134
  }
134
-
135
+
135
136
  public IRubyObject getAttributes(ThreadContext context) {
136
137
  if(attributeList == null) return context.getRuntime().getNil();
137
138
  RubyHash hash = RubyHash.newHash(context.getRuntime());
@@ -173,7 +174,7 @@ public abstract class ReaderNode {
173
174
  }
174
175
  return hash;
175
176
  }
176
-
177
+
177
178
  public IRubyObject getXmlBase() {
178
179
  return stringOrNil(ruby, xmlBase);
179
180
  }
@@ -218,7 +219,7 @@ public abstract class ReaderNode {
218
219
  }
219
220
 
220
221
  public IRubyObject toSyntaxError() { return ruby.getNil(); }
221
-
222
+
222
223
  public IRubyObject getNodeType() { return ruby.newFixnum(nodeType); }
223
224
 
224
225
  public static enum ReaderNodeType {
@@ -240,17 +241,17 @@ public abstract class ReaderNode {
240
241
  END_ELEMENT(15),
241
242
  END_ENTITY(16),
242
243
  XML_DECLARATION(17);
243
-
244
+
244
245
  private final int value;
245
246
  ReaderNodeType(int value) {
246
247
  this.value = value;
247
248
  }
248
-
249
+
249
250
  public int getValue() {
250
251
  return value;
251
252
  }
252
253
  }
253
-
254
+
254
255
  public static ClosingNode createClosingNode(Ruby ruby, String uri, String localName, String qName, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
255
256
  if (closingNode == null) closingNode = new ClosingNode();
256
257
  ClosingNode clone;
@@ -264,13 +265,13 @@ public abstract class ReaderNode {
264
265
  }
265
266
 
266
267
  public static class ClosingNode extends ReaderNode {
267
-
268
+
268
269
  public ClosingNode() {}
269
270
 
270
271
  public ClosingNode(Ruby ruby, String uri, String localName, String qName, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
271
272
  init(ruby, uri, localName, qName, depth, langStack, xmlBaseStack);
272
273
  }
273
-
274
+
274
275
  public void init(Ruby ruby, String uri, String localName, String qName, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
275
276
  this.ruby = ruby;
276
277
  nodeType = ReaderNodeType.END_ELEMENT.getValue();
@@ -300,8 +301,8 @@ public abstract class ReaderNode {
300
301
  return new String(sb);
301
302
  }
302
303
  }
303
-
304
- public static ElementNode createElementNode(Ruby ruby, String uri, String localName, String qName, Attributes attrs, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
304
+
305
+ public static ElementNode createElementNode(Ruby ruby, String uri, String localName, String qName, XMLAttributes attrs, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
305
306
  if (elementNode == null) elementNode = new ElementNode();
306
307
  ElementNode clone;
307
308
  try {
@@ -312,17 +313,17 @@ public abstract class ReaderNode {
312
313
  clone.init(ruby, uri, localName, qName, attrs, depth, langStack, xmlBaseStack);
313
314
  return clone;
314
315
  }
315
-
316
+
316
317
  public static class ElementNode extends ReaderNode {
317
- private List<String> attributeStrings = new ArrayList<String>();
318
-
318
+ private final List<String> attributeStrings = new ArrayList<String>();
319
+
319
320
  public ElementNode() {}
320
-
321
- public ElementNode(Ruby ruby, String uri, String localName, String qName, Attributes attrs, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
321
+
322
+ public ElementNode(Ruby ruby, String uri, String localName, String qName, XMLAttributes attrs, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
322
323
  init(ruby, uri, localName, qName, attrs, depth, langStack, xmlBaseStack);
323
324
  }
324
-
325
- public void init(Ruby ruby, String uri, String localName, String qName, Attributes attrs, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
325
+
326
+ public void init(Ruby ruby, String uri, String localName, String qName, XMLAttributes attrs, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
326
327
  this.ruby = ruby;
327
328
  this.nodeType = ReaderNodeType.ELEMENT.getValue();
328
329
  this.uri = "".equals(uri) ? null : uri;
@@ -330,7 +331,6 @@ public abstract class ReaderNode {
330
331
  this.name = qName;
331
332
  parsePrefix(qName);
332
333
  this.depth = depth;
333
- hasChildren = true;
334
334
  parseAttributes(attrs, langStack, xmlBaseStack);
335
335
  }
336
336
 
@@ -339,7 +339,7 @@ public abstract class ReaderNode {
339
339
  return ruby.getFalse();
340
340
  }
341
341
 
342
- private void parseAttributes(Attributes attrs, Stack<String> langStack, Stack<String> xmlBaseStack) {
342
+ private void parseAttributes(XMLAttributes attrs, Stack<String> langStack, Stack<String> xmlBaseStack) {
343
343
  if (attrs.getLength() > 0) attributeList = new ReaderAttributeList();
344
344
  String u, n, v;
345
345
  for (int i = 0; i < attrs.getLength(); i++) {
@@ -357,7 +357,7 @@ public abstract class ReaderNode {
357
357
  attributeStrings.add(n + "=\"" + v + "\"");
358
358
  }
359
359
  }
360
-
360
+
361
361
  private String resolveLang(String n, String v, Stack<String> langStack) {
362
362
  if ("xml:lang".equals(n)) {
363
363
  return v;
@@ -367,7 +367,7 @@ public abstract class ReaderNode {
367
367
  return null;
368
368
  }
369
369
  }
370
-
370
+
371
371
  private String resolveXmlBase(String n, String v, Stack<String> xmlBaseStack) {
372
372
  if (isXmlBase(n)) {
373
373
  return getXmlBaseUri(n, v, xmlBaseStack);
@@ -377,7 +377,7 @@ public abstract class ReaderNode {
377
377
  return null;
378
378
  }
379
379
  }
380
-
380
+
381
381
  private String getXmlBaseUri(String n, String v, Stack<String> xmlBaseStack) {
382
382
  if ("xml:base".equals(n)) {
383
383
  if (v.startsWith("http://")) {
@@ -423,7 +423,7 @@ public abstract class ReaderNode {
423
423
  List<String> names = new ArrayList<String>();
424
424
  List<String> values = new ArrayList<String>();
425
425
  int length = 0;
426
-
426
+
427
427
  void add(String namespace, String name, String value) {
428
428
  namespace = namespace != null ? namespace : "";
429
429
  namespaces.add(namespace);
@@ -433,7 +433,7 @@ public abstract class ReaderNode {
433
433
  values.add(value);
434
434
  length++;
435
435
  }
436
-
436
+
437
437
  String getByName(String name) {
438
438
  for (int i=0; i<names.size(); i++) {
439
439
  if (name.equals(names.get(i))) {
@@ -471,7 +471,7 @@ public abstract class ReaderNode {
471
471
  private final XmlSyntaxError exception;
472
472
 
473
473
  // Still don't know what to do with ex.
474
- public ExceptionNode(Ruby runtime, SAXParseException ex) {
474
+ public ExceptionNode(Ruby runtime, Exception ex) {
475
475
  super(runtime);
476
476
  exception = (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(runtime, getNokogiriClass(ruby, "Nokogiri::XML::SyntaxError"));
477
477
  }
@@ -486,7 +486,7 @@ public abstract class ReaderNode {
486
486
  return this.exception;
487
487
  }
488
488
  }
489
-
489
+
490
490
  public static TextNode createTextNode(Ruby ruby, String content, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
491
491
  if (textNode == null) textNode = new TextNode();
492
492
  TextNode clone;
@@ -505,7 +505,7 @@ public abstract class ReaderNode {
505
505
  public TextNode(Ruby ruby, String content, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
506
506
  init(ruby, content, depth, langStack, xmlBaseStack);
507
507
  }
508
-
508
+
509
509
  public void init(Ruby ruby, String content, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
510
510
  this.ruby = ruby;
511
511
  this.value = content;
@@ -37,6 +37,8 @@ import static nokogiri.internals.NokogiriHelpers.encodeJavaString;
37
37
  import static nokogiri.internals.NokogiriHelpers.isNamespace;
38
38
  import static nokogiri.internals.NokogiriHelpers.isWhitespaceText;
39
39
 
40
+ import java.nio.charset.Charset;
41
+ import java.nio.charset.CharsetEncoder;
40
42
  import java.util.ArrayDeque;
41
43
  import java.util.ArrayList;
42
44
  import java.util.Arrays;
@@ -193,7 +195,7 @@ public class SaveContextVisitor {
193
195
  return enter((Entity)node);
194
196
  }
195
197
  if (node instanceof EntityReference) {
196
- return enter(node);
198
+ return enter((EntityReference) node);
197
199
  }
198
200
  if (node instanceof Notation) {
199
201
  return enter((Notation)node);
@@ -237,7 +239,7 @@ public class SaveContextVisitor {
237
239
  return;
238
240
  }
239
241
  if (node instanceof EntityReference) {
240
- leave(node);
242
+ leave((EntityReference) node);
241
243
  return;
242
244
  }
243
245
  if (node instanceof Notation) {
@@ -433,11 +435,13 @@ public class SaveContextVisitor {
433
435
  return true;
434
436
  }
435
437
  // no child
436
- if (asHtml || asXhtml) {
438
+ if (asHtml) {
437
439
  buffer.append(">");
438
440
  } else if (asXml && noEmpty) {
439
441
  buffer.append(">");
440
442
  } else {
443
+ if (asXhtml)
444
+ buffer.append(" "); // see http://www.w3.org/TR/xhtml1/#C_2
441
445
  buffer.append("/>");
442
446
  }
443
447
  if (needBreakInOpening(element)) {
@@ -677,13 +681,11 @@ public class SaveContextVisitor {
677
681
  // no-op
678
682
  }
679
683
 
680
- public boolean enterEntityReference(Text entityRef) {
681
- String name = entityRef.getNodeName();
682
- buffer.append("&" + name + ";");
684
+ public boolean enter(EntityReference entityRef) {
685
+ buffer.append("&" + entityRef.getNodeName() + ";");
683
686
  return true;
684
687
  }
685
-
686
- public void leaveEntityReference(Text entityRef) {
688
+ public void leave(EntityReference entityRef) {
687
689
  // no-op
688
690
  }
689
691
 
@@ -731,6 +733,10 @@ public class SaveContextVisitor {
731
733
  // no-op
732
734
  }
733
735
 
736
+ private boolean isHtmlScript(Text text) {
737
+ return htmlDoc && text.getParentNode().getNodeName().equals("script");
738
+ }
739
+
734
740
  private static char lineSeparator = '\n'; // System.getProperty("line.separator"); ?
735
741
  public boolean enter(Text text) {
736
742
  String textContent = text.getNodeValue();
@@ -742,13 +748,11 @@ public class SaveContextVisitor {
742
748
  }
743
749
  }
744
750
 
745
- if (NokogiriHelpers.shouldEncode(text)) {
751
+ if (NokogiriHelpers.shouldEncode(text) && !isHtmlScript(text)) {
746
752
  textContent = encodeJavaString(textContent);
747
753
  }
748
754
 
749
- if (getEncoding(text) == null) {
750
- textContent = encodeStringToHtmlEntity(textContent);
751
- }
755
+ textContent = encodeStringToHtmlEntity(textContent);
752
756
  buffer.append(textContent);
753
757
  return true;
754
758
  }
@@ -760,12 +764,15 @@ public class SaveContextVisitor {
760
764
  }
761
765
 
762
766
  private String encodeStringToHtmlEntity(String text) {
767
+ if (encoding == null)
768
+ return text;
769
+ CharsetEncoder encoder = Charset.forName(encoding).newEncoder();
763
770
  int last = 126; // = U+007E. No need to encode under U+007E.
764
771
  StringBuffer sb = new StringBuffer();
765
- for (int i=0; i<text.length(); i++) {
766
- int codePoint = text.codePointAt(i);
767
- if (codePoint > last) sb.append("&#x" + Integer.toHexString(codePoint) + ";");
768
- else sb.append(text.charAt(i));
772
+ for (int i = 0; i < text.length(); i++) {
773
+ char ch = text.charAt(i);
774
+ if (encoder.canEncode(ch)) sb.append(ch);
775
+ else sb.append("&#x" + Integer.toHexString(ch) + ";");
769
776
  }
770
777
  return new String(sb);
771
778
  }