nokogiri 1.5.6.rc3-java → 1.5.7-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (50) hide show
  1. data/CHANGELOG.ja.rdoc +87 -26
  2. data/CHANGELOG.rdoc +94 -32
  3. data/Manifest.txt +1 -0
  4. data/Rakefile +28 -15
  5. data/build_all +13 -5
  6. data/ext/java/nokogiri/NokogiriService.java +8 -1
  7. data/ext/java/nokogiri/XmlDocument.java +4 -4
  8. data/ext/java/nokogiri/XmlDtd.java +13 -2
  9. data/ext/java/nokogiri/XmlElement.java +3 -12
  10. data/ext/java/nokogiri/XmlEntityReference.java +11 -31
  11. data/ext/java/nokogiri/XmlNode.java +76 -32
  12. data/ext/java/nokogiri/XmlReader.java +257 -181
  13. data/ext/java/nokogiri/XmlSaxPushParser.java +17 -2
  14. data/ext/java/nokogiri/internals/NokogiriHelpers.java +23 -16
  15. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +18 -1
  16. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +9 -0
  17. data/ext/java/nokogiri/internals/ReaderNode.java +37 -37
  18. data/ext/java/nokogiri/internals/SaveContextVisitor.java +23 -16
  19. data/ext/java/nokogiri/internals/UncloseableInputStream.java +102 -0
  20. data/ext/java/nokogiri/internals/XmlDomParserContext.java +7 -4
  21. data/ext/nokogiri/extconf.rb +1 -0
  22. data/ext/nokogiri/nokogiri.h +4 -0
  23. data/ext/nokogiri/xml_node.c +33 -1
  24. data/ext/nokogiri/xml_reader.c +0 -3
  25. data/ext/nokogiri/xml_sax_parser.c +4 -1
  26. data/lib/nekodtd.jar +0 -0
  27. data/lib/nokogiri.rb +1 -0
  28. data/lib/nokogiri/css/xpath_visitor.rb +1 -1
  29. data/lib/nokogiri/nokogiri.jar +0 -0
  30. data/lib/nokogiri/version.rb +4 -1
  31. data/lib/nokogiri/xml/builder.rb +12 -2
  32. data/lib/nokogiri/xml/document.rb +3 -1
  33. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  34. data/tasks/cross_compile.rb +15 -15
  35. data/test/css/test_parser.rb +9 -9
  36. data/test/css/test_xpath_visitor.rb +1 -1
  37. data/test/helper.rb +1 -0
  38. data/test/html/sax/test_parser.rb +5 -2
  39. data/test/html/test_document_fragment.rb +4 -2
  40. data/test/namespaces/test_namespaces_in_builder_doc.rb +60 -0
  41. data/test/namespaces/test_namespaces_in_created_doc.rb +62 -0
  42. data/test/namespaces/test_namespaces_in_parsed_doc.rb +60 -0
  43. data/test/test_reader.rb +38 -4
  44. data/test/xml/sax/test_parser.rb +10 -1
  45. data/test/xml/test_builder.rb +40 -1
  46. data/test/xml/test_document.rb +50 -2
  47. data/test/xml/test_entity_reference.rb +2 -4
  48. data/test/xml/test_node.rb +30 -1
  49. data/test_all +2 -2
  50. metadata +142 -232
@@ -17,10 +17,10 @@
17
17
  * distribute, sublicense, and/or sell copies of the Software, and to
18
18
  * permit persons to whom the Software is furnished to do so, subject to
19
19
  * the following conditions:
20
- *
20
+ *
21
21
  * The above copyright notice and this permission notice shall be
22
22
  * included in all copies or substantial portions of the Software.
23
- *
23
+ *
24
24
  * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
25
25
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
26
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -35,39 +35,51 @@ package nokogiri;
35
35
  import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
36
36
  import static nokogiri.internals.NokogiriHelpers.stringOrBlank;
37
37
 
38
- import java.io.ByteArrayInputStream;
39
38
  import java.io.IOException;
40
- import java.util.ArrayDeque;
39
+ import java.io.InputStream;
40
+ import java.util.LinkedList;
41
+ import java.util.List;
41
42
  import java.util.Stack;
42
43
 
43
44
  import nokogiri.internals.NokogiriEntityResolver;
44
45
  import nokogiri.internals.ParserContext;
45
46
  import nokogiri.internals.ParserContext.Options;
46
47
  import nokogiri.internals.ReaderNode;
48
+ import nokogiri.internals.ReaderNode.ClosingNode;
47
49
  import nokogiri.internals.ReaderNode.ElementNode;
48
-
50
+ import nokogiri.internals.ReaderNode.TextNode;
51
+ import nokogiri.internals.UncloseableInputStream;
52
+
53
+ import org.apache.xerces.impl.Constants;
54
+ import org.apache.xerces.impl.xs.opti.DefaultXMLDocumentHandler;
55
+ import org.apache.xerces.parsers.StandardParserConfiguration;
56
+ import org.apache.xerces.util.EntityResolver2Wrapper;
57
+ import org.apache.xerces.xni.Augmentations;
58
+ import org.apache.xerces.xni.NamespaceContext;
59
+ import org.apache.xerces.xni.QName;
60
+ import org.apache.xerces.xni.XMLAttributes;
61
+ import org.apache.xerces.xni.XMLLocator;
62
+ import org.apache.xerces.xni.XMLResourceIdentifier;
63
+ import org.apache.xerces.xni.XMLString;
64
+ import org.apache.xerces.xni.XNIException;
65
+ import org.apache.xerces.xni.parser.XMLErrorHandler;
66
+ import org.apache.xerces.xni.parser.XMLInputSource;
67
+ import org.apache.xerces.xni.parser.XMLParseException;
68
+ import org.apache.xerces.xni.parser.XMLPullParserConfiguration;
49
69
  import org.jruby.Ruby;
50
70
  import org.jruby.RubyArray;
51
71
  import org.jruby.RubyBoolean;
52
72
  import org.jruby.RubyClass;
53
73
  import org.jruby.RubyFixnum;
54
74
  import org.jruby.RubyObject;
55
- import org.jruby.RubyString;
56
75
  import org.jruby.anno.JRubyClass;
57
76
  import org.jruby.anno.JRubyMethod;
58
77
  import org.jruby.exceptions.RaiseException;
59
- import org.jruby.javasupport.util.RuntimeHelpers;
60
- import org.jruby.lexer.yacc.SyntaxException;
78
+ import org.jruby.runtime.Block;
61
79
  import org.jruby.runtime.ThreadContext;
62
80
  import org.jruby.runtime.builtin.IRubyObject;
63
- import org.jruby.util.ByteList;
64
- import org.xml.sax.Attributes;
81
+ import org.jruby.util.IOInputStream;
65
82
  import org.xml.sax.InputSource;
66
- import org.xml.sax.SAXException;
67
- import org.xml.sax.SAXParseException;
68
- import org.xml.sax.XMLReader;
69
- import org.xml.sax.ext.DefaultHandler2;
70
- import org.xml.sax.helpers.XMLReaderFactory;
71
83
 
72
84
  /**
73
85
  * Class for Nokogiri:XML::Reader
@@ -85,13 +97,16 @@ public class XmlReader extends RubyObject {
85
97
  private static final int XML_TEXTREADER_MODE_CLOSED = 4;
86
98
  private static final int XML_TEXTREADER_MODE_READING = 5;
87
99
 
88
- ArrayDeque<ReaderNode> nodeQueue;
100
+ List<ReaderNode> nodeQueue;
89
101
  private int state;
90
-
102
+ private int position = 0;
103
+ private XMLPullParserConfiguration config;
104
+ private boolean continueParsing = true;
105
+
91
106
  public XmlReader(Ruby runtime, RubyClass klazz) {
92
107
  super(runtime, klazz);
93
108
  }
94
-
109
+
95
110
  /**
96
111
  * Create and return a copy of this object.
97
112
  *
@@ -103,81 +118,77 @@ public class XmlReader extends RubyObject {
103
118
  }
104
119
 
105
120
  public void init(Ruby runtime) {
106
- nodeQueue = new ArrayDeque<ReaderNode>();
121
+ nodeQueue = new LinkedList<ReaderNode>();
107
122
  nodeQueue.add(new ReaderNode.EmptyNode(runtime));
108
123
  }
109
124
 
110
- private void parseRubyString(ThreadContext context, RubyString content, IRubyObject url, Options options){
111
- Ruby ruby = context.getRuntime();
125
+ private void setInput(ThreadContext context, InputStream in, IRubyObject url, Options options){
126
+ this.setState(XML_TEXTREADER_MODE_READING);
127
+ config = this.createReader(context.getRuntime(), options);
128
+ InputSource inputSource = new InputSource();
129
+ ParserContext.setUrl(context, inputSource, url);
130
+ XMLInputSource xmlInputSource = new XMLInputSource(inputSource.getPublicId(),
131
+ inputSource.getSystemId(), null, in, null);
112
132
  try {
113
- this.setState(XML_TEXTREADER_MODE_READING);
114
- XMLReader reader = this.createReader(ruby, options);
115
- ByteList byteList = content.getByteList();
116
- ByteArrayInputStream bais = new ByteArrayInputStream(byteList.unsafeBytes(), byteList.begin(), byteList.length());
117
- InputSource inputSource = new InputSource(bais);
118
- ParserContext.setUrl(context, inputSource, url);
119
- reader.parse(inputSource);
120
- this.setState(XML_TEXTREADER_MODE_CLOSED);
121
- } catch (SAXParseException spe) {
122
- this.setState(XML_TEXTREADER_MODE_ERROR);
123
- this.nodeQueue.add(new ReaderNode.ExceptionNode(ruby, spe));
124
- } catch (IOException ioe) {
125
- throw RaiseException.createNativeRaiseException(ruby, ioe);
126
- } catch (SAXException saxe) {
127
- throw RaiseException.createNativeRaiseException(ruby, saxe);
133
+ config.setInputSource(xmlInputSource);
134
+ } catch (IOException e) {
135
+ throw context.getRuntime().newRuntimeError(e.getMessage());
128
136
  }
137
+ this.setState(XML_TEXTREADER_MODE_CLOSED);
129
138
  }
130
139
 
131
140
  private void setState(int state) { this.state = state; }
132
141
 
133
142
  @JRubyMethod
134
143
  public IRubyObject attribute(ThreadContext context, IRubyObject name) {
135
- return nodeQueue.peek().getAttributeByName(name);
144
+ return currentNode().getAttributeByName(name);
136
145
  }
137
146
 
138
147
  @JRubyMethod
139
148
  public IRubyObject attribute_at(ThreadContext context, IRubyObject index) {
140
- return nodeQueue.peek().getAttributeByIndex(index);
149
+ return currentNode().getAttributeByIndex(index);
141
150
  }
142
151
 
143
152
  @JRubyMethod
144
153
  public IRubyObject attribute_count(ThreadContext context) {
145
- return nodeQueue.peek().getAttributeCount();
154
+ return currentNode().getAttributeCount();
146
155
  }
147
156
 
148
157
  @JRubyMethod
149
158
  public IRubyObject attribute_nodes(ThreadContext context) {
150
- return nodeQueue.peek().getAttributesNodes();
159
+ return currentNode().getAttributesNodes();
151
160
  }
152
161
 
153
162
  @JRubyMethod
154
163
  public IRubyObject attr_nodes(ThreadContext context) {
155
- return nodeQueue.peek().getAttributesNodes();
164
+ return currentNode().getAttributesNodes();
156
165
  }
157
166
 
158
167
  @JRubyMethod(name = "attributes?")
159
168
  public IRubyObject attributes_p(ThreadContext context) {
160
- return nodeQueue.peek().hasAttributes();
169
+ return currentNode().hasAttributes();
161
170
  }
162
-
171
+
163
172
  @JRubyMethod
164
173
  public IRubyObject base_uri(ThreadContext context) {
165
- return nodeQueue.peek().getXmlBase();
174
+ return currentNode().getXmlBase();
166
175
  }
167
176
 
168
177
  @JRubyMethod(name="default?")
169
178
  public IRubyObject default_p(ThreadContext context){
170
- return nodeQueue.peek().isDefault();
179
+ return currentNode().isDefault();
171
180
  }
172
181
 
173
182
  @JRubyMethod
174
183
  public IRubyObject depth(ThreadContext context) {
175
- return nodeQueue.peek().getDepth();
184
+ return currentNode().getDepth();
176
185
  }
177
-
186
+
178
187
  @JRubyMethod(name = {"empty_element?", "self_closing?"})
179
188
  public IRubyObject empty_element_p(ThreadContext context) {
180
- ReaderNode readerNode = nodeQueue.peek();
189
+ ReaderNode readerNode = currentNode();
190
+ ensureNodeClosed(context);
191
+
181
192
  if (readerNode == null) return context.getRuntime().getNil();
182
193
  if (!(readerNode instanceof ElementNode)) context.getRuntime().getFalse();
183
194
  return RubyBoolean.newBoolean(context.getRuntime(), !readerNode.hasChildren);
@@ -198,8 +209,6 @@ public class XmlReader extends RubyObject {
198
209
  if (args.length > 1) url = args[1];
199
210
  if (args.length > 2) reader.setInstanceVariable("@encoding", args[2]);
200
211
 
201
- RubyString content = RuntimeHelpers.invoke(context, args[0], "read").convertToString();
202
-
203
212
  Options options;
204
213
  if (args.length > 3) {
205
214
  options = new ParserContext.Options((Long)args[3].toJava(Long.class));
@@ -207,7 +216,9 @@ public class XmlReader extends RubyObject {
207
216
  // use the default options RECOVER | NONET
208
217
  options = new ParserContext.Options(2048 | 1);
209
218
  }
210
- reader.parseRubyString(context, content, url, options);
219
+
220
+ InputStream in = new UncloseableInputStream(new IOInputStream(args[0]));
221
+ reader.setInput(context, in, url, options);
211
222
  return reader;
212
223
  }
213
224
 
@@ -233,109 +244,136 @@ public class XmlReader extends RubyObject {
233
244
  // use the default options RECOVER | NONET
234
245
  options = new ParserContext.Options(2048 | 1);
235
246
  }
236
- reader.parseRubyString(context, args[0].convertToString(), url, options);
247
+ IRubyObject stringIO = NokogiriService.nokogiriClassCache.get("StringIO").newInstance(context, args[0], Block.NULL_BLOCK);
248
+ InputStream in = new UncloseableInputStream(new IOInputStream(stringIO));
249
+ reader.setInput(context, in, url, options);
237
250
  return reader;
238
251
  }
239
252
 
240
253
  @JRubyMethod
241
254
  public IRubyObject node_type(ThreadContext context) {
242
- IRubyObject node_type = nodeQueue.peek().getNodeType();
255
+ IRubyObject node_type = currentNode().getNodeType();
243
256
  return node_type == null ? RubyFixnum.zero(context.getRuntime()) : node_type;
244
257
  }
245
258
 
246
259
  @JRubyMethod
247
260
  public IRubyObject inner_xml(ThreadContext context) {
248
- return stringOrBlank(context.getRuntime(), getInnerXml(nodeQueue, nodeQueue.peek()));
261
+ ensureNodeClosed(context);
262
+ return stringOrBlank(context.getRuntime(), getInnerXml(currentNode()));
249
263
  }
250
-
251
- private String getInnerXml(ArrayDeque<ReaderNode> nodeQueue, ReaderNode current) {
264
+
265
+ private String getInnerXml(ReaderNode current) {
252
266
  if (current.depth < 0) return null;
253
267
  if (!current.hasChildren) return null;
254
268
  StringBuffer sb = new StringBuffer();
255
- int currentDepth = current.depth;
256
- int inner = 0;
257
- for (ReaderNode node : nodeQueue) {
258
- if (node.depth == currentDepth && node.getName().equals(current.getName())) {
259
- inner++;
260
- }
261
- if (node.depth > currentDepth) {
262
- sb.append(node.getString());
263
- }
264
- if (inner == 2) break;
269
+ for (int i = current.startOffset + 1; i <= current.endOffset - 1; i++) {
270
+ sb.append(nodeQueue.get(i).getString());
265
271
  }
266
272
  return new String(sb);
267
273
  }
268
-
274
+
269
275
  @JRubyMethod
270
276
  public IRubyObject outer_xml(ThreadContext context) {
271
- return stringOrBlank(context.getRuntime(), getOuterXml(nodeQueue, nodeQueue.peek()));
277
+ ensureNodeClosed(context);
278
+ return stringOrBlank(context.getRuntime(), getOuterXml());
272
279
  }
273
-
274
- private String getOuterXml(ArrayDeque<ReaderNode> nodeQueue, ReaderNode current) {
280
+
281
+ private String getOuterXml() {
282
+ ReaderNode current = currentNode();
275
283
  if (current.depth < 0) return null;
284
+
285
+ if (current instanceof ClosingNode) {
286
+ return "<" + current.name + "/>";
287
+ }
288
+
276
289
  StringBuffer sb = new StringBuffer();
277
- int initialDepth = current.depth;
278
- int inner = 0;
279
- for (ReaderNode node : nodeQueue) {
280
- if (node.depth >= initialDepth) {
281
- if (node.depth == initialDepth && node.getName().equals(current.getName())) {
282
- inner++;
283
- }
284
-
285
- sb.append(node.getString());
286
- }
287
- if (inner == 2) break;
290
+ for (int i = position; i <= current.endOffset; i++) {
291
+ sb.append(nodeQueue.get(i).getString());
288
292
  }
289
293
  return new String(sb);
290
294
  }
291
295
 
292
296
  @JRubyMethod
293
297
  public IRubyObject lang(ThreadContext context) {
294
- return nodeQueue.peek().getLang();
298
+ return currentNode().getLang();
295
299
  }
296
300
 
297
301
  @JRubyMethod
298
302
  public IRubyObject local_name(ThreadContext context) {
299
- return nodeQueue.peek().getLocalName();
303
+ return currentNode().getLocalName();
300
304
  }
301
305
 
302
306
  @JRubyMethod
303
307
  public IRubyObject name(ThreadContext context) {
304
- return nodeQueue.peek().getName();
308
+ return currentNode().getName();
305
309
  }
306
310
 
307
311
  @JRubyMethod
308
312
  public IRubyObject namespace_uri(ThreadContext context) {
309
- return nodeQueue.peek().getUri();
313
+ return currentNode().getUri();
310
314
  }
311
315
 
312
316
  @JRubyMethod
313
317
  public IRubyObject namespaces(ThreadContext context) {
314
- return nodeQueue.peek().getNamespaces(context);
318
+ return currentNode().getNamespaces(context);
315
319
  }
316
320
 
317
321
  @JRubyMethod
318
322
  public IRubyObject prefix(ThreadContext context) {
319
- return nodeQueue.peek().getPrefix();
323
+ return currentNode().getPrefix();
324
+ }
325
+
326
+ private void readMoreData(ThreadContext context) {
327
+ if (!continueParsing) {
328
+ throw context.runtime.newRuntimeError("Cannot parse more data");
329
+ }
330
+ try {
331
+ continueParsing = config.parse(false);
332
+ } catch (XNIException e) {
333
+ Ruby ruby = context.runtime;
334
+ XmlSyntaxError exception = (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(ruby, getNokogiriClass(ruby, "Nokogiri::XML::SyntaxError"));
335
+ throw new RaiseException(exception);
336
+ } catch (IOException e) {
337
+ throw context.getRuntime().newRuntimeError("Received IOException: " + e.getMessage());
338
+ }
339
+ }
340
+
341
+ private void ensureNodeClosed(ThreadContext context) {
342
+ ReaderNode node = currentNode();
343
+ if (node instanceof TextNode) {
344
+ return;
345
+ }
346
+ while (node.endOffset < 1) {
347
+ readMoreData(context);
348
+ }
320
349
  }
321
350
 
322
351
  @JRubyMethod
323
352
  public IRubyObject read(ThreadContext context) {
324
- this.nodeQueue.poll();
325
- if(nodeQueue.peek() == null) {
326
- return context.getRuntime().getNil();
327
- } else if(nodeQueue.peek().isError()) {
353
+ position++;
354
+ while (nodeQueue.size() <= position && continueParsing) {
355
+ readMoreData(context);
356
+ }
357
+ if(currentNode() == null) {
358
+ return context.nil;
359
+ } else if(currentNode().isError()) {
328
360
  RubyArray errors = (RubyArray) this.getInstanceVariable("@errors");
329
- errors.append(nodeQueue.peek().toSyntaxError());
361
+ errors.append(currentNode().toSyntaxError());
330
362
 
331
363
  this.setInstanceVariable("@errors", errors);
332
364
 
333
- throw new RaiseException((XmlSyntaxError) nodeQueue.peek().toSyntaxError());
365
+ throw new RaiseException((XmlSyntaxError) currentNode().toSyntaxError());
334
366
  } else {
335
367
  return this;
336
368
  }
337
369
  }
338
370
 
371
+ private ReaderNode currentNode() {
372
+ if (position >= nodeQueue.size())
373
+ return null;
374
+ return nodeQueue.get(position);
375
+ }
376
+
339
377
  @JRubyMethod
340
378
  public IRubyObject state(ThreadContext context) {
341
379
  return context.getRuntime().newFixnum(this.state);
@@ -343,114 +381,152 @@ public class XmlReader extends RubyObject {
343
381
 
344
382
  @JRubyMethod
345
383
  public IRubyObject value(ThreadContext context) {
346
- return nodeQueue.peek().getValue();
384
+ return currentNode().getValue();
347
385
  }
348
386
 
349
387
  @JRubyMethod(name = "value?")
350
388
  public IRubyObject value_p(ThreadContext context) {
351
- return nodeQueue.peek().hasValue();
389
+ return currentNode().hasValue();
352
390
  }
353
391
 
354
392
  @JRubyMethod
355
393
  public IRubyObject xml_version(ThreadContext context) {
356
- return nodeQueue.peek().getXmlVersion();
394
+ return currentNode().getXmlVersion();
357
395
  }
358
396
 
359
- protected XMLReader createReader(final Ruby ruby, Options options) {
360
- DefaultHandler2 handler = new DefaultHandler2() {
397
+ protected XMLPullParserConfiguration createReader(Ruby ruby, Options options) {
398
+ StandardParserConfiguration config = new StandardParserConfiguration();
399
+ DocumentHandler handler = new DocumentHandler(ruby);
400
+ // XMLReader reader = XMLReaderFactory.createXMLReader();
401
+ config.setDocumentHandler(handler);
402
+ config.setDTDHandler(handler);
403
+ config.setErrorHandler(handler);
404
+ config.setEntityResolver(new EntityResolver2Wrapper(new NokogiriEntityResolver(ruby, null, options)));
405
+ // config.setFeature("http://xml.org/sax/features/xmlns-uris", true);
406
+ // config.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
407
+ config.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", options.dtdLoad || options.dtdValid);
408
+ return config;
409
+ }
361
410
 
362
- Stack<String> langStack;
363
- int depth;
364
- Stack<String> xmlBaseStack;
365
- Stack<ReaderNode.ElementNode> elementStack;
411
+ private class DocumentHandler extends DefaultXMLDocumentHandler implements XMLErrorHandler {
366
412
 
367
- @Override
368
- public void characters(char[] chars, int start, int length) {
369
- ReaderNode.TextNode node = ReaderNode.createTextNode(ruby, new String(chars, start, length), depth, langStack, xmlBaseStack);
370
- nodeQueue.add(node);
371
- }
372
-
373
- @Override
374
- public void endDocument() throws SAXException {
375
- langStack = null;
376
- xmlBaseStack = null;
377
- elementStack = null;
378
- }
413
+ Stack<String> langStack;
414
+ int depth;
415
+ Stack<String> xmlBaseStack;
416
+ Stack<ReaderNode.ElementNode> elementStack;
417
+ private final Ruby ruby;
379
418
 
380
- @Override
381
- public void endElement(String uri, String localName, String qName) {
382
- depth--;
383
- ReaderNode previous = nodeQueue.getLast();
384
- ElementNode startElementNode = elementStack.pop();
385
- if (previous instanceof ReaderNode.ElementNode && qName.equals(previous.name)) {
386
- previous.hasChildren = false;
387
- } else {
388
- ReaderNode node = ReaderNode.createClosingNode(ruby, uri, localName, qName, depth, langStack, xmlBaseStack);
389
- if (startElementNode != null) {
390
- node.attributeList = startElementNode.attributeList;
391
- node.namespaces = startElementNode.namespaces;
392
- }
393
- nodeQueue.add(node);
394
- }
395
- if (!langStack.isEmpty()) langStack.pop();
396
- if (!xmlBaseStack.isEmpty()) xmlBaseStack.pop();
397
- }
419
+ public DocumentHandler(Ruby ruby) {
420
+ this.ruby = ruby;
421
+ }
398
422
 
399
- @Override
400
- public void error(SAXParseException ex) throws SAXParseException {
401
- nodeQueue.add(new ReaderNode.ExceptionNode(ruby, ex));
402
- throw ex;
403
- }
404
423
 
405
- @Override
406
- public void fatalError(SAXParseException ex) throws SAXParseException {
407
- nodeQueue.add(new ReaderNode.ExceptionNode(ruby, ex));
408
- throw ex;
409
- }
410
424
 
411
- @Override
412
- public void startDocument() {
413
- depth = 0;
414
- langStack = new Stack<String>();
415
- xmlBaseStack = new Stack<String>();
416
- elementStack = new Stack<ReaderNode.ElementNode>();
417
- }
425
+ @Override
426
+ public void startGeneralEntity(String name, XMLResourceIdentifier identifier, String encoding,
427
+ Augmentations augs) throws XNIException {
428
+ Object entitySkipped;
429
+ if (augs != null && (entitySkipped = augs.getItem(Constants.ENTITY_SKIPPED)) != null && ((Boolean) entitySkipped) == true) {
430
+ nodeQueue.add(new ReaderNode.ExceptionNode(ruby, null));
431
+ }
432
+ }
433
+
418
434
 
419
- @Override
420
- public void startElement(String uri, String localName, String qName, Attributes attrs) {
421
- ReaderNode readerNode = ReaderNode.createElementNode(ruby, uri, localName, qName, attrs, depth, langStack, xmlBaseStack);
422
- nodeQueue.add(readerNode);
423
- depth++;
424
- if (readerNode.lang != null) langStack.push(readerNode.lang);
425
- if (readerNode.xmlBase != null) xmlBaseStack.push(readerNode.xmlBase);
426
- elementStack.push((ReaderNode.ElementNode)readerNode);
427
- }
428
435
 
429
- @Override
430
- public void skippedEntity(String name) {
431
- XmlSyntaxError error = XmlSyntaxError.createNokogiriXmlSyntaxError(ruby);
432
- error.setException(new Exception("Unknown entity " + name));
433
- throw new RaiseException(error);
436
+ @Override
437
+ public void startDocument(XMLLocator locator, String encoding, NamespaceContext context, Augmentations augs) {
438
+ depth = 0;
439
+ langStack = new Stack<String>();
440
+ xmlBaseStack = new Stack<String>();
441
+ elementStack = new Stack<ReaderNode.ElementNode>();
442
+ }
443
+
444
+ @Override
445
+ public void endDocument(Augmentations augs) {
446
+ langStack = null;
447
+ xmlBaseStack = null;
448
+ elementStack = null;
449
+ }
450
+
451
+ @Override
452
+ public void startElement(QName element, XMLAttributes attrs, Augmentations augs) {
453
+ commonElement(element, attrs, false);
454
+ }
455
+
456
+ @Override
457
+ public void endElement(QName element, Augmentations augs) {
458
+ String uri = element.uri;
459
+ String localName = element.localpart;
460
+ String qName = element.rawname;
461
+ depth--;
462
+ ElementNode startElementNode = elementStack.pop();
463
+ ReaderNode node = ReaderNode.createClosingNode(ruby, uri, localName, qName, depth, langStack, xmlBaseStack);
464
+
465
+ startElementNode.endOffset = nodeQueue.size() - 1;
466
+
467
+ if (startElementNode.endOffset != startElementNode.startOffset) {
468
+ // this node isn't empty
469
+ node.attributeList = startElementNode.attributeList;
470
+ node.namespaces = startElementNode.namespaces;
471
+ node.startOffset = startElementNode.startOffset;
472
+ node.endOffset = ++startElementNode.endOffset;
473
+ node.hasChildren = startElementNode.hasChildren = true;
474
+ nodeQueue.add(node);
434
475
  }
476
+ if (!langStack.isEmpty()) langStack.pop();
477
+ if (!xmlBaseStack.isEmpty()) xmlBaseStack.pop();
478
+ }
435
479
 
436
- @Override
437
- public void warning(SAXParseException ex) throws SAXParseException {
438
- nodeQueue.add(new ReaderNode.ExceptionNode(ruby, ex));
439
- throw ex;
480
+ @Override
481
+ public void emptyElement(QName element, XMLAttributes attrs, Augmentations augs) {
482
+ commonElement(element, attrs, true);
483
+ }
484
+
485
+ private void commonElement(QName element, XMLAttributes attrs, boolean isEmpty) {
486
+ String qName = element.rawname;
487
+ String uri = element.uri;
488
+ String localName = element.localpart;
489
+ ReaderNode readerNode = ReaderNode.createElementNode(ruby, uri, localName, qName, attrs, depth, langStack, xmlBaseStack);
490
+ if (!elementStack.isEmpty()) {
491
+ ElementNode parent = elementStack.peek();
492
+ parent.hasChildren = true;
493
+ }
494
+ nodeQueue.add(readerNode);
495
+ readerNode.startOffset = nodeQueue.size() - 1;
496
+ if (!isEmpty) {
497
+ depth++;
498
+ if (readerNode.lang != null) langStack.push(readerNode.lang);
499
+ if (readerNode.xmlBase != null) xmlBaseStack.push(readerNode.xmlBase);
500
+ elementStack.push((ReaderNode.ElementNode)readerNode);
501
+ } else {
502
+ readerNode.endOffset = readerNode.startOffset;
503
+ readerNode.hasChildren = false;
440
504
  }
441
- };
442
- try {
443
- XMLReader reader = XMLReaderFactory.createXMLReader();
444
- reader.setContentHandler(handler);
445
- reader.setDTDHandler(handler);
446
- reader.setErrorHandler(handler);
447
- reader.setEntityResolver(new NokogiriEntityResolver(ruby, null, options));
448
- reader.setFeature("http://xml.org/sax/features/xmlns-uris", true);
449
- reader.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
450
- reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", options.dtdLoad || options.dtdValid);
451
- return reader;
452
- } catch (SAXException saxe) {
453
- throw RaiseException.createNativeRaiseException(ruby, saxe);
454
505
  }
455
- }
506
+
507
+ @Override
508
+ public void characters(XMLString string, Augmentations augs) {
509
+ ReaderNode.TextNode node = ReaderNode.createTextNode(ruby, string.toString(), depth, langStack, xmlBaseStack);
510
+ nodeQueue.add(node);
511
+ node.startOffset = node.endOffset = nodeQueue.size() - 1;
512
+ }
513
+
514
+ @Override
515
+ public void error(String domain, String key, XMLParseException ex) {
516
+ nodeQueue.add(new ReaderNode.ExceptionNode(ruby, ex));
517
+ throw ex;
518
+ }
519
+
520
+ @Override
521
+ public void fatalError(String domain, String key, XMLParseException ex) {
522
+ nodeQueue.add(new ReaderNode.ExceptionNode(ruby, ex));
523
+ throw ex;
524
+ }
525
+
526
+ @Override
527
+ public void warning(String domain, String key, XMLParseException ex) {
528
+ nodeQueue.add(new ReaderNode.ExceptionNode(ruby, ex));
529
+ throw ex;
530
+ }
531
+ };
456
532
  }