nokogiri 1.5.0.beta.4 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +34 -0
- data/CHANGELOG.rdoc +40 -1
- data/Manifest.txt +11 -2
- data/README.rdoc +1 -1
- data/Rakefile +100 -104
- data/bin/nokogiri +1 -2
- data/ext/nokogiri/nokogiri.c +24 -1
- data/ext/nokogiri/xml_io.c +32 -7
- data/ext/nokogiri/xml_node.c +14 -13
- data/ext/nokogiri/xml_sax_parser.c +9 -4
- data/ext/nokogiri/xslt_stylesheet.c +7 -1
- data/lib/nokogiri.rb +3 -22
- data/lib/nokogiri/css.rb +4 -0
- data/lib/nokogiri/html/document.rb +10 -14
- data/lib/nokogiri/version.rb +76 -23
- data/lib/nokogiri/xml/builder.rb +7 -0
- data/lib/nokogiri/xml/document.rb +17 -1
- data/lib/nokogiri/xml/document_fragment.rb +14 -0
- data/lib/nokogiri/xml/node.rb +36 -28
- data/lib/nokogiri/xml/node/save_options.rb +17 -1
- data/lib/nokogiri/xml/node_set.rb +7 -0
- data/lib/nokogiri/xml/parse_options.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +6 -6
- data/lib/nokogiri/xml/schema.rb +7 -1
- data/nokogiri_help_responses.md +40 -0
- data/tasks/cross_compile.rb +134 -159
- data/tasks/nokogiri.org.rb +18 -0
- data/tasks/test.rb +1 -1
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/metacharset.html +10 -0
- data/test/files/noencoding.html +47 -0
- data/test/helper.rb +2 -0
- data/test/html/test_document.rb +15 -0
- data/test/html/test_document_encoding.rb +13 -0
- data/test/test_memory_leak.rb +20 -0
- data/test/test_reader.rb +22 -0
- data/test/test_xslt_transforms.rb +6 -2
- data/test/xml/node/test_save_options.rb +10 -2
- data/test/xml/test_builder.rb +17 -0
- data/test/xml/test_document.rb +22 -0
- data/test/xml/test_node.rb +19 -1
- data/test/xml/test_node_reparenting.rb +16 -3
- data/test/xml/test_node_set.rb +34 -0
- data/test/xml/test_schema.rb +5 -0
- data/test/xslt/test_exception_handling.rb +37 -0
- metadata +185 -157
- data/deps.rip +0 -5
- data/ext/java/nokogiri/EncodingHandler.java +0 -124
- data/ext/java/nokogiri/HtmlDocument.java +0 -149
- data/ext/java/nokogiri/HtmlElementDescription.java +0 -145
- data/ext/java/nokogiri/HtmlEntityLookup.java +0 -79
- data/ext/java/nokogiri/HtmlSaxParserContext.java +0 -259
- data/ext/java/nokogiri/NokogiriService.java +0 -535
- data/ext/java/nokogiri/XmlAttr.java +0 -191
- data/ext/java/nokogiri/XmlAttributeDecl.java +0 -130
- data/ext/java/nokogiri/XmlCdata.java +0 -91
- data/ext/java/nokogiri/XmlComment.java +0 -86
- data/ext/java/nokogiri/XmlDocument.java +0 -529
- data/ext/java/nokogiri/XmlDocumentFragment.java +0 -217
- data/ext/java/nokogiri/XmlDtd.java +0 -467
- data/ext/java/nokogiri/XmlElement.java +0 -222
- data/ext/java/nokogiri/XmlElementContent.java +0 -382
- data/ext/java/nokogiri/XmlElementDecl.java +0 -148
- data/ext/java/nokogiri/XmlEntityDecl.java +0 -162
- data/ext/java/nokogiri/XmlEntityReference.java +0 -75
- data/ext/java/nokogiri/XmlNamespace.java +0 -128
- data/ext/java/nokogiri/XmlNode.java +0 -1399
- data/ext/java/nokogiri/XmlNodeSet.java +0 -311
- data/ext/java/nokogiri/XmlProcessingInstruction.java +0 -103
- data/ext/java/nokogiri/XmlReader.java +0 -411
- data/ext/java/nokogiri/XmlRelaxng.java +0 -144
- data/ext/java/nokogiri/XmlSaxParserContext.java +0 -367
- data/ext/java/nokogiri/XmlSaxPushParser.java +0 -184
- data/ext/java/nokogiri/XmlSchema.java +0 -319
- data/ext/java/nokogiri/XmlSyntaxError.java +0 -119
- data/ext/java/nokogiri/XmlText.java +0 -136
- data/ext/java/nokogiri/XmlXpathContext.java +0 -179
- data/ext/java/nokogiri/XsltStylesheet.java +0 -183
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +0 -206
- data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +0 -73
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +0 -86
- data/ext/java/nokogiri/internals/NokogiriHandler.java +0 -327
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +0 -582
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +0 -171
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +0 -118
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +0 -74
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +0 -121
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +0 -79
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +0 -126
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +0 -56
- data/ext/java/nokogiri/internals/ParserContext.java +0 -278
- data/ext/java/nokogiri/internals/PushInputStream.java +0 -411
- data/ext/java/nokogiri/internals/ReaderNode.java +0 -474
- data/ext/java/nokogiri/internals/SaveContext.java +0 -288
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +0 -76
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +0 -42
- data/ext/java/nokogiri/internals/XmlDomParser.java +0 -77
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +0 -238
- data/ext/java/nokogiri/internals/XmlSaxParser.java +0 -65
- data/ext/java/nokogiri/internals/XsltExtensionFunction.java +0 -72
- data/lib/isorelax.jar +0 -0
- data/lib/jing.jar +0 -0
- data/lib/nekodtd.jar +0 -0
- data/lib/nekohtml.jar +0 -0
- data/lib/xercesImpl.jar +0 -0
@@ -1,411 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* (The MIT License)
|
3
|
-
*
|
4
|
-
* Copyright (c) 2008 - 2011:
|
5
|
-
*
|
6
|
-
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
-
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
-
* * {Charles Nutter}[http://blog.headius.com/]
|
9
|
-
* * {Sergio Arbeo}[http://www.serabe.com/]
|
10
|
-
* * {Patrick Mahoney}[http://polycrystal.org/]
|
11
|
-
* * {Yoko Harada}[http://yokolet.blogspot.com/]
|
12
|
-
*
|
13
|
-
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
-
* a copy of this software and associated documentation files (the
|
15
|
-
* 'Software'), to deal in the Software without restriction, including
|
16
|
-
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
-
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
-
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
-
* the following conditions:
|
20
|
-
*
|
21
|
-
* The above copyright notice and this permission notice shall be
|
22
|
-
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
24
|
-
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
-
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
-
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
-
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
-
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
-
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
-
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
-
*/
|
32
|
-
|
33
|
-
package nokogiri.internals;
|
34
|
-
|
35
|
-
import java.io.IOException;
|
36
|
-
import java.io.InputStream;
|
37
|
-
import java.nio.channels.ClosedChannelException;
|
38
|
-
import java.util.ArrayList;
|
39
|
-
|
40
|
-
|
41
|
-
/**
|
42
|
-
* Implements a "push" InputStream. An owner thread create an
|
43
|
-
* InputStream and passes it to a second thread. The owner thread
|
44
|
-
* calls PushInputStream.write() to write data to the stream. The
|
45
|
-
* second thread calls PushInputStream.read() and other InputStream
|
46
|
-
* methods.
|
47
|
-
*
|
48
|
-
* You should ensure that only one thread write to, and only one
|
49
|
-
* thread reads to, this stream, though nothing enforces this
|
50
|
-
* strictly.
|
51
|
-
*/
|
52
|
-
public class PushInputStream extends InputStream {
|
53
|
-
/**
|
54
|
-
* Current position in the stream relative to the start of the
|
55
|
-
* buffer.
|
56
|
-
*/
|
57
|
-
protected int pos;
|
58
|
-
|
59
|
-
/**
|
60
|
-
* Current mark position, or -1 if there is no mark.
|
61
|
-
*/
|
62
|
-
protected int mark;
|
63
|
-
|
64
|
-
protected int readlimit;
|
65
|
-
|
66
|
-
/**
|
67
|
-
* State is open or closed.
|
68
|
-
*/
|
69
|
-
protected boolean isOpen;
|
70
|
-
|
71
|
-
protected Buffer buffer;
|
72
|
-
|
73
|
-
public PushInputStream() {
|
74
|
-
pos = 0;
|
75
|
-
mark = -1;
|
76
|
-
readlimit = -1;
|
77
|
-
isOpen = true;
|
78
|
-
|
79
|
-
buffer = new Buffer(512);
|
80
|
-
}
|
81
|
-
|
82
|
-
protected synchronized void ensureOpen() throws IOException {
|
83
|
-
if (!isOpen) {
|
84
|
-
throw new ClosedChannelException();
|
85
|
-
}
|
86
|
-
}
|
87
|
-
|
88
|
-
/**
|
89
|
-
* Write data that can be read from the stream.
|
90
|
-
*/
|
91
|
-
public synchronized void write(byte[] b) {
|
92
|
-
if (buffer == null) System.out.println("BUFFER IS NULL");
|
93
|
-
if (b == null) System.out.println("BYTE ARRAY IS NILL");
|
94
|
-
buffer.put(b);
|
95
|
-
notifyAll(); // notify readers waiting
|
96
|
-
}
|
97
|
-
|
98
|
-
/**
|
99
|
-
* Write data and then wait until all the data has been read
|
100
|
-
* (waits until the thread reading from this stream is blocked in
|
101
|
-
* a read()).
|
102
|
-
*/
|
103
|
-
public synchronized void writeAndWaitForRead(byte[] b) throws IOException {
|
104
|
-
ensureOpen();
|
105
|
-
write(b);
|
106
|
-
for (;;) {
|
107
|
-
try {
|
108
|
-
wait();
|
109
|
-
break;
|
110
|
-
} catch (InterruptedException e) {
|
111
|
-
// continue waiting
|
112
|
-
}
|
113
|
-
}
|
114
|
-
}
|
115
|
-
|
116
|
-
/*
|
117
|
-
*------------------------------------------------------------
|
118
|
-
* InputStream methods
|
119
|
-
*------------------------------------------------------------
|
120
|
-
*/
|
121
|
-
|
122
|
-
/**
|
123
|
-
* @see InputStream.available()
|
124
|
-
*/
|
125
|
-
@Override
|
126
|
-
public synchronized int available() throws IOException {
|
127
|
-
ensureOpen();
|
128
|
-
return buffer.size() - pos;
|
129
|
-
}
|
130
|
-
|
131
|
-
int nClose = 0;
|
132
|
-
/**
|
133
|
-
* @see InputStream.close()
|
134
|
-
*/
|
135
|
-
@Override
|
136
|
-
public synchronized void close() throws IOException {
|
137
|
-
if (!isOpen) return;
|
138
|
-
isOpen = false;
|
139
|
-
buffer = null;
|
140
|
-
notifyAll();
|
141
|
-
}
|
142
|
-
|
143
|
-
/**
|
144
|
-
* @see InputStream.mark()
|
145
|
-
*/
|
146
|
-
@Override
|
147
|
-
public synchronized void mark(int readlimit) {
|
148
|
-
this.mark = pos;
|
149
|
-
this.readlimit = readlimit;
|
150
|
-
}
|
151
|
-
|
152
|
-
/**
|
153
|
-
* Mark the current position in this stream. Supported by
|
154
|
-
* PushInputStream.
|
155
|
-
*
|
156
|
-
* @see InputStream.markSupported()
|
157
|
-
*/
|
158
|
-
@Override
|
159
|
-
public synchronized boolean markSupported() {
|
160
|
-
return true;
|
161
|
-
}
|
162
|
-
|
163
|
-
/**
|
164
|
-
* @see InputStream.read()
|
165
|
-
*/
|
166
|
-
@Override
|
167
|
-
public synchronized int read() throws IOException {
|
168
|
-
ensureOpen();
|
169
|
-
byte[] b = new byte[1];
|
170
|
-
read(b, 0, 1);
|
171
|
-
return (int) b[0];
|
172
|
-
}
|
173
|
-
|
174
|
-
/**
|
175
|
-
* @see InputStream.read(byte[])
|
176
|
-
*/
|
177
|
-
@Override
|
178
|
-
public synchronized int read(byte[] b) throws IOException {
|
179
|
-
ensureOpen();
|
180
|
-
return read(b, 0, b.length);
|
181
|
-
}
|
182
|
-
|
183
|
-
protected synchronized boolean markIsValid() {
|
184
|
-
return (mark >= 0 && pos < mark+readlimit);
|
185
|
-
}
|
186
|
-
|
187
|
-
/**
|
188
|
-
* @see InputStream.read(byte[], int, int)
|
189
|
-
*/
|
190
|
-
@Override
|
191
|
-
public synchronized int read(byte[] b, int off, int len) throws IOException {
|
192
|
-
while (isOpen && available() == 0) {
|
193
|
-
/* block until data available */
|
194
|
-
try {
|
195
|
-
notifyAll(); // notify writers waiting
|
196
|
-
wait();
|
197
|
-
} catch (InterruptedException e) {
|
198
|
-
// continue waiting
|
199
|
-
}
|
200
|
-
}
|
201
|
-
|
202
|
-
if (!isOpen) {
|
203
|
-
return -1;
|
204
|
-
}
|
205
|
-
|
206
|
-
int readLen = Math.min(available(), len);
|
207
|
-
|
208
|
-
buffer.get(pos, readLen, b, off);
|
209
|
-
pos += readLen;
|
210
|
-
|
211
|
-
int reduce;
|
212
|
-
|
213
|
-
if (markIsValid()) {
|
214
|
-
reduce = mark;
|
215
|
-
} else {
|
216
|
-
reduce = pos;
|
217
|
-
}
|
218
|
-
|
219
|
-
buffer.truncateFromStart(buffer.size - reduce);
|
220
|
-
pos -= reduce;
|
221
|
-
mark -= reduce;
|
222
|
-
if (mark < 0) mark = -1; // don't wrap mark around?
|
223
|
-
|
224
|
-
return readLen;
|
225
|
-
}
|
226
|
-
|
227
|
-
/**
|
228
|
-
* @see InputStream.reset()
|
229
|
-
*/
|
230
|
-
@Override
|
231
|
-
public synchronized void reset() throws IOException {
|
232
|
-
ensureOpen();
|
233
|
-
if (markIsValid())
|
234
|
-
pos = mark;
|
235
|
-
}
|
236
|
-
|
237
|
-
/**
|
238
|
-
* @see InputStream.skip()
|
239
|
-
*/
|
240
|
-
@Override
|
241
|
-
public synchronized long skip(long n) throws IOException {
|
242
|
-
ensureOpen();
|
243
|
-
pos += n;
|
244
|
-
return n;
|
245
|
-
}
|
246
|
-
|
247
|
-
/*
|
248
|
-
*------------------------------------------------------------
|
249
|
-
* Data Buffer
|
250
|
-
*------------------------------------------------------------
|
251
|
-
*/
|
252
|
-
|
253
|
-
public static class Block {
|
254
|
-
protected byte[] data;
|
255
|
-
|
256
|
-
public Block(int size) {
|
257
|
-
data = new byte[size];
|
258
|
-
}
|
259
|
-
|
260
|
-
public void copyIn(byte[] src, int srcPos, int destPos, int length) {
|
261
|
-
System.arraycopy(src, srcPos, data, destPos, length);
|
262
|
-
}
|
263
|
-
|
264
|
-
public void copyOut(int srcPos, byte[] dest, int destPos, int length) {
|
265
|
-
System.arraycopy(data, srcPos, dest, destPos, length);
|
266
|
-
}
|
267
|
-
}
|
268
|
-
|
269
|
-
public static class BlockList extends ArrayList<Block> {
|
270
|
-
public BlockList() {
|
271
|
-
super();
|
272
|
-
}
|
273
|
-
|
274
|
-
@Override
|
275
|
-
public void removeRange(int fromIndex, int toIndex) {
|
276
|
-
super.removeRange(fromIndex, toIndex);
|
277
|
-
}
|
278
|
-
}
|
279
|
-
|
280
|
-
public static class Buffer {
|
281
|
-
protected int blockSize;
|
282
|
-
protected BlockList blocks;
|
283
|
-
|
284
|
-
/**
|
285
|
-
* Offset (position) to the first logical byte in the buffer.
|
286
|
-
*/
|
287
|
-
protected int offset;
|
288
|
-
|
289
|
-
/**
|
290
|
-
* Logical size of the buffer.
|
291
|
-
*/
|
292
|
-
protected int size;
|
293
|
-
|
294
|
-
public Buffer(int blockSize) {
|
295
|
-
this.blockSize = blockSize;
|
296
|
-
this.blocks = new BlockList();
|
297
|
-
this.offset = 0;
|
298
|
-
this.size = 0;
|
299
|
-
}
|
300
|
-
|
301
|
-
public int size() {
|
302
|
-
return size;
|
303
|
-
}
|
304
|
-
|
305
|
-
protected class Segment {
|
306
|
-
/**
|
307
|
-
* Block index.
|
308
|
-
*/
|
309
|
-
protected int block;
|
310
|
-
|
311
|
-
/**
|
312
|
-
* Offset into the block.
|
313
|
-
*/
|
314
|
-
protected int off;
|
315
|
-
|
316
|
-
/**
|
317
|
-
* Length of segment.
|
318
|
-
*/
|
319
|
-
protected int len;
|
320
|
-
|
321
|
-
/**
|
322
|
-
* Calculate the block number and block offset given a position.
|
323
|
-
*/
|
324
|
-
protected Segment(int pos) {
|
325
|
-
int absPos = offset + pos;
|
326
|
-
block = (int) (absPos / blockSize);
|
327
|
-
off = (int) (absPos % blockSize);
|
328
|
-
len = -1;
|
329
|
-
}
|
330
|
-
}
|
331
|
-
|
332
|
-
protected Segment[] accessList(int pos, int size) {
|
333
|
-
Segment start = new Segment(pos);
|
334
|
-
Segment end = new Segment(pos + size);
|
335
|
-
int nBlocks = end.block - start.block + 1;
|
336
|
-
Segment[] segs = new Segment[nBlocks];
|
337
|
-
|
338
|
-
start.len = Math.min(size, blockSize - start.off);
|
339
|
-
segs[0] = start;
|
340
|
-
int currPos = pos + start.len;
|
341
|
-
int currSize = start.len;
|
342
|
-
for (int i = 1; i < nBlocks; i++) {
|
343
|
-
Segment seg = new Segment(currPos);
|
344
|
-
seg.len = Math.min(blockSize, size - currSize);
|
345
|
-
segs[i] = seg;
|
346
|
-
currPos += seg.len;
|
347
|
-
currSize += seg.len;
|
348
|
-
}
|
349
|
-
|
350
|
-
return segs;
|
351
|
-
}
|
352
|
-
|
353
|
-
protected void ensureCapacity(int pos) {
|
354
|
-
Segment seg = new Segment(pos-1);
|
355
|
-
|
356
|
-
while (blocks.size() < (seg.block + 1))
|
357
|
-
blocks.add(new Block(blockSize));
|
358
|
-
}
|
359
|
-
|
360
|
-
public void put(byte b) {
|
361
|
-
byte[] buf = new byte[1];
|
362
|
-
buf[0] = b;
|
363
|
-
put(buf);
|
364
|
-
}
|
365
|
-
|
366
|
-
public void put(byte[] b) {
|
367
|
-
ensureCapacity(size + b.length);
|
368
|
-
Segment[] segs = accessList(size, b.length);
|
369
|
-
|
370
|
-
int off = 0;
|
371
|
-
for (int i = 0; i < segs.length; i++) {
|
372
|
-
Block block = blocks.get(segs[i].block);
|
373
|
-
block.copyIn(b, off, segs[i].off, segs[i].len);
|
374
|
-
}
|
375
|
-
|
376
|
-
size += b.length;
|
377
|
-
}
|
378
|
-
|
379
|
-
public byte[] get(int pos, int len) {
|
380
|
-
byte[] b = new byte[len];
|
381
|
-
get(pos, len, b, 0);
|
382
|
-
return b;
|
383
|
-
}
|
384
|
-
|
385
|
-
/**
|
386
|
-
* Throws IndexOutOfBoundsException.
|
387
|
-
*/
|
388
|
-
public void get(int pos, int len, byte[] b, int off) {
|
389
|
-
Segment[] segs = accessList(pos, len);
|
390
|
-
for (int i = 0; i < segs.length; i++) {
|
391
|
-
Block block = blocks.get(segs[i].block);
|
392
|
-
block.copyOut(segs[i].off, b, off, segs[i].len);
|
393
|
-
}
|
394
|
-
}
|
395
|
-
|
396
|
-
/**
|
397
|
-
* Truncate the buffer to <code>newSize</code> by removing
|
398
|
-
* data from the start of the buffer.
|
399
|
-
*/
|
400
|
-
public void truncateFromStart(int newSize) {
|
401
|
-
if (newSize > size || newSize < 0)
|
402
|
-
throw new RuntimeException("invalid size");
|
403
|
-
|
404
|
-
Segment newStart = new Segment(size - newSize);
|
405
|
-
blocks.removeRange(0, newStart.block);
|
406
|
-
|
407
|
-
size = newSize;
|
408
|
-
offset = newStart.off;
|
409
|
-
}
|
410
|
-
}
|
411
|
-
}
|
@@ -1,474 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* (The MIT License)
|
3
|
-
*
|
4
|
-
* Copyright (c) 2008 - 2011:
|
5
|
-
*
|
6
|
-
* * {Aaron Patterson}[http://tenderlovemaking.com]
|
7
|
-
* * {Mike Dalessio}[http://mike.daless.io]
|
8
|
-
* * {Charles Nutter}[http://blog.headius.com]
|
9
|
-
* * {Sergio Arbeo}[http://www.serabe.com]
|
10
|
-
* * {Patrick Mahoney}[http://polycrystal.org]
|
11
|
-
* * {Yoko Harada}[http://yokolet.blogspot.com]
|
12
|
-
*
|
13
|
-
* Permission is hereby granted, free of charge, to any person obtaining
|
14
|
-
* a copy of this software and associated documentation files (the
|
15
|
-
* 'Software'), to deal in the Software without restriction, including
|
16
|
-
* without limitation the rights to use, copy, modify, merge, publish,
|
17
|
-
* distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
-
* permit persons to whom the Software is furnished to do so, subject to
|
19
|
-
* the following conditions:
|
20
|
-
*
|
21
|
-
* The above copyright notice and this permission notice shall be
|
22
|
-
* included in all copies or substantial portions of the Software.
|
23
|
-
*
|
24
|
-
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
25
|
-
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
-
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
27
|
-
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
28
|
-
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
29
|
-
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
30
|
-
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
-
*/
|
32
|
-
|
33
|
-
package nokogiri.internals;
|
34
|
-
|
35
|
-
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
|
36
|
-
import static nokogiri.internals.NokogiriHelpers.isNamespace;
|
37
|
-
import static nokogiri.internals.NokogiriHelpers.isXmlBase;
|
38
|
-
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
|
39
|
-
import static nokogiri.internals.NokogiriHelpers.stringOrBlank;
|
40
|
-
import static nokogiri.internals.NokogiriHelpers.stringOrNil;
|
41
|
-
|
42
|
-
import java.util.ArrayList;
|
43
|
-
import java.util.HashMap;
|
44
|
-
import java.util.List;
|
45
|
-
import java.util.Map;
|
46
|
-
import java.util.Set;
|
47
|
-
import java.util.Stack;
|
48
|
-
|
49
|
-
import nokogiri.NokogiriService;
|
50
|
-
import nokogiri.XmlAttr;
|
51
|
-
import nokogiri.XmlDocument;
|
52
|
-
import nokogiri.XmlSyntaxError;
|
53
|
-
|
54
|
-
import org.jruby.Ruby;
|
55
|
-
import org.jruby.RubyArray;
|
56
|
-
import org.jruby.RubyBoolean;
|
57
|
-
import org.jruby.RubyHash;
|
58
|
-
import org.jruby.runtime.ThreadContext;
|
59
|
-
import org.jruby.runtime.builtin.IRubyObject;
|
60
|
-
import org.w3c.dom.Attr;
|
61
|
-
import org.w3c.dom.Document;
|
62
|
-
import org.xml.sax.Attributes;
|
63
|
-
import org.xml.sax.SAXParseException;
|
64
|
-
|
65
|
-
/**
|
66
|
-
* Abstract class of Node for XmlReader.
|
67
|
-
*
|
68
|
-
* @author Yoko Harada <yokolet@gmail.com>
|
69
|
-
*
|
70
|
-
*/
|
71
|
-
public abstract class ReaderNode {
|
72
|
-
|
73
|
-
Ruby ruby;
|
74
|
-
public ReaderAttributeList attributeList;
|
75
|
-
public Map<String, String> namespaces;
|
76
|
-
public int depth, nodeType;
|
77
|
-
public String lang, localName, xmlBase, prefix, name, uri, value, xmlVersion = "1.0";
|
78
|
-
public boolean hasChildren = false;
|
79
|
-
public abstract String getString();
|
80
|
-
private Document document = null;
|
81
|
-
|
82
|
-
public IRubyObject getAttributeByIndex(IRubyObject index){
|
83
|
-
if(index.isNil()) return index;
|
84
|
-
|
85
|
-
long i = index.convertToInteger().getLongValue();
|
86
|
-
if(i > Integer.MAX_VALUE) {
|
87
|
-
throw ruby.newArgumentError("value too long to be an array index");
|
88
|
-
}
|
89
|
-
|
90
|
-
if (attributeList == null) return ruby.getNil();
|
91
|
-
if (i<0 || attributeList.length <= i) return ruby.getNil();
|
92
|
-
return stringOrBlank(ruby, attributeList.values.get(((Long)i).intValue()));
|
93
|
-
}
|
94
|
-
|
95
|
-
public IRubyObject getAttributeByName(IRubyObject name){
|
96
|
-
if(attributeList == null) return ruby.getNil();
|
97
|
-
String value = attributeList.getByName(rubyStringToString(name));
|
98
|
-
return stringOrNil(ruby, value);
|
99
|
-
}
|
100
|
-
|
101
|
-
public IRubyObject getAttributeByName(String name){
|
102
|
-
if(attributeList == null) return ruby.getNil();
|
103
|
-
String value = attributeList.getByName(name);
|
104
|
-
return stringOrNil(ruby, value);
|
105
|
-
}
|
106
|
-
|
107
|
-
public IRubyObject getAttributeCount(){
|
108
|
-
if(attributeList == null) return ruby.newFixnum(0);
|
109
|
-
return ruby.newFixnum(attributeList.length);
|
110
|
-
}
|
111
|
-
|
112
|
-
public IRubyObject getAttributesNodes() {
|
113
|
-
RubyArray array = RubyArray.newArray(ruby);
|
114
|
-
if (attributeList != null && attributeList.length > 0) {
|
115
|
-
if (document == null) {
|
116
|
-
document = ((XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(ruby, getNokogiriClass(ruby, "Nokogiri::XML::Document"))).getDocument();
|
117
|
-
}
|
118
|
-
for (int i=0; i<attributeList.length; i++) {
|
119
|
-
if (!isNamespace(attributeList.names.get(i))) {
|
120
|
-
Attr attr = document.createAttributeNS(attributeList.namespaces.get(i), attributeList.names.get(i));
|
121
|
-
attr.setValue(attributeList.values.get(i));
|
122
|
-
XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(ruby, getNokogiriClass(ruby, "Nokogiri::XML::Attr"));
|
123
|
-
xmlAttr.setNode(ruby.getCurrentContext(), attr);
|
124
|
-
array.append(xmlAttr);
|
125
|
-
}
|
126
|
-
}
|
127
|
-
}
|
128
|
-
return array;
|
129
|
-
}
|
130
|
-
|
131
|
-
public IRubyObject getAttributes(ThreadContext context) {
|
132
|
-
if(attributeList == null) return context.getRuntime().getNil();
|
133
|
-
RubyHash hash = RubyHash.newHash(context.getRuntime());
|
134
|
-
for (int i=0; i<attributeList.length; i++) {
|
135
|
-
IRubyObject k = stringOrBlank(context.getRuntime(), attributeList.names.get(i));
|
136
|
-
IRubyObject v = stringOrBlank(context.getRuntime(), attributeList.values.get(i));
|
137
|
-
if (context.getRuntime().is1_9()) hash.op_aset19(context, k, v);
|
138
|
-
else hash.op_aset(context, k, v);
|
139
|
-
}
|
140
|
-
return hash;
|
141
|
-
}
|
142
|
-
|
143
|
-
public IRubyObject getDepth() {
|
144
|
-
return ruby.newFixnum(depth);
|
145
|
-
}
|
146
|
-
|
147
|
-
public IRubyObject getLang() {
|
148
|
-
return stringOrNil(ruby, lang);
|
149
|
-
}
|
150
|
-
|
151
|
-
public IRubyObject getLocalName() {
|
152
|
-
return stringOrNil(ruby, localName);
|
153
|
-
}
|
154
|
-
|
155
|
-
public IRubyObject getName() {
|
156
|
-
return stringOrNil(ruby, name);
|
157
|
-
}
|
158
|
-
|
159
|
-
public IRubyObject getNamespaces(ThreadContext context) {
|
160
|
-
if(namespaces == null) return ruby.getNil();
|
161
|
-
RubyHash hash = RubyHash.newHash(ruby);
|
162
|
-
Set<String> keys = namespaces.keySet();
|
163
|
-
for (String key : keys) {
|
164
|
-
String stringValue = namespaces.get(key);
|
165
|
-
IRubyObject k = stringOrBlank(context.getRuntime(), key);
|
166
|
-
IRubyObject v = stringOrBlank(context.getRuntime(), stringValue);
|
167
|
-
if (context.getRuntime().is1_9()) hash.op_aset19(context, k, v);
|
168
|
-
else hash.op_aset(context, k, v);
|
169
|
-
}
|
170
|
-
return hash;
|
171
|
-
}
|
172
|
-
|
173
|
-
public IRubyObject getXmlBase() {
|
174
|
-
return stringOrNil(ruby, xmlBase);
|
175
|
-
}
|
176
|
-
|
177
|
-
public IRubyObject getPrefix() {
|
178
|
-
return stringOrNil(ruby, prefix);
|
179
|
-
}
|
180
|
-
|
181
|
-
public IRubyObject getUri() {
|
182
|
-
return stringOrNil(ruby, uri);
|
183
|
-
}
|
184
|
-
|
185
|
-
public IRubyObject getValue() {
|
186
|
-
return stringOrNil(ruby, value);
|
187
|
-
}
|
188
|
-
|
189
|
-
public IRubyObject getXmlVersion() {
|
190
|
-
return ruby.newString(xmlVersion);
|
191
|
-
}
|
192
|
-
|
193
|
-
public RubyBoolean hasAttributes() {
|
194
|
-
if (attributeList == null || attributeList.length == 0) return ruby.getFalse();
|
195
|
-
return ruby.getTrue();
|
196
|
-
}
|
197
|
-
|
198
|
-
public abstract RubyBoolean hasValue();
|
199
|
-
|
200
|
-
public RubyBoolean isDefault(){
|
201
|
-
// TODO Implement.
|
202
|
-
return ruby.getFalse();
|
203
|
-
}
|
204
|
-
|
205
|
-
public boolean isError() { return false; }
|
206
|
-
|
207
|
-
protected void parsePrefix(String qName) {
|
208
|
-
int index = qName.indexOf(':');
|
209
|
-
if(index != -1) prefix = qName.substring(0, index);
|
210
|
-
}
|
211
|
-
|
212
|
-
public void setLang(String lang) {
|
213
|
-
lang = (lang != null) ? lang : null;
|
214
|
-
}
|
215
|
-
|
216
|
-
public IRubyObject toSyntaxError() { return ruby.getNil(); }
|
217
|
-
|
218
|
-
public IRubyObject getNodeType() { return ruby.newFixnum(nodeType); }
|
219
|
-
|
220
|
-
public static enum ReaderNodeType {
|
221
|
-
NODE(0),
|
222
|
-
ELEMENT(1),
|
223
|
-
ATTRIBUTE(2),
|
224
|
-
TEXT(3),
|
225
|
-
CDATA(4),
|
226
|
-
ENTITY_REFERENCE(5),
|
227
|
-
ENTITY(6),
|
228
|
-
PROCESSING_INSTRUCTION(7),
|
229
|
-
COMMENT(8),
|
230
|
-
DOCUMENT(9),
|
231
|
-
DOCUMENT_TYPE(10),
|
232
|
-
DOCUMENTFRAGMENT(11),
|
233
|
-
NOTATION(12),
|
234
|
-
WHITESPACE(13),
|
235
|
-
SIGNIFICANT_WHITESPACE(14),
|
236
|
-
END_ELEMENT(15),
|
237
|
-
END_ENTITY(16),
|
238
|
-
XML_DECLARATION(17);
|
239
|
-
|
240
|
-
private final int value;
|
241
|
-
ReaderNodeType(int value) {
|
242
|
-
this.value = value;
|
243
|
-
}
|
244
|
-
|
245
|
-
public int getValue() {
|
246
|
-
return value;
|
247
|
-
}
|
248
|
-
}
|
249
|
-
|
250
|
-
public static class ClosingNode extends ReaderNode {
|
251
|
-
|
252
|
-
public ClosingNode(Ruby ruby, String uri, String localName, String qName, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
253
|
-
this.ruby = ruby;
|
254
|
-
nodeType = ReaderNodeType.END_ELEMENT.getValue();
|
255
|
-
this.uri = "".equals(uri) ? null : uri;
|
256
|
-
this.localName = localName.trim().length() > 0 ? localName : qName;
|
257
|
-
this.name = qName;
|
258
|
-
parsePrefix(qName);
|
259
|
-
this.depth = depth;
|
260
|
-
if (!langStack.isEmpty()) this.lang = langStack.peek();
|
261
|
-
if (!xmlBaseStack.isEmpty()) this.xmlBase = xmlBaseStack.peek();
|
262
|
-
}
|
263
|
-
|
264
|
-
@Override
|
265
|
-
public IRubyObject getAttributeCount() {
|
266
|
-
return ruby.newFixnum(0);
|
267
|
-
}
|
268
|
-
|
269
|
-
@Override
|
270
|
-
public RubyBoolean hasValue() {
|
271
|
-
return ruby.getFalse();
|
272
|
-
}
|
273
|
-
|
274
|
-
@Override
|
275
|
-
public String getString() {
|
276
|
-
StringBuffer sb = new StringBuffer();
|
277
|
-
sb.append("</").append(name).append(">");
|
278
|
-
return new String(sb);
|
279
|
-
}
|
280
|
-
}
|
281
|
-
|
282
|
-
public static class ElementNode extends ReaderNode {
|
283
|
-
private List<String> attributeStrings = new ArrayList<String>();
|
284
|
-
|
285
|
-
public ElementNode(Ruby ruby, String uri, String localName, String qName, Attributes attrs, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
286
|
-
this.ruby = ruby;
|
287
|
-
this.nodeType = ReaderNodeType.ELEMENT.getValue();
|
288
|
-
this.uri = "".equals(uri) ? null : uri;
|
289
|
-
this.localName = localName.trim().length() > 0 ? localName : qName;
|
290
|
-
this.name = qName;
|
291
|
-
parsePrefix(qName);
|
292
|
-
this.depth = depth;
|
293
|
-
hasChildren = true;
|
294
|
-
parseAttributes(attrs, langStack, xmlBaseStack);
|
295
|
-
}
|
296
|
-
|
297
|
-
@Override
|
298
|
-
public RubyBoolean hasValue() {
|
299
|
-
return ruby.getFalse();
|
300
|
-
}
|
301
|
-
|
302
|
-
private void parseAttributes(Attributes attrs, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
303
|
-
if (attrs.getLength() > 0) attributeList = new ReaderAttributeList();
|
304
|
-
String u, n, v;
|
305
|
-
for (int i = 0; i < attrs.getLength(); i++) {
|
306
|
-
u = attrs.getURI(i);
|
307
|
-
n = attrs.getQName(i);
|
308
|
-
v = attrs.getValue(i);
|
309
|
-
if (isNamespace(n)) {
|
310
|
-
if (namespaces == null) namespaces = new HashMap<String, String>();
|
311
|
-
namespaces.put(n, v);
|
312
|
-
} else {
|
313
|
-
if (lang == null) lang = resolveLang(n, v, langStack);
|
314
|
-
if (xmlBase == null) xmlBase = resolveXmlBase(n, v, xmlBaseStack);
|
315
|
-
}
|
316
|
-
attributeList.add(u, n, v);
|
317
|
-
attributeStrings.add(n + "=\"" + v + "\"");
|
318
|
-
}
|
319
|
-
}
|
320
|
-
|
321
|
-
private String resolveLang(String n, String v, Stack<String> langStack) {
|
322
|
-
if ("xml:lang".equals(n)) {
|
323
|
-
return v;
|
324
|
-
} else if (!langStack.isEmpty()) {
|
325
|
-
return langStack.peek();
|
326
|
-
} else {
|
327
|
-
return null;
|
328
|
-
}
|
329
|
-
}
|
330
|
-
|
331
|
-
private String resolveXmlBase(String n, String v, Stack<String> xmlBaseStack) {
|
332
|
-
if (isXmlBase(n)) {
|
333
|
-
return getXmlBaseUri(n, v, xmlBaseStack);
|
334
|
-
} else if (!xmlBaseStack.isEmpty()) {
|
335
|
-
return xmlBaseStack.peek();
|
336
|
-
} else {
|
337
|
-
return null;
|
338
|
-
}
|
339
|
-
}
|
340
|
-
|
341
|
-
private String getXmlBaseUri(String n, String v, Stack<String> xmlBaseStack) {
|
342
|
-
if ("xml:base".equals(n)) {
|
343
|
-
if (v.startsWith("http://")) {
|
344
|
-
return v;
|
345
|
-
} else if (v.startsWith("/") && v.endsWith("/")) {
|
346
|
-
String sub = v.substring(1, v.length() - 2);
|
347
|
-
String base = xmlBaseStack.peek();
|
348
|
-
if (base.endsWith("/")) {
|
349
|
-
base = base.substring(0, base.length() - 1);
|
350
|
-
}
|
351
|
-
int pos = base.lastIndexOf("/");
|
352
|
-
return base.substring(0, pos).concat(sub);
|
353
|
-
} else {
|
354
|
-
String base = xmlBaseStack.peek();
|
355
|
-
if (base.endsWith("/")) return base.concat(v);
|
356
|
-
else return base.concat("/").concat(v);
|
357
|
-
}
|
358
|
-
} else if ("xlink:href".equals(n)) {
|
359
|
-
String base = xmlBaseStack.peek();
|
360
|
-
if (base.endsWith("/")) return base.concat(v);
|
361
|
-
else return base.concat("/").concat(v);
|
362
|
-
}
|
363
|
-
return null;
|
364
|
-
}
|
365
|
-
|
366
|
-
@Override
|
367
|
-
public String getString() {
|
368
|
-
StringBuffer sb = new StringBuffer();
|
369
|
-
sb.append("<").append(name);
|
370
|
-
if (attributeList != null) {
|
371
|
-
for (int i=0; i<attributeList.length; i++) {
|
372
|
-
sb.append(" ").append(attributeStrings.get(i));
|
373
|
-
}
|
374
|
-
}
|
375
|
-
if (hasChildren) sb.append(">");
|
376
|
-
else sb.append("/>");
|
377
|
-
return new String(sb);
|
378
|
-
}
|
379
|
-
}
|
380
|
-
|
381
|
-
public static class ReaderAttributeList {
|
382
|
-
List<String> namespaces = new ArrayList<String>();
|
383
|
-
List<String> names = new ArrayList<String>();
|
384
|
-
List<String> values = new ArrayList<String>();
|
385
|
-
int length = 0;
|
386
|
-
|
387
|
-
void add(String namespace, String name, String value) {
|
388
|
-
namespace = namespace != null ? namespace : "";
|
389
|
-
namespaces.add(namespace);
|
390
|
-
name = name != null ? name : "";
|
391
|
-
names.add(name);
|
392
|
-
value = value != null ? value : "";
|
393
|
-
values.add(value);
|
394
|
-
length++;
|
395
|
-
}
|
396
|
-
|
397
|
-
String getByName(String name) {
|
398
|
-
for (int i=0; i<names.size(); i++) {
|
399
|
-
if (name.equals(names.get(i))) {
|
400
|
-
return values.get(i);
|
401
|
-
}
|
402
|
-
}
|
403
|
-
return null;
|
404
|
-
}
|
405
|
-
}
|
406
|
-
|
407
|
-
public static class EmptyNode extends ReaderNode {
|
408
|
-
|
409
|
-
public EmptyNode(Ruby ruby) {
|
410
|
-
this.ruby = ruby;
|
411
|
-
this.nodeType = ReaderNodeType.NODE.getValue();
|
412
|
-
}
|
413
|
-
|
414
|
-
@Override
|
415
|
-
public IRubyObject getXmlVersion() {
|
416
|
-
return this.ruby.getNil();
|
417
|
-
}
|
418
|
-
|
419
|
-
@Override
|
420
|
-
public RubyBoolean hasValue() {
|
421
|
-
return ruby.getFalse();
|
422
|
-
}
|
423
|
-
|
424
|
-
@Override
|
425
|
-
public String getString() {
|
426
|
-
return null;
|
427
|
-
}
|
428
|
-
}
|
429
|
-
|
430
|
-
public static class ExceptionNode extends EmptyNode {
|
431
|
-
private final XmlSyntaxError exception;
|
432
|
-
|
433
|
-
// Still don't know what to do with ex.
|
434
|
-
public ExceptionNode(Ruby runtime, SAXParseException ex) {
|
435
|
-
super(runtime);
|
436
|
-
exception = (XmlSyntaxError) NokogiriService.XML_SYNTAXERROR_ALLOCATOR.allocate(runtime, getNokogiriClass(ruby, "Nokogiri::XML::SyntaxError"));
|
437
|
-
}
|
438
|
-
|
439
|
-
@Override
|
440
|
-
public boolean isError() {
|
441
|
-
return true;
|
442
|
-
}
|
443
|
-
|
444
|
-
@Override
|
445
|
-
public IRubyObject toSyntaxError() {
|
446
|
-
return this.exception;
|
447
|
-
}
|
448
|
-
}
|
449
|
-
|
450
|
-
public static class TextNode extends ReaderNode {
|
451
|
-
|
452
|
-
public TextNode(Ruby ruby, String content, int depth, Stack<String> langStack, Stack<String> xmlBaseStack) {
|
453
|
-
this.ruby = ruby;
|
454
|
-
this.value = content;
|
455
|
-
this.localName = "#text";
|
456
|
-
this.name = "#text";
|
457
|
-
this.depth = depth;
|
458
|
-
if (content.trim().length() > 0) nodeType = ReaderNodeType.TEXT.getValue();
|
459
|
-
else nodeType = ReaderNodeType.SIGNIFICANT_WHITESPACE.getValue();
|
460
|
-
if (!langStack.isEmpty()) this.lang = langStack.peek();
|
461
|
-
if (!xmlBaseStack.isEmpty()) this.xmlBase = xmlBaseStack.peek();
|
462
|
-
}
|
463
|
-
|
464
|
-
@Override
|
465
|
-
public RubyBoolean hasValue() {
|
466
|
-
return ruby.getTrue();
|
467
|
-
}
|
468
|
-
|
469
|
-
@Override
|
470
|
-
public String getString() {
|
471
|
-
return value;
|
472
|
-
}
|
473
|
-
}
|
474
|
-
}
|