xmlparser 0.6.81
Sign up to get free protection for your applications and to get access to all the features.
- data/MANIFEST +112 -0
- data/README +697 -0
- data/README.ja +789 -0
- data/Rakefile +34 -0
- data/ext/encoding.h +91 -0
- data/ext/xmlparser/mkrf_conf.rb +28 -0
- data/ext/xmlparser/xmlparser.c +2226 -0
- data/lib/sax.rb +1 -0
- data/lib/saxdriver.rb +1 -0
- data/lib/wget.rb +47 -0
- data/lib/xml/dom/builder-ja.rb +58 -0
- data/lib/xml/dom/builder.rb +310 -0
- data/lib/xml/dom/core.rb +3276 -0
- data/lib/xml/dom/digest.rb +94 -0
- data/lib/xml/dom/visitor.rb +182 -0
- data/lib/xml/dom2/attr.rb +213 -0
- data/lib/xml/dom2/cdatasection.rb +76 -0
- data/lib/xml/dom2/characterdata.rb +177 -0
- data/lib/xml/dom2/comment.rb +81 -0
- data/lib/xml/dom2/core.rb +19 -0
- data/lib/xml/dom2/document.rb +317 -0
- data/lib/xml/dom2/documentfragment.rb +82 -0
- data/lib/xml/dom2/documenttype.rb +102 -0
- data/lib/xml/dom2/dombuilder.rb +277 -0
- data/lib/xml/dom2/dombuilderfilter.rb +12 -0
- data/lib/xml/dom2/domentityresolver.rb +13 -0
- data/lib/xml/dom2/domentityresolverimpl.rb +37 -0
- data/lib/xml/dom2/domexception.rb +95 -0
- data/lib/xml/dom2/domimplementation.rb +61 -0
- data/lib/xml/dom2/dominputsource.rb +29 -0
- data/lib/xml/dom2/element.rb +533 -0
- data/lib/xml/dom2/entity.rb +110 -0
- data/lib/xml/dom2/entityreference.rb +107 -0
- data/lib/xml/dom2/namednodemap.rb +138 -0
- data/lib/xml/dom2/node.rb +587 -0
- data/lib/xml/dom2/nodelist.rb +231 -0
- data/lib/xml/dom2/notation.rb +86 -0
- data/lib/xml/dom2/processinginstruction.rb +155 -0
- data/lib/xml/dom2/text.rb +128 -0
- data/lib/xml/dom2/xpath.rb +398 -0
- data/lib/xml/encoding-ja.rb +42 -0
- data/lib/xml/parser.rb +13 -0
- data/lib/xml/parserns.rb +236 -0
- data/lib/xml/sax.rb +353 -0
- data/lib/xml/saxdriver.rb +370 -0
- data/lib/xml/xpath.rb +3284 -0
- data/lib/xml/xpath.ry +2352 -0
- data/lib/xmldigest.rb +1 -0
- data/lib/xmltree.rb +1 -0
- data/lib/xmltreebuilder.rb +1 -0
- data/lib/xmltreevisitor.rb +1 -0
- metadata +111 -0
@@ -0,0 +1,128 @@
|
|
1
|
+
## -*- Ruby -*-
|
2
|
+
## XML::DOM
|
3
|
+
## 1998-2001 by yoshidam
|
4
|
+
##
|
5
|
+
|
6
|
+
require 'xml/dom2/characterdata'
|
7
|
+
require 'xml/dom2/domexception'
|
8
|
+
|
9
|
+
module XML
|
10
|
+
module DOM
|
11
|
+
|
12
|
+
=begin
|
13
|
+
== Class XML::DOM::Text
|
14
|
+
|
15
|
+
=== superclass
|
16
|
+
Node
|
17
|
+
|
18
|
+
=end
|
19
|
+
class Text<CharacterData
|
20
|
+
|
21
|
+
=begin
|
22
|
+
=== Class Methods
|
23
|
+
|
24
|
+
--- Text.new(text)
|
25
|
+
|
26
|
+
creates a new Text.
|
27
|
+
=end
|
28
|
+
## new(text)
|
29
|
+
## text: String
|
30
|
+
def initialize(text = nil)
|
31
|
+
super(text)
|
32
|
+
end
|
33
|
+
|
34
|
+
=begin
|
35
|
+
=== Methods
|
36
|
+
|
37
|
+
--- Text#nodeType
|
38
|
+
|
39
|
+
[DOM]
|
40
|
+
returns the nodeType.
|
41
|
+
=end
|
42
|
+
## [DOM]
|
43
|
+
def nodeType
|
44
|
+
TEXT_NODE
|
45
|
+
end
|
46
|
+
|
47
|
+
=begin
|
48
|
+
--- Text#nodeName
|
49
|
+
|
50
|
+
[DOM]
|
51
|
+
returns the nodeName.
|
52
|
+
=end
|
53
|
+
## [DOM]
|
54
|
+
def nodeName
|
55
|
+
"#text"
|
56
|
+
end
|
57
|
+
|
58
|
+
=begin
|
59
|
+
--- Text#to_s
|
60
|
+
|
61
|
+
return the string representation of the Text.
|
62
|
+
=end
|
63
|
+
def to_s
|
64
|
+
ret = ""
|
65
|
+
@value.scan(/./um) do |c|
|
66
|
+
code = c.unpack("U")[0]
|
67
|
+
if code == 13
|
68
|
+
ret << sprintf("&#x%X;", code)
|
69
|
+
elsif c == "&"
|
70
|
+
ret << "&"
|
71
|
+
elsif c == "<"
|
72
|
+
ret << "<"
|
73
|
+
elsif c == ">"
|
74
|
+
ret << ">"
|
75
|
+
else
|
76
|
+
ret << c
|
77
|
+
end
|
78
|
+
end
|
79
|
+
ret
|
80
|
+
## XML.charRef(@value)
|
81
|
+
end
|
82
|
+
|
83
|
+
=begin
|
84
|
+
--- Text#dump(depth = 0)
|
85
|
+
|
86
|
+
dumps the Text.
|
87
|
+
=end
|
88
|
+
def dump(depth = 0)
|
89
|
+
print ' ' * depth * 2
|
90
|
+
print "#{@value.inspect}\n"
|
91
|
+
end
|
92
|
+
|
93
|
+
=begin
|
94
|
+
--- Text#splitText(offset)
|
95
|
+
|
96
|
+
[DOM]
|
97
|
+
breaks this Text node into two Text nodes at the specified offset.
|
98
|
+
=end
|
99
|
+
## [DOM]
|
100
|
+
def splitText(offset)
|
101
|
+
if offset > @value.length || offset < 0
|
102
|
+
raise DOMException.new(DOMException::INDEX_SIZE_ERR)
|
103
|
+
end
|
104
|
+
newText = @value[offset, @value.length]
|
105
|
+
newNode = Text.new(newText)
|
106
|
+
if !self.parentNode.nil?
|
107
|
+
self.parentNode.insertAfter(newNode, self)
|
108
|
+
end
|
109
|
+
@value[offset, @value.length] = ""
|
110
|
+
newNode
|
111
|
+
end
|
112
|
+
|
113
|
+
=begin
|
114
|
+
--- Text#trim(preserve = false)
|
115
|
+
|
116
|
+
trim extra whitespaces.
|
117
|
+
=end
|
118
|
+
def trim(preserve = false)
|
119
|
+
if !preserve
|
120
|
+
@value.sub!(/\A\s*([\s\S]*?)\s*\Z/, "\\1")
|
121
|
+
return @value
|
122
|
+
end
|
123
|
+
nil
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
@@ -0,0 +1,398 @@
|
|
1
|
+
#
|
2
|
+
# xpath-dom.rb
|
3
|
+
#
|
4
|
+
# Copyright (C) Ueno Katsuhiro 2000
|
5
|
+
# DOM2 support by yoshidam
|
6
|
+
#
|
7
|
+
# $Id: xpath.rb,v 1.2 2003/03/12 06:38:28 yoshidam Exp $
|
8
|
+
#
|
9
|
+
|
10
|
+
require 'xml/dom2/core'
|
11
|
+
require 'xml/xpath'
|
12
|
+
|
13
|
+
module XMLScan
|
14
|
+
XPath = ::XPath unless
|
15
|
+
defined?(::XMLScan::XPath)
|
16
|
+
|
17
|
+
module XPath
|
18
|
+
|
19
|
+
module DOM
|
20
|
+
|
21
|
+
class AbstractNodeAdapter < NullNodeAdapter
|
22
|
+
|
23
|
+
def wrap(node, visitor)
|
24
|
+
@node = node
|
25
|
+
self
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_reader :node
|
29
|
+
|
30
|
+
def root
|
31
|
+
@node.ownerDocument
|
32
|
+
end
|
33
|
+
|
34
|
+
def parent
|
35
|
+
@node.parentNode
|
36
|
+
end
|
37
|
+
|
38
|
+
def children
|
39
|
+
@node.childNodes.to_a
|
40
|
+
end
|
41
|
+
|
42
|
+
def each_following_siblings
|
43
|
+
node = @node
|
44
|
+
yield node while node = node.nextSibling
|
45
|
+
end
|
46
|
+
|
47
|
+
def each_preceding_siblings
|
48
|
+
node = @node
|
49
|
+
yield node while node = node.previousSibling
|
50
|
+
end
|
51
|
+
|
52
|
+
def index
|
53
|
+
@node.parentNode.childNodes.to_a.index(@node)
|
54
|
+
end
|
55
|
+
|
56
|
+
def lang
|
57
|
+
node = @node
|
58
|
+
lang = nil
|
59
|
+
until a = node.attributes and lang = a.getNamedItem('xml:lang')
|
60
|
+
node = node.parentNode
|
61
|
+
end
|
62
|
+
lang and lang.nodeValue
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
class TextNodeAdapter < AbstractNodeAdapter
|
69
|
+
|
70
|
+
def node_type
|
71
|
+
:text
|
72
|
+
end
|
73
|
+
|
74
|
+
def string_value
|
75
|
+
@node.nodeValue
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
class CommentNodeAdapter < TextNodeAdapter
|
82
|
+
|
83
|
+
def node_type
|
84
|
+
:comment
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
class PINodeAdapter < AbstractNodeAdapter
|
91
|
+
|
92
|
+
def node_type
|
93
|
+
:processing_instruction
|
94
|
+
end
|
95
|
+
|
96
|
+
def name_localpart
|
97
|
+
@node.nodeName
|
98
|
+
end
|
99
|
+
|
100
|
+
def string_value
|
101
|
+
@node.nodeValue
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
class ParentNodeAdapter < AbstractNodeAdapter
|
108
|
+
|
109
|
+
def string_value
|
110
|
+
dst = ''
|
111
|
+
stack = @node.childNodes.to_a.reverse
|
112
|
+
while node = stack.pop
|
113
|
+
s = node.nodeValue
|
114
|
+
dst << s if s
|
115
|
+
stack.concat node.childNodes.to_a.reverse
|
116
|
+
end
|
117
|
+
dst
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
class RootNodeAdapter < ParentNodeAdapter
|
124
|
+
|
125
|
+
def node_type
|
126
|
+
:root
|
127
|
+
end
|
128
|
+
|
129
|
+
alias root node
|
130
|
+
|
131
|
+
def index
|
132
|
+
0
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
class ElementNodeAdapter < ParentNodeAdapter
|
139
|
+
|
140
|
+
def wrap(node, visitor)
|
141
|
+
@node = node
|
142
|
+
@visitor = visitor
|
143
|
+
self
|
144
|
+
end
|
145
|
+
|
146
|
+
def node_type
|
147
|
+
:element
|
148
|
+
end
|
149
|
+
|
150
|
+
def name_localpart
|
151
|
+
@node.nodeName
|
152
|
+
end
|
153
|
+
|
154
|
+
def namespace_uri
|
155
|
+
@node.namespaceURI
|
156
|
+
end
|
157
|
+
|
158
|
+
def qualified_name
|
159
|
+
@node.nodeName
|
160
|
+
end
|
161
|
+
|
162
|
+
def attributes
|
163
|
+
map = @node.attributes
|
164
|
+
attrs = @visitor.get_attributes(@node)
|
165
|
+
unless attrs then
|
166
|
+
attrs = []
|
167
|
+
map.length.times { |i| attrs.push map.item(i) }
|
168
|
+
@visitor.regist_attributes @node, attrs
|
169
|
+
end
|
170
|
+
attrs
|
171
|
+
end
|
172
|
+
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
class AttrNodeAdapter < AbstractNodeAdapter
|
177
|
+
|
178
|
+
def wrap(node, visitor)
|
179
|
+
@node = node
|
180
|
+
@visitor = visitor
|
181
|
+
self
|
182
|
+
end
|
183
|
+
|
184
|
+
def node_type
|
185
|
+
:attribute
|
186
|
+
end
|
187
|
+
|
188
|
+
def name_localpart
|
189
|
+
@node.nodeName
|
190
|
+
end
|
191
|
+
|
192
|
+
def namespace_uri
|
193
|
+
@node.namespaceURI
|
194
|
+
end
|
195
|
+
|
196
|
+
def qualified_name
|
197
|
+
@node.nodeName
|
198
|
+
end
|
199
|
+
|
200
|
+
def parent
|
201
|
+
@visitor.get_attr_parent @node
|
202
|
+
end
|
203
|
+
|
204
|
+
def index
|
205
|
+
-@visitor.get_attributes(parent).index(@node)
|
206
|
+
end
|
207
|
+
|
208
|
+
def string_value
|
209
|
+
@node.nodeValue
|
210
|
+
end
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
|
215
|
+
|
216
|
+
class NodeVisitor
|
217
|
+
|
218
|
+
def initialize
|
219
|
+
@adapters = Array.new(12, NullNodeAdapter.new)
|
220
|
+
@adapters[XML::DOM::Node::ELEMENT_NODE] = ElementNodeAdapter.new
|
221
|
+
@adapters[XML::DOM::Node::ATTRIBUTE_NODE] = AttrNodeAdapter.new
|
222
|
+
@adapters[XML::DOM::Node::TEXT_NODE] =
|
223
|
+
@adapters[XML::DOM::Node::CDATA_SECTION_NODE] = TextNodeAdapter.new
|
224
|
+
@adapters[XML::DOM::Node::PROCESSING_INSTRUCTION_NODE] =
|
225
|
+
PINodeAdapter.new
|
226
|
+
@adapters[XML::DOM::Node::COMMENT_NODE] = CommentNodeAdapter.new
|
227
|
+
@adapters[XML::DOM::Node::DOCUMENT_NODE] = RootNodeAdapter.new
|
228
|
+
@attr = {}
|
229
|
+
end
|
230
|
+
|
231
|
+
def visit(node)
|
232
|
+
@adapters[node.nodeType].wrap(node, self)
|
233
|
+
end
|
234
|
+
|
235
|
+
def regist_attributes(node, attrs)
|
236
|
+
@attr[node] = attrs
|
237
|
+
attrs.each { |i| @attr[i] = node }
|
238
|
+
end
|
239
|
+
|
240
|
+
def get_attributes(node)
|
241
|
+
@attr[node]
|
242
|
+
end
|
243
|
+
|
244
|
+
def get_attr_parent(node)
|
245
|
+
@attr[node]
|
246
|
+
end
|
247
|
+
|
248
|
+
end
|
249
|
+
|
250
|
+
|
251
|
+
|
252
|
+
class Context < XMLScan::XPath::Context
|
253
|
+
|
254
|
+
def initialize(node, namespace = nil, variable = nil)
|
255
|
+
super node, namespace, variable, NodeVisitor.new
|
256
|
+
end
|
257
|
+
|
258
|
+
end
|
259
|
+
|
260
|
+
|
261
|
+
end
|
262
|
+
|
263
|
+
end ## module XPath
|
264
|
+
end ## module XMLScan
|
265
|
+
|
266
|
+
|
267
|
+
|
268
|
+
module XML
|
269
|
+
|
270
|
+
module DOM
|
271
|
+
|
272
|
+
class Node
|
273
|
+
|
274
|
+
def __collectDescendatNS(ns = {})
|
275
|
+
childNodes.each do |node|
|
276
|
+
next if node.nodeType != ELEMENT_NODE
|
277
|
+
prefix = node.prefix
|
278
|
+
uri = node.namespaceURI
|
279
|
+
ns[prefix] = uri unless ns.has_key?(prefix)
|
280
|
+
node.__collectDescendatNS(ns)
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
def __collectAncestorNS(ns = {})
|
285
|
+
node = self
|
286
|
+
while node
|
287
|
+
prefix = node.prefix
|
288
|
+
uri = node.namespaceURI
|
289
|
+
ns[prefix] = uri unless ns.has_key?(prefix)
|
290
|
+
node = node.parentNode
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
def getNodesByXPath(xpath, ns = {})
|
295
|
+
xpath = XMLScan::XPath.compile(xpath) unless xpath.is_a? XMLScan::XPath
|
296
|
+
if ns.length == 0
|
297
|
+
## collect namespaces
|
298
|
+
__collectAncestorNS(ns)
|
299
|
+
__collectDescendatNS(ns)
|
300
|
+
end
|
301
|
+
ret = xpath.call(XPath::DOM::Context.new(self, ns))
|
302
|
+
raise "return value is not NodeSet" unless ret.is_a? Array
|
303
|
+
ret
|
304
|
+
end
|
305
|
+
|
306
|
+
def _getMyLocationInXPath(parent)
|
307
|
+
n = parent.childNodes.index(self)
|
308
|
+
"node()[#{n + 1}]"
|
309
|
+
end
|
310
|
+
|
311
|
+
def makeXPath
|
312
|
+
dst = []
|
313
|
+
node = self
|
314
|
+
while parent = node.parentNode
|
315
|
+
dst.push node._getMyLocationInXPath(parent)
|
316
|
+
node = parent
|
317
|
+
end
|
318
|
+
dst.reverse!
|
319
|
+
'/' + dst.join('/')
|
320
|
+
end
|
321
|
+
|
322
|
+
end
|
323
|
+
|
324
|
+
|
325
|
+
class Element
|
326
|
+
|
327
|
+
def _getMyLocationInXPath(parent)
|
328
|
+
name = nodeName
|
329
|
+
n = parent.childNodes.to_a.select { |i|
|
330
|
+
i.nodeType == ELEMENT_NODE and i.nodeName == name
|
331
|
+
}.index(self)
|
332
|
+
"#{name}[#{n + 1}]"
|
333
|
+
end
|
334
|
+
|
335
|
+
end
|
336
|
+
|
337
|
+
|
338
|
+
class Text
|
339
|
+
|
340
|
+
def _getMyLocationInXPath(parent)
|
341
|
+
n = parent.childNodes.to_a.select { |i|
|
342
|
+
i.nodeType == TEXT_NODE or i.nodeType == CDATA_SECTION_NODE
|
343
|
+
}.index(self)
|
344
|
+
"text()[#{n + 1}]"
|
345
|
+
end
|
346
|
+
|
347
|
+
end
|
348
|
+
|
349
|
+
|
350
|
+
class CDATASection
|
351
|
+
|
352
|
+
def _getMyLocationInXPath(parent)
|
353
|
+
n = parent.childNodes.to_a.select { |i|
|
354
|
+
i.nodeType == TEXT_NODE or i.nodeType == CDATA_SECTION_NODE
|
355
|
+
}.index(self)
|
356
|
+
"text()[#{n + 1}]"
|
357
|
+
end
|
358
|
+
|
359
|
+
end
|
360
|
+
|
361
|
+
|
362
|
+
class Comment
|
363
|
+
|
364
|
+
def _getMyLocationInXPath(parent)
|
365
|
+
n = parent.childNodes.to_a.select { |i|
|
366
|
+
i.nodeType == COMMENT_NODE
|
367
|
+
}.index(self)
|
368
|
+
"comment()[#{n + 1}]"
|
369
|
+
end
|
370
|
+
|
371
|
+
end
|
372
|
+
|
373
|
+
|
374
|
+
class ProcessingInstruction
|
375
|
+
|
376
|
+
def _getMyLocationInXPath(parent)
|
377
|
+
n = parent.childNodes.to_a.select { |i|
|
378
|
+
i.nodeType == PROCESSING_INSTRUCTION_NODE
|
379
|
+
}.index(self)
|
380
|
+
"processing-instruction()[#{n + 1}]"
|
381
|
+
end
|
382
|
+
|
383
|
+
end
|
384
|
+
|
385
|
+
|
386
|
+
class Attr
|
387
|
+
|
388
|
+
def makeXPath
|
389
|
+
'@' + nodeName
|
390
|
+
end
|
391
|
+
|
392
|
+
end
|
393
|
+
|
394
|
+
|
395
|
+
end
|
396
|
+
|
397
|
+
end
|
398
|
+
|