xmlparser 0.6.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MANIFEST +112 -0
- data/README +697 -0
- data/README.ja +789 -0
- data/Rakefile +34 -0
- data/ext/encoding.h +91 -0
- data/ext/xmlparser/mkrf_conf.rb +28 -0
- data/ext/xmlparser/xmlparser.c +2226 -0
- data/lib/sax.rb +1 -0
- data/lib/saxdriver.rb +1 -0
- data/lib/wget.rb +47 -0
- data/lib/xml/dom/builder-ja.rb +58 -0
- data/lib/xml/dom/builder.rb +310 -0
- data/lib/xml/dom/core.rb +3276 -0
- data/lib/xml/dom/digest.rb +94 -0
- data/lib/xml/dom/visitor.rb +182 -0
- data/lib/xml/dom2/attr.rb +213 -0
- data/lib/xml/dom2/cdatasection.rb +76 -0
- data/lib/xml/dom2/characterdata.rb +177 -0
- data/lib/xml/dom2/comment.rb +81 -0
- data/lib/xml/dom2/core.rb +19 -0
- data/lib/xml/dom2/document.rb +317 -0
- data/lib/xml/dom2/documentfragment.rb +82 -0
- data/lib/xml/dom2/documenttype.rb +102 -0
- data/lib/xml/dom2/dombuilder.rb +277 -0
- data/lib/xml/dom2/dombuilderfilter.rb +12 -0
- data/lib/xml/dom2/domentityresolver.rb +13 -0
- data/lib/xml/dom2/domentityresolverimpl.rb +37 -0
- data/lib/xml/dom2/domexception.rb +95 -0
- data/lib/xml/dom2/domimplementation.rb +61 -0
- data/lib/xml/dom2/dominputsource.rb +29 -0
- data/lib/xml/dom2/element.rb +533 -0
- data/lib/xml/dom2/entity.rb +110 -0
- data/lib/xml/dom2/entityreference.rb +107 -0
- data/lib/xml/dom2/namednodemap.rb +138 -0
- data/lib/xml/dom2/node.rb +587 -0
- data/lib/xml/dom2/nodelist.rb +231 -0
- data/lib/xml/dom2/notation.rb +86 -0
- data/lib/xml/dom2/processinginstruction.rb +155 -0
- data/lib/xml/dom2/text.rb +128 -0
- data/lib/xml/dom2/xpath.rb +398 -0
- data/lib/xml/encoding-ja.rb +42 -0
- data/lib/xml/parser.rb +13 -0
- data/lib/xml/parserns.rb +236 -0
- data/lib/xml/sax.rb +353 -0
- data/lib/xml/saxdriver.rb +370 -0
- data/lib/xml/xpath.rb +3284 -0
- data/lib/xml/xpath.ry +2352 -0
- data/lib/xmldigest.rb +1 -0
- data/lib/xmltree.rb +1 -0
- data/lib/xmltreebuilder.rb +1 -0
- data/lib/xmltreevisitor.rb +1 -0
- metadata +111 -0
@@ -0,0 +1,128 @@
|
|
1
|
+
## -*- Ruby -*-
|
2
|
+
## XML::DOM
|
3
|
+
## 1998-2001 by yoshidam
|
4
|
+
##
|
5
|
+
|
6
|
+
require 'xml/dom2/characterdata'
|
7
|
+
require 'xml/dom2/domexception'
|
8
|
+
|
9
|
+
module XML
|
10
|
+
module DOM
|
11
|
+
|
12
|
+
=begin
|
13
|
+
== Class XML::DOM::Text
|
14
|
+
|
15
|
+
=== superclass
|
16
|
+
Node
|
17
|
+
|
18
|
+
=end
|
19
|
+
class Text<CharacterData
|
20
|
+
|
21
|
+
=begin
|
22
|
+
=== Class Methods
|
23
|
+
|
24
|
+
--- Text.new(text)
|
25
|
+
|
26
|
+
creates a new Text.
|
27
|
+
=end
|
28
|
+
## new(text)
|
29
|
+
## text: String
|
30
|
+
def initialize(text = nil)
|
31
|
+
super(text)
|
32
|
+
end
|
33
|
+
|
34
|
+
=begin
|
35
|
+
=== Methods
|
36
|
+
|
37
|
+
--- Text#nodeType
|
38
|
+
|
39
|
+
[DOM]
|
40
|
+
returns the nodeType.
|
41
|
+
=end
|
42
|
+
## [DOM]
|
43
|
+
def nodeType
|
44
|
+
TEXT_NODE
|
45
|
+
end
|
46
|
+
|
47
|
+
=begin
|
48
|
+
--- Text#nodeName
|
49
|
+
|
50
|
+
[DOM]
|
51
|
+
returns the nodeName.
|
52
|
+
=end
|
53
|
+
## [DOM]
|
54
|
+
def nodeName
|
55
|
+
"#text"
|
56
|
+
end
|
57
|
+
|
58
|
+
=begin
|
59
|
+
--- Text#to_s
|
60
|
+
|
61
|
+
return the string representation of the Text.
|
62
|
+
=end
|
63
|
+
def to_s
|
64
|
+
ret = ""
|
65
|
+
@value.scan(/./um) do |c|
|
66
|
+
code = c.unpack("U")[0]
|
67
|
+
if code == 13
|
68
|
+
ret << sprintf("&#x%X;", code)
|
69
|
+
elsif c == "&"
|
70
|
+
ret << "&"
|
71
|
+
elsif c == "<"
|
72
|
+
ret << "<"
|
73
|
+
elsif c == ">"
|
74
|
+
ret << ">"
|
75
|
+
else
|
76
|
+
ret << c
|
77
|
+
end
|
78
|
+
end
|
79
|
+
ret
|
80
|
+
## XML.charRef(@value)
|
81
|
+
end
|
82
|
+
|
83
|
+
=begin
|
84
|
+
--- Text#dump(depth = 0)
|
85
|
+
|
86
|
+
dumps the Text.
|
87
|
+
=end
|
88
|
+
def dump(depth = 0)
|
89
|
+
print ' ' * depth * 2
|
90
|
+
print "#{@value.inspect}\n"
|
91
|
+
end
|
92
|
+
|
93
|
+
=begin
|
94
|
+
--- Text#splitText(offset)
|
95
|
+
|
96
|
+
[DOM]
|
97
|
+
breaks this Text node into two Text nodes at the specified offset.
|
98
|
+
=end
|
99
|
+
## [DOM]
|
100
|
+
def splitText(offset)
|
101
|
+
if offset > @value.length || offset < 0
|
102
|
+
raise DOMException.new(DOMException::INDEX_SIZE_ERR)
|
103
|
+
end
|
104
|
+
newText = @value[offset, @value.length]
|
105
|
+
newNode = Text.new(newText)
|
106
|
+
if !self.parentNode.nil?
|
107
|
+
self.parentNode.insertAfter(newNode, self)
|
108
|
+
end
|
109
|
+
@value[offset, @value.length] = ""
|
110
|
+
newNode
|
111
|
+
end
|
112
|
+
|
113
|
+
=begin
|
114
|
+
--- Text#trim(preserve = false)
|
115
|
+
|
116
|
+
trim extra whitespaces.
|
117
|
+
=end
|
118
|
+
def trim(preserve = false)
|
119
|
+
if !preserve
|
120
|
+
@value.sub!(/\A\s*([\s\S]*?)\s*\Z/, "\\1")
|
121
|
+
return @value
|
122
|
+
end
|
123
|
+
nil
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
@@ -0,0 +1,398 @@
|
|
1
|
+
#
|
2
|
+
# xpath-dom.rb
|
3
|
+
#
|
4
|
+
# Copyright (C) Ueno Katsuhiro 2000
|
5
|
+
# DOM2 support by yoshidam
|
6
|
+
#
|
7
|
+
# $Id: xpath.rb,v 1.2 2003/03/12 06:38:28 yoshidam Exp $
|
8
|
+
#
|
9
|
+
|
10
|
+
require 'xml/dom2/core'
|
11
|
+
require 'xml/xpath'
|
12
|
+
|
13
|
+
module XMLScan
|
14
|
+
XPath = ::XPath unless
|
15
|
+
defined?(::XMLScan::XPath)
|
16
|
+
|
17
|
+
module XPath
|
18
|
+
|
19
|
+
module DOM
|
20
|
+
|
21
|
+
class AbstractNodeAdapter < NullNodeAdapter
|
22
|
+
|
23
|
+
def wrap(node, visitor)
|
24
|
+
@node = node
|
25
|
+
self
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_reader :node
|
29
|
+
|
30
|
+
def root
|
31
|
+
@node.ownerDocument
|
32
|
+
end
|
33
|
+
|
34
|
+
def parent
|
35
|
+
@node.parentNode
|
36
|
+
end
|
37
|
+
|
38
|
+
def children
|
39
|
+
@node.childNodes.to_a
|
40
|
+
end
|
41
|
+
|
42
|
+
def each_following_siblings
|
43
|
+
node = @node
|
44
|
+
yield node while node = node.nextSibling
|
45
|
+
end
|
46
|
+
|
47
|
+
def each_preceding_siblings
|
48
|
+
node = @node
|
49
|
+
yield node while node = node.previousSibling
|
50
|
+
end
|
51
|
+
|
52
|
+
def index
|
53
|
+
@node.parentNode.childNodes.to_a.index(@node)
|
54
|
+
end
|
55
|
+
|
56
|
+
def lang
|
57
|
+
node = @node
|
58
|
+
lang = nil
|
59
|
+
until a = node.attributes and lang = a.getNamedItem('xml:lang')
|
60
|
+
node = node.parentNode
|
61
|
+
end
|
62
|
+
lang and lang.nodeValue
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
class TextNodeAdapter < AbstractNodeAdapter
|
69
|
+
|
70
|
+
def node_type
|
71
|
+
:text
|
72
|
+
end
|
73
|
+
|
74
|
+
def string_value
|
75
|
+
@node.nodeValue
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
class CommentNodeAdapter < TextNodeAdapter
|
82
|
+
|
83
|
+
def node_type
|
84
|
+
:comment
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
class PINodeAdapter < AbstractNodeAdapter
|
91
|
+
|
92
|
+
def node_type
|
93
|
+
:processing_instruction
|
94
|
+
end
|
95
|
+
|
96
|
+
def name_localpart
|
97
|
+
@node.nodeName
|
98
|
+
end
|
99
|
+
|
100
|
+
def string_value
|
101
|
+
@node.nodeValue
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
class ParentNodeAdapter < AbstractNodeAdapter
|
108
|
+
|
109
|
+
def string_value
|
110
|
+
dst = ''
|
111
|
+
stack = @node.childNodes.to_a.reverse
|
112
|
+
while node = stack.pop
|
113
|
+
s = node.nodeValue
|
114
|
+
dst << s if s
|
115
|
+
stack.concat node.childNodes.to_a.reverse
|
116
|
+
end
|
117
|
+
dst
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
class RootNodeAdapter < ParentNodeAdapter
|
124
|
+
|
125
|
+
def node_type
|
126
|
+
:root
|
127
|
+
end
|
128
|
+
|
129
|
+
alias root node
|
130
|
+
|
131
|
+
def index
|
132
|
+
0
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
class ElementNodeAdapter < ParentNodeAdapter
|
139
|
+
|
140
|
+
def wrap(node, visitor)
|
141
|
+
@node = node
|
142
|
+
@visitor = visitor
|
143
|
+
self
|
144
|
+
end
|
145
|
+
|
146
|
+
def node_type
|
147
|
+
:element
|
148
|
+
end
|
149
|
+
|
150
|
+
def name_localpart
|
151
|
+
@node.nodeName
|
152
|
+
end
|
153
|
+
|
154
|
+
def namespace_uri
|
155
|
+
@node.namespaceURI
|
156
|
+
end
|
157
|
+
|
158
|
+
def qualified_name
|
159
|
+
@node.nodeName
|
160
|
+
end
|
161
|
+
|
162
|
+
def attributes
|
163
|
+
map = @node.attributes
|
164
|
+
attrs = @visitor.get_attributes(@node)
|
165
|
+
unless attrs then
|
166
|
+
attrs = []
|
167
|
+
map.length.times { |i| attrs.push map.item(i) }
|
168
|
+
@visitor.regist_attributes @node, attrs
|
169
|
+
end
|
170
|
+
attrs
|
171
|
+
end
|
172
|
+
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
class AttrNodeAdapter < AbstractNodeAdapter
|
177
|
+
|
178
|
+
def wrap(node, visitor)
|
179
|
+
@node = node
|
180
|
+
@visitor = visitor
|
181
|
+
self
|
182
|
+
end
|
183
|
+
|
184
|
+
def node_type
|
185
|
+
:attribute
|
186
|
+
end
|
187
|
+
|
188
|
+
def name_localpart
|
189
|
+
@node.nodeName
|
190
|
+
end
|
191
|
+
|
192
|
+
def namespace_uri
|
193
|
+
@node.namespaceURI
|
194
|
+
end
|
195
|
+
|
196
|
+
def qualified_name
|
197
|
+
@node.nodeName
|
198
|
+
end
|
199
|
+
|
200
|
+
def parent
|
201
|
+
@visitor.get_attr_parent @node
|
202
|
+
end
|
203
|
+
|
204
|
+
def index
|
205
|
+
-@visitor.get_attributes(parent).index(@node)
|
206
|
+
end
|
207
|
+
|
208
|
+
def string_value
|
209
|
+
@node.nodeValue
|
210
|
+
end
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
|
215
|
+
|
216
|
+
class NodeVisitor
|
217
|
+
|
218
|
+
def initialize
|
219
|
+
@adapters = Array.new(12, NullNodeAdapter.new)
|
220
|
+
@adapters[XML::DOM::Node::ELEMENT_NODE] = ElementNodeAdapter.new
|
221
|
+
@adapters[XML::DOM::Node::ATTRIBUTE_NODE] = AttrNodeAdapter.new
|
222
|
+
@adapters[XML::DOM::Node::TEXT_NODE] =
|
223
|
+
@adapters[XML::DOM::Node::CDATA_SECTION_NODE] = TextNodeAdapter.new
|
224
|
+
@adapters[XML::DOM::Node::PROCESSING_INSTRUCTION_NODE] =
|
225
|
+
PINodeAdapter.new
|
226
|
+
@adapters[XML::DOM::Node::COMMENT_NODE] = CommentNodeAdapter.new
|
227
|
+
@adapters[XML::DOM::Node::DOCUMENT_NODE] = RootNodeAdapter.new
|
228
|
+
@attr = {}
|
229
|
+
end
|
230
|
+
|
231
|
+
def visit(node)
|
232
|
+
@adapters[node.nodeType].wrap(node, self)
|
233
|
+
end
|
234
|
+
|
235
|
+
def regist_attributes(node, attrs)
|
236
|
+
@attr[node] = attrs
|
237
|
+
attrs.each { |i| @attr[i] = node }
|
238
|
+
end
|
239
|
+
|
240
|
+
def get_attributes(node)
|
241
|
+
@attr[node]
|
242
|
+
end
|
243
|
+
|
244
|
+
def get_attr_parent(node)
|
245
|
+
@attr[node]
|
246
|
+
end
|
247
|
+
|
248
|
+
end
|
249
|
+
|
250
|
+
|
251
|
+
|
252
|
+
class Context < XMLScan::XPath::Context
|
253
|
+
|
254
|
+
def initialize(node, namespace = nil, variable = nil)
|
255
|
+
super node, namespace, variable, NodeVisitor.new
|
256
|
+
end
|
257
|
+
|
258
|
+
end
|
259
|
+
|
260
|
+
|
261
|
+
end
|
262
|
+
|
263
|
+
end ## module XPath
|
264
|
+
end ## module XMLScan
|
265
|
+
|
266
|
+
|
267
|
+
|
268
|
+
module XML
|
269
|
+
|
270
|
+
module DOM
|
271
|
+
|
272
|
+
class Node
|
273
|
+
|
274
|
+
def __collectDescendatNS(ns = {})
|
275
|
+
childNodes.each do |node|
|
276
|
+
next if node.nodeType != ELEMENT_NODE
|
277
|
+
prefix = node.prefix
|
278
|
+
uri = node.namespaceURI
|
279
|
+
ns[prefix] = uri unless ns.has_key?(prefix)
|
280
|
+
node.__collectDescendatNS(ns)
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
def __collectAncestorNS(ns = {})
|
285
|
+
node = self
|
286
|
+
while node
|
287
|
+
prefix = node.prefix
|
288
|
+
uri = node.namespaceURI
|
289
|
+
ns[prefix] = uri unless ns.has_key?(prefix)
|
290
|
+
node = node.parentNode
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
def getNodesByXPath(xpath, ns = {})
|
295
|
+
xpath = XMLScan::XPath.compile(xpath) unless xpath.is_a? XMLScan::XPath
|
296
|
+
if ns.length == 0
|
297
|
+
## collect namespaces
|
298
|
+
__collectAncestorNS(ns)
|
299
|
+
__collectDescendatNS(ns)
|
300
|
+
end
|
301
|
+
ret = xpath.call(XPath::DOM::Context.new(self, ns))
|
302
|
+
raise "return value is not NodeSet" unless ret.is_a? Array
|
303
|
+
ret
|
304
|
+
end
|
305
|
+
|
306
|
+
def _getMyLocationInXPath(parent)
|
307
|
+
n = parent.childNodes.index(self)
|
308
|
+
"node()[#{n + 1}]"
|
309
|
+
end
|
310
|
+
|
311
|
+
def makeXPath
|
312
|
+
dst = []
|
313
|
+
node = self
|
314
|
+
while parent = node.parentNode
|
315
|
+
dst.push node._getMyLocationInXPath(parent)
|
316
|
+
node = parent
|
317
|
+
end
|
318
|
+
dst.reverse!
|
319
|
+
'/' + dst.join('/')
|
320
|
+
end
|
321
|
+
|
322
|
+
end
|
323
|
+
|
324
|
+
|
325
|
+
class Element
|
326
|
+
|
327
|
+
def _getMyLocationInXPath(parent)
|
328
|
+
name = nodeName
|
329
|
+
n = parent.childNodes.to_a.select { |i|
|
330
|
+
i.nodeType == ELEMENT_NODE and i.nodeName == name
|
331
|
+
}.index(self)
|
332
|
+
"#{name}[#{n + 1}]"
|
333
|
+
end
|
334
|
+
|
335
|
+
end
|
336
|
+
|
337
|
+
|
338
|
+
class Text
|
339
|
+
|
340
|
+
def _getMyLocationInXPath(parent)
|
341
|
+
n = parent.childNodes.to_a.select { |i|
|
342
|
+
i.nodeType == TEXT_NODE or i.nodeType == CDATA_SECTION_NODE
|
343
|
+
}.index(self)
|
344
|
+
"text()[#{n + 1}]"
|
345
|
+
end
|
346
|
+
|
347
|
+
end
|
348
|
+
|
349
|
+
|
350
|
+
class CDATASection
|
351
|
+
|
352
|
+
def _getMyLocationInXPath(parent)
|
353
|
+
n = parent.childNodes.to_a.select { |i|
|
354
|
+
i.nodeType == TEXT_NODE or i.nodeType == CDATA_SECTION_NODE
|
355
|
+
}.index(self)
|
356
|
+
"text()[#{n + 1}]"
|
357
|
+
end
|
358
|
+
|
359
|
+
end
|
360
|
+
|
361
|
+
|
362
|
+
class Comment
|
363
|
+
|
364
|
+
def _getMyLocationInXPath(parent)
|
365
|
+
n = parent.childNodes.to_a.select { |i|
|
366
|
+
i.nodeType == COMMENT_NODE
|
367
|
+
}.index(self)
|
368
|
+
"comment()[#{n + 1}]"
|
369
|
+
end
|
370
|
+
|
371
|
+
end
|
372
|
+
|
373
|
+
|
374
|
+
class ProcessingInstruction
|
375
|
+
|
376
|
+
def _getMyLocationInXPath(parent)
|
377
|
+
n = parent.childNodes.to_a.select { |i|
|
378
|
+
i.nodeType == PROCESSING_INSTRUCTION_NODE
|
379
|
+
}.index(self)
|
380
|
+
"processing-instruction()[#{n + 1}]"
|
381
|
+
end
|
382
|
+
|
383
|
+
end
|
384
|
+
|
385
|
+
|
386
|
+
class Attr
|
387
|
+
|
388
|
+
def makeXPath
|
389
|
+
'@' + nodeName
|
390
|
+
end
|
391
|
+
|
392
|
+
end
|
393
|
+
|
394
|
+
|
395
|
+
end
|
396
|
+
|
397
|
+
end
|
398
|
+
|