xmlparser 0.6.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. data/MANIFEST +112 -0
  2. data/README +697 -0
  3. data/README.ja +789 -0
  4. data/Rakefile +34 -0
  5. data/ext/encoding.h +91 -0
  6. data/ext/xmlparser/mkrf_conf.rb +28 -0
  7. data/ext/xmlparser/xmlparser.c +2226 -0
  8. data/lib/sax.rb +1 -0
  9. data/lib/saxdriver.rb +1 -0
  10. data/lib/wget.rb +47 -0
  11. data/lib/xml/dom/builder-ja.rb +58 -0
  12. data/lib/xml/dom/builder.rb +310 -0
  13. data/lib/xml/dom/core.rb +3276 -0
  14. data/lib/xml/dom/digest.rb +94 -0
  15. data/lib/xml/dom/visitor.rb +182 -0
  16. data/lib/xml/dom2/attr.rb +213 -0
  17. data/lib/xml/dom2/cdatasection.rb +76 -0
  18. data/lib/xml/dom2/characterdata.rb +177 -0
  19. data/lib/xml/dom2/comment.rb +81 -0
  20. data/lib/xml/dom2/core.rb +19 -0
  21. data/lib/xml/dom2/document.rb +317 -0
  22. data/lib/xml/dom2/documentfragment.rb +82 -0
  23. data/lib/xml/dom2/documenttype.rb +102 -0
  24. data/lib/xml/dom2/dombuilder.rb +277 -0
  25. data/lib/xml/dom2/dombuilderfilter.rb +12 -0
  26. data/lib/xml/dom2/domentityresolver.rb +13 -0
  27. data/lib/xml/dom2/domentityresolverimpl.rb +37 -0
  28. data/lib/xml/dom2/domexception.rb +95 -0
  29. data/lib/xml/dom2/domimplementation.rb +61 -0
  30. data/lib/xml/dom2/dominputsource.rb +29 -0
  31. data/lib/xml/dom2/element.rb +533 -0
  32. data/lib/xml/dom2/entity.rb +110 -0
  33. data/lib/xml/dom2/entityreference.rb +107 -0
  34. data/lib/xml/dom2/namednodemap.rb +138 -0
  35. data/lib/xml/dom2/node.rb +587 -0
  36. data/lib/xml/dom2/nodelist.rb +231 -0
  37. data/lib/xml/dom2/notation.rb +86 -0
  38. data/lib/xml/dom2/processinginstruction.rb +155 -0
  39. data/lib/xml/dom2/text.rb +128 -0
  40. data/lib/xml/dom2/xpath.rb +398 -0
  41. data/lib/xml/encoding-ja.rb +42 -0
  42. data/lib/xml/parser.rb +13 -0
  43. data/lib/xml/parserns.rb +236 -0
  44. data/lib/xml/sax.rb +353 -0
  45. data/lib/xml/saxdriver.rb +370 -0
  46. data/lib/xml/xpath.rb +3284 -0
  47. data/lib/xml/xpath.ry +2352 -0
  48. data/lib/xmldigest.rb +1 -0
  49. data/lib/xmltree.rb +1 -0
  50. data/lib/xmltreebuilder.rb +1 -0
  51. data/lib/xmltreevisitor.rb +1 -0
  52. metadata +111 -0
@@ -0,0 +1,128 @@
1
+ ## -*- Ruby -*-
2
+ ## XML::DOM
3
+ ## 1998-2001 by yoshidam
4
+ ##
5
+
6
+ require 'xml/dom2/characterdata'
7
+ require 'xml/dom2/domexception'
8
+
9
+ module XML
10
+ module DOM
11
+
12
+ =begin
13
+ == Class XML::DOM::Text
14
+
15
+ === superclass
16
+ Node
17
+
18
+ =end
19
+ class Text<CharacterData
20
+
21
+ =begin
22
+ === Class Methods
23
+
24
+ --- Text.new(text)
25
+
26
+ creates a new Text.
27
+ =end
28
+ ## new(text)
29
+ ## text: String
30
+ def initialize(text = nil)
31
+ super(text)
32
+ end
33
+
34
+ =begin
35
+ === Methods
36
+
37
+ --- Text#nodeType
38
+
39
+ [DOM]
40
+ returns the nodeType.
41
+ =end
42
+ ## [DOM]
43
+ def nodeType
44
+ TEXT_NODE
45
+ end
46
+
47
+ =begin
48
+ --- Text#nodeName
49
+
50
+ [DOM]
51
+ returns the nodeName.
52
+ =end
53
+ ## [DOM]
54
+ def nodeName
55
+ "#text"
56
+ end
57
+
58
+ =begin
59
+ --- Text#to_s
60
+
61
+ return the string representation of the Text.
62
+ =end
63
+ def to_s
64
+ ret = ""
65
+ @value.scan(/./um) do |c|
66
+ code = c.unpack("U")[0]
67
+ if code == 13
68
+ ret << sprintf("&#x%X;", code)
69
+ elsif c == "&"
70
+ ret << "&amp;"
71
+ elsif c == "<"
72
+ ret << "&lt;"
73
+ elsif c == ">"
74
+ ret << "&gt;"
75
+ else
76
+ ret << c
77
+ end
78
+ end
79
+ ret
80
+ ## XML.charRef(@value)
81
+ end
82
+
83
+ =begin
84
+ --- Text#dump(depth = 0)
85
+
86
+ dumps the Text.
87
+ =end
88
+ def dump(depth = 0)
89
+ print ' ' * depth * 2
90
+ print "#{@value.inspect}\n"
91
+ end
92
+
93
+ =begin
94
+ --- Text#splitText(offset)
95
+
96
+ [DOM]
97
+ breaks this Text node into two Text nodes at the specified offset.
98
+ =end
99
+ ## [DOM]
100
+ def splitText(offset)
101
+ if offset > @value.length || offset < 0
102
+ raise DOMException.new(DOMException::INDEX_SIZE_ERR)
103
+ end
104
+ newText = @value[offset, @value.length]
105
+ newNode = Text.new(newText)
106
+ if !self.parentNode.nil?
107
+ self.parentNode.insertAfter(newNode, self)
108
+ end
109
+ @value[offset, @value.length] = ""
110
+ newNode
111
+ end
112
+
113
+ =begin
114
+ --- Text#trim(preserve = false)
115
+
116
+ trim extra whitespaces.
117
+ =end
118
+ def trim(preserve = false)
119
+ if !preserve
120
+ @value.sub!(/\A\s*([\s\S]*?)\s*\Z/, "\\1")
121
+ return @value
122
+ end
123
+ nil
124
+ end
125
+
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,398 @@
1
+ #
2
+ # xpath-dom.rb
3
+ #
4
+ # Copyright (C) Ueno Katsuhiro 2000
5
+ # DOM2 support by yoshidam
6
+ #
7
+ # $Id: xpath.rb,v 1.2 2003/03/12 06:38:28 yoshidam Exp $
8
+ #
9
+
10
+ require 'xml/dom2/core'
11
+ require 'xml/xpath'
12
+
13
+ module XMLScan
14
+ XPath = ::XPath unless
15
+ defined?(::XMLScan::XPath)
16
+
17
+ module XPath
18
+
19
+ module DOM
20
+
21
+ class AbstractNodeAdapter < NullNodeAdapter
22
+
23
+ def wrap(node, visitor)
24
+ @node = node
25
+ self
26
+ end
27
+
28
+ attr_reader :node
29
+
30
+ def root
31
+ @node.ownerDocument
32
+ end
33
+
34
+ def parent
35
+ @node.parentNode
36
+ end
37
+
38
+ def children
39
+ @node.childNodes.to_a
40
+ end
41
+
42
+ def each_following_siblings
43
+ node = @node
44
+ yield node while node = node.nextSibling
45
+ end
46
+
47
+ def each_preceding_siblings
48
+ node = @node
49
+ yield node while node = node.previousSibling
50
+ end
51
+
52
+ def index
53
+ @node.parentNode.childNodes.to_a.index(@node)
54
+ end
55
+
56
+ def lang
57
+ node = @node
58
+ lang = nil
59
+ until a = node.attributes and lang = a.getNamedItem('xml:lang')
60
+ node = node.parentNode
61
+ end
62
+ lang and lang.nodeValue
63
+ end
64
+
65
+ end
66
+
67
+
68
+ class TextNodeAdapter < AbstractNodeAdapter
69
+
70
+ def node_type
71
+ :text
72
+ end
73
+
74
+ def string_value
75
+ @node.nodeValue
76
+ end
77
+
78
+ end
79
+
80
+
81
+ class CommentNodeAdapter < TextNodeAdapter
82
+
83
+ def node_type
84
+ :comment
85
+ end
86
+
87
+ end
88
+
89
+
90
+ class PINodeAdapter < AbstractNodeAdapter
91
+
92
+ def node_type
93
+ :processing_instruction
94
+ end
95
+
96
+ def name_localpart
97
+ @node.nodeName
98
+ end
99
+
100
+ def string_value
101
+ @node.nodeValue
102
+ end
103
+
104
+ end
105
+
106
+
107
+ class ParentNodeAdapter < AbstractNodeAdapter
108
+
109
+ def string_value
110
+ dst = ''
111
+ stack = @node.childNodes.to_a.reverse
112
+ while node = stack.pop
113
+ s = node.nodeValue
114
+ dst << s if s
115
+ stack.concat node.childNodes.to_a.reverse
116
+ end
117
+ dst
118
+ end
119
+
120
+ end
121
+
122
+
123
+ class RootNodeAdapter < ParentNodeAdapter
124
+
125
+ def node_type
126
+ :root
127
+ end
128
+
129
+ alias root node
130
+
131
+ def index
132
+ 0
133
+ end
134
+
135
+ end
136
+
137
+
138
+ class ElementNodeAdapter < ParentNodeAdapter
139
+
140
+ def wrap(node, visitor)
141
+ @node = node
142
+ @visitor = visitor
143
+ self
144
+ end
145
+
146
+ def node_type
147
+ :element
148
+ end
149
+
150
+ def name_localpart
151
+ @node.nodeName
152
+ end
153
+
154
+ def namespace_uri
155
+ @node.namespaceURI
156
+ end
157
+
158
+ def qualified_name
159
+ @node.nodeName
160
+ end
161
+
162
+ def attributes
163
+ map = @node.attributes
164
+ attrs = @visitor.get_attributes(@node)
165
+ unless attrs then
166
+ attrs = []
167
+ map.length.times { |i| attrs.push map.item(i) }
168
+ @visitor.regist_attributes @node, attrs
169
+ end
170
+ attrs
171
+ end
172
+
173
+ end
174
+
175
+
176
+ class AttrNodeAdapter < AbstractNodeAdapter
177
+
178
+ def wrap(node, visitor)
179
+ @node = node
180
+ @visitor = visitor
181
+ self
182
+ end
183
+
184
+ def node_type
185
+ :attribute
186
+ end
187
+
188
+ def name_localpart
189
+ @node.nodeName
190
+ end
191
+
192
+ def namespace_uri
193
+ @node.namespaceURI
194
+ end
195
+
196
+ def qualified_name
197
+ @node.nodeName
198
+ end
199
+
200
+ def parent
201
+ @visitor.get_attr_parent @node
202
+ end
203
+
204
+ def index
205
+ -@visitor.get_attributes(parent).index(@node)
206
+ end
207
+
208
+ def string_value
209
+ @node.nodeValue
210
+ end
211
+
212
+ end
213
+
214
+
215
+
216
+ class NodeVisitor
217
+
218
+ def initialize
219
+ @adapters = Array.new(12, NullNodeAdapter.new)
220
+ @adapters[XML::DOM::Node::ELEMENT_NODE] = ElementNodeAdapter.new
221
+ @adapters[XML::DOM::Node::ATTRIBUTE_NODE] = AttrNodeAdapter.new
222
+ @adapters[XML::DOM::Node::TEXT_NODE] =
223
+ @adapters[XML::DOM::Node::CDATA_SECTION_NODE] = TextNodeAdapter.new
224
+ @adapters[XML::DOM::Node::PROCESSING_INSTRUCTION_NODE] =
225
+ PINodeAdapter.new
226
+ @adapters[XML::DOM::Node::COMMENT_NODE] = CommentNodeAdapter.new
227
+ @adapters[XML::DOM::Node::DOCUMENT_NODE] = RootNodeAdapter.new
228
+ @attr = {}
229
+ end
230
+
231
+ def visit(node)
232
+ @adapters[node.nodeType].wrap(node, self)
233
+ end
234
+
235
+ def regist_attributes(node, attrs)
236
+ @attr[node] = attrs
237
+ attrs.each { |i| @attr[i] = node }
238
+ end
239
+
240
+ def get_attributes(node)
241
+ @attr[node]
242
+ end
243
+
244
+ def get_attr_parent(node)
245
+ @attr[node]
246
+ end
247
+
248
+ end
249
+
250
+
251
+
252
+ class Context < XMLScan::XPath::Context
253
+
254
+ def initialize(node, namespace = nil, variable = nil)
255
+ super node, namespace, variable, NodeVisitor.new
256
+ end
257
+
258
+ end
259
+
260
+
261
+ end
262
+
263
+ end ## module XPath
264
+ end ## module XMLScan
265
+
266
+
267
+
268
+ module XML
269
+
270
+ module DOM
271
+
272
+ class Node
273
+
274
+ def __collectDescendatNS(ns = {})
275
+ childNodes.each do |node|
276
+ next if node.nodeType != ELEMENT_NODE
277
+ prefix = node.prefix
278
+ uri = node.namespaceURI
279
+ ns[prefix] = uri unless ns.has_key?(prefix)
280
+ node.__collectDescendatNS(ns)
281
+ end
282
+ end
283
+
284
+ def __collectAncestorNS(ns = {})
285
+ node = self
286
+ while node
287
+ prefix = node.prefix
288
+ uri = node.namespaceURI
289
+ ns[prefix] = uri unless ns.has_key?(prefix)
290
+ node = node.parentNode
291
+ end
292
+ end
293
+
294
+ def getNodesByXPath(xpath, ns = {})
295
+ xpath = XMLScan::XPath.compile(xpath) unless xpath.is_a? XMLScan::XPath
296
+ if ns.length == 0
297
+ ## collect namespaces
298
+ __collectAncestorNS(ns)
299
+ __collectDescendatNS(ns)
300
+ end
301
+ ret = xpath.call(XPath::DOM::Context.new(self, ns))
302
+ raise "return value is not NodeSet" unless ret.is_a? Array
303
+ ret
304
+ end
305
+
306
+ def _getMyLocationInXPath(parent)
307
+ n = parent.childNodes.index(self)
308
+ "node()[#{n + 1}]"
309
+ end
310
+
311
+ def makeXPath
312
+ dst = []
313
+ node = self
314
+ while parent = node.parentNode
315
+ dst.push node._getMyLocationInXPath(parent)
316
+ node = parent
317
+ end
318
+ dst.reverse!
319
+ '/' + dst.join('/')
320
+ end
321
+
322
+ end
323
+
324
+
325
+ class Element
326
+
327
+ def _getMyLocationInXPath(parent)
328
+ name = nodeName
329
+ n = parent.childNodes.to_a.select { |i|
330
+ i.nodeType == ELEMENT_NODE and i.nodeName == name
331
+ }.index(self)
332
+ "#{name}[#{n + 1}]"
333
+ end
334
+
335
+ end
336
+
337
+
338
+ class Text
339
+
340
+ def _getMyLocationInXPath(parent)
341
+ n = parent.childNodes.to_a.select { |i|
342
+ i.nodeType == TEXT_NODE or i.nodeType == CDATA_SECTION_NODE
343
+ }.index(self)
344
+ "text()[#{n + 1}]"
345
+ end
346
+
347
+ end
348
+
349
+
350
+ class CDATASection
351
+
352
+ def _getMyLocationInXPath(parent)
353
+ n = parent.childNodes.to_a.select { |i|
354
+ i.nodeType == TEXT_NODE or i.nodeType == CDATA_SECTION_NODE
355
+ }.index(self)
356
+ "text()[#{n + 1}]"
357
+ end
358
+
359
+ end
360
+
361
+
362
+ class Comment
363
+
364
+ def _getMyLocationInXPath(parent)
365
+ n = parent.childNodes.to_a.select { |i|
366
+ i.nodeType == COMMENT_NODE
367
+ }.index(self)
368
+ "comment()[#{n + 1}]"
369
+ end
370
+
371
+ end
372
+
373
+
374
+ class ProcessingInstruction
375
+
376
+ def _getMyLocationInXPath(parent)
377
+ n = parent.childNodes.to_a.select { |i|
378
+ i.nodeType == PROCESSING_INSTRUCTION_NODE
379
+ }.index(self)
380
+ "processing-instruction()[#{n + 1}]"
381
+ end
382
+
383
+ end
384
+
385
+
386
+ class Attr
387
+
388
+ def makeXPath
389
+ '@' + nodeName
390
+ end
391
+
392
+ end
393
+
394
+
395
+ end
396
+
397
+ end
398
+