xmlparser 0.6.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. data/MANIFEST +112 -0
  2. data/README +697 -0
  3. data/README.ja +789 -0
  4. data/Rakefile +34 -0
  5. data/ext/encoding.h +91 -0
  6. data/ext/xmlparser/mkrf_conf.rb +28 -0
  7. data/ext/xmlparser/xmlparser.c +2226 -0
  8. data/lib/sax.rb +1 -0
  9. data/lib/saxdriver.rb +1 -0
  10. data/lib/wget.rb +47 -0
  11. data/lib/xml/dom/builder-ja.rb +58 -0
  12. data/lib/xml/dom/builder.rb +310 -0
  13. data/lib/xml/dom/core.rb +3276 -0
  14. data/lib/xml/dom/digest.rb +94 -0
  15. data/lib/xml/dom/visitor.rb +182 -0
  16. data/lib/xml/dom2/attr.rb +213 -0
  17. data/lib/xml/dom2/cdatasection.rb +76 -0
  18. data/lib/xml/dom2/characterdata.rb +177 -0
  19. data/lib/xml/dom2/comment.rb +81 -0
  20. data/lib/xml/dom2/core.rb +19 -0
  21. data/lib/xml/dom2/document.rb +317 -0
  22. data/lib/xml/dom2/documentfragment.rb +82 -0
  23. data/lib/xml/dom2/documenttype.rb +102 -0
  24. data/lib/xml/dom2/dombuilder.rb +277 -0
  25. data/lib/xml/dom2/dombuilderfilter.rb +12 -0
  26. data/lib/xml/dom2/domentityresolver.rb +13 -0
  27. data/lib/xml/dom2/domentityresolverimpl.rb +37 -0
  28. data/lib/xml/dom2/domexception.rb +95 -0
  29. data/lib/xml/dom2/domimplementation.rb +61 -0
  30. data/lib/xml/dom2/dominputsource.rb +29 -0
  31. data/lib/xml/dom2/element.rb +533 -0
  32. data/lib/xml/dom2/entity.rb +110 -0
  33. data/lib/xml/dom2/entityreference.rb +107 -0
  34. data/lib/xml/dom2/namednodemap.rb +138 -0
  35. data/lib/xml/dom2/node.rb +587 -0
  36. data/lib/xml/dom2/nodelist.rb +231 -0
  37. data/lib/xml/dom2/notation.rb +86 -0
  38. data/lib/xml/dom2/processinginstruction.rb +155 -0
  39. data/lib/xml/dom2/text.rb +128 -0
  40. data/lib/xml/dom2/xpath.rb +398 -0
  41. data/lib/xml/encoding-ja.rb +42 -0
  42. data/lib/xml/parser.rb +13 -0
  43. data/lib/xml/parserns.rb +236 -0
  44. data/lib/xml/sax.rb +353 -0
  45. data/lib/xml/saxdriver.rb +370 -0
  46. data/lib/xml/xpath.rb +3284 -0
  47. data/lib/xml/xpath.ry +2352 -0
  48. data/lib/xmldigest.rb +1 -0
  49. data/lib/xmltree.rb +1 -0
  50. data/lib/xmltreebuilder.rb +1 -0
  51. data/lib/xmltreevisitor.rb +1 -0
  52. metadata +111 -0
@@ -0,0 +1,42 @@
1
+ ## -*- Ruby -*-
2
+ ## Sample XMLEncoding class for Japanese (EUC-JP, Shift_JIS)
3
+ ## 1998 by yoshidam
4
+ ##
5
+ ## Usage:
6
+ ## require 'xml/encoding-ja'
7
+ ## include XML::Encoding_ja
8
+
9
+ module XML
10
+ module Encoding_ja
11
+ require 'xml/parser'
12
+ require 'uconv'
13
+
14
+ class EUCHandler<XML::Encoding
15
+ def map(i)
16
+ return i if i < 128
17
+ return -1 if i < 160 or i == 255
18
+ return -2
19
+ end
20
+ def convert(s)
21
+ Uconv.euctou2(s)
22
+ end
23
+ end
24
+
25
+ class SJISHandler<XML::Encoding
26
+ def map(i)
27
+ return i if i < 128
28
+ return -2
29
+ end
30
+ def convert(s)
31
+ Uconv.sjistou2(s)
32
+ end
33
+ end
34
+
35
+ def unknownEncoding(name)
36
+ return EUCHandler.new if name =~ /^euc-jp$/i
37
+ return SJISHandler.new if name =~ /^shift_jis$/i
38
+ nil
39
+ end
40
+
41
+ end
42
+ end
data/lib/xml/parser.rb ADDED
@@ -0,0 +1,13 @@
1
+ ## -*- Ruby -*-
2
+ ## XML::Parser (alias of XMLParser)
3
+ ## 1998 by yoshidam
4
+
5
+ require 'xmlparser.so'
6
+
7
+ #module XML
8
+ # Parser = XMLParser
9
+ # class Parser
10
+ # Error = XMLParserError
11
+ # end
12
+ # Encoding = XMLEncoding
13
+ #end
@@ -0,0 +1,236 @@
1
+ ## -*- Ruby -*-
2
+ ## XML::ParserNS
3
+ ## namespaces-aware version of XML::Parser (experimental)
4
+ ## 2002 by yoshidam
5
+
6
+ require 'xml/parser'
7
+
8
+ module XML
9
+ class InternalParserNS < Parser
10
+ XMLNS = 'http://www.w3.org/XML/1998/namespace'
11
+ attr_reader :ns
12
+
13
+ def self.new(parserNS, *args)
14
+ nssep = nil
15
+ if args.length == 2 && !args[0].is_a?(Parser)
16
+ nssep = args[1]
17
+ args = args.shift
18
+ end
19
+ obj = super(*args)
20
+ obj.__init__(parserNS, nssep)
21
+ obj
22
+ end
23
+
24
+ def __init__(parserNS, nssep)
25
+ @ns = []
26
+ @parserNS = parserNS
27
+ @nssep = nssep
28
+ end
29
+
30
+
31
+ def parse(*args)
32
+ if block_given?
33
+ super do |nodetype, name, args, parser|
34
+ case nodetype
35
+ when START_ELEM
36
+ ns, args = getNSAttrs(args)
37
+ @ns.push(ns)
38
+ if @nssep
39
+ if @parserNS.respond_to?(:startNamespaceDecl)
40
+ ns.each do |prefix, uri|
41
+ yield(START_NAMESPACE_DECL, prefix, uri, parser)
42
+ end
43
+ end
44
+
45
+ prefix, uri, localpart = resolveElementQName(name)
46
+ name = uri + @nssep + name if uri
47
+ attrs = {}
48
+ args.each do |k, v|
49
+ prefix, uri, localpart = resolveAttributeQName(k)
50
+ k = uri + @nssep + k if uri
51
+ attrs[k] = v
52
+ end
53
+ args = attrs
54
+ end
55
+ yield(nodetype, name, args, parser)
56
+ when END_ELEM
57
+ if @nssep
58
+ prefix, uri, localpart = resolveElementQName(name)
59
+ name = uri + @nssep + name if uri
60
+ end
61
+ yield(nodetype, name, args, parser)
62
+ ns = @ns.pop
63
+ if @nssep and @parserNS.respond_to?(:endNamespaceDecl)
64
+ ns.to_a.reverse.each do |prefix, uri|
65
+ yield(END_NAMESPACE_DECL, prefix, nil, parser)
66
+ end
67
+ end
68
+ else
69
+ yield(nodetype, name, args, parser)
70
+ end
71
+ end
72
+ else
73
+ super
74
+ end
75
+ end
76
+
77
+ def getNamespaces
78
+ @ns[-1]
79
+ end
80
+
81
+ def getNSURI(prefix)
82
+ return XMLNS if prefix == 'xml'
83
+ @ns.reverse_each do |n|
84
+ return n[prefix] if n.include?(prefix)
85
+ end
86
+ nil
87
+ end
88
+
89
+ def resolveElementQName(qname)
90
+ qname =~ /^((\S+):)?(\S+)$/u
91
+ prefix, localpart = $2, $3
92
+ uri = getNSURI(prefix)
93
+ [prefix, uri, localpart]
94
+ end
95
+
96
+ def resolveAttributeQName(qname)
97
+ qname =~ /^((\S+):)?(\S+)$/u
98
+ prefix, localpart = $2, $3
99
+ uri = nil
100
+ uri = getNSURI(prefix) if !prefix.nil?
101
+ [prefix, uri, localpart]
102
+ end
103
+
104
+ def getSpecifiedAttributes
105
+ ret = super
106
+ # attrs = {}
107
+ # ret.each do |k, v|
108
+ # next if k =~ /^xmlns/u
109
+ # if @nssep
110
+ # prefix, uri, localpart = resolveAttributeQName(k)
111
+ # k = uri.to_s + @nssep + k
112
+ # end
113
+ # attrs[k] = v
114
+ # end
115
+ attrs = []
116
+ ret.each do |k|
117
+ next if k =~ /^xmlns:|^xmlns$/u
118
+ if @nssep
119
+ prefix, uri, localpart = resolveAttributeQName(k)
120
+ k = uri.to_s + @nssep + k
121
+ end
122
+ attrs.push(k)
123
+ end
124
+ attrs
125
+ end
126
+
127
+
128
+ private
129
+
130
+ def getNSAttrs(args, eliminateNSDecl = false)
131
+ ns = {}
132
+ newargs = {}
133
+ args.each do |n, v|
134
+ prefix, localpart = n.split(':')
135
+ if prefix == 'xmlns'
136
+ ns[localpart] = v
137
+ next if eliminateNSDecl
138
+ end
139
+ newargs[n] = v
140
+ end
141
+ [ns, newargs]
142
+ end
143
+
144
+
145
+ def startElement(name, args)
146
+ ns, args = getNSAttrs(args)
147
+ @ns.push(ns)
148
+ if @nssep and @parserNS.respond_to?(:startNamespaceDecl)
149
+ ns.each do |prefix, uri|
150
+ @parserNS.startNamespaceDecl(prefix, uri)
151
+ end
152
+ end
153
+ if @parserNS.respond_to?(:startElement)
154
+ if @nssep
155
+ prefix, uri, localpart = resolveElementQName(name)
156
+ name = uri + @nssep + name if uri
157
+ attrs = {}
158
+ args.each do |k, v|
159
+ prefix, uri, localpart = resolveAttributeQName(k)
160
+ k = uri + @nssep + k if uri
161
+ attrs[k] = v
162
+ end
163
+ args = attrs
164
+ end
165
+ @parserNS.startElement(name, args)
166
+ end
167
+ end
168
+
169
+ def endElement(name)
170
+ if @parserNS.respond_to?(:endElement)
171
+ if @nssep
172
+ prefix, uri, localpart = resolveElementQName(name)
173
+ name = uri + @nssep + name if uri
174
+ end
175
+ @parserNS.endElement(name)
176
+ end
177
+ ns = @ns.pop
178
+ if @nssep and @parserNS.respond_to?(:endNamespaceDecl)
179
+ ns.to_a.reverse.each do |prefix, uri|
180
+ @parserNS.endNamespaceDecl(prefix)
181
+ end
182
+ end
183
+ end
184
+ end
185
+
186
+
187
+ class ParserNS
188
+ EVENT_HANDLERS = [
189
+ :character,
190
+ :processingInstruction,
191
+ :unparsedEntityDecl,
192
+ :notationDecl,
193
+ :externalEntityRef,
194
+ :comment,
195
+ :startCdata,
196
+ :endCdata,
197
+ :startNamespaceDecl,
198
+ :endNamespaceDecl,
199
+ :startDoctypeDecl,
200
+ :endDoctypeDecl,
201
+ :default,
202
+ :defaultExpand,
203
+ :unknownEncoding,
204
+ :notStandalone,
205
+ :elementDecl,
206
+ :attlistDecl,
207
+ :xmlDecl,
208
+ :entityDecl,
209
+ :externalParsedEntityDecl,
210
+ :internalParsedEntityDecl]
211
+
212
+ def initialize(*args)
213
+ @parser = InternalParserNS.new(self, *args)
214
+ end
215
+
216
+ def parse(*args, &block)
217
+ EVENT_HANDLERS.each do |m|
218
+ if self.respond_to?(m)
219
+ eval "def @parser.#{m}(*args); @parserNS.#{m}(*args); end"
220
+ end
221
+ end
222
+ @parser.parse(*args, &block)
223
+ end
224
+
225
+ def setReturnNSTriplet(do_nst); end
226
+
227
+ def method_missing(name, *args)
228
+ if @parser.respond_to?(name)
229
+ @parser.send(name, *args)
230
+ else
231
+ raise NameError.new("undefined method `#{name.id2name}' " +
232
+ "for #{self.inspect}")
233
+ end
234
+ end
235
+ end
236
+ end
data/lib/xml/sax.rb ADDED
@@ -0,0 +1,353 @@
1
+ ## -*- Ruby -*-
2
+ ## SAX (Simple API for XML) 1.0 for Ruby (experimental)
3
+ ## 1999 by yoshidam
4
+ ##
5
+ ## SAX information: http://www.megginson.com/SAX/
6
+ ##
7
+
8
+ module XML
9
+ module SAX
10
+ module AttributeList
11
+ def getLength
12
+ raise "not implemented"
13
+ end
14
+
15
+ def getName(pos)
16
+ raise "not implemented"
17
+ end
18
+
19
+ def getType(pos_or_name)
20
+ raise "not implemented"
21
+ end
22
+
23
+ def getValue(pos_or_name)
24
+ raise "not implemented"
25
+ end
26
+ end
27
+
28
+ module DTDHandler
29
+ def notationDecl(name, pubid, sysid)
30
+ raise "not implemented"
31
+ end
32
+
33
+ def unparsedEntityDecl(name, pubid, sysid, notation)
34
+ raise "not implemented"
35
+ end
36
+ end
37
+
38
+ module DocumentHandler
39
+ def setDocumentLocator(locator)
40
+ raise "not implemented"
41
+ end
42
+
43
+ def startDocument
44
+ raise "not implemented"
45
+ end
46
+
47
+ def endDocument()
48
+ raise "not implemented"
49
+ end
50
+
51
+ def startElement(name, atts)
52
+ raise "not implemented"
53
+ end
54
+
55
+ def endElement(name)
56
+ raise "not implemented"
57
+ end
58
+
59
+ def characters(ch, start, length)
60
+ raise "not implemented"
61
+ end
62
+
63
+ def ignorableWhitespace(ch, start, length)
64
+ raise "not implemented"
65
+ end
66
+
67
+ def processingInstruction(target, data)
68
+ raise "not implemented"
69
+ end
70
+ end
71
+
72
+ module EntityResolver
73
+ def resolveEntity(pubid, sysid)
74
+ raise "not implemented"
75
+ end
76
+ end
77
+
78
+ module ErrorHandler
79
+ def warning(e)
80
+ raise "not implemented"
81
+ end
82
+
83
+ def error(e)
84
+ raise "not implemented"
85
+ end
86
+
87
+ def fatalError(e)
88
+ raise "not implemented"
89
+ end
90
+ end
91
+
92
+ module Locator
93
+ def getPublicId
94
+ raise "not implemented"
95
+ end
96
+
97
+ def getSystemId
98
+ raise "not implemented"
99
+ end
100
+
101
+ def getLineNumber
102
+ raise "not implemented"
103
+ end
104
+
105
+ def getColumnNumber
106
+ raise "not implemented"
107
+ end
108
+ end
109
+
110
+ module Parser
111
+ def setLocale(locale)
112
+ raise "not implemented"
113
+ end
114
+
115
+ def setEntityResolver(resolver)
116
+ raise "not implemented"
117
+ end
118
+
119
+ def setDTDHandler(handler)
120
+ raise "not implemented"
121
+ end
122
+
123
+ def setDocumentHandler(handler)
124
+ raise "not implemented"
125
+ end
126
+
127
+ def setErrorHandler
128
+ raise "not implemented"
129
+ end
130
+
131
+ def parse(source_or_sysid)
132
+ raise "not implemented"
133
+ end
134
+ end
135
+
136
+ class HandlerBase
137
+ include EntityResolver
138
+ include DTDHandler
139
+ include DocumentHandler
140
+ include ErrorHandler
141
+
142
+ def resolveEntity(pubid, sysid)
143
+ nil
144
+ end
145
+
146
+ def notationDecl(name, pubid, sysid)
147
+ end
148
+
149
+ def unparsedEntityDecl(name, pubid, sysid, natation)
150
+ end
151
+
152
+ def setDocumentLocator(locator)
153
+ end
154
+
155
+ def startDocument
156
+ end
157
+
158
+ def endDocument
159
+ end
160
+
161
+ def startElement(name, atts)
162
+ end
163
+
164
+ def endElement(name)
165
+ end
166
+
167
+ def characters(ch, start, length)
168
+ end
169
+
170
+ def ignorableWhitespace(ch, sart, length)
171
+ end
172
+
173
+ def processingInstruction(target, data)
174
+ end
175
+
176
+ def warning(e)
177
+ end
178
+
179
+ def error(e)
180
+ end
181
+
182
+ def fatalError(e)
183
+ raise e
184
+ end
185
+ end
186
+
187
+ class InputSource
188
+ def initialize(sysid)
189
+ @publicId = nil
190
+ @systemId = nil
191
+ @stream = nil
192
+ @encoding = nil
193
+
194
+ if sysid.kind_of?(String)
195
+ setSystemId(sysid)
196
+ elsif !sysid.nil?
197
+ setByteStream(sysid)
198
+ end
199
+ end
200
+
201
+ def setPublicId(pubid)
202
+ @publicId = pubid
203
+ end
204
+
205
+ def getPublicId
206
+ @publicId
207
+ end
208
+
209
+ def setSystemId(sysid)
210
+ @systemId = sysid
211
+ end
212
+
213
+ def getSystemId
214
+ @systemId
215
+ end
216
+
217
+ def setByteStream(stream)
218
+ @stream = stream
219
+ end
220
+
221
+ def getByteStream
222
+ @stream
223
+ end
224
+
225
+ def setEncoding(encoding)
226
+ @encoding = encoding
227
+ end
228
+
229
+ def getEncoding
230
+ @encoding
231
+ end
232
+
233
+ def setCharacterStream(stream)
234
+ raise "not implemented"
235
+ end
236
+
237
+ def getCharacterStream
238
+ raise "not implemented"
239
+ end
240
+ end
241
+
242
+ class SAXException < Exception
243
+ ## initialize(String)
244
+ ## initialize(Exception)
245
+ ## initialize(String, Exception)
246
+ def initialize(message, e = nil)
247
+ @message = nil
248
+ @exception = nil
249
+ if message.kind_of?(String) && e.nil?
250
+ @message = message
251
+ elsif message.kind_of?(Exception) && e.nil?
252
+ @exception = e
253
+ elsif message.kind_of?(String) && e.kind_of?(Exception)
254
+ @message = message
255
+ @exception = e
256
+ else
257
+ raise TypeError.new("parameter error")
258
+ end
259
+ end
260
+
261
+ def getMessage
262
+ if @message.nil? && !@exception.nil?
263
+ return @exception.to_s
264
+ end
265
+ @message
266
+ end
267
+
268
+ def getException
269
+ @exception
270
+ end
271
+
272
+ def toString
273
+ getMessage
274
+ end
275
+ alias to_s toString
276
+ end
277
+
278
+ class SAXParseException < SAXException
279
+ ## initialize(String, Locator)
280
+ ## initialize(String, Locator, Exception)
281
+ ## initialize(String, String, String, Fixnum, Fixnum)
282
+ ## initialize(String, String, String, Fixnum, Fixnum, Exception)
283
+ def initialize(message, pubid = nil, sysid = nil,
284
+ line = nil, column = nil, e = nil)
285
+ @publicId = nil
286
+ @systemiId = nil
287
+ @lineNumber = nil
288
+ @columnNumber = nil
289
+ if message.kind_of?(String) && pubid.kind_of?(Locator) &&
290
+ sysid.nil? && line.nil? && column.nil? && e.nil?
291
+ super(message)
292
+ @publicId = pubid.getPublicId
293
+ @systemId = pubid.getSystemId
294
+ @lineNumber = pubid.getLineNumber
295
+ @columnNumber = pubid.getColumnNumber
296
+ elsif message.kind_of?(String) && pubid.kind_of?(Locator) &&
297
+ sysid.kind_of?(Exception) && line.nil? && column.nil? && e.nil?
298
+ super(message, sysid)
299
+ @publicId = pubid.getPublicId
300
+ @systemId = pubid.getSystemId
301
+ @lineNumber = pubid.getLineNumber
302
+ @columnNumber = pubid.getColumnNumber
303
+ elsif message.kind_of?(String) && pubid.kind_of?(String) &&
304
+ sysid.kind_of?(String) && line.kind_of?(Fixnum) &&
305
+ column.kind_of?(Fixnum) && e.nil?
306
+ super(message)
307
+ @publicId = pubid
308
+ @systemId = sysid
309
+ @lineNumber = line
310
+ @columnNumber = column
311
+ elsif message.kind_of?(String) && pubid.kind_of?(String) &&
312
+ sysid.kind_of?(String) && line.kind_of?(Fixnum) &&
313
+ column.kind_of?(Fixnum) && e.kind_of?(Exception)
314
+ super(message, e)
315
+ @publicId = pubid
316
+ @systemId = sysid
317
+ @lineNumber = line
318
+ @columnNumber = column
319
+ else
320
+ raise TypeError.new("parameter error")
321
+ end
322
+ end
323
+
324
+ def getPublicId
325
+ @publicId
326
+ end
327
+
328
+ def getSystemId
329
+ @systemId
330
+ end
331
+
332
+ def getLineNumber
333
+ @lineNumber
334
+ end
335
+
336
+ def getColumnNumber
337
+ @columnNumber
338
+ end
339
+ end
340
+
341
+ module Helpers
342
+ module ParserFactory
343
+ def ParserFactory::makeParser(klass)
344
+ if klass.kind_of?(Class)
345
+ klass.new
346
+ elsif klass.kind_of?(String)
347
+ eval(klass).new
348
+ end
349
+ end
350
+ end
351
+ end
352
+ end
353
+ end