hutch-xamplr-pp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +504 -0
- data/README.rdoc +94 -0
- data/Rakefile +56 -0
- data/VERSION.yml +4 -0
- data/lib/xampl-pp-dtd.rb +126 -0
- data/lib/xampl-pp-wf.rb +1037 -0
- data/lib/xamplr-pp/ANNOUNCE.TXT +47 -0
- data/lib/xamplr-pp/LICENSE +504 -0
- data/lib/xamplr-pp/Makefile +122 -0
- data/lib/xamplr-pp/examples/parse-wf.rb +55 -0
- data/lib/xamplr-pp/examples/parse.rb +59 -0
- data/lib/xamplr-pp/license.inc +17 -0
- data/lib/xamplr-pp/saxdemo.rb +214 -0
- data/lib/xamplr-pp/saxish.rb +298 -0
- data/lib/xamplr-pp/saxishHandler.rb +58 -0
- data/lib/xamplr-pp/toys/chew.rb +62 -0
- data/lib/xamplr-pp/toys/chewMultibyte.rb +44 -0
- data/lib/xamplr-pp/toys/dump.rb +58 -0
- data/lib/xamplr-pp/xmlName.defn +67 -0
- data/lib/xamplr-pp/xpp.rb +908 -0
- data/lib/xamplr-pp/xppDeluxe.rb +49 -0
- data/lib/xamplr-pp/xppIter.rb +845 -0
- data/lib/xamplr-pp.rb +991 -0
- data/test/test_helper.rb +10 -0
- data/test/xamplr_pp_gem_test.rb +7 -0
- metadata +79 -0
@@ -0,0 +1,908 @@
|
|
1
|
+
# xampl-pp : XML pull parser
|
2
|
+
# Copyright (C) 2002-2009 Bob Hutchison
|
3
|
+
#
|
4
|
+
# This library is free software; you can redistribute it and/or
|
5
|
+
# modify it under the terms of the GNU Lesser General Public
|
6
|
+
# License as published by the Free Software Foundation; either
|
7
|
+
# version 2.1 of the License, or (at your option) any later version.
|
8
|
+
#
|
9
|
+
# This library is distributed in the hope that it will be useful,
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
12
|
+
# #Lesser General Public License for more details.
|
13
|
+
#
|
14
|
+
# You should have received a copy of the GNU Lesser General Public
|
15
|
+
# License along with this library; if not, write to the Free Software
|
16
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
17
|
+
#
|
18
|
+
|
19
|
+
|
20
|
+
class Xpp
|
21
|
+
# XML 'event' types
|
22
|
+
START_DOCUMENT = 'START_DOCUMENT'
|
23
|
+
END_DOCUMENT = 'END_DOCUMENT'
|
24
|
+
START_ELEMENT = 'START_ELEMENT'
|
25
|
+
END_ELEMENT = 'END_ELEMENT'
|
26
|
+
TEXT = 'TEXT'
|
27
|
+
CDATA_SECTION = 'CDATA_SECTION'
|
28
|
+
ENTITY_REF = 'ENTITY_REF'
|
29
|
+
IGNORABLE_WHITESPACE = 'IGNORABLE_WHITESPACE'
|
30
|
+
PROCESSING_INSTRUCTION = 'PROCESSING_INSTRUCTION'
|
31
|
+
COMMENT = 'COMMENT'
|
32
|
+
DOCTYPE = 'DOCTYPE'
|
33
|
+
UNDECIDED_TYPE = 'UNDECIDED_TYPE'
|
34
|
+
|
35
|
+
# 'Features', acutally just processing options
|
36
|
+
attr :processNamespace, true
|
37
|
+
attr :reportNamespaceAttributes, true
|
38
|
+
attr :checkWellFormed, true
|
39
|
+
attr :utf8encode, true
|
40
|
+
|
41
|
+
# the entities that we will recognise
|
42
|
+
attr :entityMap, true
|
43
|
+
attr :unresolvedEntity
|
44
|
+
attr :resolver, true
|
45
|
+
|
46
|
+
# some information about where we are
|
47
|
+
attr :line
|
48
|
+
attr :column
|
49
|
+
|
50
|
+
# element information
|
51
|
+
attr :type
|
52
|
+
attr :emptyElement
|
53
|
+
attr :name
|
54
|
+
attr :qname
|
55
|
+
attr :namespace
|
56
|
+
attr :prefix
|
57
|
+
attr :attributeName
|
58
|
+
attr :attributeQName
|
59
|
+
attr :attributeNamespace
|
60
|
+
attr :attributePrefix
|
61
|
+
attr :attributeValue
|
62
|
+
|
63
|
+
attr :text
|
64
|
+
|
65
|
+
# These are not intended for general use (they are not part of the api)
|
66
|
+
|
67
|
+
# open element information
|
68
|
+
attr :elementName
|
69
|
+
attr :elementQName
|
70
|
+
attr :elementNamespace
|
71
|
+
attr :elementPrefix
|
72
|
+
|
73
|
+
# some pre-compiled patterns
|
74
|
+
attr :namePattern, true
|
75
|
+
attr :skipWhitespacePattern, true
|
76
|
+
|
77
|
+
attr :elementNamespacePrefixStack
|
78
|
+
attr :elementNamespaceValueStack
|
79
|
+
attr :elementNamespaceDefaultStack
|
80
|
+
|
81
|
+
public
|
82
|
+
def startDocument?
|
83
|
+
@type.equal? START_DOCUMENT
|
84
|
+
end
|
85
|
+
|
86
|
+
def endDocument?
|
87
|
+
@type.equal? END_DOCUMENT
|
88
|
+
end
|
89
|
+
|
90
|
+
def startElement?
|
91
|
+
@type.equal? START_ELEMENT
|
92
|
+
end
|
93
|
+
|
94
|
+
def endElement?
|
95
|
+
@type.equal? END_ELEMENT
|
96
|
+
end
|
97
|
+
|
98
|
+
def text?
|
99
|
+
@type.equal? TEXT
|
100
|
+
end
|
101
|
+
|
102
|
+
def cdata?
|
103
|
+
@type.equal? CDATA_SECTION
|
104
|
+
end
|
105
|
+
|
106
|
+
def entityRef?
|
107
|
+
@type.equal? ENTITY_REF
|
108
|
+
end
|
109
|
+
|
110
|
+
def ignorableWhitespace?
|
111
|
+
@type.equal? IGNORABLE_WHITESPACE
|
112
|
+
end
|
113
|
+
|
114
|
+
def whitespace?
|
115
|
+
nil == @text.index(@skipWhitespacePattern)
|
116
|
+
end
|
117
|
+
|
118
|
+
def processingInstruction?
|
119
|
+
@type.equal? PROCESSING_INSTRUCTION
|
120
|
+
end
|
121
|
+
|
122
|
+
def comment?
|
123
|
+
@type.equal? COMMENT
|
124
|
+
end
|
125
|
+
|
126
|
+
def doctype?
|
127
|
+
@type.equal? DOCTYPE
|
128
|
+
end
|
129
|
+
|
130
|
+
def input=(v)
|
131
|
+
if nil == v then
|
132
|
+
@input = nil
|
133
|
+
@inputBuffer = nil
|
134
|
+
@inputBufferLength = 0
|
135
|
+
@column = 0
|
136
|
+
@line = 0
|
137
|
+
elsif v.kind_of? String then
|
138
|
+
@input = nil
|
139
|
+
@inputBuffer = v
|
140
|
+
@inputBufferLength = v.length
|
141
|
+
@line = 1
|
142
|
+
@column = 0
|
143
|
+
elsif v.kind_of? IO then
|
144
|
+
@input = v
|
145
|
+
@inputBuffer = nil
|
146
|
+
@inputBufferLength = 0
|
147
|
+
@column = 0
|
148
|
+
@line = 0
|
149
|
+
else
|
150
|
+
raise "illegalInput"
|
151
|
+
end
|
152
|
+
@nextInputBuffer = nil
|
153
|
+
@textBuffer = ''
|
154
|
+
|
155
|
+
@elementNamespacePrefixStack = []
|
156
|
+
@elementNamespaceValueStack = []
|
157
|
+
@elementNamespaceDefaultStack = []
|
158
|
+
|
159
|
+
@elementName = []
|
160
|
+
@elementQName = []
|
161
|
+
@elementNamespace = []
|
162
|
+
@elementPrefix = []
|
163
|
+
|
164
|
+
@type = START_DOCUMENT
|
165
|
+
@unresolvedEntity = false
|
166
|
+
|
167
|
+
@name = nil
|
168
|
+
@namespace = nil
|
169
|
+
|
170
|
+
@attributeName = []
|
171
|
+
@attributeQName = []
|
172
|
+
@attributeNamespace = []
|
173
|
+
@attributePrefix = []
|
174
|
+
@attributeValue = []
|
175
|
+
|
176
|
+
@emptyElement = false
|
177
|
+
|
178
|
+
@haveRoot = false
|
179
|
+
|
180
|
+
initInput
|
181
|
+
end
|
182
|
+
|
183
|
+
def initInput
|
184
|
+
# override this if needed
|
185
|
+
end
|
186
|
+
|
187
|
+
def nextEvent
|
188
|
+
begin
|
189
|
+
@type = END_DOCUMENT
|
190
|
+
if (nil == @inputBuffer) and (nil == @input) then
|
191
|
+
#if END_DOCUMENT == @type then
|
192
|
+
return @type
|
193
|
+
#end
|
194
|
+
#raise "no input defined"
|
195
|
+
end
|
196
|
+
|
197
|
+
@unresolvedEntity = false
|
198
|
+
|
199
|
+
@text = nil
|
200
|
+
|
201
|
+
parseNextEvent
|
202
|
+
|
203
|
+
return @type
|
204
|
+
rescue Exception => message
|
205
|
+
#print message.backtrace.join("\n")
|
206
|
+
if nil != @inputBuffer then
|
207
|
+
message = sprintf("parse error: '%s' -- String input, line %d, column %d", message, @line, @column)
|
208
|
+
elsif nil != @input then
|
209
|
+
if @input.kind_of? File then
|
210
|
+
message = sprintf("parse error: '%s' -- file '%s', line %d, column %d", message, @input.path, @line, @column)
|
211
|
+
else
|
212
|
+
message = sprintf("parse error: '%s' -- unnamed IO stream, line %d, column %d", message, @line, @column)
|
213
|
+
end
|
214
|
+
else
|
215
|
+
message = sprintf("parse error: '%s' -- unknown source, line %d, column %d", message, @line, @column)
|
216
|
+
end
|
217
|
+
raise message
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
private
|
222
|
+
def initialize
|
223
|
+
self.processNamespace = true
|
224
|
+
self.reportNamespaceAttributes = false
|
225
|
+
self.checkWellFormed = true
|
226
|
+
self.utf8encode = true
|
227
|
+
|
228
|
+
self.input = nil
|
229
|
+
|
230
|
+
self.entityMap = {"amp"=>"&",
|
231
|
+
"apos"=>"'",
|
232
|
+
"gt"=>">",
|
233
|
+
"lt"=>"<",
|
234
|
+
"quot"=>"\""}
|
235
|
+
self.namePattern = Regexp.compile(/[^\x00-\x20=\/>\`\.\~\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?][^\x00-\x20=\/>\`\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?]*/, nil, 'u')
|
236
|
+
self.skipWhitespacePattern = Regexp.compile(/[^\s]+|\x00/, nil, 'u')
|
237
|
+
end
|
238
|
+
|
239
|
+
def getMoreInput
|
240
|
+
@column = 0
|
241
|
+
@line += 1
|
242
|
+
if nil == @input then
|
243
|
+
@inputBuffer = nil
|
244
|
+
@inputBufferLength = -1
|
245
|
+
return nil
|
246
|
+
end
|
247
|
+
@inputBuffer = @nextInputBuffer
|
248
|
+
if nil == @inputBuffer then
|
249
|
+
@inputBuffer = @input.gets
|
250
|
+
@column = 0
|
251
|
+
if nil == @inputBuffer then
|
252
|
+
@inputBufferLength = -1
|
253
|
+
return nil
|
254
|
+
end
|
255
|
+
end
|
256
|
+
@inputBufferLength = @inputBuffer.length
|
257
|
+
@nextInputBuffer = @input.gets
|
258
|
+
end
|
259
|
+
|
260
|
+
def expect(e)
|
261
|
+
c = read
|
262
|
+
if (nil == c) or (c != e) then
|
263
|
+
msg = sprintf("unexpectedChar:: expect '%s' got '%s' in %s", (''<<e), (''<<c), caller[0])
|
264
|
+
raise msg
|
265
|
+
end
|
266
|
+
return c
|
267
|
+
end
|
268
|
+
|
269
|
+
def read
|
270
|
+
if (nil == @inputBuffer) or (@inputBufferLength <= @column) then
|
271
|
+
getMoreInput
|
272
|
+
end
|
273
|
+
|
274
|
+
if nil != @inputBuffer then
|
275
|
+
c = @inputBuffer[@column]
|
276
|
+
@column += 1
|
277
|
+
return c
|
278
|
+
else
|
279
|
+
return nil
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
def peekAt0
|
284
|
+
if nil == @inputBuffer then
|
285
|
+
getMoreInput
|
286
|
+
end
|
287
|
+
if @column < @inputBufferLength then
|
288
|
+
return @inputBuffer[@column]
|
289
|
+
else
|
290
|
+
if (nil != @nextInputBuffer) and (0 < @nextInputBuffer.length) then
|
291
|
+
return @nextInputBuffer[0]
|
292
|
+
else
|
293
|
+
return nil
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
def peekAt1
|
299
|
+
if nil == @inputBuffer then
|
300
|
+
getMoreInput
|
301
|
+
end
|
302
|
+
if (@column + 1) < @inputBufferLength then
|
303
|
+
return @inputBuffer[@column + 1]
|
304
|
+
else
|
305
|
+
if @column < @inputBufferLength then
|
306
|
+
if (nil != @nextInputBuffer) and (0 < @nextInputBuffer.length) then
|
307
|
+
return @nextInputBuffer[0]
|
308
|
+
else
|
309
|
+
return nil
|
310
|
+
end
|
311
|
+
else
|
312
|
+
if (nil != @nextInputBuffer) and (1 < @nextInputBuffer.length) then
|
313
|
+
return @nextInputBuffer[1]
|
314
|
+
else
|
315
|
+
return nil
|
316
|
+
end
|
317
|
+
end
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
def parseNextEvent
|
322
|
+
@attributeName.clear
|
323
|
+
@attributeQName.clear
|
324
|
+
@attributeNamespace.clear
|
325
|
+
@attributePrefix.clear
|
326
|
+
@attributeValue.clear
|
327
|
+
|
328
|
+
if @emptyElement then
|
329
|
+
# the last event was an empty start element like <start/>
|
330
|
+
@type = END_ELEMENT
|
331
|
+
@emptyElement = false
|
332
|
+
return
|
333
|
+
end
|
334
|
+
|
335
|
+
@prefix = nil
|
336
|
+
@name = nil
|
337
|
+
@qname = nil
|
338
|
+
@namespace = nil
|
339
|
+
@type = peekType
|
340
|
+
|
341
|
+
@textBuffer = ''
|
342
|
+
|
343
|
+
case @type
|
344
|
+
when END_DOCUMENT
|
345
|
+
# nothing to do
|
346
|
+
when ENTITY_REF
|
347
|
+
parseEntity
|
348
|
+
@text = @textBuffer
|
349
|
+
@textBuffer = ''
|
350
|
+
when START_ELEMENT
|
351
|
+
parseStartElement
|
352
|
+
when END_ELEMENT
|
353
|
+
parseEndElement
|
354
|
+
when TEXT
|
355
|
+
parseText(?<, false)
|
356
|
+
@text = @textBuffer
|
357
|
+
@textBuffer = ''
|
358
|
+
if 0 == @elementName.length then
|
359
|
+
if nil == @text.index(@skipWhitespacePattern) then
|
360
|
+
@type = IGNORABLE_WHITESPACE
|
361
|
+
end
|
362
|
+
end
|
363
|
+
else
|
364
|
+
@type = parseUndecided
|
365
|
+
end
|
366
|
+
|
367
|
+
end
|
368
|
+
|
369
|
+
def parseUndecided
|
370
|
+
# this could be a comment, processing instruction, or CDATA section
|
371
|
+
expect ?<
|
372
|
+
|
373
|
+
demand = nil
|
374
|
+
delimiter = nil # **first** (not last) character in delimting string
|
375
|
+
|
376
|
+
@text = @textBuffer
|
377
|
+
@textBuffer = ''
|
378
|
+
|
379
|
+
c = read
|
380
|
+
if ?? == c then
|
381
|
+
result = PROCESSING_INSTRUCTION
|
382
|
+
demand = nil
|
383
|
+
delimiter = ??
|
384
|
+
elsif ?! == c then
|
385
|
+
cc = peekAt0
|
386
|
+
if ?- == cc then
|
387
|
+
result = COMMENT
|
388
|
+
demand = '--'
|
389
|
+
delimiter = ?-
|
390
|
+
elsif ?[ == cc then
|
391
|
+
result = CDATA_SECTION
|
392
|
+
demand = '[CDATA['
|
393
|
+
delimiter = ?]
|
394
|
+
else
|
395
|
+
result = DOCTYPE
|
396
|
+
demand = 'DOCTYPE'
|
397
|
+
delimiter = nil
|
398
|
+
end
|
399
|
+
else
|
400
|
+
# this should never happen because we'll get an illegal name execption
|
401
|
+
# first
|
402
|
+
raise "illegal <{c}"
|
403
|
+
end
|
404
|
+
|
405
|
+
if nil != demand then
|
406
|
+
demand.each_byte do
|
407
|
+
| d |
|
408
|
+
expect d
|
409
|
+
end
|
410
|
+
end
|
411
|
+
|
412
|
+
if DOCTYPE == result then
|
413
|
+
parseDoctype
|
414
|
+
else
|
415
|
+
if ?? == delimiter then
|
416
|
+
s = Regexp.escape "?>"
|
417
|
+
inc = 2
|
418
|
+
else
|
419
|
+
s = Regexp.escape "" << delimiter << delimiter << '>'
|
420
|
+
inc = 3
|
421
|
+
end
|
422
|
+
regex = /#{s}/u
|
423
|
+
p = findOneOfThese(regex)
|
424
|
+
@text = @textBuffer
|
425
|
+
if nil != p then
|
426
|
+
@column += inc
|
427
|
+
end
|
428
|
+
end
|
429
|
+
|
430
|
+
return result
|
431
|
+
end
|
432
|
+
|
433
|
+
def parseDoctype
|
434
|
+
depth = 1
|
435
|
+
quoted = false
|
436
|
+
delimiter = nil
|
437
|
+
entityDefinitionText = ''
|
438
|
+
|
439
|
+
@text = ''
|
440
|
+
|
441
|
+
while true do
|
442
|
+
c = read
|
443
|
+
case c
|
444
|
+
when ?', ?" # for the sake of vim '
|
445
|
+
if quoted then
|
446
|
+
if c == delimiter then
|
447
|
+
quoted = false
|
448
|
+
delimiter = nil
|
449
|
+
end
|
450
|
+
else
|
451
|
+
quoted = true
|
452
|
+
delimiter = c
|
453
|
+
end
|
454
|
+
when ?<
|
455
|
+
if not quoted then
|
456
|
+
if (?! == peekAt0) and (?- == peekAt1) then
|
457
|
+
#this is looking like a comment
|
458
|
+
@text << c
|
459
|
+
@text << (expect ?!)
|
460
|
+
@text << (expect ?-)
|
461
|
+
c = read
|
462
|
+
if ?- == c then
|
463
|
+
@text << ?-
|
464
|
+
regex = /-->/u
|
465
|
+
p = findOneOfThese(regex)
|
466
|
+
@text << @textBuffer
|
467
|
+
@textBuffer = ''
|
468
|
+
@text << (expect ?-)
|
469
|
+
@text << (expect ?-)
|
470
|
+
c = (expect ?>)
|
471
|
+
else
|
472
|
+
depth += 1
|
473
|
+
entityDefinitionText = ''
|
474
|
+
end
|
475
|
+
else
|
476
|
+
depth += 1
|
477
|
+
entityDefinitionText = ''
|
478
|
+
end
|
479
|
+
end
|
480
|
+
when ?>
|
481
|
+
if not quoted then
|
482
|
+
depth -= 1
|
483
|
+
#check right here for an entity definition!!!
|
484
|
+
entityDefinitionText = ''
|
485
|
+
if 0 == depth then
|
486
|
+
return
|
487
|
+
end
|
488
|
+
end
|
489
|
+
when nil
|
490
|
+
raise sprintf("unexpected EOF in DOCTYPE (depth: %d, quoted: %s)", depth, quoted)
|
491
|
+
end
|
492
|
+
@text << c
|
493
|
+
entityDefinitionText << c
|
494
|
+
end
|
495
|
+
end
|
496
|
+
|
497
|
+
def peekType
|
498
|
+
c = peekAt0
|
499
|
+
case c
|
500
|
+
when nil, 0
|
501
|
+
return END_DOCUMENT
|
502
|
+
when ?&
|
503
|
+
return ENTITY_REF
|
504
|
+
when ?<
|
505
|
+
case peekAt1
|
506
|
+
when ?/
|
507
|
+
return END_ELEMENT
|
508
|
+
when ?[
|
509
|
+
return CDATA_SECTION
|
510
|
+
when ??, ?!
|
511
|
+
return UNDECIDED_TYPE
|
512
|
+
else
|
513
|
+
return START_ELEMENT
|
514
|
+
end
|
515
|
+
else
|
516
|
+
return TEXT
|
517
|
+
end
|
518
|
+
end
|
519
|
+
|
520
|
+
def encode(c)
|
521
|
+
if utf8encode then
|
522
|
+
if c < 0x80 then
|
523
|
+
@textBuffer << c
|
524
|
+
elsif c < 0x0800
|
525
|
+
@textBuffer << ((c >> 6) | 0xC0)
|
526
|
+
@textBuffer << (c & (0x3F | 0x80))
|
527
|
+
elsif c < 0x10000
|
528
|
+
@textBuffer << ((c >> 12) | 0xE0)
|
529
|
+
@textBuffer << ((c >> 6) & (0x3F | 0x80))
|
530
|
+
@textBuffer << (c & (0x3F | 0x80))
|
531
|
+
else
|
532
|
+
@textBuffer << ((c >> 18) | 0xF0)
|
533
|
+
@textBuffer << ((c >> 12) & (0x3F | 0x80))
|
534
|
+
@textBuffer << ((c >> 6) & (0x3F | 0x80))
|
535
|
+
@textBuffer << (c & (0x3F | 0x80))
|
536
|
+
end
|
537
|
+
else
|
538
|
+
@textBuffer << c
|
539
|
+
end
|
540
|
+
end
|
541
|
+
|
542
|
+
def parseEntity
|
543
|
+
expect ?&
|
544
|
+
|
545
|
+
@name = ''
|
546
|
+
while true do
|
547
|
+
c = read
|
548
|
+
if ?; == c then
|
549
|
+
break
|
550
|
+
end
|
551
|
+
if nil == c then
|
552
|
+
raise "unexpectedEOF"
|
553
|
+
end
|
554
|
+
@name << c
|
555
|
+
end
|
556
|
+
|
557
|
+
if ?\# == @name[0] then
|
558
|
+
if ?x == @name[1] then
|
559
|
+
c = @name[2..@name.length].hex
|
560
|
+
else
|
561
|
+
c = @name[1..@name.length].to_i
|
562
|
+
end
|
563
|
+
encode(c)
|
564
|
+
else
|
565
|
+
value = entityMap[@name]
|
566
|
+
if nil != value then
|
567
|
+
@textBuffer << value
|
568
|
+
else
|
569
|
+
if nil != @resolver then
|
570
|
+
value = @resolver.resolve(@name)
|
571
|
+
end
|
572
|
+
|
573
|
+
if nil != value then
|
574
|
+
@textBuffer << value
|
575
|
+
else
|
576
|
+
@unresolvedEntity = true
|
577
|
+
end
|
578
|
+
end
|
579
|
+
end
|
580
|
+
end
|
581
|
+
|
582
|
+
def parseStartElement
|
583
|
+
expect ?<
|
584
|
+
skipWhitespace
|
585
|
+
|
586
|
+
@qname = readName
|
587
|
+
@textBuffer = ''
|
588
|
+
|
589
|
+
while true do
|
590
|
+
skipWhitespace
|
591
|
+
c = peekAt0
|
592
|
+
if nil == c then
|
593
|
+
raise "unexpectedEOF"
|
594
|
+
end
|
595
|
+
if ?/ == c then
|
596
|
+
@emptyElement = true
|
597
|
+
|
598
|
+
read
|
599
|
+
skipWhitespace
|
600
|
+
expect ?>
|
601
|
+
|
602
|
+
break
|
603
|
+
end
|
604
|
+
if ?> == c then
|
605
|
+
@emptyElement = false
|
606
|
+
read
|
607
|
+
break
|
608
|
+
end
|
609
|
+
|
610
|
+
aName = readName
|
611
|
+
@textBuffer = ''
|
612
|
+
if (nil == aName) or (0 == aName.length) then
|
613
|
+
raise "name expected (start element)"
|
614
|
+
end
|
615
|
+
|
616
|
+
skipWhitespace
|
617
|
+
expect ?=
|
618
|
+
skipWhitespace
|
619
|
+
|
620
|
+
delimiter = read
|
621
|
+
if ?' == delimiter then # for vim: '
|
622
|
+
value = parseText(?', true) # for vim: '
|
623
|
+
elsif ?" == delimiter then # for vim: "
|
624
|
+
value = parseText(?", true) # for vim: "
|
625
|
+
else
|
626
|
+
raise "invalidDelimiter"
|
627
|
+
end
|
628
|
+
|
629
|
+
@textBuffer = ''
|
630
|
+
|
631
|
+
# skip the end delimiter
|
632
|
+
read
|
633
|
+
|
634
|
+
if processNamespace then
|
635
|
+
@attributeQName.push aName
|
636
|
+
else
|
637
|
+
@attributeName.push aName
|
638
|
+
@attributeQName.push aName
|
639
|
+
@attributeNamespace.push nil
|
640
|
+
@attributePrefix.push nil
|
641
|
+
end
|
642
|
+
@attributeValue.push value
|
643
|
+
|
644
|
+
end
|
645
|
+
|
646
|
+
if processNamespace then
|
647
|
+
handleNamespaces
|
648
|
+
else
|
649
|
+
@name = @qname
|
650
|
+
@namespace = nil
|
651
|
+
@prefix = nil
|
652
|
+
end
|
653
|
+
|
654
|
+
if not @emptyElement then
|
655
|
+
if @checkWellFormed and (0 == @elementName.length) then
|
656
|
+
if @haveRoot then
|
657
|
+
raise "unexpected element"
|
658
|
+
end
|
659
|
+
@haveRoot = true
|
660
|
+
end
|
661
|
+
@elementName.push @name
|
662
|
+
@elementQName.push @qname
|
663
|
+
@elementNamespace.push @namespace
|
664
|
+
@elementPrefix.push @prefix
|
665
|
+
end
|
666
|
+
end
|
667
|
+
|
668
|
+
def parseEndElement
|
669
|
+
if 0 == @elementName.length then
|
670
|
+
raise "elementStackEmpty"
|
671
|
+
end
|
672
|
+
|
673
|
+
expect ?<
|
674
|
+
expect ?/
|
675
|
+
skipWhitespace
|
676
|
+
|
677
|
+
@qname = readName
|
678
|
+
startQName = @elementQName.pop
|
679
|
+
if @qname != startQName then
|
680
|
+
raise sprintf("unexpectedEndElement wanted '%s' found '%s'", startQName, @qname)
|
681
|
+
end
|
682
|
+
skipWhitespace
|
683
|
+
expect ?>
|
684
|
+
|
685
|
+
@name = @elementName.pop
|
686
|
+
@prefix = @elementPrefix.pop
|
687
|
+
@namespace = @elementNamespace.pop
|
688
|
+
|
689
|
+
@elementNamespacePrefixStack.pop
|
690
|
+
@elementNamespaceValueStack.pop
|
691
|
+
@elementNamespaceDefaultStack.pop
|
692
|
+
end
|
693
|
+
|
694
|
+
def readName
|
695
|
+
@textBuffer = ''
|
696
|
+
matchThis(@namePattern)
|
697
|
+
if 0 == @textBuffer.length then
|
698
|
+
raise "name expected (readName)"
|
699
|
+
end
|
700
|
+
return @textBuffer
|
701
|
+
end
|
702
|
+
|
703
|
+
def handleNamespaces
|
704
|
+
# This is called by parseStartElement to deal with namespaces. Updates knows
|
705
|
+
# name spaces based on the attributes in this start element. Then sets up
|
706
|
+
# the namespaces for this element itself (i.e. process the qname).
|
707
|
+
|
708
|
+
i = 0
|
709
|
+
|
710
|
+
defaultNamespace = @elementNamespaceDefaultStack.last
|
711
|
+
|
712
|
+
qnames = @attributeQName
|
713
|
+
@attributeQName = []
|
714
|
+
values = @attributeValue
|
715
|
+
@attributeValue = []
|
716
|
+
|
717
|
+
prefixList = []
|
718
|
+
valueList = []
|
719
|
+
|
720
|
+
while i < qnames.length do
|
721
|
+
qname = qnames[i]
|
722
|
+
value = values[i]
|
723
|
+
i += 1
|
724
|
+
|
725
|
+
if 'xmlns' == qname then
|
726
|
+
prefix = 'xmlns'
|
727
|
+
name = nil
|
728
|
+
namespace = lookupNamespace prefix
|
729
|
+
defaultNamespace = value
|
730
|
+
else
|
731
|
+
pieces = qname.split(':', 2)
|
732
|
+
if 2 == pieces.length then
|
733
|
+
namespace = value
|
734
|
+
prefix = pieces[0]
|
735
|
+
name = pieces[1]
|
736
|
+
|
737
|
+
if 0 == prefix.length then
|
738
|
+
raise "illegalEmptyAtributePrefix"
|
739
|
+
end
|
740
|
+
if 0 == name.length then
|
741
|
+
raise "illegalEmptyAttributeName"
|
742
|
+
end
|
743
|
+
else
|
744
|
+
# this is a un-prefixed non-xmlns attribute
|
745
|
+
@attributeQName.push qname
|
746
|
+
@attributeName.push qname
|
747
|
+
@attributeNamespace.push nil
|
748
|
+
@attributePrefix.push nil
|
749
|
+
@attributeValue.push value
|
750
|
+
|
751
|
+
next
|
752
|
+
end
|
753
|
+
end
|
754
|
+
|
755
|
+
# only prefixed attributes beyond here
|
756
|
+
|
757
|
+
if nil == namespace then
|
758
|
+
raise "illegalEmptyNamespace"
|
759
|
+
end
|
760
|
+
|
761
|
+
if "xmlns" != prefix then
|
762
|
+
anyQualifiedAttributes = true
|
763
|
+
|
764
|
+
@attributeQName.push qname
|
765
|
+
@attributeName.push name
|
766
|
+
@attributeNamespace.push namespace
|
767
|
+
@attributePrefix.push prefix
|
768
|
+
@attributeValue.push value
|
769
|
+
else
|
770
|
+
if (nil != name) and ((nil == namespace) or (0 == namespace.length)) then
|
771
|
+
raise "illegalNamespace"
|
772
|
+
end
|
773
|
+
|
774
|
+
prefixList.push name
|
775
|
+
valueList.push value
|
776
|
+
|
777
|
+
if @reportNamespaceAttributes then
|
778
|
+
@attributeQName.push qname
|
779
|
+
@attributeName.push name
|
780
|
+
@attributeNamespace.push namespace
|
781
|
+
@attributePrefix.push prefix
|
782
|
+
@attributeValue.push value
|
783
|
+
end
|
784
|
+
end
|
785
|
+
|
786
|
+
end
|
787
|
+
|
788
|
+
@elementNamespacePrefixStack.push prefixList
|
789
|
+
@elementNamespaceValueStack.push valueList
|
790
|
+
@elementNamespaceDefaultStack.push defaultNamespace
|
791
|
+
|
792
|
+
if anyQualifiedAttributes then
|
793
|
+
# run over the attributes and make sure we have them qualified
|
794
|
+
for i in 0..(@attributeName.length-1) do
|
795
|
+
prefix = @attributePrefix[i]
|
796
|
+
|
797
|
+
if nil != prefix then
|
798
|
+
@attributeNamespace[i] = lookupNamespace prefix
|
799
|
+
end
|
800
|
+
end
|
801
|
+
end
|
802
|
+
|
803
|
+
# handle namespaces for the element name
|
804
|
+
pieces = @qname.split(':', 2)
|
805
|
+
if 2 == pieces.length then
|
806
|
+
@name = pieces[1]
|
807
|
+
@prefix = pieces[0]
|
808
|
+
@namespace = lookupNamespace @prefix
|
809
|
+
else
|
810
|
+
@name = @qname
|
811
|
+
@namespace = defaultNamespace
|
812
|
+
@prefix = nil
|
813
|
+
end
|
814
|
+
end
|
815
|
+
|
816
|
+
def lookupNamespace(prefix)
|
817
|
+
if nil == prefix then
|
818
|
+
raise "illegalPrefix"
|
819
|
+
end
|
820
|
+
if'xml' == prefix then
|
821
|
+
return 'http://www.w3.org/XML/1998/namespace'
|
822
|
+
end
|
823
|
+
if'xmlns' == prefix then
|
824
|
+
return 'http://www.w3.org/2000/xmlns/'
|
825
|
+
end
|
826
|
+
|
827
|
+
i = @elementNamespacePrefixStack.length - 1
|
828
|
+
while 0 <= i do
|
829
|
+
j = @elementNamespacePrefixStack[i].index(prefix)
|
830
|
+
if nil != j then
|
831
|
+
return @elementNamespaceValueStack[i][j]
|
832
|
+
end
|
833
|
+
|
834
|
+
i -= 1
|
835
|
+
end
|
836
|
+
raise sprintf("unknown Namespace Prefix '%s' [%s]", prefix, caller[0])
|
837
|
+
end
|
838
|
+
|
839
|
+
def parseText(delimiter, resolve)
|
840
|
+
s = "&" << delimiter
|
841
|
+
regex = /[#{s}]/u
|
842
|
+
c = findOneOfThese regex
|
843
|
+
while (nil != c) and (delimiter != c) do
|
844
|
+
if ?& == c then
|
845
|
+
if !resolve then
|
846
|
+
break
|
847
|
+
end
|
848
|
+
|
849
|
+
parseEntity
|
850
|
+
|
851
|
+
if @unresolvedEntity then
|
852
|
+
raise "unresolvedEntity"
|
853
|
+
end
|
854
|
+
else
|
855
|
+
c = read
|
856
|
+
@textBuffer << c
|
857
|
+
end
|
858
|
+
|
859
|
+
c = findOneOfThese regex
|
860
|
+
end
|
861
|
+
|
862
|
+
return @textBuffer
|
863
|
+
end
|
864
|
+
|
865
|
+
def skipWhitespace
|
866
|
+
while nil != @inputBuffer do
|
867
|
+
p = @inputBuffer.index(@skipWhitespacePattern, @column)
|
868
|
+
|
869
|
+
if nil != p then
|
870
|
+
if p != @column then
|
871
|
+
@column = p
|
872
|
+
end
|
873
|
+
return @inputBuffer[p]
|
874
|
+
end
|
875
|
+
getMoreInput
|
876
|
+
end
|
877
|
+
return nil
|
878
|
+
end
|
879
|
+
|
880
|
+
def matchThis(regex)
|
881
|
+
p = @inputBuffer.index(regex, @column)
|
882
|
+
if nil != $& then
|
883
|
+
@textBuffer << $&
|
884
|
+
@column += $&.length
|
885
|
+
else
|
886
|
+
@column = @inputBufferLength
|
887
|
+
end
|
888
|
+
end
|
889
|
+
|
890
|
+
def findOneOfThese(regex)
|
891
|
+
while nil != @inputBuffer do
|
892
|
+
p = @inputBuffer.index(regex, @column)
|
893
|
+
|
894
|
+
if nil != p then
|
895
|
+
if p != @column then
|
896
|
+
@textBuffer << @inputBuffer[@column..(p - 1)]
|
897
|
+
@column = p
|
898
|
+
end
|
899
|
+
return @inputBuffer[p]
|
900
|
+
else
|
901
|
+
@textBuffer << @inputBuffer[@column..-1]
|
902
|
+
end
|
903
|
+
getMoreInput
|
904
|
+
end
|
905
|
+
return nil
|
906
|
+
end
|
907
|
+
|
908
|
+
end
|