xamplr-pp 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/xamplr-pp.rb ADDED
@@ -0,0 +1,991 @@
1
+ # xampl-pp : XML pull parser
2
+ # Copyright (C) 2002-2009 Bob Hutchison
3
+ #
4
+ # This library is free software; you can redistribute it and/or
5
+ # modify it under the terms of the GNU Lesser General Public
6
+ # License as published by the Free Software Foundation; either
7
+ # version 2.1 of the License, or (at your option) any later version.
8
+ #
9
+ # This library is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
+ # #Lesser General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU Lesser General Public
15
+ # License along with this library; if not, write to the Free Software
16
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
+ #
18
+
19
+
20
+ class Xampl_PP
21
+ # XML 'event' types
22
+ START_DOCUMENT = 'START_DOCUMENT'
23
+ END_DOCUMENT = 'END_DOCUMENT'
24
+ START_ELEMENT = 'START_ELEMENT'
25
+ END_ELEMENT = 'END_ELEMENT'
26
+ TEXT = 'TEXT'
27
+ CDATA_SECTION = 'CDATA_SECTION'
28
+ ENTITY_REF = 'ENTITY_REF'
29
+ IGNORABLE_WHITESPACE = 'IGNORABLE_WHITESPACE'
30
+ PROCESSING_INSTRUCTION = 'PROCESSING_INSTRUCTION'
31
+ COMMENT = 'COMMENT'
32
+ DOCTYPE = 'DOCTYPE'
33
+ UNDECIDED_TYPE = 'UNDECIDED_TYPE'
34
+
35
+ # 'Features', acutally just processing options
36
+ attr :processNamespace, true
37
+ attr :reportNamespaceAttributes, true
38
+ attr :utf8encode, true
39
+
40
+ # the entities that we will recognise
41
+ attr :entityMap, true
42
+ attr :unresolvedEntity
43
+ attr :resolver, true
44
+
45
+ # some information about where we are
46
+ attr :line
47
+ attr :column
48
+
49
+ # element information
50
+ attr :type
51
+ attr :emptyElement
52
+ attr :name
53
+ attr :qname
54
+ attr :namespace
55
+ attr :prefix
56
+ attr :attributeName
57
+ attr :attributeQName
58
+ attr :attributeNamespace
59
+ attr :attributePrefix
60
+ attr :attributeValue
61
+
62
+ attr :text
63
+
64
+ # These are not intended for general use (they are not part of the api)
65
+
66
+ # open element information
67
+ attr :elementName
68
+ attr :elementQName
69
+ attr :elementNamespace
70
+ attr :elementPrefix
71
+
72
+ # some pre-compiled patterns
73
+ attr :namePattern, true
74
+ attr :skipWhitespacePattern, true
75
+
76
+ attr :elementNamespacePrefixStack
77
+ attr :elementNamespaceValueStack
78
+ attr :elementNamespaceDefaultStack
79
+
80
+ attr :standalone
81
+
82
+ public
83
+ def startDocument?
84
+ @type.equal? START_DOCUMENT
85
+ end
86
+
87
+ def endDocument?
88
+ @type.equal? END_DOCUMENT
89
+ end
90
+
91
+ def startElement?
92
+ @type.equal? START_ELEMENT
93
+ end
94
+
95
+ def endElement?
96
+ @type.equal? END_ELEMENT
97
+ end
98
+
99
+ def text?
100
+ @type.equal? TEXT
101
+ end
102
+
103
+ def cdata?
104
+ @type.equal? CDATA_SECTION
105
+ end
106
+
107
+ def entityRef?
108
+ @type.equal? ENTITY_REF
109
+ end
110
+
111
+ def ignorableWhitespace?
112
+ @type.equal? IGNORABLE_WHITESPACE
113
+ end
114
+
115
+ def whitespace?
116
+ nil == @text.index(@skipWhitespacePattern)
117
+ end
118
+
119
+ def processingInstruction?
120
+ @type.equal? PROCESSING_INSTRUCTION
121
+ end
122
+
123
+ def comment?
124
+ @type.equal? COMMENT
125
+ end
126
+
127
+ def doctype?
128
+ @type.equal? DOCTYPE
129
+ end
130
+
131
+ def resolve(name)
132
+ raise sprintf("unresolved entity '%s'", name)
133
+ end
134
+
135
+ def input=(v)
136
+ setInput(v)
137
+ end
138
+
139
+ def setInput(v)
140
+ if (defined? @input) and (nil != @input) then
141
+ @input.close
142
+ end
143
+ if nil == v then
144
+ @input = nil
145
+ @inputBuffer = nil
146
+ @inputBufferLength = 0
147
+ @column = 0
148
+ @line = 0
149
+ elsif v.kind_of? String then
150
+ @input = nil
151
+ @inputBuffer = v
152
+ @inputBufferLength = v.length
153
+ @line = 1
154
+ @column = 0
155
+ elsif v.kind_of? IO then
156
+ @input = v
157
+ @inputBuffer = nil
158
+ @inputBufferLength = 0
159
+ @column = 0
160
+ @line = 0
161
+ else
162
+ raise "illegalInput"
163
+ end
164
+ @nextInputBuffer = nil
165
+ @textBuffer = ''
166
+
167
+ @elementNamespacePrefixStack = []
168
+ @elementNamespaceValueStack = []
169
+ @elementNamespaceDefaultStack = []
170
+
171
+ @elementName = []
172
+ @elementQName = []
173
+ @elementNamespace = []
174
+ @elementPrefix = []
175
+
176
+ @type = START_DOCUMENT
177
+ @unresolvedEntity = false
178
+
179
+ @name = nil
180
+ @namespace = nil
181
+
182
+ @attributeName = []
183
+ @attributeQName = []
184
+ @attributeNamespace = []
185
+ @attributePrefix = []
186
+ @attributeValue = []
187
+
188
+ @emptyElement = false
189
+
190
+ @errorMessage = nil
191
+
192
+ initInput
193
+ end
194
+
195
+ def initInput
196
+ # redefine this method if you have some initialisation you need done
197
+ end
198
+
199
+ def nextEvent
200
+ begin
201
+ @type = END_DOCUMENT
202
+ if (nil == @inputBuffer) and (nil == @input) then
203
+ return @type
204
+ end
205
+
206
+ @unresolvedEntity = false
207
+
208
+ @text = nil
209
+
210
+ parseNextEvent
211
+
212
+ return @type
213
+ rescue Exception => message
214
+ print message.backtrace.join("\n")
215
+ if nil != @inputBuffer then
216
+ @errorMessage = sprintf("parse error: '%s' -- String input, line %d, column %d", message, @line, @column)
217
+ elsif nil != @input then
218
+ if @input.kind_of? File then
219
+ @errorMessage = sprintf("parse error: '%s' -- file '%s', line %d, column %d", message, @input.path, @line, @column)
220
+ else
221
+ @errorMessage = sprintf("parse error: '%s' -- unnamed IO stream, line %d, column %d", message, @line, @column)
222
+ end
223
+ else
224
+ @errorMessage = sprintf("parse error: '%s' -- unknown source, line %d, column %d", message, @line, @column)
225
+ end
226
+ raise @errorMessage
227
+ end
228
+ end
229
+
230
+ def depth
231
+ return @elementName.length
232
+ end
233
+
234
+ private
235
+ def initialize
236
+ self.processNamespace = true
237
+ self.reportNamespaceAttributes = false
238
+ self.utf8encode = true
239
+
240
+ self.input = nil
241
+
242
+ self.entityMap = {"amp"=>"&",
243
+ "apos"=>"'",
244
+ "gt"=>">",
245
+ "lt"=>"<",
246
+ "quot"=>"\""}
247
+ self.namePattern = /[^0-9\x00-\x20=\/>\`\.\-\~\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?][^\x00-\x20=\/>\`\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?]*/u
248
+ self.skipWhitespacePattern = /[^\n\r\t ]+/u
249
+
250
+ #pre ruby 1.8 needs the Regex.new syntax
251
+ #self.namePattern = Regexp.new(/[^0-9\x00-\x20=\/>\`\.\-\~\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?][^\x00-\x20=\/>\`\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?]*/, nil, 'u')
252
+ #old junk... self.skipWhitespacePattern = Regexp.new(/[^\n\r\t ]+|\x00/, nil, 'u')
253
+ #self.skipWhitespacePattern = Regexp.new(/[^\n\r\t ]+/, nil, 'u')
254
+ end
255
+
256
+ def getMoreInput
257
+ @column = 0
258
+ @line += 1
259
+ if nil == @input then
260
+ @inputBuffer = nil
261
+ @inputBufferLength = -1
262
+ return nil
263
+ end
264
+ @inputBuffer = @nextInputBuffer
265
+ if nil == @inputBuffer then
266
+ @inputBuffer = @input.gets
267
+ @column = 0
268
+ if nil == @inputBuffer then
269
+ @inputBufferLength = -1
270
+ return nil
271
+ end
272
+ end
273
+ @inputBufferLength = @inputBuffer.length
274
+ @nextInputBuffer = @input.gets
275
+ end
276
+
277
+ def expectold(e)
278
+ c = read
279
+ if c != e then
280
+ msg = sprintf("unexpectedChar:: expect '%s' got '%s' in %s", (''<<e), (''<<c), caller[0])
281
+ raise msg
282
+ end
283
+ return c
284
+ end
285
+
286
+ def expect(e)
287
+ if (nil == @inputBuffer) or (@inputBufferLength <= @column) then
288
+ getMoreInput
289
+ if nil == @inputBuffer then
290
+ msg = sprintf("unexpectedChar:: expect '%s' got EOF in %s", (''<<e), caller[0])
291
+ raise msg
292
+ end
293
+ end
294
+
295
+ c = @inputBuffer[@column]
296
+ @column += 1
297
+ if c == e then
298
+ return c
299
+ end
300
+
301
+ msg = sprintf("unexpectedChar:: expect '%s' got '%s' in %s", (''<<e), (''<<c), caller[0])
302
+ raise msg
303
+ end
304
+
305
+ def read
306
+ if (nil == @inputBuffer) or (@inputBufferLength <= @column) then
307
+ getMoreInput
308
+ if nil == @inputBuffer then
309
+ return nil
310
+ end
311
+ end
312
+
313
+ c = @inputBuffer[@column]
314
+ @column += 1
315
+ return c
316
+ end
317
+
318
+ def peekAt0
319
+ if nil == @inputBuffer then
320
+ getMoreInput
321
+ end
322
+ if @column < @inputBufferLength then
323
+ return @inputBuffer[@column]
324
+ else
325
+ if (nil != @nextInputBuffer) and (0 < @nextInputBuffer.length) then
326
+ return @nextInputBuffer[0]
327
+ else
328
+ return nil
329
+ end
330
+ end
331
+ end
332
+
333
+ def peekAt1
334
+ if nil == @inputBuffer then
335
+ getMoreInput
336
+ end
337
+ if (@column + 1) < @inputBufferLength then
338
+ return @inputBuffer[@column + 1]
339
+ else
340
+ if @column < @inputBufferLength then
341
+ if (nil != @nextInputBuffer) and (0 < @nextInputBuffer.length) then
342
+ return @nextInputBuffer[0]
343
+ else
344
+ return nil
345
+ end
346
+ else
347
+ if (nil != @nextInputBuffer) and (1 < @nextInputBuffer.length) then
348
+ return @nextInputBuffer[1]
349
+ else
350
+ return nil
351
+ end
352
+ end
353
+ end
354
+ end
355
+
356
+ def parseNextEvent
357
+ @attributeName.clear
358
+ @attributeQName.clear
359
+ @attributeNamespace.clear
360
+ @attributePrefix.clear
361
+ @attributeValue.clear
362
+
363
+ if @emptyElement then
364
+ # the last event was an empty start element like <start/>
365
+ @type = END_ELEMENT
366
+ @emptyElement = false
367
+ return
368
+ end
369
+
370
+ @prefix = nil
371
+ @name = nil
372
+ @qname = nil
373
+ @namespace = nil
374
+ @type = peekType
375
+
376
+ @textBuffer = ''
377
+
378
+ case @type
379
+ when END_DOCUMENT
380
+ # nothing to do
381
+ when ENTITY_REF
382
+ parseEntity
383
+ @text = @textBuffer
384
+ @textBuffer = ''
385
+ when START_ELEMENT
386
+ parseStartElement
387
+ when END_ELEMENT
388
+ parseEndElement
389
+ when TEXT
390
+ parseText(?<, false)
391
+ @text = @textBuffer
392
+ @textBuffer = ''
393
+ if 0 == @elementName.length then
394
+ if nil == @text.index(@skipWhitespacePattern) then
395
+ @type = IGNORABLE_WHITESPACE
396
+ end
397
+ end
398
+ else
399
+ @type = parseUndecided
400
+ end
401
+
402
+ end
403
+
404
+ def parseUndecided
405
+ # this could be a comment, processing instruction, or CDATA section
406
+ expect ?<
407
+
408
+ demand = nil
409
+ delimiter = nil # **first** (not last) character in delimting string
410
+
411
+ @text = @textBuffer
412
+ @textBuffer = ''
413
+
414
+ c = read
415
+ if ?? == c then
416
+ result = PROCESSING_INSTRUCTION
417
+ demand = nil
418
+ delimiter = ??
419
+ elsif ?! == c then
420
+ cc = peekAt0
421
+ if ?- == cc then
422
+ result = COMMENT
423
+ demand = '--'
424
+ delimiter = ?-
425
+ elsif ?[ == cc then
426
+ result = CDATA_SECTION
427
+ demand = '[CDATA['
428
+ delimiter = ?]
429
+ else
430
+ result = DOCTYPE
431
+ demand = 'DOCTYPE'
432
+ delimiter = nil
433
+ end
434
+ else
435
+ # this should never happen because we'll get an illegal name execption
436
+ # first
437
+ raise "illegal <{c}"
438
+ end
439
+
440
+ if nil != demand then
441
+ demand.each_byte do
442
+ | d |
443
+ expect d
444
+ end
445
+ end
446
+
447
+ if DOCTYPE == result then
448
+ parseDoctype
449
+ else
450
+ if ?? == delimiter then
451
+ s = Regexp.escape "?>"
452
+ inc = 2
453
+ else
454
+ s = Regexp.escape "" << delimiter << delimiter << '>'
455
+ inc = 3
456
+ end
457
+ regex = /#{s}/u
458
+ p = findOneOfThese(regex)
459
+ @text = @textBuffer
460
+ if nil != p then
461
+ @column += inc
462
+ end
463
+ end
464
+
465
+ return result
466
+ end
467
+
468
+ def parseXMLDecl
469
+ return nil != @text.index(/^xml/u)
470
+ end
471
+
472
+ def parseDoctype
473
+ depth = 1
474
+ quoted = false
475
+ delimiter = nil
476
+ entityDefinitionText = ''
477
+ havePiece = false
478
+ internalSubset = false
479
+
480
+ @text = ''
481
+
482
+ while true do
483
+ c = read
484
+ case c
485
+ when ?', ?" # for the sake of vim '
486
+ if quoted then
487
+ if c == delimiter then
488
+ quoted = false
489
+ delimiter = nil
490
+ end
491
+ else
492
+ quoted = true
493
+ delimiter = c
494
+ end
495
+ when ?<
496
+ if not quoted then
497
+ if (?! == peekAt0) and (?- == peekAt1) then
498
+ #this is looking like a comment
499
+ @text << c
500
+ @text << (expect ?!)
501
+ @text << (expect ?-)
502
+ c = read
503
+ if ?- == c then
504
+ @text << ?-
505
+ regex = /-->/u
506
+ p = findOneOfThese(regex)
507
+ @text << @textBuffer
508
+ @textBuffer = ''
509
+ @text << (expect ?-)
510
+ @text << (expect ?-)
511
+ c = (expect ?>)
512
+ else
513
+ depth += 1
514
+ entityDefinitionText = ''
515
+ end
516
+ else
517
+ depth += 1
518
+ entityDefinitionText = ''
519
+ end
520
+ end
521
+ when ?>
522
+ if not quoted then
523
+ depth -= 1
524
+ #check right here for an entity definition!!!
525
+ havePiece = true
526
+ #entityDefinitionText = ''
527
+ if 0 == depth then
528
+ return
529
+ end
530
+ end
531
+ when ?[
532
+ if not quoted then
533
+ internalSubset = true
534
+ end
535
+ when nil
536
+ raise sprintf("unexpected EOF in DOCTYPE (depth: %d, quoted: %s)", depth, quoted)
537
+ end
538
+ @text << c
539
+ entityDefinitionText << c
540
+ if havePiece then
541
+ parseDefinition(entityDefinitionText, internalSubset)
542
+ entityDefinitionText = ''
543
+ end
544
+ havePiece = false
545
+ end
546
+ end
547
+
548
+ def parseDefinition(defn, internal)
549
+ end
550
+
551
+ def peekType
552
+ c = peekAt0
553
+ case c
554
+ when nil, 0
555
+ return END_DOCUMENT
556
+ when ?&
557
+ return ENTITY_REF
558
+ when ?<
559
+ case peekAt1
560
+ when ?/
561
+ return END_ELEMENT
562
+ when ?[
563
+ return CDATA_SECTION
564
+ when ??, ?!
565
+ return UNDECIDED_TYPE
566
+ else
567
+ return START_ELEMENT
568
+ end
569
+ else
570
+ return TEXT
571
+ end
572
+ end
573
+
574
+ def encode(c)
575
+ if @utf8encode then
576
+ # if c < 0x80 then
577
+ # @textBuffer << c
578
+ # elsif c < 0x0800
579
+ # @textBuffer << ((c >> 6) | 0xC0)
580
+ # @textBuffer << (c & (0x3F | 0x80))
581
+ # elsif c < 0x10000
582
+ # @textBuffer << ((c >> 12) | 0xE0)
583
+ # @textBuffer << ((c >> 6) & (0x3F | 0x80))
584
+ # @textBuffer << (c & (0x3F | 0x80))
585
+ # else
586
+ # @textBuffer << ((c >> 18) | 0xF0)
587
+ # @textBuffer << ((c >> 12) & (0x3F | 0x80))
588
+ # @textBuffer << ((c >> 6) & (0x3F | 0x80))
589
+ # @textBuffer << (c & (0x3F | 0x80))
590
+ # end
591
+ if c < 0x80 then
592
+ @textBuffer << c
593
+ elsif c < 0x0800
594
+ @textBuffer << ((c >> 6) | 0xC0)
595
+ @textBuffer << ((c & 0x3F) | 0x80)
596
+ elsif c < 0x10000
597
+ @textBuffer << ((c >> 12) | 0xE0)
598
+ @textBuffer << (((c >> 6) & 0x3F) | 0x80)
599
+ @textBuffer << ((c & 0x3F) | 0x80)
600
+ else
601
+ @textBuffer << ((c >> 18) | 0xF0)
602
+ @textBuffer << (((c >> 12) & 0x3F) | 0x80)
603
+ @textBuffer << (((c >> 6) & 0x3F) | 0x80)
604
+ @textBuffer << ((c & 0x3F) | 0x80)
605
+ end
606
+ else
607
+ @textBuffer << c
608
+ end
609
+ end
610
+
611
+ def parseEntity
612
+ expect ?&
613
+
614
+ @name = ''
615
+ while true do
616
+ c = read
617
+ if ?; == c then
618
+ break
619
+ end
620
+ if nil == c then
621
+ raise "unexpectedEOF"
622
+ end
623
+ @name << c
624
+ end
625
+
626
+ if ?\# == @name[0] then
627
+ if ?x == @name[1] then
628
+ c = @name[2..@name.length].hex
629
+ else
630
+ c = @name[1..@name.length].to_i
631
+ end
632
+ encode(c)
633
+ else
634
+ value = entityMap[@name]
635
+ if nil != value then
636
+ @textBuffer << value
637
+ else
638
+ if nil != @resolver then
639
+ value = @resolver.resolve(@name)
640
+ else
641
+ value = resolve(@name)
642
+ end
643
+
644
+ if nil != value then
645
+ @textBuffer << value
646
+ else
647
+ @unresolvedEntity = true
648
+ end
649
+ end
650
+ end
651
+ end
652
+
653
+ def parseStartElement
654
+ expect ?<
655
+
656
+ @qname = readName
657
+ @textBuffer = ''
658
+
659
+ multiple = false
660
+ while true do
661
+ hasWhitespace = skipWhitespace
662
+
663
+ c = peekAt0
664
+ if nil == c then
665
+ raise "unexpectedEOF"
666
+ end
667
+ if ?/ == c then
668
+ @emptyElement = true
669
+
670
+ read
671
+ expect ?>
672
+
673
+ break
674
+ end
675
+ if ?> == c then
676
+ @emptyElement = false
677
+ read
678
+ break
679
+ end
680
+
681
+ if multiple and !hasWhitespace then
682
+ raise "whitespace is required between attributes"
683
+ end
684
+ multiple = true
685
+
686
+ aName = readName
687
+ @textBuffer = ''
688
+ if (nil == aName) or (0 == aName.length) then
689
+ raise "name expected (start element)"
690
+ end
691
+
692
+ skipWhitespace
693
+ expect ?=
694
+ skipWhitespace
695
+
696
+ delimiter = read
697
+ if ?' == delimiter then # for vim: '
698
+ value = parseText(?', true) # for vim: '
699
+ elsif ?" == delimiter then # for vim: "
700
+ value = parseText(?", true) # for vim: "
701
+ else
702
+ raise "invalidDelimiter"
703
+ end
704
+
705
+ @textBuffer = ''
706
+
707
+ # skip the end delimiter
708
+ read
709
+
710
+ if processNamespace then
711
+ @attributeQName.push aName
712
+ else
713
+ @attributeName.push aName
714
+ @attributeQName.push aName
715
+ @attributeNamespace.push nil
716
+ @attributePrefix.push nil
717
+ end
718
+ @attributeValue.push value
719
+
720
+ end
721
+
722
+ if processNamespace then
723
+ handleNamespaces
724
+ else
725
+ @name = @qname
726
+ @namespace = nil
727
+ @prefix = nil
728
+ end
729
+
730
+ if not @emptyElement then
731
+ @elementName.push @name
732
+ @elementQName.push @qname
733
+ @elementNamespace.push @namespace
734
+ @elementPrefix.push @prefix
735
+ end
736
+ end
737
+
738
+ def parseEndElement
739
+ if 0 == @elementName.length then
740
+ raise "elementStackEmpty"
741
+ end
742
+
743
+ expect ?<
744
+ expect ?/
745
+
746
+ @qname = readName
747
+ startQName = @elementQName.pop
748
+ if @qname != startQName then
749
+ raise sprintf("unexpectedEndElement wanted '%s' found '%s'", startQName, @qname)
750
+ end
751
+ skipWhitespace
752
+ expect ?>
753
+
754
+ @name = @elementName.pop
755
+ @prefix = @elementPrefix.pop
756
+ @namespace = @elementNamespace.pop
757
+
758
+ @elementNamespacePrefixStack.pop
759
+ @elementNamespaceValueStack.pop
760
+ @elementNamespaceDefaultStack.pop
761
+ end
762
+
763
+ def readName
764
+ @textBuffer = ''
765
+ if @column != @inputBuffer.index(@namePattern, @column) then
766
+ raise "invalid name"
767
+ end
768
+ if nil != $& then
769
+ @textBuffer = $&
770
+ @column += $&.length
771
+ else
772
+ @column = @inputBufferLength
773
+ end
774
+ if 0 == @textBuffer.length then
775
+ raise "name expected (readName)"
776
+ end
777
+ return @textBuffer
778
+ end
779
+
780
+ def handleNamespaces
781
+ # This is called by parseStartElement to deal with namespaces. Updates knows
782
+ # name spaces based on the attributes in this start element. Then sets up
783
+ # the namespaces for this element itself (i.e. process the qname).
784
+
785
+ i = 0
786
+
787
+ defaultNamespace = @elementNamespaceDefaultStack.last
788
+
789
+ qnames = @attributeQName
790
+ @attributeQName = []
791
+ values = @attributeValue
792
+ @attributeValue = []
793
+
794
+ prefixList = []
795
+ valueList = []
796
+
797
+ while i < qnames.length do
798
+ qname = qnames[i]
799
+ value = values[i]
800
+ i += 1
801
+
802
+ if 'xmlns' == qname then
803
+ prefix = 'xmlns'
804
+ name = nil
805
+ namespace = lookupNamespace prefix
806
+ defaultNamespace = value
807
+ else
808
+ pieces = qname.split(':', 2)
809
+ if 2 == pieces.length then
810
+ namespace = value
811
+ prefix = pieces[0]
812
+ name = pieces[1]
813
+
814
+ if 0 == prefix.length then
815
+ raise "illegalEmptyAtributePrefix"
816
+ end
817
+ if 0 == name.length then
818
+ raise "illegalEmptyAttributeName"
819
+ end
820
+ else
821
+ # this is a un-prefixed non-xmlns attribute
822
+ @attributeQName.push qname
823
+ @attributeName.push qname
824
+ @attributeNamespace.push nil
825
+ @attributePrefix.push nil
826
+ @attributeValue.push value
827
+
828
+ next
829
+ end
830
+ end
831
+
832
+ # only prefixed attributes beyond here
833
+
834
+ if nil == namespace then
835
+ raise "illegalEmptyNamespace"
836
+ end
837
+
838
+ if "xmlns" != prefix then
839
+ anyQualifiedAttributes = true
840
+
841
+ @attributeQName.push qname
842
+ @attributeName.push name
843
+ @attributeNamespace.push namespace
844
+ @attributePrefix.push prefix
845
+ @attributeValue.push value
846
+ else
847
+ if (nil != name) and ((nil == namespace) or (0 == namespace.length)) then
848
+ raise "illegalNamespace"
849
+ end
850
+
851
+ prefixList.push name
852
+ valueList.push value
853
+
854
+ if @reportNamespaceAttributes then
855
+ @attributeQName.push qname
856
+ @attributeName.push name
857
+ @attributeNamespace.push namespace
858
+ @attributePrefix.push prefix
859
+ @attributeValue.push value
860
+ end
861
+ end
862
+
863
+ end
864
+
865
+ @elementNamespacePrefixStack.push prefixList
866
+ @elementNamespaceValueStack.push valueList
867
+ @elementNamespaceDefaultStack.push defaultNamespace
868
+
869
+ if anyQualifiedAttributes then
870
+ # run over the attributes and make sure we have them qualified
871
+ for i in 0..(@attributeName.length-1) do
872
+ prefix = @attributePrefix[i]
873
+
874
+ if nil != prefix then
875
+ @attributeNamespace[i] = lookupNamespace prefix
876
+ end
877
+ end
878
+ end
879
+
880
+ # handle namespaces for the element name
881
+ pieces = @qname.split(':', 2)
882
+ if 2 == pieces.length then
883
+ @name = pieces[1]
884
+ @prefix = pieces[0]
885
+ @namespace = lookupNamespace @prefix
886
+ else
887
+ @name = @qname
888
+ @namespace = defaultNamespace
889
+ @prefix = nil
890
+ end
891
+ end
892
+
893
+ def lookupNamespace(prefix)
894
+ if nil == prefix then
895
+ raise "illegalPrefix"
896
+ end
897
+ if'xml' == prefix then
898
+ return 'http://www.w3.org/XML/1998/namespace'
899
+ end
900
+ if'xmlns' == prefix then
901
+ return 'http://www.w3.org/2000/xmlns/'
902
+ end
903
+
904
+ i = @elementNamespacePrefixStack.length - 1
905
+ while 0 <= i do
906
+ j = @elementNamespacePrefixStack[i].index(prefix)
907
+ if nil != j then
908
+ return @elementNamespaceValueStack[i][j]
909
+ end
910
+
911
+ i -= 1
912
+ end
913
+ raise sprintf("unknown Namespace Prefix '%s' [%s]", prefix, caller[0])
914
+ end
915
+
916
+ def parseText(delimiter, resolve)
917
+ s = "&" << delimiter
918
+ regex = /[#{s}|<]/u
919
+ c = findOneOfThese regex
920
+ while (nil != c) and (delimiter != c) do
921
+ if ?< == c then
922
+ raise "illegal character '<'"
923
+ end
924
+ if ?& == c then
925
+ if !resolve then
926
+ break
927
+ end
928
+
929
+ parseEntity
930
+
931
+ if @unresolvedEntity then
932
+ raise "unresolvedEntity"
933
+ end
934
+ else
935
+ c = read
936
+ @textBuffer << c
937
+ end
938
+
939
+ c = findOneOfThese regex
940
+ end
941
+
942
+ return @textBuffer
943
+ end
944
+
945
+ def skipWhitespace
946
+ foundSome = false
947
+ while nil != @inputBuffer do
948
+ p = @inputBuffer.index(@skipWhitespacePattern, @column)
949
+
950
+ if nil != p then
951
+ foundSome = (foundSome or (@column != p))
952
+ @column = p
953
+ return foundSome
954
+ end
955
+ getMoreInput
956
+ foundSome = true
957
+ end
958
+ return foundSome
959
+ end
960
+
961
+ def findOneOfThese(regex)
962
+ p = @inputBuffer.index(regex, @column)
963
+
964
+ if nil != p then
965
+ if p != @column then
966
+ @textBuffer << @inputBuffer[@column..(p - 1)]
967
+ @column = p
968
+ end
969
+ return @inputBuffer[p]
970
+ end
971
+ @textBuffer << @inputBuffer[@column..-1]
972
+ getMoreInput
973
+ return findOneOfTheseSecond(regex)
974
+ end
975
+
976
+ def findOneOfTheseSecond(regex)
977
+ # know we are at the first of a line
978
+ while nil != @inputBuffer do
979
+ @column = @inputBuffer.index(regex)
980
+
981
+ if nil != @column then
982
+ @textBuffer << @inputBuffer[0, @column]
983
+ return @inputBuffer[@column]
984
+ end
985
+ @textBuffer << @inputBuffer
986
+ getMoreInput
987
+ end
988
+ return nil
989
+ end
990
+
991
+ end