hutch-xamplr-pp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/xamplr-pp.rb ADDED
@@ -0,0 +1,991 @@
1
+ # xampl-pp : XML pull parser
2
+ # Copyright (C) 2002-2009 Bob Hutchison
3
+ #
4
+ # This library is free software; you can redistribute it and/or
5
+ # modify it under the terms of the GNU Lesser General Public
6
+ # License as published by the Free Software Foundation; either
7
+ # version 2.1 of the License, or (at your option) any later version.
8
+ #
9
+ # This library is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
+ # #Lesser General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU Lesser General Public
15
+ # License along with this library; if not, write to the Free Software
16
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
+ #
18
+
19
+
20
+ class Xampl_PP
21
+ # XML 'event' types
22
+ START_DOCUMENT = 'START_DOCUMENT'
23
+ END_DOCUMENT = 'END_DOCUMENT'
24
+ START_ELEMENT = 'START_ELEMENT'
25
+ END_ELEMENT = 'END_ELEMENT'
26
+ TEXT = 'TEXT'
27
+ CDATA_SECTION = 'CDATA_SECTION'
28
+ ENTITY_REF = 'ENTITY_REF'
29
+ IGNORABLE_WHITESPACE = 'IGNORABLE_WHITESPACE'
30
+ PROCESSING_INSTRUCTION = 'PROCESSING_INSTRUCTION'
31
+ COMMENT = 'COMMENT'
32
+ DOCTYPE = 'DOCTYPE'
33
+ UNDECIDED_TYPE = 'UNDECIDED_TYPE'
34
+
35
+ # 'Features', acutally just processing options
36
+ attr :processNamespace, true
37
+ attr :reportNamespaceAttributes, true
38
+ attr :utf8encode, true
39
+
40
+ # the entities that we will recognise
41
+ attr :entityMap, true
42
+ attr :unresolvedEntity
43
+ attr :resolver, true
44
+
45
+ # some information about where we are
46
+ attr :line
47
+ attr :column
48
+
49
+ # element information
50
+ attr :type
51
+ attr :emptyElement
52
+ attr :name
53
+ attr :qname
54
+ attr :namespace
55
+ attr :prefix
56
+ attr :attributeName
57
+ attr :attributeQName
58
+ attr :attributeNamespace
59
+ attr :attributePrefix
60
+ attr :attributeValue
61
+
62
+ attr :text
63
+
64
+ # These are not intended for general use (they are not part of the api)
65
+
66
+ # open element information
67
+ attr :elementName
68
+ attr :elementQName
69
+ attr :elementNamespace
70
+ attr :elementPrefix
71
+
72
+ # some pre-compiled patterns
73
+ attr :namePattern, true
74
+ attr :skipWhitespacePattern, true
75
+
76
+ attr :elementNamespacePrefixStack
77
+ attr :elementNamespaceValueStack
78
+ attr :elementNamespaceDefaultStack
79
+
80
+ attr :standalone
81
+
82
+ public
83
+ def startDocument?
84
+ @type.equal? START_DOCUMENT
85
+ end
86
+
87
+ def endDocument?
88
+ @type.equal? END_DOCUMENT
89
+ end
90
+
91
+ def startElement?
92
+ @type.equal? START_ELEMENT
93
+ end
94
+
95
+ def endElement?
96
+ @type.equal? END_ELEMENT
97
+ end
98
+
99
+ def text?
100
+ @type.equal? TEXT
101
+ end
102
+
103
+ def cdata?
104
+ @type.equal? CDATA_SECTION
105
+ end
106
+
107
+ def entityRef?
108
+ @type.equal? ENTITY_REF
109
+ end
110
+
111
+ def ignorableWhitespace?
112
+ @type.equal? IGNORABLE_WHITESPACE
113
+ end
114
+
115
+ def whitespace?
116
+ nil == @text.index(@skipWhitespacePattern)
117
+ end
118
+
119
+ def processingInstruction?
120
+ @type.equal? PROCESSING_INSTRUCTION
121
+ end
122
+
123
+ def comment?
124
+ @type.equal? COMMENT
125
+ end
126
+
127
+ def doctype?
128
+ @type.equal? DOCTYPE
129
+ end
130
+
131
+ def resolve(name)
132
+ raise sprintf("unresolved entity '%s'", name)
133
+ end
134
+
135
+ def input=(v)
136
+ setInput(v)
137
+ end
138
+
139
+ def setInput(v)
140
+ if (defined? @input) and (nil != @input) then
141
+ @input.close
142
+ end
143
+ if nil == v then
144
+ @input = nil
145
+ @inputBuffer = nil
146
+ @inputBufferLength = 0
147
+ @column = 0
148
+ @line = 0
149
+ elsif v.kind_of? String then
150
+ @input = nil
151
+ @inputBuffer = v
152
+ @inputBufferLength = v.length
153
+ @line = 1
154
+ @column = 0
155
+ elsif v.kind_of? IO then
156
+ @input = v
157
+ @inputBuffer = nil
158
+ @inputBufferLength = 0
159
+ @column = 0
160
+ @line = 0
161
+ else
162
+ raise "illegalInput"
163
+ end
164
+ @nextInputBuffer = nil
165
+ @textBuffer = ''
166
+
167
+ @elementNamespacePrefixStack = []
168
+ @elementNamespaceValueStack = []
169
+ @elementNamespaceDefaultStack = []
170
+
171
+ @elementName = []
172
+ @elementQName = []
173
+ @elementNamespace = []
174
+ @elementPrefix = []
175
+
176
+ @type = START_DOCUMENT
177
+ @unresolvedEntity = false
178
+
179
+ @name = nil
180
+ @namespace = nil
181
+
182
+ @attributeName = []
183
+ @attributeQName = []
184
+ @attributeNamespace = []
185
+ @attributePrefix = []
186
+ @attributeValue = []
187
+
188
+ @emptyElement = false
189
+
190
+ @errorMessage = nil
191
+
192
+ initInput
193
+ end
194
+
195
+ def initInput
196
+ # redefine this method if you have some initialisation you need done
197
+ end
198
+
199
+ def nextEvent
200
+ begin
201
+ @type = END_DOCUMENT
202
+ if (nil == @inputBuffer) and (nil == @input) then
203
+ return @type
204
+ end
205
+
206
+ @unresolvedEntity = false
207
+
208
+ @text = nil
209
+
210
+ parseNextEvent
211
+
212
+ return @type
213
+ rescue Exception => message
214
+ print message.backtrace.join("\n")
215
+ if nil != @inputBuffer then
216
+ @errorMessage = sprintf("parse error: '%s' -- String input, line %d, column %d", message, @line, @column)
217
+ elsif nil != @input then
218
+ if @input.kind_of? File then
219
+ @errorMessage = sprintf("parse error: '%s' -- file '%s', line %d, column %d", message, @input.path, @line, @column)
220
+ else
221
+ @errorMessage = sprintf("parse error: '%s' -- unnamed IO stream, line %d, column %d", message, @line, @column)
222
+ end
223
+ else
224
+ @errorMessage = sprintf("parse error: '%s' -- unknown source, line %d, column %d", message, @line, @column)
225
+ end
226
+ raise @errorMessage
227
+ end
228
+ end
229
+
230
+ def depth
231
+ return @elementName.length
232
+ end
233
+
234
+ private
235
+ def initialize
236
+ self.processNamespace = true
237
+ self.reportNamespaceAttributes = false
238
+ self.utf8encode = true
239
+
240
+ self.input = nil
241
+
242
+ self.entityMap = {"amp"=>"&",
243
+ "apos"=>"'",
244
+ "gt"=>">",
245
+ "lt"=>"<",
246
+ "quot"=>"\""}
247
+ self.namePattern = /[^0-9\x00-\x20=\/>\`\.\-\~\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?][^\x00-\x20=\/>\`\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?]*/u
248
+ self.skipWhitespacePattern = /[^\n\r\t ]+/u
249
+
250
+ #pre ruby 1.8 needs the Regex.new syntax
251
+ #self.namePattern = Regexp.new(/[^0-9\x00-\x20=\/>\`\.\-\~\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?][^\x00-\x20=\/>\`\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?]*/, nil, 'u')
252
+ #old junk... self.skipWhitespacePattern = Regexp.new(/[^\n\r\t ]+|\x00/, nil, 'u')
253
+ #self.skipWhitespacePattern = Regexp.new(/[^\n\r\t ]+/, nil, 'u')
254
+ end
255
+
256
+ def getMoreInput
257
+ @column = 0
258
+ @line += 1
259
+ if nil == @input then
260
+ @inputBuffer = nil
261
+ @inputBufferLength = -1
262
+ return nil
263
+ end
264
+ @inputBuffer = @nextInputBuffer
265
+ if nil == @inputBuffer then
266
+ @inputBuffer = @input.gets
267
+ @column = 0
268
+ if nil == @inputBuffer then
269
+ @inputBufferLength = -1
270
+ return nil
271
+ end
272
+ end
273
+ @inputBufferLength = @inputBuffer.length
274
+ @nextInputBuffer = @input.gets
275
+ end
276
+
277
+ def expectold(e)
278
+ c = read
279
+ if c != e then
280
+ msg = sprintf("unexpectedChar:: expect '%s' got '%s' in %s", (''<<e), (''<<c), caller[0])
281
+ raise msg
282
+ end
283
+ return c
284
+ end
285
+
286
+ def expect(e)
287
+ if (nil == @inputBuffer) or (@inputBufferLength <= @column) then
288
+ getMoreInput
289
+ if nil == @inputBuffer then
290
+ msg = sprintf("unexpectedChar:: expect '%s' got EOF in %s", (''<<e), caller[0])
291
+ raise msg
292
+ end
293
+ end
294
+
295
+ c = @inputBuffer[@column]
296
+ @column += 1
297
+ if c == e then
298
+ return c
299
+ end
300
+
301
+ msg = sprintf("unexpectedChar:: expect '%s' got '%s' in %s", (''<<e), (''<<c), caller[0])
302
+ raise msg
303
+ end
304
+
305
+ def read
306
+ if (nil == @inputBuffer) or (@inputBufferLength <= @column) then
307
+ getMoreInput
308
+ if nil == @inputBuffer then
309
+ return nil
310
+ end
311
+ end
312
+
313
+ c = @inputBuffer[@column]
314
+ @column += 1
315
+ return c
316
+ end
317
+
318
+ def peekAt0
319
+ if nil == @inputBuffer then
320
+ getMoreInput
321
+ end
322
+ if @column < @inputBufferLength then
323
+ return @inputBuffer[@column]
324
+ else
325
+ if (nil != @nextInputBuffer) and (0 < @nextInputBuffer.length) then
326
+ return @nextInputBuffer[0]
327
+ else
328
+ return nil
329
+ end
330
+ end
331
+ end
332
+
333
+ def peekAt1
334
+ if nil == @inputBuffer then
335
+ getMoreInput
336
+ end
337
+ if (@column + 1) < @inputBufferLength then
338
+ return @inputBuffer[@column + 1]
339
+ else
340
+ if @column < @inputBufferLength then
341
+ if (nil != @nextInputBuffer) and (0 < @nextInputBuffer.length) then
342
+ return @nextInputBuffer[0]
343
+ else
344
+ return nil
345
+ end
346
+ else
347
+ if (nil != @nextInputBuffer) and (1 < @nextInputBuffer.length) then
348
+ return @nextInputBuffer[1]
349
+ else
350
+ return nil
351
+ end
352
+ end
353
+ end
354
+ end
355
+
356
+ def parseNextEvent
357
+ @attributeName.clear
358
+ @attributeQName.clear
359
+ @attributeNamespace.clear
360
+ @attributePrefix.clear
361
+ @attributeValue.clear
362
+
363
+ if @emptyElement then
364
+ # the last event was an empty start element like <start/>
365
+ @type = END_ELEMENT
366
+ @emptyElement = false
367
+ return
368
+ end
369
+
370
+ @prefix = nil
371
+ @name = nil
372
+ @qname = nil
373
+ @namespace = nil
374
+ @type = peekType
375
+
376
+ @textBuffer = ''
377
+
378
+ case @type
379
+ when END_DOCUMENT
380
+ # nothing to do
381
+ when ENTITY_REF
382
+ parseEntity
383
+ @text = @textBuffer
384
+ @textBuffer = ''
385
+ when START_ELEMENT
386
+ parseStartElement
387
+ when END_ELEMENT
388
+ parseEndElement
389
+ when TEXT
390
+ parseText(?<, false)
391
+ @text = @textBuffer
392
+ @textBuffer = ''
393
+ if 0 == @elementName.length then
394
+ if nil == @text.index(@skipWhitespacePattern) then
395
+ @type = IGNORABLE_WHITESPACE
396
+ end
397
+ end
398
+ else
399
+ @type = parseUndecided
400
+ end
401
+
402
+ end
403
+
404
+ def parseUndecided
405
+ # this could be a comment, processing instruction, or CDATA section
406
+ expect ?<
407
+
408
+ demand = nil
409
+ delimiter = nil # **first** (not last) character in delimting string
410
+
411
+ @text = @textBuffer
412
+ @textBuffer = ''
413
+
414
+ c = read
415
+ if ?? == c then
416
+ result = PROCESSING_INSTRUCTION
417
+ demand = nil
418
+ delimiter = ??
419
+ elsif ?! == c then
420
+ cc = peekAt0
421
+ if ?- == cc then
422
+ result = COMMENT
423
+ demand = '--'
424
+ delimiter = ?-
425
+ elsif ?[ == cc then
426
+ result = CDATA_SECTION
427
+ demand = '[CDATA['
428
+ delimiter = ?]
429
+ else
430
+ result = DOCTYPE
431
+ demand = 'DOCTYPE'
432
+ delimiter = nil
433
+ end
434
+ else
435
+ # this should never happen because we'll get an illegal name execption
436
+ # first
437
+ raise "illegal <{c}"
438
+ end
439
+
440
+ if nil != demand then
441
+ demand.each_byte do
442
+ | d |
443
+ expect d
444
+ end
445
+ end
446
+
447
+ if DOCTYPE == result then
448
+ parseDoctype
449
+ else
450
+ if ?? == delimiter then
451
+ s = Regexp.escape "?>"
452
+ inc = 2
453
+ else
454
+ s = Regexp.escape "" << delimiter << delimiter << '>'
455
+ inc = 3
456
+ end
457
+ regex = /#{s}/u
458
+ p = findOneOfThese(regex)
459
+ @text = @textBuffer
460
+ if nil != p then
461
+ @column += inc
462
+ end
463
+ end
464
+
465
+ return result
466
+ end
467
+
468
+ def parseXMLDecl
469
+ return nil != @text.index(/^xml/u)
470
+ end
471
+
472
+ def parseDoctype
473
+ depth = 1
474
+ quoted = false
475
+ delimiter = nil
476
+ entityDefinitionText = ''
477
+ havePiece = false
478
+ internalSubset = false
479
+
480
+ @text = ''
481
+
482
+ while true do
483
+ c = read
484
+ case c
485
+ when ?', ?" # for the sake of vim '
486
+ if quoted then
487
+ if c == delimiter then
488
+ quoted = false
489
+ delimiter = nil
490
+ end
491
+ else
492
+ quoted = true
493
+ delimiter = c
494
+ end
495
+ when ?<
496
+ if not quoted then
497
+ if (?! == peekAt0) and (?- == peekAt1) then
498
+ #this is looking like a comment
499
+ @text << c
500
+ @text << (expect ?!)
501
+ @text << (expect ?-)
502
+ c = read
503
+ if ?- == c then
504
+ @text << ?-
505
+ regex = /-->/u
506
+ p = findOneOfThese(regex)
507
+ @text << @textBuffer
508
+ @textBuffer = ''
509
+ @text << (expect ?-)
510
+ @text << (expect ?-)
511
+ c = (expect ?>)
512
+ else
513
+ depth += 1
514
+ entityDefinitionText = ''
515
+ end
516
+ else
517
+ depth += 1
518
+ entityDefinitionText = ''
519
+ end
520
+ end
521
+ when ?>
522
+ if not quoted then
523
+ depth -= 1
524
+ #check right here for an entity definition!!!
525
+ havePiece = true
526
+ #entityDefinitionText = ''
527
+ if 0 == depth then
528
+ return
529
+ end
530
+ end
531
+ when ?[
532
+ if not quoted then
533
+ internalSubset = true
534
+ end
535
+ when nil
536
+ raise sprintf("unexpected EOF in DOCTYPE (depth: %d, quoted: %s)", depth, quoted)
537
+ end
538
+ @text << c
539
+ entityDefinitionText << c
540
+ if havePiece then
541
+ parseDefinition(entityDefinitionText, internalSubset)
542
+ entityDefinitionText = ''
543
+ end
544
+ havePiece = false
545
+ end
546
+ end
547
+
548
+ def parseDefinition(defn, internal)
549
+ end
550
+
551
+ def peekType
552
+ c = peekAt0
553
+ case c
554
+ when nil, 0
555
+ return END_DOCUMENT
556
+ when ?&
557
+ return ENTITY_REF
558
+ when ?<
559
+ case peekAt1
560
+ when ?/
561
+ return END_ELEMENT
562
+ when ?[
563
+ return CDATA_SECTION
564
+ when ??, ?!
565
+ return UNDECIDED_TYPE
566
+ else
567
+ return START_ELEMENT
568
+ end
569
+ else
570
+ return TEXT
571
+ end
572
+ end
573
+
574
+ def encode(c)
575
+ if @utf8encode then
576
+ # if c < 0x80 then
577
+ # @textBuffer << c
578
+ # elsif c < 0x0800
579
+ # @textBuffer << ((c >> 6) | 0xC0)
580
+ # @textBuffer << (c & (0x3F | 0x80))
581
+ # elsif c < 0x10000
582
+ # @textBuffer << ((c >> 12) | 0xE0)
583
+ # @textBuffer << ((c >> 6) & (0x3F | 0x80))
584
+ # @textBuffer << (c & (0x3F | 0x80))
585
+ # else
586
+ # @textBuffer << ((c >> 18) | 0xF0)
587
+ # @textBuffer << ((c >> 12) & (0x3F | 0x80))
588
+ # @textBuffer << ((c >> 6) & (0x3F | 0x80))
589
+ # @textBuffer << (c & (0x3F | 0x80))
590
+ # end
591
+ if c < 0x80 then
592
+ @textBuffer << c
593
+ elsif c < 0x0800
594
+ @textBuffer << ((c >> 6) | 0xC0)
595
+ @textBuffer << ((c & 0x3F) | 0x80)
596
+ elsif c < 0x10000
597
+ @textBuffer << ((c >> 12) | 0xE0)
598
+ @textBuffer << (((c >> 6) & 0x3F) | 0x80)
599
+ @textBuffer << ((c & 0x3F) | 0x80)
600
+ else
601
+ @textBuffer << ((c >> 18) | 0xF0)
602
+ @textBuffer << (((c >> 12) & 0x3F) | 0x80)
603
+ @textBuffer << (((c >> 6) & 0x3F) | 0x80)
604
+ @textBuffer << ((c & 0x3F) | 0x80)
605
+ end
606
+ else
607
+ @textBuffer << c
608
+ end
609
+ end
610
+
611
+ def parseEntity
612
+ expect ?&
613
+
614
+ @name = ''
615
+ while true do
616
+ c = read
617
+ if ?; == c then
618
+ break
619
+ end
620
+ if nil == c then
621
+ raise "unexpectedEOF"
622
+ end
623
+ @name << c
624
+ end
625
+
626
+ if ?\# == @name[0] then
627
+ if ?x == @name[1] then
628
+ c = @name[2..@name.length].hex
629
+ else
630
+ c = @name[1..@name.length].to_i
631
+ end
632
+ encode(c)
633
+ else
634
+ value = entityMap[@name]
635
+ if nil != value then
636
+ @textBuffer << value
637
+ else
638
+ if nil != @resolver then
639
+ value = @resolver.resolve(@name)
640
+ else
641
+ value = resolve(@name)
642
+ end
643
+
644
+ if nil != value then
645
+ @textBuffer << value
646
+ else
647
+ @unresolvedEntity = true
648
+ end
649
+ end
650
+ end
651
+ end
652
+
653
+ def parseStartElement
654
+ expect ?<
655
+
656
+ @qname = readName
657
+ @textBuffer = ''
658
+
659
+ multiple = false
660
+ while true do
661
+ hasWhitespace = skipWhitespace
662
+
663
+ c = peekAt0
664
+ if nil == c then
665
+ raise "unexpectedEOF"
666
+ end
667
+ if ?/ == c then
668
+ @emptyElement = true
669
+
670
+ read
671
+ expect ?>
672
+
673
+ break
674
+ end
675
+ if ?> == c then
676
+ @emptyElement = false
677
+ read
678
+ break
679
+ end
680
+
681
+ if multiple and !hasWhitespace then
682
+ raise "whitespace is required between attributes"
683
+ end
684
+ multiple = true
685
+
686
+ aName = readName
687
+ @textBuffer = ''
688
+ if (nil == aName) or (0 == aName.length) then
689
+ raise "name expected (start element)"
690
+ end
691
+
692
+ skipWhitespace
693
+ expect ?=
694
+ skipWhitespace
695
+
696
+ delimiter = read
697
+ if ?' == delimiter then # for vim: '
698
+ value = parseText(?', true) # for vim: '
699
+ elsif ?" == delimiter then # for vim: "
700
+ value = parseText(?", true) # for vim: "
701
+ else
702
+ raise "invalidDelimiter"
703
+ end
704
+
705
+ @textBuffer = ''
706
+
707
+ # skip the end delimiter
708
+ read
709
+
710
+ if processNamespace then
711
+ @attributeQName.push aName
712
+ else
713
+ @attributeName.push aName
714
+ @attributeQName.push aName
715
+ @attributeNamespace.push nil
716
+ @attributePrefix.push nil
717
+ end
718
+ @attributeValue.push value
719
+
720
+ end
721
+
722
+ if processNamespace then
723
+ handleNamespaces
724
+ else
725
+ @name = @qname
726
+ @namespace = nil
727
+ @prefix = nil
728
+ end
729
+
730
+ if not @emptyElement then
731
+ @elementName.push @name
732
+ @elementQName.push @qname
733
+ @elementNamespace.push @namespace
734
+ @elementPrefix.push @prefix
735
+ end
736
+ end
737
+
738
+ def parseEndElement
739
+ if 0 == @elementName.length then
740
+ raise "elementStackEmpty"
741
+ end
742
+
743
+ expect ?<
744
+ expect ?/
745
+
746
+ @qname = readName
747
+ startQName = @elementQName.pop
748
+ if @qname != startQName then
749
+ raise sprintf("unexpectedEndElement wanted '%s' found '%s'", startQName, @qname)
750
+ end
751
+ skipWhitespace
752
+ expect ?>
753
+
754
+ @name = @elementName.pop
755
+ @prefix = @elementPrefix.pop
756
+ @namespace = @elementNamespace.pop
757
+
758
+ @elementNamespacePrefixStack.pop
759
+ @elementNamespaceValueStack.pop
760
+ @elementNamespaceDefaultStack.pop
761
+ end
762
+
763
+ def readName
764
+ @textBuffer = ''
765
+ if @column != @inputBuffer.index(@namePattern, @column) then
766
+ raise "invalid name"
767
+ end
768
+ if nil != $& then
769
+ @textBuffer = $&
770
+ @column += $&.length
771
+ else
772
+ @column = @inputBufferLength
773
+ end
774
+ if 0 == @textBuffer.length then
775
+ raise "name expected (readName)"
776
+ end
777
+ return @textBuffer
778
+ end
779
+
780
+ def handleNamespaces
781
+ # This is called by parseStartElement to deal with namespaces. Updates knows
782
+ # name spaces based on the attributes in this start element. Then sets up
783
+ # the namespaces for this element itself (i.e. process the qname).
784
+
785
+ i = 0
786
+
787
+ defaultNamespace = @elementNamespaceDefaultStack.last
788
+
789
+ qnames = @attributeQName
790
+ @attributeQName = []
791
+ values = @attributeValue
792
+ @attributeValue = []
793
+
794
+ prefixList = []
795
+ valueList = []
796
+
797
+ while i < qnames.length do
798
+ qname = qnames[i]
799
+ value = values[i]
800
+ i += 1
801
+
802
+ if 'xmlns' == qname then
803
+ prefix = 'xmlns'
804
+ name = nil
805
+ namespace = lookupNamespace prefix
806
+ defaultNamespace = value
807
+ else
808
+ pieces = qname.split(':', 2)
809
+ if 2 == pieces.length then
810
+ namespace = value
811
+ prefix = pieces[0]
812
+ name = pieces[1]
813
+
814
+ if 0 == prefix.length then
815
+ raise "illegalEmptyAtributePrefix"
816
+ end
817
+ if 0 == name.length then
818
+ raise "illegalEmptyAttributeName"
819
+ end
820
+ else
821
+ # this is a un-prefixed non-xmlns attribute
822
+ @attributeQName.push qname
823
+ @attributeName.push qname
824
+ @attributeNamespace.push nil
825
+ @attributePrefix.push nil
826
+ @attributeValue.push value
827
+
828
+ next
829
+ end
830
+ end
831
+
832
+ # only prefixed attributes beyond here
833
+
834
+ if nil == namespace then
835
+ raise "illegalEmptyNamespace"
836
+ end
837
+
838
+ if "xmlns" != prefix then
839
+ anyQualifiedAttributes = true
840
+
841
+ @attributeQName.push qname
842
+ @attributeName.push name
843
+ @attributeNamespace.push namespace
844
+ @attributePrefix.push prefix
845
+ @attributeValue.push value
846
+ else
847
+ if (nil != name) and ((nil == namespace) or (0 == namespace.length)) then
848
+ raise "illegalNamespace"
849
+ end
850
+
851
+ prefixList.push name
852
+ valueList.push value
853
+
854
+ if @reportNamespaceAttributes then
855
+ @attributeQName.push qname
856
+ @attributeName.push name
857
+ @attributeNamespace.push namespace
858
+ @attributePrefix.push prefix
859
+ @attributeValue.push value
860
+ end
861
+ end
862
+
863
+ end
864
+
865
+ @elementNamespacePrefixStack.push prefixList
866
+ @elementNamespaceValueStack.push valueList
867
+ @elementNamespaceDefaultStack.push defaultNamespace
868
+
869
+ if anyQualifiedAttributes then
870
+ # run over the attributes and make sure we have them qualified
871
+ for i in 0..(@attributeName.length-1) do
872
+ prefix = @attributePrefix[i]
873
+
874
+ if nil != prefix then
875
+ @attributeNamespace[i] = lookupNamespace prefix
876
+ end
877
+ end
878
+ end
879
+
880
+ # handle namespaces for the element name
881
+ pieces = @qname.split(':', 2)
882
+ if 2 == pieces.length then
883
+ @name = pieces[1]
884
+ @prefix = pieces[0]
885
+ @namespace = lookupNamespace @prefix
886
+ else
887
+ @name = @qname
888
+ @namespace = defaultNamespace
889
+ @prefix = nil
890
+ end
891
+ end
892
+
893
+ def lookupNamespace(prefix)
894
+ if nil == prefix then
895
+ raise "illegalPrefix"
896
+ end
897
+ if'xml' == prefix then
898
+ return 'http://www.w3.org/XML/1998/namespace'
899
+ end
900
+ if'xmlns' == prefix then
901
+ return 'http://www.w3.org/2000/xmlns/'
902
+ end
903
+
904
+ i = @elementNamespacePrefixStack.length - 1
905
+ while 0 <= i do
906
+ j = @elementNamespacePrefixStack[i].index(prefix)
907
+ if nil != j then
908
+ return @elementNamespaceValueStack[i][j]
909
+ end
910
+
911
+ i -= 1
912
+ end
913
+ raise sprintf("unknown Namespace Prefix '%s' [%s]", prefix, caller[0])
914
+ end
915
+
916
+ def parseText(delimiter, resolve)
917
+ s = "&" << delimiter
918
+ regex = /[#{s}|<]/u
919
+ c = findOneOfThese regex
920
+ while (nil != c) and (delimiter != c) do
921
+ if ?< == c then
922
+ raise "illegal character '<'"
923
+ end
924
+ if ?& == c then
925
+ if !resolve then
926
+ break
927
+ end
928
+
929
+ parseEntity
930
+
931
+ if @unresolvedEntity then
932
+ raise "unresolvedEntity"
933
+ end
934
+ else
935
+ c = read
936
+ @textBuffer << c
937
+ end
938
+
939
+ c = findOneOfThese regex
940
+ end
941
+
942
+ return @textBuffer
943
+ end
944
+
945
+ def skipWhitespace
946
+ foundSome = false
947
+ while nil != @inputBuffer do
948
+ p = @inputBuffer.index(@skipWhitespacePattern, @column)
949
+
950
+ if nil != p then
951
+ foundSome = (foundSome or (@column != p))
952
+ @column = p
953
+ return foundSome
954
+ end
955
+ getMoreInput
956
+ foundSome = true
957
+ end
958
+ return foundSome
959
+ end
960
+
961
+ def findOneOfThese(regex)
962
+ p = @inputBuffer.index(regex, @column)
963
+
964
+ if nil != p then
965
+ if p != @column then
966
+ @textBuffer << @inputBuffer[@column..(p - 1)]
967
+ @column = p
968
+ end
969
+ return @inputBuffer[p]
970
+ end
971
+ @textBuffer << @inputBuffer[@column..-1]
972
+ getMoreInput
973
+ return findOneOfTheseSecond(regex)
974
+ end
975
+
976
+ def findOneOfTheseSecond(regex)
977
+ # know we are at the first of a line
978
+ while nil != @inputBuffer do
979
+ @column = @inputBuffer.index(regex)
980
+
981
+ if nil != @column then
982
+ @textBuffer << @inputBuffer[0, @column]
983
+ return @inputBuffer[@column]
984
+ end
985
+ @textBuffer << @inputBuffer
986
+ getMoreInput
987
+ end
988
+ return nil
989
+ end
990
+
991
+ end