hutch-xamplr-pp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,845 @@
1
+
2
+ class Xpp
3
+ # XML 'event' types
4
+ START_DOCUMENT = 'START_DOCUMENT'
5
+ END_DOCUMENT = 'END_DOCUMENT'
6
+ START_ELEMENT = 'START_ELEMENT'
7
+ END_ELEMENT = 'END_ELEMENT'
8
+ TEXT = 'TEXT'
9
+ CDATA_SECTION = 'CDATA_SECTION'
10
+ ENTITY_REF = 'ENTITY_REF'
11
+ IGNORABLE_WHITESPACE = 'IGNORABLE_WHITESPACE'
12
+ PROCESSING_INSTRUCTION = 'PROCESSING_INSTRUCTION'
13
+ COMMENT = 'COMMENT'
14
+ DOCTYPE = 'DOCTYPE'
15
+ LOOK_FURTHER = 'LOOK_FURTHER'
16
+
17
+ # 'Features', acutally just processing options
18
+ attr :processNamespace, true
19
+ attr :reportNamespaceAttributes, true
20
+
21
+ # the entities that we will recognise
22
+ attr :entityMap, true
23
+ attr :unresolvedEntity
24
+
25
+ # some information about where we are
26
+ attr :line
27
+ attr :column
28
+
29
+ # is the current text whitespace?
30
+ attr :whitespace
31
+
32
+ # element information
33
+ attr :type
34
+ attr :emptyElement
35
+ attr :name
36
+ attr :qname
37
+ attr :namespace
38
+ attr :prefix
39
+ attr :attributeName
40
+ attr :attributeQName
41
+ attr :attributeNamespace
42
+ attr :attributePrefix
43
+ attr :attributeValue
44
+
45
+ attr :text
46
+
47
+ attr :elementNamespacePrefixStack
48
+ attr :elementNamespaceValueStack
49
+ attr :elementNamespaceDefaultStack
50
+
51
+ # open element information
52
+ attr :elementName
53
+ attr :elementQName
54
+ attr :elementNamespace
55
+ attr :elementPrefix
56
+
57
+ def startDocument?
58
+ @type.equal? START_DOCUMENT
59
+ end
60
+
61
+ def endDocument?
62
+ @type.equal? END_DOCUMENT
63
+ end
64
+
65
+ def startElement?
66
+ @type.equal? START_ELEMENT
67
+ end
68
+
69
+ def endElement?
70
+ @type.equal? END_ELEMENT
71
+ end
72
+
73
+ def text?
74
+ @type.equal? TEXT
75
+ end
76
+
77
+ def cdata?
78
+ @type.equal? CDATA_SECTION
79
+ end
80
+
81
+ def entityRef?
82
+ @type.equal? ENTITY_REF
83
+ end
84
+
85
+ def ignorableWhitespace?
86
+ @type.equal? IGNORABLE_WHITESPACE
87
+ end
88
+
89
+ def processingInstruction?
90
+ @type.equal? PROCESSING_INSTRUCTION
91
+ end
92
+
93
+ def comment?
94
+ @type.equal? COMMENT
95
+ end
96
+
97
+ def doctype?
98
+ @type.equal? DOCTYPE
99
+ end
100
+
101
+ def lookFurther?
102
+ @type.equal? LOOK_FURTHER
103
+ end
104
+
105
+ #def whitespace?
106
+ #
107
+ # What is this all about -- FIX
108
+ #if text? or ignorableWhitespace? or cdata? then
109
+ #return @whitespace
110
+ #end
111
+ #raise "illegal type for whitespace query"
112
+ #end
113
+
114
+ def input=(v)
115
+ if nil == v then
116
+ @input = nil
117
+ @inputBuffer = nil
118
+ @inputBufferLength = 0
119
+ elsif v.kind_of? String then
120
+ @input = nil
121
+ @inputBuffer = v
122
+ @inputBufferLength = v.length
123
+ elsif v.kind_of? IO then
124
+ @input = v
125
+ @inputBuffer = nil
126
+ @inputBufferLength = 0
127
+ else
128
+ raise "illegalInput"
129
+ end
130
+ @nextInputBuffer = nil
131
+ @inputBufferPosition = 0
132
+ @textBuffer = ''
133
+
134
+ @line = 1
135
+ @column = 0
136
+ @compactNewLine = false
137
+
138
+ @elementNamespacePrefixStack = []
139
+ @elementNamespaceValueStack = []
140
+ @elementNamespaceDefaultStack = []
141
+
142
+ @elementName = []
143
+ @elementQName = []
144
+ @elementNamespace = []
145
+ @elementPrefix = []
146
+
147
+ @whitespace = true
148
+ @type = START_DOCUMENT
149
+ @unresolvedEntity = false
150
+
151
+ @name = nil
152
+ @namespace = nil
153
+
154
+ @attributeName = []
155
+ @attributeQName = []
156
+ @attributeNamespace = []
157
+ @attributePrefix = []
158
+ @attributeValue = []
159
+
160
+ @emptyElement = false
161
+ end
162
+
163
+ def nextEvent
164
+ begin
165
+ if (nil == @inputBuffer) and (nil == @input) then
166
+ raise "no input defined"
167
+ end
168
+
169
+ @whitespace = true
170
+ @unresolvedEntity = false
171
+
172
+ @text = nil
173
+
174
+ parseNextEvent
175
+
176
+ return @type
177
+ rescue RuntimeError => message
178
+ if nil != @inputBuffer then
179
+ message = sprintf("parse error: '%s' -- String input, line %d, column %d", message, @line, @column)
180
+ elsif nil != @input then
181
+ if @input.kind_of? File then
182
+ message = sprintf("parse error: '%s' -- file '%s', line %d, column %d", message, @input.path, @line, @column)
183
+ else
184
+ message = sprintf("parse error: '%s' -- unnamed IO stream, line %d, column %d", message, @line, @column)
185
+ end
186
+ else
187
+ message = sprintf("parse error: '%s' -- unknown source, line %d, column %d", message, @line, @column)
188
+ end
189
+ raise message
190
+ end
191
+ end
192
+
193
+ private
194
+ def initialize
195
+ self.processNamespace = true
196
+ self.reportNamespaceAttributes = false
197
+
198
+ self.input = nil
199
+
200
+ self.entityMap = {"amp"=>"&",
201
+ "apos"=>"'",
202
+ "gt"=>">",
203
+ "lt"=>"<",
204
+ "quot"=>"\""}
205
+ end
206
+
207
+ def expect(e)
208
+ c = read
209
+ if (nil == c) or (c != e) then
210
+ msg = sprintf("unexpectedChar:: expect '%s' got '%s'\n", (''<<e), (''<<c))
211
+ raise msg
212
+ end
213
+ return c
214
+ end
215
+
216
+ def read
217
+ # This is consumes the first thing in the peek buffer.
218
+
219
+ result = readFromInput
220
+
221
+ if result == ?\n then
222
+ # counting newlines for line count... not great for the mac
223
+ @line += 1
224
+ @column = 1
225
+ else
226
+ @column += 1
227
+ end
228
+
229
+ return result
230
+ end
231
+
232
+ def readFromInput
233
+ if (nil == @inputBuffer) or (@inputBufferLength <= @inputBufferPosition) then
234
+ getMoreInput
235
+ end
236
+
237
+ if nil != @inputBuffer then
238
+ c = @inputBuffer[@inputBufferPosition]
239
+ @inputBufferPosition += 1
240
+ return c
241
+ else
242
+ return nil
243
+ end
244
+ end
245
+
246
+ def getMoreInput
247
+ if nil == @input then
248
+ return nil
249
+ end
250
+ @inputBuffer = @nextInputBuffer
251
+ @inputBufferPosition = 0
252
+ if nil == @inputBuffer then
253
+ @inputBuffer = @input.gets
254
+ @inputBufferPosition = 0
255
+ if nil == @inputBuffer then
256
+ @inputBufferLength = 0
257
+ return nil
258
+ end
259
+ end
260
+ @inputBufferLength = @inputBuffer.length
261
+ @nextInputBuffer = @input.gets
262
+ end
263
+
264
+ def peekAt0
265
+ if nil == @inputBuffer then
266
+ getMoreInput
267
+ end
268
+ if @inputBufferPosition < @inputBufferLength then
269
+ return @inputBuffer[@inputBufferPosition]
270
+ else
271
+ if (nil != @nextInputBuffer) and (0 < @nextInputBuffer.length) then
272
+ return @nextInputBuffer[0]
273
+ else
274
+ return nil
275
+ end
276
+ end
277
+ end
278
+
279
+ def peekAt1
280
+ if nil == @inputBuffer then
281
+ getMoreInput
282
+ end
283
+ if (@inputBufferPosition + 1) < @inputBufferLength then
284
+ return @inputBuffer[@inputBufferPosition + 1]
285
+ else
286
+ if @inputBufferPosition < @inputBufferLength then
287
+ if (nil != @nextInputBuffer) and (0 < @nextInputBuffer.length) then
288
+ return @nextInputBuffer[0]
289
+ else
290
+ return nil
291
+ end
292
+ else
293
+ if (nil != @nextInputBuffer) and (1 < @nextInputBuffer.length) then
294
+ return @nextInputBuffer[1]
295
+ else
296
+ return nil
297
+ end
298
+ end
299
+ end
300
+ end
301
+
302
+ def parseNextEvent
303
+ @attributeName.clear
304
+ @attributeQName.clear
305
+ @attributeNamespace.clear
306
+ @attributePrefix.clear
307
+ @attributeValue.clear
308
+
309
+ if @emptyElement then
310
+ # the last event was an empty start element like <start/>
311
+ @type = END_ELEMENT
312
+ @emptyElement = false
313
+ return
314
+ end
315
+
316
+ @prefix = nil
317
+ @name = nil
318
+ @qname = nil
319
+ @namespace = nil
320
+ @type = peekType
321
+
322
+ case @type
323
+ when END_DOCUMENT
324
+ # nothing to do
325
+ when ENTITY_REF
326
+ parseEntity
327
+ @text = @textBuffer
328
+ @textBuffer = ''
329
+ when START_ELEMENT
330
+ parseStartElement
331
+ when END_ELEMENT
332
+ parseEndElement
333
+ when TEXT
334
+ parseText(?<, false)
335
+ @text = @textBuffer
336
+ @textBuffer = ''
337
+ if 0 == @elementName.length then
338
+ if(@whitespace) then
339
+ @type = IGNORABLE_WHITESPACE
340
+ end
341
+ end
342
+ when CDATA_SECTION
343
+ printf("\nCDATA... WOW I *am* used\n")
344
+ # how is this ever called?
345
+ @type = parseLookFurther
346
+ else
347
+ @type = parseLookFurther
348
+ end
349
+
350
+ end
351
+
352
+ def parseLookFurther
353
+ # this could be a comment, processing instruction, or CDATA section
354
+ expect(?<)
355
+
356
+ demand = nil
357
+ delimiter = nil # **first** (not last) character in delimting string
358
+
359
+ @text = @textBuffer
360
+ @textBuffer = ''
361
+
362
+ c = read
363
+ if ?? == c then
364
+ result = PROCESSING_INSTRUCTION
365
+ demand = nil
366
+ delimiter = ??
367
+ elsif ?! == c then
368
+ cc = peekAt0
369
+ if ?- == cc then
370
+ result = COMMENT
371
+ demand = '--'
372
+ delimiter = ?-
373
+ elsif ?[ == cc then
374
+ result = CDATA_SECTION
375
+ demand = '[CDATA['
376
+ delimiter = ?]
377
+ else
378
+ result = DOCTYPE
379
+ demand = 'DOCTYPE'
380
+ delimiter = nil
381
+ end
382
+ else
383
+ # this should never happen because we'll get an illegal name execption
384
+ # first
385
+ raise "illegal <{c}"
386
+ end
387
+
388
+ if nil != demand then
389
+ demand.each_byte do
390
+ | d |
391
+ expect d
392
+ end
393
+ end
394
+
395
+ if DOCTYPE == result then
396
+ parseDoctype
397
+ else
398
+ while true do
399
+ c = read
400
+ if nil == c then
401
+ raise "unexpectedEOF"
402
+ end
403
+
404
+
405
+ if ((?? == delimiter) or (c == delimiter)) and
406
+ (peekAt0 == delimiter) and
407
+ (peekAt1 == ?>) then
408
+ if ?? == delimiter then
409
+ @text << c
410
+ end
411
+ break
412
+ end
413
+ @text << c
414
+ end
415
+
416
+ read
417
+ read
418
+ end
419
+
420
+ return result
421
+ end
422
+
423
+ def parseDoctype
424
+ depth = 1
425
+ quoted = false
426
+
427
+ @text = ''
428
+
429
+ while true do
430
+ c = read
431
+
432
+ case c
433
+ when ?', ?" # for the sake of vim '
434
+ quoted = !quoted
435
+ when ?<
436
+ if not quoted then
437
+ depth += 1
438
+ end
439
+ when ?>
440
+ if not quoted then
441
+ depth -= 1
442
+ if 0 == depth then
443
+ return
444
+ end
445
+ end
446
+ when nil
447
+ raise "unexpectedEOF"
448
+ end
449
+ @text << c
450
+ end
451
+ end
452
+
453
+ def peekType
454
+ c = peekAt0
455
+ case c
456
+ when nil, 0
457
+ return END_DOCUMENT
458
+ when ?&
459
+ return ENTITY_REF
460
+ when ?<
461
+ case peekAt1
462
+ when ?/
463
+ return END_ELEMENT
464
+ when ?[
465
+ return CDATA_SECTION
466
+ when ??, ?!
467
+ return LOOK_FURTHER
468
+ else
469
+ return START_ELEMENT
470
+ end
471
+ else
472
+ return TEXT
473
+ end
474
+ end
475
+
476
+ def parseEntity
477
+ @compactNewLine = false
478
+
479
+ expect(?&)
480
+
481
+ @name = ''
482
+ while true do
483
+ c = read
484
+ if ?; == c then
485
+ break
486
+ end
487
+ if nil == c then
488
+ raise "unexpectedEOF"
489
+ end
490
+ @name << c
491
+ end
492
+
493
+ if ?\# == @name[0] then
494
+ if ?x == @name[1] then
495
+ c = @name[2..@name.length].hex
496
+ else
497
+ c = @name[1..@name.length].to_i
498
+ end
499
+ @textBuffer << c
500
+ @whitespace &= (c <= ?\s)
501
+ else
502
+ value = entityMap[@name]
503
+ if nil != value then
504
+ @textBuffer << value
505
+ @whitespace = false
506
+ else
507
+ @unresolvedEntity = true
508
+ end
509
+ end
510
+ end
511
+
512
+ def parseStartElement
513
+ #read the "<" that got us here
514
+ expect(?<)
515
+
516
+ @qname = readName
517
+ @textBuffer = ''
518
+
519
+ while true do
520
+ skipWhitespace
521
+ c = peekAt0
522
+ if nil == c then
523
+ raise "unexpectedEOF"
524
+ end
525
+ if ?/ == c then
526
+ @emptyElement = true
527
+
528
+ read
529
+ skipWhitespace
530
+ expect(?>)
531
+
532
+ break
533
+ end
534
+ if ?> == c then
535
+ @emptyElement = false
536
+ read
537
+ break
538
+ end
539
+
540
+ aName = readName
541
+ @textBuffer = ''
542
+ if (nil == aName) or (0 == aName.length) then
543
+ raise "nameExpected"
544
+ end
545
+
546
+ skipWhitespace
547
+ expect(?=)
548
+ skipWhitespace
549
+
550
+ delimiter = read
551
+ if (?' != delimiter) and (?" != delimiter) then # for vim: '
552
+ raise "invalidDelimiter"
553
+ end
554
+
555
+ value = parseText(delimiter, true)
556
+ @textBuffer = ''
557
+
558
+ # skip the end delimiter
559
+ read
560
+
561
+ if processNamespace then
562
+ @attributeQName.push aName
563
+ else
564
+ @attributeName.push aName
565
+ @attributeQName.push aName
566
+ @attributeNamespace.push nil
567
+ @attributePrefix.push nil
568
+ end
569
+ @attributeValue.push value
570
+
571
+ end
572
+
573
+ if processNamespace then
574
+ handleNamespaces
575
+ else
576
+ @name = @qname
577
+ @namespace = nil
578
+ @prefix = nil
579
+ end
580
+
581
+ if not @emptyElement then
582
+ @elementName.push @name
583
+ @elementQName.push @qname
584
+ @elementNamespace.push @namespace
585
+ @elementPrefix.push @prefix
586
+ end
587
+
588
+ #read
589
+ end
590
+
591
+ def parseEndElement
592
+ if 0 == @elementName.length then
593
+ raise "elementStackEmpty"
594
+ end
595
+
596
+ # read the "</" that we've only had a peek at
597
+ expect(?<)
598
+ expect(?/)
599
+
600
+ @qname = readName
601
+ startQName = @elementQName.pop
602
+ if @qname != startQName then
603
+ raise sprintf("unexpectedEndElement wanted '%s' found '%s'", startQName, @qname)
604
+ end
605
+ skipWhitespace
606
+ expect(?>)
607
+
608
+ @name = @elementName.pop
609
+ @prefix = @elementPrefix.pop
610
+ @namespace = @elementNamespace.pop
611
+
612
+ @elementNamespacePrefixStack.pop
613
+ @elementNamespaceValueStack.pop
614
+ @elementNamespaceDefaultStack.pop
615
+ end
616
+
617
+ def readName
618
+ c = peekAt0
619
+ if !nameStartChar(c) then
620
+ raise "nameExpected"
621
+ end
622
+ while true do
623
+ appendToTextBuffer(read)
624
+ c = peekAt0
625
+ if !nameChar(c) then
626
+ break
627
+ end
628
+ end
629
+ return @textBuffer
630
+ end
631
+
632
+ # is this method correct?? verify FIX ME
633
+ def nameStartChar(c)
634
+ if ((c < ?A) or (?Z < c)) and ((c < ?a) or (?z < c)) then
635
+ if (c != ?_) and (c != ?:) then
636
+ return false
637
+ end
638
+ end
639
+ return true
640
+ end
641
+
642
+ # is this method correct?? verify FIX ME
643
+ def nameChar(c)
644
+ if nil == c then return false end
645
+ if ((?A <= c) and (c <= ?Z)) then return true end
646
+ if ((?a <= c) and (c <= ?z)) then return true end
647
+ if ((?0 <= c) and (c <= ?9)) then return true end
648
+ if (?_ == c) then return true end
649
+ if (?- == c) then return true end
650
+ if (?. == c) then return true end
651
+ if (?: == c) then return true end
652
+
653
+ return false
654
+ end
655
+
656
+ def handleNamespaces
657
+ # This is called by parseStartElement to deal with namespaces. Updates knows
658
+ # name spaces based on the attributes in this start element. Then sets up
659
+ # the namespaces for this element itself (i.e. process the qname).
660
+
661
+ i = 0
662
+
663
+ defaultNamespace = @elementNamespaceDefaultStack.last
664
+
665
+ qnames = @attributeQName
666
+ @attributeQName = []
667
+ values = @attributeValue
668
+ @attributeValue = []
669
+
670
+ prefixList = []
671
+ valueList = []
672
+
673
+ while i < qnames.length do
674
+ qname = qnames[i]
675
+ value = values[i]
676
+ i += 1
677
+
678
+ if 'xmlns' == qname then
679
+ prefix = 'xmlns'
680
+ name = nil
681
+ namespace = lookupNamespace prefix
682
+ defaultNamespace = value
683
+ else
684
+ pieces = qname.split(':', 2)
685
+ if 2 == pieces.length then
686
+ namespace = value
687
+ prefix = pieces[0]
688
+ name = pieces[1]
689
+
690
+ if 0 == prefix.length then
691
+ raise "illegalEmptyAtributePrefix"
692
+ end
693
+ if 0 == name.length then
694
+ raise "illegalEmptyAttributeName"
695
+ end
696
+ else
697
+ # this is a un-prefixed non-xmlns attribute
698
+ @attributeQName.push qname
699
+ @attributeName.push qname
700
+ @attributeNamespace.push nil
701
+ @attributePrefix.push nil
702
+ @attributeValue.push value
703
+
704
+ next
705
+ end
706
+ end
707
+
708
+ # only prefixed attributes beyond here
709
+
710
+ if nil == namespace then
711
+ raise "illegalEmptyNamespace"
712
+ end
713
+
714
+ if "xmlns" != prefix then
715
+ anyQualifiedAttributes = true
716
+
717
+ @attributeQName.push qname
718
+ @attributeName.push name
719
+ @attributeNamespace.push namespace
720
+ @attributePrefix.push prefix
721
+ @attributeValue.push value
722
+ else
723
+ if (nil != name) and ((nil == namespace) or (0 == namespace.length)) then
724
+ raise "illegalNamespace"
725
+ end
726
+
727
+ prefixList.push name
728
+ valueList.push value
729
+
730
+ if @reportNamespaceAttributes then
731
+ @attributeQName.push qname
732
+ @attributeName.push name
733
+ @attributeNamespace.push namespace
734
+ @attributePrefix.push prefix
735
+ @attributeValue.push value
736
+
737
+ #why???
738
+ # anyQualifiedAttributes = true
739
+ end
740
+ end
741
+
742
+ end
743
+
744
+ @elementNamespacePrefixStack.push prefixList
745
+ @elementNamespaceValueStack.push valueList
746
+ @elementNamespaceDefaultStack.push defaultNamespace
747
+
748
+ if anyQualifiedAttributes then
749
+ # run over the attributes and make sure we have them qualified
750
+ for i in 0..(@attributeName.length-1) do
751
+ prefix = @attributePrefix[i]
752
+
753
+ if nil != prefix then
754
+ @attributeNamespace[i] = lookupNamespace prefix
755
+ end
756
+ end
757
+ end
758
+
759
+ # handle namespaces for the element name
760
+ pieces = @qname.split(':', 2)
761
+ if 2 == pieces.length then
762
+ @name = pieces[1]
763
+ @prefix = pieces[0]
764
+ @namespace = lookupNamespace @prefix
765
+ else
766
+ @name = @qname
767
+ @namespace = defaultNamespace
768
+ @prefix = nil
769
+ end
770
+ end
771
+
772
+ def lookupNamespace(prefix)
773
+ if nil == prefix then
774
+ raise "illegalPrefix"
775
+ end
776
+ if'xml' == prefix then
777
+ return 'http://www.w3.org/XML/1998/namespace'
778
+ end
779
+ if'xmlns' == prefix then
780
+ return 'http://www.w3.org/2000/xmlns/'
781
+ end
782
+
783
+ i = @elementNamespacePrefixStack.length - 1
784
+ while 0 <= i do
785
+ j = @elementNamespacePrefixStack[i].index(prefix)
786
+ if nil != j then
787
+ return @elementNamespaceValueStack[i][j]
788
+ end
789
+
790
+ i -= 1
791
+ end
792
+ raise "unknownNamespacePrefix"
793
+ end
794
+
795
+ def parseText(delimiter, resolve)
796
+ c = peekAt0
797
+ while (nil != c) and (delimiter != c) do
798
+ if ?& == c then
799
+ if !resolve then
800
+ break
801
+ end
802
+
803
+ parseEntity
804
+
805
+ if @unresolvedEntity then
806
+ raise "unresolvedEntity"
807
+ end
808
+ else
809
+ appendToTextBuffer(read)
810
+ end
811
+
812
+ c = peekAt0
813
+ end
814
+
815
+ return @textBuffer
816
+ end
817
+
818
+ def appendToTextBuffer(c)
819
+ if ((?\r == c) or (?\n == c)) and startElement? then
820
+ if (?\n == c) and @compactNewLine then
821
+ @compactNewLine = false
822
+ return
823
+ end
824
+ @compactNewLine = (?\r == c)
825
+ c = startElement? ? ?\s : ?\n
826
+ else
827
+ @compactNewLine = false
828
+ end
829
+
830
+ @textBuffer << c
831
+ @whitespace &= (c <= ?\s)
832
+ return @textBuffer
833
+ end
834
+
835
+ def skipWhitespace
836
+ while true do
837
+ c = peekAt0
838
+ if (nil == c) or (?\s < c) then
839
+ return
840
+ end
841
+ read
842
+ end
843
+ end
844
+
845
+ end