hutch-xamplr-pp 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION.yml +2 -2
- data/lib/xamplr-pp.rb +154 -123
- metadata +1 -1
data/VERSION.yml
CHANGED
data/lib/xamplr-pp.rb
CHANGED
@@ -32,54 +32,59 @@ class Xampl_PP
|
|
32
32
|
DOCTYPE = 'DOCTYPE'
|
33
33
|
UNDECIDED_TYPE = 'UNDECIDED_TYPE'
|
34
34
|
|
35
|
+
def first_byte(str)
|
36
|
+
str.bytes.first
|
37
|
+
end
|
38
|
+
|
35
39
|
# 'Features', acutally just processing options
|
36
|
-
|
37
|
-
|
38
|
-
|
40
|
+
attr_accessor :processNamespace #1.9.1 , true
|
41
|
+
attr_accessor :reportNamespaceAttributes #1.9.1 , true
|
42
|
+
attr_accessor :utf8encode #1.9.1 , true
|
39
43
|
|
40
44
|
# the entities that we will recognise
|
41
|
-
|
42
|
-
|
43
|
-
|
45
|
+
attr_accessor :entityMap #1.9.1 , true
|
46
|
+
attr_accessor :unresolvedEntity
|
47
|
+
attr_accessor :resolver #1.9.1 , true
|
44
48
|
|
45
49
|
# some information about where we are
|
46
|
-
|
47
|
-
|
50
|
+
attr_accessor :line
|
51
|
+
attr_accessor :column
|
48
52
|
|
49
53
|
# element information
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
54
|
+
attr_accessor :type
|
55
|
+
attr_accessor :emptyElement
|
56
|
+
attr_accessor :name
|
57
|
+
attr_accessor :qname
|
58
|
+
attr_accessor :namespace
|
59
|
+
attr_accessor :prefix
|
60
|
+
attr_accessor :attributeName
|
61
|
+
attr_accessor :attributeQName
|
62
|
+
attr_accessor :attributeNamespace
|
63
|
+
attr_accessor :attributePrefix
|
64
|
+
attr_accessor :attributeValue
|
65
|
+
|
66
|
+
attr_accessor :text
|
63
67
|
|
64
68
|
# These are not intended for general use (they are not part of the api)
|
65
69
|
|
66
70
|
# open element information
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
+
attr_accessor :elementName
|
72
|
+
attr_accessor :elementQName
|
73
|
+
attr_accessor :elementNamespace
|
74
|
+
attr_accessor :elementPrefix
|
71
75
|
|
72
76
|
# some pre-compiled patterns
|
73
|
-
|
74
|
-
|
77
|
+
attr_accessor :namePattern #1.9.1 , true
|
78
|
+
attr_accessor :skipWhitespacePattern #1.9.1 , true
|
79
|
+
|
80
|
+
attr_accessor :elementNamespacePrefixStack
|
81
|
+
attr_accessor :elementNamespaceValueStack
|
82
|
+
attr_accessor :elementNamespaceDefaultStack
|
75
83
|
|
76
|
-
|
77
|
-
attr :elementNamespaceValueStack
|
78
|
-
attr :elementNamespaceDefaultStack
|
84
|
+
attr_accessor :standalone
|
79
85
|
|
80
|
-
|
86
|
+
public
|
81
87
|
|
82
|
-
public
|
83
88
|
def startDocument?
|
84
89
|
@type.equal? START_DOCUMENT
|
85
90
|
end
|
@@ -128,18 +133,18 @@ public
|
|
128
133
|
@type.equal? DOCTYPE
|
129
134
|
end
|
130
135
|
|
131
|
-
|
132
|
-
|
133
|
-
|
136
|
+
def resolve(name)
|
137
|
+
raise sprintf("unresolved entity '%s'", name)
|
138
|
+
end
|
134
139
|
|
135
140
|
def input=(v)
|
136
|
-
|
137
|
-
|
141
|
+
setInput(v)
|
142
|
+
end
|
138
143
|
|
139
144
|
def setInput(v)
|
140
|
-
|
141
|
-
|
142
|
-
|
145
|
+
if (defined? @input) and (nil != @input) then
|
146
|
+
@input.close
|
147
|
+
end
|
143
148
|
if nil == v then
|
144
149
|
@input = nil
|
145
150
|
@inputBuffer = nil
|
@@ -187,7 +192,7 @@ public
|
|
187
192
|
|
188
193
|
@emptyElement = false
|
189
194
|
|
190
|
-
|
195
|
+
@errorMessage = nil
|
191
196
|
|
192
197
|
initInput
|
193
198
|
end
|
@@ -198,18 +203,18 @@ public
|
|
198
203
|
|
199
204
|
def nextEvent
|
200
205
|
begin
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
206
|
+
@type = END_DOCUMENT
|
207
|
+
if (nil == @inputBuffer) and (nil == @input) then
|
208
|
+
return @type
|
209
|
+
end
|
205
210
|
|
206
|
-
|
211
|
+
@unresolvedEntity = false
|
207
212
|
|
208
|
-
|
213
|
+
@text = nil
|
209
214
|
|
210
|
-
|
215
|
+
parseNextEvent
|
211
216
|
|
212
|
-
|
217
|
+
return @type
|
213
218
|
rescue Exception => message
|
214
219
|
print message.backtrace.join("\n")
|
215
220
|
if nil != @inputBuffer then
|
@@ -227,11 +232,12 @@ public
|
|
227
232
|
end
|
228
233
|
end
|
229
234
|
|
230
|
-
|
231
|
-
|
232
|
-
|
235
|
+
def depth
|
236
|
+
return @elementName.length
|
237
|
+
end
|
238
|
+
|
239
|
+
private
|
233
240
|
|
234
|
-
private
|
235
241
|
def initialize
|
236
242
|
self.processNamespace = true
|
237
243
|
self.reportNamespaceAttributes = false
|
@@ -247,7 +253,7 @@ private
|
|
247
253
|
self.namePattern = /[^0-9\x00-\x20=\/>\`\.\-\~\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?][^\x00-\x20=\/>\`\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?]*/u
|
248
254
|
self.skipWhitespacePattern = /[^\n\r\t ]+/u
|
249
255
|
|
250
|
-
|
256
|
+
#pre ruby 1.8 needs the Regex.new syntax
|
251
257
|
#self.namePattern = Regexp.new(/[^0-9\x00-\x20=\/>\`\.\-\~\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?][^\x00-\x20=\/>\`\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?]*/, nil, 'u')
|
252
258
|
#old junk... self.skipWhitespacePattern = Regexp.new(/[^\n\r\t ]+|\x00/, nil, 'u')
|
253
259
|
#self.skipWhitespacePattern = Regexp.new(/[^\n\r\t ]+/, nil, 'u')
|
@@ -264,6 +270,7 @@ private
|
|
264
270
|
@inputBuffer = @nextInputBuffer
|
265
271
|
if nil == @inputBuffer then
|
266
272
|
@inputBuffer = @input.gets
|
273
|
+
#puts "#{ __FILE__ }:#{ __LINE__ } MORE INPUT: #{ @inputBuffer }"
|
267
274
|
@column = 0
|
268
275
|
if nil == @inputBuffer then
|
269
276
|
@inputBufferLength = -1
|
@@ -272,6 +279,7 @@ private
|
|
272
279
|
end
|
273
280
|
@inputBufferLength = @inputBuffer.length
|
274
281
|
@nextInputBuffer = @input.gets
|
282
|
+
#puts "#{ __FILE__ }:#{ __LINE__ } NEXT INPUT: #{ @nextInputBuffer }"
|
275
283
|
end
|
276
284
|
|
277
285
|
def expectold(e)
|
@@ -286,17 +294,27 @@ private
|
|
286
294
|
def expect(e)
|
287
295
|
if (nil == @inputBuffer) or (@inputBufferLength <= @column) then
|
288
296
|
getMoreInput
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
297
|
+
if nil == @inputBuffer then
|
298
|
+
msg = sprintf("unexpectedChar:: expect '%s' got EOF in %s", (''<<e), caller[0])
|
299
|
+
raise msg
|
300
|
+
end
|
293
301
|
end
|
294
302
|
|
295
303
|
c = @inputBuffer[@column]
|
304
|
+
if c.instance_of?(String) then
|
305
|
+
c = c.bytes.first
|
306
|
+
end
|
307
|
+
if e.instance_of?(String) then
|
308
|
+
e = e.bytes.first
|
309
|
+
end
|
296
310
|
@column += 1
|
297
|
-
|
298
|
-
|
299
|
-
|
311
|
+
if c == e then
|
312
|
+
return c
|
313
|
+
end
|
314
|
+
|
315
|
+
#puts "#{ __FILE__ }:#{ __LINE__ } EXPECT CLASS: #{ e.class.name }"
|
316
|
+
#puts "#{ __FILE__ }:#{ __LINE__ } GOT CLASS: #{ c.class.name }"
|
317
|
+
|
300
318
|
|
301
319
|
msg = sprintf("unexpectedChar:: expect '%s' got '%s' in %s", (''<<e), (''<<c), caller[0])
|
302
320
|
raise msg
|
@@ -305,12 +323,14 @@ private
|
|
305
323
|
def read
|
306
324
|
if (nil == @inputBuffer) or (@inputBufferLength <= @column) then
|
307
325
|
getMoreInput
|
308
|
-
|
309
|
-
|
310
|
-
|
326
|
+
if nil == @inputBuffer then
|
327
|
+
return nil
|
328
|
+
end
|
311
329
|
end
|
312
330
|
|
313
|
-
|
331
|
+
#puts "#{ __FILE__ }:#{ __LINE__ } READ COLUMN #{ @column } FROM: #{ @inputBuffer }"
|
332
|
+
#puts "#{ __FILE__ }:#{ __LINE__ } READ: #{ @inputBuffer[@column] }"
|
333
|
+
c = @inputBuffer[@column].bytes.first # 1.9.1 fixup
|
314
334
|
@column += 1
|
315
335
|
return c
|
316
336
|
end
|
@@ -412,11 +432,11 @@ private
|
|
412
432
|
@textBuffer = ''
|
413
433
|
|
414
434
|
c = read
|
415
|
-
if
|
435
|
+
if first_byte("?") == c then
|
416
436
|
result = PROCESSING_INSTRUCTION
|
417
437
|
demand = nil
|
418
438
|
delimiter = ??
|
419
|
-
elsif
|
439
|
+
elsif first_byte("!") == c then
|
420
440
|
cc = peekAt0
|
421
441
|
if ?- == cc then
|
422
442
|
result = COMMENT
|
@@ -438,8 +458,7 @@ private
|
|
438
458
|
end
|
439
459
|
|
440
460
|
if nil != demand then
|
441
|
-
demand.each_byte do
|
442
|
-
| d |
|
461
|
+
demand.each_byte do | d |
|
443
462
|
expect d
|
444
463
|
end
|
445
464
|
end
|
@@ -465,20 +484,20 @@ private
|
|
465
484
|
return result
|
466
485
|
end
|
467
486
|
|
468
|
-
|
469
|
-
|
470
|
-
|
487
|
+
def parseXMLDecl
|
488
|
+
return nil != @text.index(/^xml/u)
|
489
|
+
end
|
471
490
|
|
472
491
|
def parseDoctype
|
473
492
|
depth = 1
|
474
493
|
quoted = false
|
475
494
|
delimiter = nil
|
476
495
|
entityDefinitionText = ''
|
477
|
-
|
478
|
-
|
496
|
+
havePiece = false
|
497
|
+
internalSubset = false
|
479
498
|
|
480
499
|
@text = ''
|
481
|
-
|
500
|
+
|
482
501
|
while true do
|
483
502
|
c = read
|
484
503
|
case c
|
@@ -522,31 +541,31 @@ private
|
|
522
541
|
if not quoted then
|
523
542
|
depth -= 1
|
524
543
|
#check right here for an entity definition!!!
|
525
|
-
|
544
|
+
havePiece = true
|
526
545
|
#entityDefinitionText = ''
|
527
546
|
if 0 == depth then
|
528
547
|
return
|
529
548
|
end
|
530
549
|
end
|
531
|
-
|
550
|
+
when ?[
|
532
551
|
if not quoted then
|
533
|
-
|
534
|
-
|
552
|
+
internalSubset = true
|
553
|
+
end
|
535
554
|
when nil
|
536
555
|
raise sprintf("unexpected EOF in DOCTYPE (depth: %d, quoted: %s)", depth, quoted)
|
537
556
|
end
|
538
557
|
@text << c
|
539
558
|
entityDefinitionText << c
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
559
|
+
if havePiece then
|
560
|
+
parseDefinition(entityDefinitionText, internalSubset)
|
561
|
+
entityDefinitionText = ''
|
562
|
+
end
|
563
|
+
havePiece = false
|
545
564
|
end
|
546
565
|
end
|
547
566
|
|
548
|
-
|
549
|
-
|
567
|
+
def parseDefinition(defn, internal)
|
568
|
+
end
|
550
569
|
|
551
570
|
def peekType
|
552
571
|
c = peekAt0
|
@@ -590,18 +609,18 @@ private
|
|
590
609
|
# end
|
591
610
|
if c < 0x80 then
|
592
611
|
@textBuffer << c
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
612
|
+
elsif c < 0x0800
|
613
|
+
@textBuffer << ((c >> 6) | 0xC0)
|
614
|
+
@textBuffer << ((c & 0x3F) | 0x80)
|
615
|
+
elsif c < 0x10000
|
616
|
+
@textBuffer << ((c >> 12) | 0xE0)
|
617
|
+
@textBuffer << (((c >> 6) & 0x3F) | 0x80)
|
618
|
+
@textBuffer << ((c & 0x3F) | 0x80)
|
619
|
+
else
|
620
|
+
@textBuffer << ((c >> 18) | 0xF0)
|
621
|
+
@textBuffer << (((c >> 12) & 0x3F) | 0x80)
|
622
|
+
@textBuffer << (((c >> 6) & 0x3F) | 0x80)
|
623
|
+
@textBuffer << ((c & 0x3F) | 0x80)
|
605
624
|
end
|
606
625
|
else
|
607
626
|
@textBuffer << c
|
@@ -614,7 +633,9 @@ private
|
|
614
633
|
@name = ''
|
615
634
|
while true do
|
616
635
|
c = read
|
617
|
-
|
636
|
+
#puts "#{ __FILE__ }:#{ __LINE__ } CLASS OF C: #{ c.class.name }"
|
637
|
+
if first_byte(";") == c then
|
638
|
+
#if ?; == c then # 1.9.1
|
618
639
|
break
|
619
640
|
end
|
620
641
|
if nil == c then
|
@@ -637,7 +658,7 @@ private
|
|
637
658
|
else
|
638
659
|
if nil != @resolver then
|
639
660
|
value = @resolver.resolve(@name)
|
640
|
-
|
661
|
+
else
|
641
662
|
value = resolve(@name)
|
642
663
|
end
|
643
664
|
|
@@ -656,7 +677,7 @@ private
|
|
656
677
|
@qname = readName
|
657
678
|
@textBuffer = ''
|
658
679
|
|
659
|
-
|
680
|
+
multiple = false
|
660
681
|
while true do
|
661
682
|
hasWhitespace = skipWhitespace
|
662
683
|
|
@@ -678,10 +699,10 @@ private
|
|
678
699
|
break
|
679
700
|
end
|
680
701
|
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
702
|
+
if multiple and !hasWhitespace then
|
703
|
+
raise "whitespace is required between attributes"
|
704
|
+
end
|
705
|
+
multiple = true
|
685
706
|
|
686
707
|
aName = readName
|
687
708
|
@textBuffer = ''
|
@@ -689,19 +710,29 @@ private
|
|
689
710
|
raise "name expected (start element)"
|
690
711
|
end
|
691
712
|
|
692
|
-
|
713
|
+
skipWhitespace
|
693
714
|
expect ?=
|
694
|
-
|
715
|
+
skipWhitespace
|
695
716
|
|
696
717
|
delimiter = read
|
697
|
-
|
718
|
+
#TODO optimise this
|
719
|
+
if "'".bytes.first == delimiter then # for vim: '
|
698
720
|
value = parseText(?', true) # for vim: '
|
699
|
-
elsif
|
721
|
+
elsif '"'.bytes.first == delimiter then # for vim: "
|
700
722
|
value = parseText(?", true) # for vim: "
|
701
723
|
else
|
702
724
|
raise "invalidDelimiter"
|
703
725
|
end
|
704
726
|
|
727
|
+
# replaced with above for 1.9.1
|
728
|
+
#if ?' == delimiter then # for vim: '
|
729
|
+
# value = parseText(?', true) # for vim: '
|
730
|
+
#elsif ?" == delimiter then # for vim: "
|
731
|
+
# value = parseText(?", true) # for vim: "
|
732
|
+
#else
|
733
|
+
# raise "invalidDelimiter"
|
734
|
+
#end
|
735
|
+
|
705
736
|
@textBuffer = ''
|
706
737
|
|
707
738
|
# skip the end delimiter
|
@@ -763,8 +794,8 @@ private
|
|
763
794
|
def readName
|
764
795
|
@textBuffer = ''
|
765
796
|
if @column != @inputBuffer.index(@namePattern, @column) then
|
766
|
-
|
767
|
-
|
797
|
+
raise "invalid name"
|
798
|
+
end
|
768
799
|
if nil != $& then
|
769
800
|
@textBuffer = $&
|
770
801
|
@column += $&.length
|
@@ -865,7 +896,7 @@ private
|
|
865
896
|
@elementNamespacePrefixStack.push prefixList
|
866
897
|
@elementNamespaceValueStack.push valueList
|
867
898
|
@elementNamespaceDefaultStack.push defaultNamespace
|
868
|
-
|
899
|
+
|
869
900
|
if anyQualifiedAttributes then
|
870
901
|
# run over the attributes and make sure we have them qualified
|
871
902
|
for i in 0..(@attributeName.length-1) do
|
@@ -894,10 +925,10 @@ private
|
|
894
925
|
if nil == prefix then
|
895
926
|
raise "illegalPrefix"
|
896
927
|
end
|
897
|
-
if'xml' == prefix then
|
928
|
+
if 'xml' == prefix then
|
898
929
|
return 'http://www.w3.org/XML/1998/namespace'
|
899
930
|
end
|
900
|
-
if'xmlns' == prefix then
|
931
|
+
if 'xmlns' == prefix then
|
901
932
|
return 'http://www.w3.org/2000/xmlns/'
|
902
933
|
end
|
903
934
|
|
@@ -918,9 +949,9 @@ private
|
|
918
949
|
regex = /[#{s}|<]/u
|
919
950
|
c = findOneOfThese regex
|
920
951
|
while (nil != c) and (delimiter != c) do
|
921
|
-
|
922
|
-
|
923
|
-
|
952
|
+
if ?< == c then
|
953
|
+
raise "illegal character '<'"
|
954
|
+
end
|
924
955
|
if ?& == c then
|
925
956
|
if !resolve then
|
926
957
|
break
|
@@ -948,12 +979,12 @@ private
|
|
948
979
|
p = @inputBuffer.index(@skipWhitespacePattern, @column)
|
949
980
|
|
950
981
|
if nil != p then
|
951
|
-
|
952
|
-
|
953
|
-
|
982
|
+
foundSome = (foundSome or (@column != p))
|
983
|
+
@column = p
|
984
|
+
return foundSome
|
954
985
|
end
|
955
986
|
getMoreInput
|
956
|
-
|
987
|
+
foundSome = true
|
957
988
|
end
|
958
989
|
return foundSome
|
959
990
|
end
|
@@ -970,11 +1001,11 @@ private
|
|
970
1001
|
end
|
971
1002
|
@textBuffer << @inputBuffer[@column..-1]
|
972
1003
|
getMoreInput
|
973
|
-
|
1004
|
+
return findOneOfTheseSecond(regex)
|
974
1005
|
end
|
975
1006
|
|
976
1007
|
def findOneOfTheseSecond(regex)
|
977
|
-
|
1008
|
+
# know we are at the first of a line
|
978
1009
|
while nil != @inputBuffer do
|
979
1010
|
@column = @inputBuffer.index(regex)
|
980
1011
|
|