hutch-xamplr-pp 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.yml +2 -2
- data/lib/xamplr-pp.rb +154 -123
- metadata +1 -1
data/VERSION.yml
CHANGED
data/lib/xamplr-pp.rb
CHANGED
@@ -32,54 +32,59 @@ class Xampl_PP
|
|
32
32
|
DOCTYPE = 'DOCTYPE'
|
33
33
|
UNDECIDED_TYPE = 'UNDECIDED_TYPE'
|
34
34
|
|
35
|
+
def first_byte(str)
|
36
|
+
str.bytes.first
|
37
|
+
end
|
38
|
+
|
35
39
|
# 'Features', acutally just processing options
|
36
|
-
|
37
|
-
|
38
|
-
|
40
|
+
attr_accessor :processNamespace #1.9.1 , true
|
41
|
+
attr_accessor :reportNamespaceAttributes #1.9.1 , true
|
42
|
+
attr_accessor :utf8encode #1.9.1 , true
|
39
43
|
|
40
44
|
# the entities that we will recognise
|
41
|
-
|
42
|
-
|
43
|
-
|
45
|
+
attr_accessor :entityMap #1.9.1 , true
|
46
|
+
attr_accessor :unresolvedEntity
|
47
|
+
attr_accessor :resolver #1.9.1 , true
|
44
48
|
|
45
49
|
# some information about where we are
|
46
|
-
|
47
|
-
|
50
|
+
attr_accessor :line
|
51
|
+
attr_accessor :column
|
48
52
|
|
49
53
|
# element information
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
54
|
+
attr_accessor :type
|
55
|
+
attr_accessor :emptyElement
|
56
|
+
attr_accessor :name
|
57
|
+
attr_accessor :qname
|
58
|
+
attr_accessor :namespace
|
59
|
+
attr_accessor :prefix
|
60
|
+
attr_accessor :attributeName
|
61
|
+
attr_accessor :attributeQName
|
62
|
+
attr_accessor :attributeNamespace
|
63
|
+
attr_accessor :attributePrefix
|
64
|
+
attr_accessor :attributeValue
|
65
|
+
|
66
|
+
attr_accessor :text
|
63
67
|
|
64
68
|
# These are not intended for general use (they are not part of the api)
|
65
69
|
|
66
70
|
# open element information
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
+
attr_accessor :elementName
|
72
|
+
attr_accessor :elementQName
|
73
|
+
attr_accessor :elementNamespace
|
74
|
+
attr_accessor :elementPrefix
|
71
75
|
|
72
76
|
# some pre-compiled patterns
|
73
|
-
|
74
|
-
|
77
|
+
attr_accessor :namePattern #1.9.1 , true
|
78
|
+
attr_accessor :skipWhitespacePattern #1.9.1 , true
|
79
|
+
|
80
|
+
attr_accessor :elementNamespacePrefixStack
|
81
|
+
attr_accessor :elementNamespaceValueStack
|
82
|
+
attr_accessor :elementNamespaceDefaultStack
|
75
83
|
|
76
|
-
|
77
|
-
attr :elementNamespaceValueStack
|
78
|
-
attr :elementNamespaceDefaultStack
|
84
|
+
attr_accessor :standalone
|
79
85
|
|
80
|
-
|
86
|
+
public
|
81
87
|
|
82
|
-
public
|
83
88
|
def startDocument?
|
84
89
|
@type.equal? START_DOCUMENT
|
85
90
|
end
|
@@ -128,18 +133,18 @@ public
|
|
128
133
|
@type.equal? DOCTYPE
|
129
134
|
end
|
130
135
|
|
131
|
-
|
132
|
-
|
133
|
-
|
136
|
+
def resolve(name)
|
137
|
+
raise sprintf("unresolved entity '%s'", name)
|
138
|
+
end
|
134
139
|
|
135
140
|
def input=(v)
|
136
|
-
|
137
|
-
|
141
|
+
setInput(v)
|
142
|
+
end
|
138
143
|
|
139
144
|
def setInput(v)
|
140
|
-
|
141
|
-
|
142
|
-
|
145
|
+
if (defined? @input) and (nil != @input) then
|
146
|
+
@input.close
|
147
|
+
end
|
143
148
|
if nil == v then
|
144
149
|
@input = nil
|
145
150
|
@inputBuffer = nil
|
@@ -187,7 +192,7 @@ public
|
|
187
192
|
|
188
193
|
@emptyElement = false
|
189
194
|
|
190
|
-
|
195
|
+
@errorMessage = nil
|
191
196
|
|
192
197
|
initInput
|
193
198
|
end
|
@@ -198,18 +203,18 @@ public
|
|
198
203
|
|
199
204
|
def nextEvent
|
200
205
|
begin
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
206
|
+
@type = END_DOCUMENT
|
207
|
+
if (nil == @inputBuffer) and (nil == @input) then
|
208
|
+
return @type
|
209
|
+
end
|
205
210
|
|
206
|
-
|
211
|
+
@unresolvedEntity = false
|
207
212
|
|
208
|
-
|
213
|
+
@text = nil
|
209
214
|
|
210
|
-
|
215
|
+
parseNextEvent
|
211
216
|
|
212
|
-
|
217
|
+
return @type
|
213
218
|
rescue Exception => message
|
214
219
|
print message.backtrace.join("\n")
|
215
220
|
if nil != @inputBuffer then
|
@@ -227,11 +232,12 @@ public
|
|
227
232
|
end
|
228
233
|
end
|
229
234
|
|
230
|
-
|
231
|
-
|
232
|
-
|
235
|
+
def depth
|
236
|
+
return @elementName.length
|
237
|
+
end
|
238
|
+
|
239
|
+
private
|
233
240
|
|
234
|
-
private
|
235
241
|
def initialize
|
236
242
|
self.processNamespace = true
|
237
243
|
self.reportNamespaceAttributes = false
|
@@ -247,7 +253,7 @@ private
|
|
247
253
|
self.namePattern = /[^0-9\x00-\x20=\/>\`\.\-\~\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?][^\x00-\x20=\/>\`\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?]*/u
|
248
254
|
self.skipWhitespacePattern = /[^\n\r\t ]+/u
|
249
255
|
|
250
|
-
|
256
|
+
#pre ruby 1.8 needs the Regex.new syntax
|
251
257
|
#self.namePattern = Regexp.new(/[^0-9\x00-\x20=\/>\`\.\-\~\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?][^\x00-\x20=\/>\`\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?]*/, nil, 'u')
|
252
258
|
#old junk... self.skipWhitespacePattern = Regexp.new(/[^\n\r\t ]+|\x00/, nil, 'u')
|
253
259
|
#self.skipWhitespacePattern = Regexp.new(/[^\n\r\t ]+/, nil, 'u')
|
@@ -264,6 +270,7 @@ private
|
|
264
270
|
@inputBuffer = @nextInputBuffer
|
265
271
|
if nil == @inputBuffer then
|
266
272
|
@inputBuffer = @input.gets
|
273
|
+
#puts "#{ __FILE__ }:#{ __LINE__ } MORE INPUT: #{ @inputBuffer }"
|
267
274
|
@column = 0
|
268
275
|
if nil == @inputBuffer then
|
269
276
|
@inputBufferLength = -1
|
@@ -272,6 +279,7 @@ private
|
|
272
279
|
end
|
273
280
|
@inputBufferLength = @inputBuffer.length
|
274
281
|
@nextInputBuffer = @input.gets
|
282
|
+
#puts "#{ __FILE__ }:#{ __LINE__ } NEXT INPUT: #{ @nextInputBuffer }"
|
275
283
|
end
|
276
284
|
|
277
285
|
def expectold(e)
|
@@ -286,17 +294,27 @@ private
|
|
286
294
|
def expect(e)
|
287
295
|
if (nil == @inputBuffer) or (@inputBufferLength <= @column) then
|
288
296
|
getMoreInput
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
297
|
+
if nil == @inputBuffer then
|
298
|
+
msg = sprintf("unexpectedChar:: expect '%s' got EOF in %s", (''<<e), caller[0])
|
299
|
+
raise msg
|
300
|
+
end
|
293
301
|
end
|
294
302
|
|
295
303
|
c = @inputBuffer[@column]
|
304
|
+
if c.instance_of?(String) then
|
305
|
+
c = c.bytes.first
|
306
|
+
end
|
307
|
+
if e.instance_of?(String) then
|
308
|
+
e = e.bytes.first
|
309
|
+
end
|
296
310
|
@column += 1
|
297
|
-
|
298
|
-
|
299
|
-
|
311
|
+
if c == e then
|
312
|
+
return c
|
313
|
+
end
|
314
|
+
|
315
|
+
#puts "#{ __FILE__ }:#{ __LINE__ } EXPECT CLASS: #{ e.class.name }"
|
316
|
+
#puts "#{ __FILE__ }:#{ __LINE__ } GOT CLASS: #{ c.class.name }"
|
317
|
+
|
300
318
|
|
301
319
|
msg = sprintf("unexpectedChar:: expect '%s' got '%s' in %s", (''<<e), (''<<c), caller[0])
|
302
320
|
raise msg
|
@@ -305,12 +323,14 @@ private
|
|
305
323
|
def read
|
306
324
|
if (nil == @inputBuffer) or (@inputBufferLength <= @column) then
|
307
325
|
getMoreInput
|
308
|
-
|
309
|
-
|
310
|
-
|
326
|
+
if nil == @inputBuffer then
|
327
|
+
return nil
|
328
|
+
end
|
311
329
|
end
|
312
330
|
|
313
|
-
|
331
|
+
#puts "#{ __FILE__ }:#{ __LINE__ } READ COLUMN #{ @column } FROM: #{ @inputBuffer }"
|
332
|
+
#puts "#{ __FILE__ }:#{ __LINE__ } READ: #{ @inputBuffer[@column] }"
|
333
|
+
c = @inputBuffer[@column].bytes.first # 1.9.1 fixup
|
314
334
|
@column += 1
|
315
335
|
return c
|
316
336
|
end
|
@@ -412,11 +432,11 @@ private
|
|
412
432
|
@textBuffer = ''
|
413
433
|
|
414
434
|
c = read
|
415
|
-
if
|
435
|
+
if first_byte("?") == c then
|
416
436
|
result = PROCESSING_INSTRUCTION
|
417
437
|
demand = nil
|
418
438
|
delimiter = ??
|
419
|
-
elsif
|
439
|
+
elsif first_byte("!") == c then
|
420
440
|
cc = peekAt0
|
421
441
|
if ?- == cc then
|
422
442
|
result = COMMENT
|
@@ -438,8 +458,7 @@ private
|
|
438
458
|
end
|
439
459
|
|
440
460
|
if nil != demand then
|
441
|
-
demand.each_byte do
|
442
|
-
| d |
|
461
|
+
demand.each_byte do | d |
|
443
462
|
expect d
|
444
463
|
end
|
445
464
|
end
|
@@ -465,20 +484,20 @@ private
|
|
465
484
|
return result
|
466
485
|
end
|
467
486
|
|
468
|
-
|
469
|
-
|
470
|
-
|
487
|
+
def parseXMLDecl
|
488
|
+
return nil != @text.index(/^xml/u)
|
489
|
+
end
|
471
490
|
|
472
491
|
def parseDoctype
|
473
492
|
depth = 1
|
474
493
|
quoted = false
|
475
494
|
delimiter = nil
|
476
495
|
entityDefinitionText = ''
|
477
|
-
|
478
|
-
|
496
|
+
havePiece = false
|
497
|
+
internalSubset = false
|
479
498
|
|
480
499
|
@text = ''
|
481
|
-
|
500
|
+
|
482
501
|
while true do
|
483
502
|
c = read
|
484
503
|
case c
|
@@ -522,31 +541,31 @@ private
|
|
522
541
|
if not quoted then
|
523
542
|
depth -= 1
|
524
543
|
#check right here for an entity definition!!!
|
525
|
-
|
544
|
+
havePiece = true
|
526
545
|
#entityDefinitionText = ''
|
527
546
|
if 0 == depth then
|
528
547
|
return
|
529
548
|
end
|
530
549
|
end
|
531
|
-
|
550
|
+
when ?[
|
532
551
|
if not quoted then
|
533
|
-
|
534
|
-
|
552
|
+
internalSubset = true
|
553
|
+
end
|
535
554
|
when nil
|
536
555
|
raise sprintf("unexpected EOF in DOCTYPE (depth: %d, quoted: %s)", depth, quoted)
|
537
556
|
end
|
538
557
|
@text << c
|
539
558
|
entityDefinitionText << c
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
559
|
+
if havePiece then
|
560
|
+
parseDefinition(entityDefinitionText, internalSubset)
|
561
|
+
entityDefinitionText = ''
|
562
|
+
end
|
563
|
+
havePiece = false
|
545
564
|
end
|
546
565
|
end
|
547
566
|
|
548
|
-
|
549
|
-
|
567
|
+
def parseDefinition(defn, internal)
|
568
|
+
end
|
550
569
|
|
551
570
|
def peekType
|
552
571
|
c = peekAt0
|
@@ -590,18 +609,18 @@ private
|
|
590
609
|
# end
|
591
610
|
if c < 0x80 then
|
592
611
|
@textBuffer << c
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
612
|
+
elsif c < 0x0800
|
613
|
+
@textBuffer << ((c >> 6) | 0xC0)
|
614
|
+
@textBuffer << ((c & 0x3F) | 0x80)
|
615
|
+
elsif c < 0x10000
|
616
|
+
@textBuffer << ((c >> 12) | 0xE0)
|
617
|
+
@textBuffer << (((c >> 6) & 0x3F) | 0x80)
|
618
|
+
@textBuffer << ((c & 0x3F) | 0x80)
|
619
|
+
else
|
620
|
+
@textBuffer << ((c >> 18) | 0xF0)
|
621
|
+
@textBuffer << (((c >> 12) & 0x3F) | 0x80)
|
622
|
+
@textBuffer << (((c >> 6) & 0x3F) | 0x80)
|
623
|
+
@textBuffer << ((c & 0x3F) | 0x80)
|
605
624
|
end
|
606
625
|
else
|
607
626
|
@textBuffer << c
|
@@ -614,7 +633,9 @@ private
|
|
614
633
|
@name = ''
|
615
634
|
while true do
|
616
635
|
c = read
|
617
|
-
|
636
|
+
#puts "#{ __FILE__ }:#{ __LINE__ } CLASS OF C: #{ c.class.name }"
|
637
|
+
if first_byte(";") == c then
|
638
|
+
#if ?; == c then # 1.9.1
|
618
639
|
break
|
619
640
|
end
|
620
641
|
if nil == c then
|
@@ -637,7 +658,7 @@ private
|
|
637
658
|
else
|
638
659
|
if nil != @resolver then
|
639
660
|
value = @resolver.resolve(@name)
|
640
|
-
|
661
|
+
else
|
641
662
|
value = resolve(@name)
|
642
663
|
end
|
643
664
|
|
@@ -656,7 +677,7 @@ private
|
|
656
677
|
@qname = readName
|
657
678
|
@textBuffer = ''
|
658
679
|
|
659
|
-
|
680
|
+
multiple = false
|
660
681
|
while true do
|
661
682
|
hasWhitespace = skipWhitespace
|
662
683
|
|
@@ -678,10 +699,10 @@ private
|
|
678
699
|
break
|
679
700
|
end
|
680
701
|
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
702
|
+
if multiple and !hasWhitespace then
|
703
|
+
raise "whitespace is required between attributes"
|
704
|
+
end
|
705
|
+
multiple = true
|
685
706
|
|
686
707
|
aName = readName
|
687
708
|
@textBuffer = ''
|
@@ -689,19 +710,29 @@ private
|
|
689
710
|
raise "name expected (start element)"
|
690
711
|
end
|
691
712
|
|
692
|
-
|
713
|
+
skipWhitespace
|
693
714
|
expect ?=
|
694
|
-
|
715
|
+
skipWhitespace
|
695
716
|
|
696
717
|
delimiter = read
|
697
|
-
|
718
|
+
#TODO optimise this
|
719
|
+
if "'".bytes.first == delimiter then # for vim: '
|
698
720
|
value = parseText(?', true) # for vim: '
|
699
|
-
elsif
|
721
|
+
elsif '"'.bytes.first == delimiter then # for vim: "
|
700
722
|
value = parseText(?", true) # for vim: "
|
701
723
|
else
|
702
724
|
raise "invalidDelimiter"
|
703
725
|
end
|
704
726
|
|
727
|
+
# replaced with above for 1.9.1
|
728
|
+
#if ?' == delimiter then # for vim: '
|
729
|
+
# value = parseText(?', true) # for vim: '
|
730
|
+
#elsif ?" == delimiter then # for vim: "
|
731
|
+
# value = parseText(?", true) # for vim: "
|
732
|
+
#else
|
733
|
+
# raise "invalidDelimiter"
|
734
|
+
#end
|
735
|
+
|
705
736
|
@textBuffer = ''
|
706
737
|
|
707
738
|
# skip the end delimiter
|
@@ -763,8 +794,8 @@ private
|
|
763
794
|
def readName
|
764
795
|
@textBuffer = ''
|
765
796
|
if @column != @inputBuffer.index(@namePattern, @column) then
|
766
|
-
|
767
|
-
|
797
|
+
raise "invalid name"
|
798
|
+
end
|
768
799
|
if nil != $& then
|
769
800
|
@textBuffer = $&
|
770
801
|
@column += $&.length
|
@@ -865,7 +896,7 @@ private
|
|
865
896
|
@elementNamespacePrefixStack.push prefixList
|
866
897
|
@elementNamespaceValueStack.push valueList
|
867
898
|
@elementNamespaceDefaultStack.push defaultNamespace
|
868
|
-
|
899
|
+
|
869
900
|
if anyQualifiedAttributes then
|
870
901
|
# run over the attributes and make sure we have them qualified
|
871
902
|
for i in 0..(@attributeName.length-1) do
|
@@ -894,10 +925,10 @@ private
|
|
894
925
|
if nil == prefix then
|
895
926
|
raise "illegalPrefix"
|
896
927
|
end
|
897
|
-
if'xml' == prefix then
|
928
|
+
if 'xml' == prefix then
|
898
929
|
return 'http://www.w3.org/XML/1998/namespace'
|
899
930
|
end
|
900
|
-
if'xmlns' == prefix then
|
931
|
+
if 'xmlns' == prefix then
|
901
932
|
return 'http://www.w3.org/2000/xmlns/'
|
902
933
|
end
|
903
934
|
|
@@ -918,9 +949,9 @@ private
|
|
918
949
|
regex = /[#{s}|<]/u
|
919
950
|
c = findOneOfThese regex
|
920
951
|
while (nil != c) and (delimiter != c) do
|
921
|
-
|
922
|
-
|
923
|
-
|
952
|
+
if ?< == c then
|
953
|
+
raise "illegal character '<'"
|
954
|
+
end
|
924
955
|
if ?& == c then
|
925
956
|
if !resolve then
|
926
957
|
break
|
@@ -948,12 +979,12 @@ private
|
|
948
979
|
p = @inputBuffer.index(@skipWhitespacePattern, @column)
|
949
980
|
|
950
981
|
if nil != p then
|
951
|
-
|
952
|
-
|
953
|
-
|
982
|
+
foundSome = (foundSome or (@column != p))
|
983
|
+
@column = p
|
984
|
+
return foundSome
|
954
985
|
end
|
955
986
|
getMoreInput
|
956
|
-
|
987
|
+
foundSome = true
|
957
988
|
end
|
958
989
|
return foundSome
|
959
990
|
end
|
@@ -970,11 +1001,11 @@ private
|
|
970
1001
|
end
|
971
1002
|
@textBuffer << @inputBuffer[@column..-1]
|
972
1003
|
getMoreInput
|
973
|
-
|
1004
|
+
return findOneOfTheseSecond(regex)
|
974
1005
|
end
|
975
1006
|
|
976
1007
|
def findOneOfTheseSecond(regex)
|
977
|
-
|
1008
|
+
# know we are at the first of a line
|
978
1009
|
while nil != @inputBuffer do
|
979
1010
|
@column = @inputBuffer.index(regex)
|
980
1011
|
|