xamplr-pp 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,58 @@
1
+ #!/usr/local/bin/ruby
2
+
3
+ pattern = Regexp.compile(".", nil, 'u')
4
+
5
+ for filename in ARGV do
6
+ printf("file: %s\n", filename)
7
+ file = File.new(filename)
8
+ file.each do
9
+ | line |
10
+ p = 0
11
+ while true do
12
+ line.index(pattern, p)
13
+ if nil == $& then
14
+ break;
15
+ end
16
+ p += $&.length
17
+ $&.each_byte do
18
+ | c |
19
+ printf("%3x ", c)
20
+ end
21
+ printf("-- '%s'\n", $&);
22
+ end
23
+ end
24
+ end
25
+
26
+ def decode(s)
27
+ r = 0
28
+ $&.each_byte do
29
+ | c |
30
+ if c < 0x80 then
31
+ r += c
32
+ elsif
33
+ end
34
+ end
35
+
36
+
37
+ def encode(c)
38
+ if utf8encode then
39
+ if c < 0x80 then
40
+ @textBuffer << c
41
+ elsif c < 0x0800
42
+ @textBuffer << ((c >> 6) | 0xC0)
43
+ @textBuffer << (c & (0x3F | 0x80))
44
+ elsif c < 0x10000
45
+ @textBuffer << ((c >> 12) | 0xE0)
46
+ @textBuffer << ((c >> 6) & (0x3F | 0x80))
47
+ @textBuffer << (c & (0x3F | 0x80))
48
+ else
49
+ @textBuffer << ((c >> 18) | 0xF0)
50
+ @textBuffer << ((c >> 12) & (0x3F | 0x80))
51
+ @textBuffer << ((c >> 6) & (0x3F | 0x80))
52
+ @textBuffer << (c & (0x3F | 0x80))
53
+ end
54
+ else
55
+ @textBuffer << c
56
+ end
57
+ end
58
+
@@ -0,0 +1,67 @@
1
+ Name ::= (Letter | '_' | ':') (NameChar)*
2
+ NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | | CombiningChar | Extender
3
+ Letter ::= BaseChar | Ideographic
4
+ Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
5
+ WhiteSpace ::= x20 | x9 | xD | xA
6
+ Char := x09 | xA | xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
7
+
8
+ Digit ::= [#x0030-#x0039] [#x0660-#x0669] |[#x06F0-#x06F9]
9
+ |[#x0966-#x096F] |[#x09E6-#x09EF] |[#x0A66-#x0A6F] |[#x0AE6-#x0AEF]
10
+ |[#x0B66-#x0B6F] |[#x0BE7-#x0BEF] |[#x0C66-#x0C6F] |[#x0CE6-#x0CEF]
11
+ |[#x0D66-#x0D6F] |[#x0E50-#x0E59] |[#x0ED0-#x0ED9] |[#x0F20-#x0F29]
12
+
13
+ Extender ::= [#x0030-#x0039] |[#x0660-#x0669] |[#x06F0-#x06F9]
14
+ |[#x0966-#x096F] |[#x09E6-#x09EF] |[#x0A66-#x0A6F]
15
+ |[#x0AE6-#x0AEF] |[#x0B66-#x0B6F] |[#x0BE7-#x0BEF]
16
+ |[#x0C66-#x0C6F] |[#x0CE6-#x0CEF] |[#x0D66-#x0D6F]
17
+ |[#x0E50-#x0E59] |[#x0ED0-#x0ED9] |[#x0F20-#x0F29]
18
+ CombiningChar ::=[#x0300-#x0345] |[#x0360-#x0361] |[#x0483-#x0486]
19
+ |[#x0591-#x05A1] |[#x05A3-#x05B9] |[#x05BB-#x05BD]
20
+ |#x05BF |[#x05C1-#x05C2] |#x05C4 |[#x064B-#x0652]
21
+ |#x0670 |[#x06D6-#x06DC] |[#x06DD-#x06DF] |[#x06E0-#x06E4]
22
+ |[#x06E7-#x06E8] |[#x06EA-#x06ED] |[#x0901-#x0903] |#x093C
23
+ |[#x093E-#x094C] |#x094D |[#x0951-#x0954] |[#x0962-#x0963]
24
+ |[#x0981-#x0983] |#x09BC |#x09BE |#x09BF |[#x09C0-#x09C4]
25
+ |[#x09C7-#x09C8] |[#x09CB-#x09CD] |#x09D7 |[#x09E2-#x09E3]
26
+ |#x0A02 |#x0A3C |#x0A3E |#x0A3F |[#x0A40-#x0A42]
27
+ |[#x0A47-#x0A48] |[#x0A4B-#x0A4D] |[#x0A70-#x0A71]
28
+ |[#x0A81-#x0A83] |#x0ABC |[#x0ABE-#x0AC5] |[#x0AC7-#x0AC9]
29
+ |[#x0ACB-#x0ACD] |[#x0B01-#x0B03] |#x0B3C |[#x0B3E-#x0B43]
30
+ |[#x0B47-#x0B48] |[#x0B4B-#x0B4D] |[#x0B56-#x0B57]
31
+ |[#x0B82-#x0B83] |[#x0BBE-#x0BC2] |[#x0BC6-#x0BC8]
32
+ |[#x0BCA-#x0BCD] |#x0BD7 |[#x0C01-#x0C03] |[#x0C3E-#x0C44]
33
+ |[#x0C46-#x0C48] |[#x0C4A-#x0C4D] |[#x0C55-#x0C56]
34
+ |[#x0C82-#x0C83] |[#x0CBE-#x0CC4] |[#x0CC6-#x0CC8]
35
+ |[#x0CCA-#x0CCD] |[#x0CD5-#x0CD6] |[#x0D02-#x0D03]
36
+ |[#x0D3E-#x0D43] |[#x0D46-#x0D48] |[#x0D4A-#x0D4D]
37
+ |#x0D57 |#x0E31 [#x0E34-#x0E3A] |[#x0E47-#x0E4E] |#x0EB1
38
+ |[#x0EB4-#x0EB9] |[#x0EBB-#x0EBC] |[#x0EC8-#x0ECD][#x0F18-#x0F19]
39
+ |#x0F35 |#x0F37 |#x0F39 |#x0F3E |#x0F3F |[#x0F71-#x0F84]
40
+ |[#x0F86-#x0F8B] |[#x0F90-#x0F95] |#x0F97 |[#x0F99-#x0FAD]
41
+ |[#x0FB1-#x0FB7] |#x0FB9 |[#x20D0-#x20DC] |#x20E1
42
+ |[#x302A-#x302F] |#x3099 |#x309A
43
+ BaseChar ::= [#x0300-#x0345] |[#x0360-#x0361] |[#x0483-#x0486] |[#x0591-#x05A1]
44
+ |[#x05A3-#x05B9] |[#x05BB-#x05BD] |#x05BF |[#x05C1-#x05C2]
45
+ |#x05C4 |[#x064B-#x0652] |#x0670 |[#x06D6-#x06DC]
46
+ |[#x06DD-#x06DF] |[#x06E0-#x06E4] |[#x06E7-#x06E8]
47
+ |[#x06EA-#x06ED] |[#x0901-#x0903] |#x093C |[#x093E-#x094C]
48
+ |#x094D |[#x0951-#x0954] |[#x0962-#x0963] |[#x0981-#x0983]
49
+ |#x09BC |#x09BE |#x09BF |[#x09C0-#x09C4] |[#x09C7-#x09C8]
50
+ |[#x09CB-#x09CD] |#x09D7 |[#x09E2-#x09E3] |#x0A02 |#x0A3C
51
+ |#x0A3E |#x0A3F |[#x0A40-#x0A42] |[#x0A47-#x0A48]
52
+ |[#x0A4B-#x0A4D] |[#x0A70-#x0A71] |[#x0A81-#x0A83]
53
+ |#x0ABC |[#x0ABE-#x0AC5] |[#x0AC7-#x0AC9] |[#x0ACB-#x0ACD]
54
+ |[#x0B01-#x0B03] |#x0B3C |[#x0B3E-#x0B43] |[#x0B47-#x0B48]
55
+ |[#x0B4B-#x0B4D] |[#x0B56-#x0B57] |[#x0B82-#x0B83]
56
+ |[#x0BBE-#x0BC2] |[#x0BC6-#x0BC8] |[#x0BCA-#x0BCD] |#x0BD7
57
+ |[#x0C01-#x0C03] |[#x0C3E-#x0C44] |[#x0C46-#x0C48]
58
+ |[#x0C4A-#x0C4D] |[#x0C55-#x0C56] |[#x0C82-#x0C83]
59
+ |[#x0CBE-#x0CC4] |[#x0CC6-#x0CC8] |[#x0CCA-#x0CCD]
60
+ |[#x0CD5-#x0CD6] |[#x0D02-#x0D03] |[#x0D3E-#x0D43]
61
+ |[#x0D46-#x0D48] |[#x0D4A-#x0D4D] |#x0D57 |#x0E31
62
+ |[#x0E34-#x0E3A] |[#x0E47-#x0E4E] |#x0EB1 |[#x0EB4-#x0EB9]
63
+ |[#x0EBB-#x0EBC] |[#x0EC8-#x0ECD]|[#x0F18-#x0F19] |#x0F35
64
+ |#x0F37 |#x0F39 |#x0F3E |#x0F3F |[#x0F71-#x0F84]
65
+ |[#x0F86-#x0F8B] |[#x0F90-#x0F95] |#x0F97 |[#x0F99-#x0FAD]
66
+ |[#x0FB1-#x0FB7] |#x0FB9 |[#x20D0-#x20DC] |#x20E1
67
+ |[#x302A-#x302F] |#x3099 |#x309A
@@ -0,0 +1,908 @@
1
+ # xampl-pp : XML pull parser
2
+ # Copyright (C) 2002-2009 Bob Hutchison
3
+ #
4
+ # This library is free software; you can redistribute it and/or
5
+ # modify it under the terms of the GNU Lesser General Public
6
+ # License as published by the Free Software Foundation; either
7
+ # version 2.1 of the License, or (at your option) any later version.
8
+ #
9
+ # This library is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
+ # #Lesser General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU Lesser General Public
15
+ # License along with this library; if not, write to the Free Software
16
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
+ #
18
+
19
+
20
+ class Xpp
21
+ # XML 'event' types
22
+ START_DOCUMENT = 'START_DOCUMENT'
23
+ END_DOCUMENT = 'END_DOCUMENT'
24
+ START_ELEMENT = 'START_ELEMENT'
25
+ END_ELEMENT = 'END_ELEMENT'
26
+ TEXT = 'TEXT'
27
+ CDATA_SECTION = 'CDATA_SECTION'
28
+ ENTITY_REF = 'ENTITY_REF'
29
+ IGNORABLE_WHITESPACE = 'IGNORABLE_WHITESPACE'
30
+ PROCESSING_INSTRUCTION = 'PROCESSING_INSTRUCTION'
31
+ COMMENT = 'COMMENT'
32
+ DOCTYPE = 'DOCTYPE'
33
+ UNDECIDED_TYPE = 'UNDECIDED_TYPE'
34
+
35
+ # 'Features', acutally just processing options
36
+ attr :processNamespace, true
37
+ attr :reportNamespaceAttributes, true
38
+ attr :checkWellFormed, true
39
+ attr :utf8encode, true
40
+
41
+ # the entities that we will recognise
42
+ attr :entityMap, true
43
+ attr :unresolvedEntity
44
+ attr :resolver, true
45
+
46
+ # some information about where we are
47
+ attr :line
48
+ attr :column
49
+
50
+ # element information
51
+ attr :type
52
+ attr :emptyElement
53
+ attr :name
54
+ attr :qname
55
+ attr :namespace
56
+ attr :prefix
57
+ attr :attributeName
58
+ attr :attributeQName
59
+ attr :attributeNamespace
60
+ attr :attributePrefix
61
+ attr :attributeValue
62
+
63
+ attr :text
64
+
65
+ # These are not intended for general use (they are not part of the api)
66
+
67
+ # open element information
68
+ attr :elementName
69
+ attr :elementQName
70
+ attr :elementNamespace
71
+ attr :elementPrefix
72
+
73
+ # some pre-compiled patterns
74
+ attr :namePattern, true
75
+ attr :skipWhitespacePattern, true
76
+
77
+ attr :elementNamespacePrefixStack
78
+ attr :elementNamespaceValueStack
79
+ attr :elementNamespaceDefaultStack
80
+
81
+ public
82
+ def startDocument?
83
+ @type.equal? START_DOCUMENT
84
+ end
85
+
86
+ def endDocument?
87
+ @type.equal? END_DOCUMENT
88
+ end
89
+
90
+ def startElement?
91
+ @type.equal? START_ELEMENT
92
+ end
93
+
94
+ def endElement?
95
+ @type.equal? END_ELEMENT
96
+ end
97
+
98
+ def text?
99
+ @type.equal? TEXT
100
+ end
101
+
102
+ def cdata?
103
+ @type.equal? CDATA_SECTION
104
+ end
105
+
106
+ def entityRef?
107
+ @type.equal? ENTITY_REF
108
+ end
109
+
110
+ def ignorableWhitespace?
111
+ @type.equal? IGNORABLE_WHITESPACE
112
+ end
113
+
114
+ def whitespace?
115
+ nil == @text.index(@skipWhitespacePattern)
116
+ end
117
+
118
+ def processingInstruction?
119
+ @type.equal? PROCESSING_INSTRUCTION
120
+ end
121
+
122
+ def comment?
123
+ @type.equal? COMMENT
124
+ end
125
+
126
+ def doctype?
127
+ @type.equal? DOCTYPE
128
+ end
129
+
130
+ def input=(v)
131
+ if nil == v then
132
+ @input = nil
133
+ @inputBuffer = nil
134
+ @inputBufferLength = 0
135
+ @column = 0
136
+ @line = 0
137
+ elsif v.kind_of? String then
138
+ @input = nil
139
+ @inputBuffer = v
140
+ @inputBufferLength = v.length
141
+ @line = 1
142
+ @column = 0
143
+ elsif v.kind_of? IO then
144
+ @input = v
145
+ @inputBuffer = nil
146
+ @inputBufferLength = 0
147
+ @column = 0
148
+ @line = 0
149
+ else
150
+ raise "illegalInput"
151
+ end
152
+ @nextInputBuffer = nil
153
+ @textBuffer = ''
154
+
155
+ @elementNamespacePrefixStack = []
156
+ @elementNamespaceValueStack = []
157
+ @elementNamespaceDefaultStack = []
158
+
159
+ @elementName = []
160
+ @elementQName = []
161
+ @elementNamespace = []
162
+ @elementPrefix = []
163
+
164
+ @type = START_DOCUMENT
165
+ @unresolvedEntity = false
166
+
167
+ @name = nil
168
+ @namespace = nil
169
+
170
+ @attributeName = []
171
+ @attributeQName = []
172
+ @attributeNamespace = []
173
+ @attributePrefix = []
174
+ @attributeValue = []
175
+
176
+ @emptyElement = false
177
+
178
+ @haveRoot = false
179
+
180
+ initInput
181
+ end
182
+
183
+ def initInput
184
+ # override this if needed
185
+ end
186
+
187
+ def nextEvent
188
+ begin
189
+ @type = END_DOCUMENT
190
+ if (nil == @inputBuffer) and (nil == @input) then
191
+ #if END_DOCUMENT == @type then
192
+ return @type
193
+ #end
194
+ #raise "no input defined"
195
+ end
196
+
197
+ @unresolvedEntity = false
198
+
199
+ @text = nil
200
+
201
+ parseNextEvent
202
+
203
+ return @type
204
+ rescue Exception => message
205
+ #print message.backtrace.join("\n")
206
+ if nil != @inputBuffer then
207
+ message = sprintf("parse error: '%s' -- String input, line %d, column %d", message, @line, @column)
208
+ elsif nil != @input then
209
+ if @input.kind_of? File then
210
+ message = sprintf("parse error: '%s' -- file '%s', line %d, column %d", message, @input.path, @line, @column)
211
+ else
212
+ message = sprintf("parse error: '%s' -- unnamed IO stream, line %d, column %d", message, @line, @column)
213
+ end
214
+ else
215
+ message = sprintf("parse error: '%s' -- unknown source, line %d, column %d", message, @line, @column)
216
+ end
217
+ raise message
218
+ end
219
+ end
220
+
221
+ private
222
+ def initialize
223
+ self.processNamespace = true
224
+ self.reportNamespaceAttributes = false
225
+ self.checkWellFormed = true
226
+ self.utf8encode = true
227
+
228
+ self.input = nil
229
+
230
+ self.entityMap = {"amp"=>"&",
231
+ "apos"=>"'",
232
+ "gt"=>">",
233
+ "lt"=>"<",
234
+ "quot"=>"\""}
235
+ self.namePattern = Regexp.compile(/[^\x00-\x20=\/>\`\.\~\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?][^\x00-\x20=\/>\`\!\@\#\$\%\^\&\*\(\)\+\=\]\[\{\}\\\|\;\'\"\,\<\>\/\?]*/, nil, 'u')
236
+ self.skipWhitespacePattern = Regexp.compile(/[^\s]+|\x00/, nil, 'u')
237
+ end
238
+
239
+ def getMoreInput
240
+ @column = 0
241
+ @line += 1
242
+ if nil == @input then
243
+ @inputBuffer = nil
244
+ @inputBufferLength = -1
245
+ return nil
246
+ end
247
+ @inputBuffer = @nextInputBuffer
248
+ if nil == @inputBuffer then
249
+ @inputBuffer = @input.gets
250
+ @column = 0
251
+ if nil == @inputBuffer then
252
+ @inputBufferLength = -1
253
+ return nil
254
+ end
255
+ end
256
+ @inputBufferLength = @inputBuffer.length
257
+ @nextInputBuffer = @input.gets
258
+ end
259
+
260
+ def expect(e)
261
+ c = read
262
+ if (nil == c) or (c != e) then
263
+ msg = sprintf("unexpectedChar:: expect '%s' got '%s' in %s", (''<<e), (''<<c), caller[0])
264
+ raise msg
265
+ end
266
+ return c
267
+ end
268
+
269
+ def read
270
+ if (nil == @inputBuffer) or (@inputBufferLength <= @column) then
271
+ getMoreInput
272
+ end
273
+
274
+ if nil != @inputBuffer then
275
+ c = @inputBuffer[@column]
276
+ @column += 1
277
+ return c
278
+ else
279
+ return nil
280
+ end
281
+ end
282
+
283
+ def peekAt0
284
+ if nil == @inputBuffer then
285
+ getMoreInput
286
+ end
287
+ if @column < @inputBufferLength then
288
+ return @inputBuffer[@column]
289
+ else
290
+ if (nil != @nextInputBuffer) and (0 < @nextInputBuffer.length) then
291
+ return @nextInputBuffer[0]
292
+ else
293
+ return nil
294
+ end
295
+ end
296
+ end
297
+
298
+ def peekAt1
299
+ if nil == @inputBuffer then
300
+ getMoreInput
301
+ end
302
+ if (@column + 1) < @inputBufferLength then
303
+ return @inputBuffer[@column + 1]
304
+ else
305
+ if @column < @inputBufferLength then
306
+ if (nil != @nextInputBuffer) and (0 < @nextInputBuffer.length) then
307
+ return @nextInputBuffer[0]
308
+ else
309
+ return nil
310
+ end
311
+ else
312
+ if (nil != @nextInputBuffer) and (1 < @nextInputBuffer.length) then
313
+ return @nextInputBuffer[1]
314
+ else
315
+ return nil
316
+ end
317
+ end
318
+ end
319
+ end
320
+
321
+ def parseNextEvent
322
+ @attributeName.clear
323
+ @attributeQName.clear
324
+ @attributeNamespace.clear
325
+ @attributePrefix.clear
326
+ @attributeValue.clear
327
+
328
+ if @emptyElement then
329
+ # the last event was an empty start element like <start/>
330
+ @type = END_ELEMENT
331
+ @emptyElement = false
332
+ return
333
+ end
334
+
335
+ @prefix = nil
336
+ @name = nil
337
+ @qname = nil
338
+ @namespace = nil
339
+ @type = peekType
340
+
341
+ @textBuffer = ''
342
+
343
+ case @type
344
+ when END_DOCUMENT
345
+ # nothing to do
346
+ when ENTITY_REF
347
+ parseEntity
348
+ @text = @textBuffer
349
+ @textBuffer = ''
350
+ when START_ELEMENT
351
+ parseStartElement
352
+ when END_ELEMENT
353
+ parseEndElement
354
+ when TEXT
355
+ parseText(?<, false)
356
+ @text = @textBuffer
357
+ @textBuffer = ''
358
+ if 0 == @elementName.length then
359
+ if nil == @text.index(@skipWhitespacePattern) then
360
+ @type = IGNORABLE_WHITESPACE
361
+ end
362
+ end
363
+ else
364
+ @type = parseUndecided
365
+ end
366
+
367
+ end
368
+
369
+ def parseUndecided
370
+ # this could be a comment, processing instruction, or CDATA section
371
+ expect ?<
372
+
373
+ demand = nil
374
+ delimiter = nil # **first** (not last) character in delimting string
375
+
376
+ @text = @textBuffer
377
+ @textBuffer = ''
378
+
379
+ c = read
380
+ if ?? == c then
381
+ result = PROCESSING_INSTRUCTION
382
+ demand = nil
383
+ delimiter = ??
384
+ elsif ?! == c then
385
+ cc = peekAt0
386
+ if ?- == cc then
387
+ result = COMMENT
388
+ demand = '--'
389
+ delimiter = ?-
390
+ elsif ?[ == cc then
391
+ result = CDATA_SECTION
392
+ demand = '[CDATA['
393
+ delimiter = ?]
394
+ else
395
+ result = DOCTYPE
396
+ demand = 'DOCTYPE'
397
+ delimiter = nil
398
+ end
399
+ else
400
+ # this should never happen because we'll get an illegal name execption
401
+ # first
402
+ raise "illegal <{c}"
403
+ end
404
+
405
+ if nil != demand then
406
+ demand.each_byte do
407
+ | d |
408
+ expect d
409
+ end
410
+ end
411
+
412
+ if DOCTYPE == result then
413
+ parseDoctype
414
+ else
415
+ if ?? == delimiter then
416
+ s = Regexp.escape "?>"
417
+ inc = 2
418
+ else
419
+ s = Regexp.escape "" << delimiter << delimiter << '>'
420
+ inc = 3
421
+ end
422
+ regex = /#{s}/u
423
+ p = findOneOfThese(regex)
424
+ @text = @textBuffer
425
+ if nil != p then
426
+ @column += inc
427
+ end
428
+ end
429
+
430
+ return result
431
+ end
432
+
433
+ def parseDoctype
434
+ depth = 1
435
+ quoted = false
436
+ delimiter = nil
437
+ entityDefinitionText = ''
438
+
439
+ @text = ''
440
+
441
+ while true do
442
+ c = read
443
+ case c
444
+ when ?', ?" # for the sake of vim '
445
+ if quoted then
446
+ if c == delimiter then
447
+ quoted = false
448
+ delimiter = nil
449
+ end
450
+ else
451
+ quoted = true
452
+ delimiter = c
453
+ end
454
+ when ?<
455
+ if not quoted then
456
+ if (?! == peekAt0) and (?- == peekAt1) then
457
+ #this is looking like a comment
458
+ @text << c
459
+ @text << (expect ?!)
460
+ @text << (expect ?-)
461
+ c = read
462
+ if ?- == c then
463
+ @text << ?-
464
+ regex = /-->/u
465
+ p = findOneOfThese(regex)
466
+ @text << @textBuffer
467
+ @textBuffer = ''
468
+ @text << (expect ?-)
469
+ @text << (expect ?-)
470
+ c = (expect ?>)
471
+ else
472
+ depth += 1
473
+ entityDefinitionText = ''
474
+ end
475
+ else
476
+ depth += 1
477
+ entityDefinitionText = ''
478
+ end
479
+ end
480
+ when ?>
481
+ if not quoted then
482
+ depth -= 1
483
+ #check right here for an entity definition!!!
484
+ entityDefinitionText = ''
485
+ if 0 == depth then
486
+ return
487
+ end
488
+ end
489
+ when nil
490
+ raise sprintf("unexpected EOF in DOCTYPE (depth: %d, quoted: %s)", depth, quoted)
491
+ end
492
+ @text << c
493
+ entityDefinitionText << c
494
+ end
495
+ end
496
+
497
+ def peekType
498
+ c = peekAt0
499
+ case c
500
+ when nil, 0
501
+ return END_DOCUMENT
502
+ when ?&
503
+ return ENTITY_REF
504
+ when ?<
505
+ case peekAt1
506
+ when ?/
507
+ return END_ELEMENT
508
+ when ?[
509
+ return CDATA_SECTION
510
+ when ??, ?!
511
+ return UNDECIDED_TYPE
512
+ else
513
+ return START_ELEMENT
514
+ end
515
+ else
516
+ return TEXT
517
+ end
518
+ end
519
+
520
+ def encode(c)
521
+ if utf8encode then
522
+ if c < 0x80 then
523
+ @textBuffer << c
524
+ elsif c < 0x0800
525
+ @textBuffer << ((c >> 6) | 0xC0)
526
+ @textBuffer << (c & (0x3F | 0x80))
527
+ elsif c < 0x10000
528
+ @textBuffer << ((c >> 12) | 0xE0)
529
+ @textBuffer << ((c >> 6) & (0x3F | 0x80))
530
+ @textBuffer << (c & (0x3F | 0x80))
531
+ else
532
+ @textBuffer << ((c >> 18) | 0xF0)
533
+ @textBuffer << ((c >> 12) & (0x3F | 0x80))
534
+ @textBuffer << ((c >> 6) & (0x3F | 0x80))
535
+ @textBuffer << (c & (0x3F | 0x80))
536
+ end
537
+ else
538
+ @textBuffer << c
539
+ end
540
+ end
541
+
542
+ def parseEntity
543
+ expect ?&
544
+
545
+ @name = ''
546
+ while true do
547
+ c = read
548
+ if ?; == c then
549
+ break
550
+ end
551
+ if nil == c then
552
+ raise "unexpectedEOF"
553
+ end
554
+ @name << c
555
+ end
556
+
557
+ if ?\# == @name[0] then
558
+ if ?x == @name[1] then
559
+ c = @name[2..@name.length].hex
560
+ else
561
+ c = @name[1..@name.length].to_i
562
+ end
563
+ encode(c)
564
+ else
565
+ value = entityMap[@name]
566
+ if nil != value then
567
+ @textBuffer << value
568
+ else
569
+ if nil != @resolver then
570
+ value = @resolver.resolve(@name)
571
+ end
572
+
573
+ if nil != value then
574
+ @textBuffer << value
575
+ else
576
+ @unresolvedEntity = true
577
+ end
578
+ end
579
+ end
580
+ end
581
+
582
+ def parseStartElement
583
+ expect ?<
584
+ skipWhitespace
585
+
586
+ @qname = readName
587
+ @textBuffer = ''
588
+
589
+ while true do
590
+ skipWhitespace
591
+ c = peekAt0
592
+ if nil == c then
593
+ raise "unexpectedEOF"
594
+ end
595
+ if ?/ == c then
596
+ @emptyElement = true
597
+
598
+ read
599
+ skipWhitespace
600
+ expect ?>
601
+
602
+ break
603
+ end
604
+ if ?> == c then
605
+ @emptyElement = false
606
+ read
607
+ break
608
+ end
609
+
610
+ aName = readName
611
+ @textBuffer = ''
612
+ if (nil == aName) or (0 == aName.length) then
613
+ raise "name expected (start element)"
614
+ end
615
+
616
+ skipWhitespace
617
+ expect ?=
618
+ skipWhitespace
619
+
620
+ delimiter = read
621
+ if ?' == delimiter then # for vim: '
622
+ value = parseText(?', true) # for vim: '
623
+ elsif ?" == delimiter then # for vim: "
624
+ value = parseText(?", true) # for vim: "
625
+ else
626
+ raise "invalidDelimiter"
627
+ end
628
+
629
+ @textBuffer = ''
630
+
631
+ # skip the end delimiter
632
+ read
633
+
634
+ if processNamespace then
635
+ @attributeQName.push aName
636
+ else
637
+ @attributeName.push aName
638
+ @attributeQName.push aName
639
+ @attributeNamespace.push nil
640
+ @attributePrefix.push nil
641
+ end
642
+ @attributeValue.push value
643
+
644
+ end
645
+
646
+ if processNamespace then
647
+ handleNamespaces
648
+ else
649
+ @name = @qname
650
+ @namespace = nil
651
+ @prefix = nil
652
+ end
653
+
654
+ if not @emptyElement then
655
+ if @checkWellFormed and (0 == @elementName.length) then
656
+ if @haveRoot then
657
+ raise "unexpected element"
658
+ end
659
+ @haveRoot = true
660
+ end
661
+ @elementName.push @name
662
+ @elementQName.push @qname
663
+ @elementNamespace.push @namespace
664
+ @elementPrefix.push @prefix
665
+ end
666
+ end
667
+
668
+ def parseEndElement
669
+ if 0 == @elementName.length then
670
+ raise "elementStackEmpty"
671
+ end
672
+
673
+ expect ?<
674
+ expect ?/
675
+ skipWhitespace
676
+
677
+ @qname = readName
678
+ startQName = @elementQName.pop
679
+ if @qname != startQName then
680
+ raise sprintf("unexpectedEndElement wanted '%s' found '%s'", startQName, @qname)
681
+ end
682
+ skipWhitespace
683
+ expect ?>
684
+
685
+ @name = @elementName.pop
686
+ @prefix = @elementPrefix.pop
687
+ @namespace = @elementNamespace.pop
688
+
689
+ @elementNamespacePrefixStack.pop
690
+ @elementNamespaceValueStack.pop
691
+ @elementNamespaceDefaultStack.pop
692
+ end
693
+
694
+ def readName
695
+ @textBuffer = ''
696
+ matchThis(@namePattern)
697
+ if 0 == @textBuffer.length then
698
+ raise "name expected (readName)"
699
+ end
700
+ return @textBuffer
701
+ end
702
+
703
+ def handleNamespaces
704
+ # This is called by parseStartElement to deal with namespaces. Updates knows
705
+ # name spaces based on the attributes in this start element. Then sets up
706
+ # the namespaces for this element itself (i.e. process the qname).
707
+
708
+ i = 0
709
+
710
+ defaultNamespace = @elementNamespaceDefaultStack.last
711
+
712
+ qnames = @attributeQName
713
+ @attributeQName = []
714
+ values = @attributeValue
715
+ @attributeValue = []
716
+
717
+ prefixList = []
718
+ valueList = []
719
+
720
+ while i < qnames.length do
721
+ qname = qnames[i]
722
+ value = values[i]
723
+ i += 1
724
+
725
+ if 'xmlns' == qname then
726
+ prefix = 'xmlns'
727
+ name = nil
728
+ namespace = lookupNamespace prefix
729
+ defaultNamespace = value
730
+ else
731
+ pieces = qname.split(':', 2)
732
+ if 2 == pieces.length then
733
+ namespace = value
734
+ prefix = pieces[0]
735
+ name = pieces[1]
736
+
737
+ if 0 == prefix.length then
738
+ raise "illegalEmptyAtributePrefix"
739
+ end
740
+ if 0 == name.length then
741
+ raise "illegalEmptyAttributeName"
742
+ end
743
+ else
744
+ # this is a un-prefixed non-xmlns attribute
745
+ @attributeQName.push qname
746
+ @attributeName.push qname
747
+ @attributeNamespace.push nil
748
+ @attributePrefix.push nil
749
+ @attributeValue.push value
750
+
751
+ next
752
+ end
753
+ end
754
+
755
+ # only prefixed attributes beyond here
756
+
757
+ if nil == namespace then
758
+ raise "illegalEmptyNamespace"
759
+ end
760
+
761
+ if "xmlns" != prefix then
762
+ anyQualifiedAttributes = true
763
+
764
+ @attributeQName.push qname
765
+ @attributeName.push name
766
+ @attributeNamespace.push namespace
767
+ @attributePrefix.push prefix
768
+ @attributeValue.push value
769
+ else
770
+ if (nil != name) and ((nil == namespace) or (0 == namespace.length)) then
771
+ raise "illegalNamespace"
772
+ end
773
+
774
+ prefixList.push name
775
+ valueList.push value
776
+
777
+ if @reportNamespaceAttributes then
778
+ @attributeQName.push qname
779
+ @attributeName.push name
780
+ @attributeNamespace.push namespace
781
+ @attributePrefix.push prefix
782
+ @attributeValue.push value
783
+ end
784
+ end
785
+
786
+ end
787
+
788
+ @elementNamespacePrefixStack.push prefixList
789
+ @elementNamespaceValueStack.push valueList
790
+ @elementNamespaceDefaultStack.push defaultNamespace
791
+
792
+ if anyQualifiedAttributes then
793
+ # run over the attributes and make sure we have them qualified
794
+ for i in 0..(@attributeName.length-1) do
795
+ prefix = @attributePrefix[i]
796
+
797
+ if nil != prefix then
798
+ @attributeNamespace[i] = lookupNamespace prefix
799
+ end
800
+ end
801
+ end
802
+
803
+ # handle namespaces for the element name
804
+ pieces = @qname.split(':', 2)
805
+ if 2 == pieces.length then
806
+ @name = pieces[1]
807
+ @prefix = pieces[0]
808
+ @namespace = lookupNamespace @prefix
809
+ else
810
+ @name = @qname
811
+ @namespace = defaultNamespace
812
+ @prefix = nil
813
+ end
814
+ end
815
+
816
+ def lookupNamespace(prefix)
817
+ if nil == prefix then
818
+ raise "illegalPrefix"
819
+ end
820
+ if'xml' == prefix then
821
+ return 'http://www.w3.org/XML/1998/namespace'
822
+ end
823
+ if'xmlns' == prefix then
824
+ return 'http://www.w3.org/2000/xmlns/'
825
+ end
826
+
827
+ i = @elementNamespacePrefixStack.length - 1
828
+ while 0 <= i do
829
+ j = @elementNamespacePrefixStack[i].index(prefix)
830
+ if nil != j then
831
+ return @elementNamespaceValueStack[i][j]
832
+ end
833
+
834
+ i -= 1
835
+ end
836
+ raise sprintf("unknown Namespace Prefix '%s' [%s]", prefix, caller[0])
837
+ end
838
+
839
+ def parseText(delimiter, resolve)
840
+ s = "&" << delimiter
841
+ regex = /[#{s}]/u
842
+ c = findOneOfThese regex
843
+ while (nil != c) and (delimiter != c) do
844
+ if ?& == c then
845
+ if !resolve then
846
+ break
847
+ end
848
+
849
+ parseEntity
850
+
851
+ if @unresolvedEntity then
852
+ raise "unresolvedEntity"
853
+ end
854
+ else
855
+ c = read
856
+ @textBuffer << c
857
+ end
858
+
859
+ c = findOneOfThese regex
860
+ end
861
+
862
+ return @textBuffer
863
+ end
864
+
865
+ def skipWhitespace
866
+ while nil != @inputBuffer do
867
+ p = @inputBuffer.index(@skipWhitespacePattern, @column)
868
+
869
+ if nil != p then
870
+ if p != @column then
871
+ @column = p
872
+ end
873
+ return @inputBuffer[p]
874
+ end
875
+ getMoreInput
876
+ end
877
+ return nil
878
+ end
879
+
880
+ def matchThis(regex)
881
+ p = @inputBuffer.index(regex, @column)
882
+ if nil != $& then
883
+ @textBuffer << $&
884
+ @column += $&.length
885
+ else
886
+ @column = @inputBufferLength
887
+ end
888
+ end
889
+
890
+ def findOneOfThese(regex)
891
+ while nil != @inputBuffer do
892
+ p = @inputBuffer.index(regex, @column)
893
+
894
+ if nil != p then
895
+ if p != @column then
896
+ @textBuffer << @inputBuffer[@column..(p - 1)]
897
+ @column = p
898
+ end
899
+ return @inputBuffer[p]
900
+ else
901
+ @textBuffer << @inputBuffer[@column..-1]
902
+ end
903
+ getMoreInput
904
+ end
905
+ return nil
906
+ end
907
+
908
+ end