rgen 0.5.4 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +28 -0
- data/Rakefile +3 -4
- data/lib/ea_support/uml13_ea_metamodel.rb +3 -3
- data/lib/ea_support/uml13_ea_to_uml13.rb +33 -2
- data/lib/ea_support/uml13_to_uml13_ea.rb +7 -0
- data/lib/mmgen/mm_ext/ecore_mmgen_ext.rb +4 -4
- data/lib/mmgen/templates/metamodel_generator.tpl +143 -143
- data/lib/rgen/ecore/ecore.rb +11 -1
- data/lib/rgen/ecore/ecore_interface.rb +47 -0
- data/lib/rgen/ecore/ecore_to_ruby.rb +166 -0
- data/lib/rgen/ecore/{ecore_transformer.rb → ruby_to_ecore.rb} +11 -11
- data/lib/rgen/environment.rb +15 -2
- data/lib/rgen/fragment/dump_file_cache.rb +63 -0
- data/lib/rgen/fragment/fragmented_model.rb +139 -0
- data/lib/rgen/fragment/model_fragment.rb +268 -0
- data/lib/rgen/instantiator/abstract_xml_instantiator.rb +44 -72
- data/lib/rgen/instantiator/default_xml_instantiator.rb +2 -2
- data/lib/rgen/instantiator/ecore_xml_instantiator.rb +16 -1
- data/lib/rgen/instantiator/json_instantiator.rb +16 -2
- data/lib/rgen/instantiator/nodebased_xml_instantiator.rb +118 -138
- data/lib/rgen/instantiator/qualified_name_resolver.rb +5 -1
- data/lib/rgen/instantiator/reference_resolver.rb +126 -24
- data/lib/rgen/instantiator/xmi11_instantiator.rb +6 -2
- data/lib/rgen/metamodel_builder.rb +18 -6
- data/lib/rgen/metamodel_builder/builder_extensions.rb +431 -407
- data/lib/rgen/metamodel_builder/builder_runtime.rb +8 -8
- data/lib/rgen/metamodel_builder/constant_order_helper.rb +4 -4
- data/lib/rgen/metamodel_builder/data_types.rb +5 -1
- data/lib/rgen/metamodel_builder/intermediate/feature.rb +167 -0
- data/lib/rgen/metamodel_builder/module_extension.rb +2 -2
- data/lib/rgen/model_builder.rb +10 -5
- data/lib/rgen/model_builder/builder_context.rb +17 -1
- data/lib/rgen/serializer/opposite_reference_filter.rb +18 -0
- data/lib/rgen/serializer/qualified_name_provider.rb +45 -0
- data/lib/rgen/template_language/template_container.rb +3 -1
- data/lib/rgen/{auto_class_creator.rb → util/auto_class_creator.rb} +6 -1
- data/lib/rgen/util/cached_glob.rb +67 -0
- data/lib/rgen/util/file_cache_map.rb +104 -0
- data/lib/rgen/util/file_change_detector.rb +78 -0
- data/lib/rgen/{method_delegation.rb → util/method_delegation.rb} +18 -3
- data/lib/rgen/{model_comparator.rb → util/model_comparator.rb} +17 -5
- data/lib/rgen/{model_comparator_base.rb → util/model_comparator_base.rb} +6 -1
- data/lib/rgen/{model_dumper.rb → util/model_dumper.rb} +6 -1
- data/lib/rgen/{name_helper.rb → util/name_helper.rb} +6 -1
- data/lib/rgen/util/pattern_matcher.rb +329 -0
- data/lib/transformers/uml13_to_ecore.rb +103 -60
- data/test/ecore_self_test.rb +43 -42
- data/test/json_test.rb +15 -0
- data/test/metamodel_builder_test.rb +361 -206
- data/test/metamodel_from_ecore_test.rb +45 -0
- data/test/metamodel_order_test.rb +10 -4
- data/test/metamodel_roundtrip_test.rb +2 -2
- data/test/metamodel_roundtrip_test/TestModel_Regenerated.rb +1 -1
- data/test/metamodel_roundtrip_test/houseMetamodel_Regenerated.ecore +50 -50
- data/test/method_delegation_test.rb +9 -9
- data/test/model_builder/ecore_internal.rb +19 -9
- data/test/model_builder/serializer_test.rb +1 -1
- data/test/reference_resolver_test.rb +79 -12
- data/test/rgen_test.rb +2 -0
- data/test/template_language_test.rb +7 -0
- data/test/template_language_test/templates/callback_indent_test/a.tpl +12 -0
- data/test/template_language_test/templates/callback_indent_test/b.tpl +5 -0
- data/test/testmodel/ea_testmodel_regenerated.xml +588 -583
- data/test/transformer_test.rb +3 -3
- data/test/util/file_cache_map_test.rb +91 -0
- data/test/util/file_cache_map_test/testdir/fileA +1 -0
- data/test/util_test.rb +4 -0
- data/test/xml_instantiator_test.rb +139 -135
- metadata +49 -104
- data/lib/rgen/ecore/ecore_instantiator.rb +0 -31
- data/lib/rgen/metamodel_builder/metamodel_description.rb +0 -232
- data/redist/xmlscan/ChangeLog +0 -1301
- data/redist/xmlscan/README +0 -34
- data/redist/xmlscan/THANKS +0 -11
- data/redist/xmlscan/doc/changes.html +0 -74
- data/redist/xmlscan/doc/changes.rd +0 -80
- data/redist/xmlscan/doc/en/conformance.html +0 -136
- data/redist/xmlscan/doc/en/conformance.rd +0 -152
- data/redist/xmlscan/doc/en/manual.html +0 -356
- data/redist/xmlscan/doc/en/manual.rd +0 -402
- data/redist/xmlscan/doc/ja/conformance.ja.html +0 -118
- data/redist/xmlscan/doc/ja/conformance.ja.rd +0 -134
- data/redist/xmlscan/doc/ja/manual.ja.html +0 -325
- data/redist/xmlscan/doc/ja/manual.ja.rd +0 -370
- data/redist/xmlscan/doc/src/Makefile +0 -41
- data/redist/xmlscan/doc/src/conformance.rd.src +0 -256
- data/redist/xmlscan/doc/src/langsplit.rb +0 -110
- data/redist/xmlscan/doc/src/manual.rd.src +0 -614
- data/redist/xmlscan/install.rb +0 -41
- data/redist/xmlscan/lib/xmlscan/encoding.rb +0 -311
- data/redist/xmlscan/lib/xmlscan/htmlscan.rb +0 -289
- data/redist/xmlscan/lib/xmlscan/namespace.rb +0 -352
- data/redist/xmlscan/lib/xmlscan/parser.rb +0 -299
- data/redist/xmlscan/lib/xmlscan/scanner.rb +0 -1109
- data/redist/xmlscan/lib/xmlscan/version.rb +0 -22
- data/redist/xmlscan/lib/xmlscan/visitor.rb +0 -158
- data/redist/xmlscan/lib/xmlscan/xmlchar.rb +0 -441
- data/redist/xmlscan/memo/CONFORMANCE +0 -1249
- data/redist/xmlscan/memo/PRODUCTIONS +0 -195
- data/redist/xmlscan/memo/contentspec.ry +0 -335
- data/redist/xmlscan/samples/chibixml.rb +0 -105
- data/redist/xmlscan/samples/getxmlchar.rb +0 -122
- data/redist/xmlscan/samples/rexml.rb +0 -159
- data/redist/xmlscan/samples/xmlbench.rb +0 -88
- data/redist/xmlscan/samples/xmlbench/parser/chibixml.rb +0 -22
- data/redist/xmlscan/samples/xmlbench/parser/nqxml.rb +0 -29
- data/redist/xmlscan/samples/xmlbench/parser/rexml.rb +0 -62
- data/redist/xmlscan/samples/xmlbench/parser/xmlparser.rb +0 -22
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan-0.0.10.rb +0 -62
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan-chibixml.rb +0 -22
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan-rexml.rb +0 -22
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan.rb +0 -99
- data/redist/xmlscan/samples/xmlbench/xmlbench-lib.rb +0 -116
- data/redist/xmlscan/samples/xmlconftest.rb +0 -200
- data/redist/xmlscan/test.rb +0 -7
- data/redist/xmlscan/tests/deftestcase.rb +0 -73
- data/redist/xmlscan/tests/runtest.rb +0 -47
- data/redist/xmlscan/tests/testall.rb +0 -14
- data/redist/xmlscan/tests/testencoding.rb +0 -438
- data/redist/xmlscan/tests/testhtmlscan.rb +0 -752
- data/redist/xmlscan/tests/testnamespace.rb +0 -457
- data/redist/xmlscan/tests/testparser.rb +0 -591
- data/redist/xmlscan/tests/testscanner.rb +0 -1749
- data/redist/xmlscan/tests/testxmlchar.rb +0 -143
- data/redist/xmlscan/tests/visitor.rb +0 -34
@@ -1,1109 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# xmlscan/scanner.rb
|
3
|
-
#
|
4
|
-
# Copyright (C) Ueno Katsuhiro 2002
|
5
|
-
#
|
6
|
-
# $Id: scanner.rb,v 1.83 2003/05/12 14:13:33 katsu Exp $
|
7
|
-
#
|
8
|
-
|
9
|
-
#
|
10
|
-
# CONSIDERATIONS FOR CHARACTER ENCODINGS:
|
11
|
-
#
|
12
|
-
# There are the following common characteristics in character encodings
|
13
|
-
# which are supported by Ruby's $KCODE feature (ISO-8859-*, Shift_JIS,
|
14
|
-
# EUC, and UTF-8):
|
15
|
-
#
|
16
|
-
# - Stateless.
|
17
|
-
# - ASCII characters are encoded in the same manner as US-ASCII.
|
18
|
-
# - The octet sequences corresponding to non-ASCII characters begin
|
19
|
-
# with an octet greater than 0x80.
|
20
|
-
# - The following characters can be identified by just one octet.
|
21
|
-
# That is, every octets corresponding to the following characters in
|
22
|
-
# US-ASCII never appear as a part of an octet sequence representing a
|
23
|
-
# non-ASCII character.
|
24
|
-
#
|
25
|
-
# Whitespaces("\t", "\n", "\r", and " ") and
|
26
|
-
# ! \ " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
|
27
|
-
#
|
28
|
-
# Be careful that `[' and `]' are NOT included in the list!
|
29
|
-
#
|
30
|
-
# If we build a regular expression carefully in accordance with these
|
31
|
-
# characteristics, we can get the same match regardless of the value
|
32
|
-
# of $KCODE. Moreover, if it can be premised on them, we can detect
|
33
|
-
# several delimiters without regular expressions. XMLScanner uses this
|
34
|
-
# fact in order to share many regular expressions in all $KCODE modes,
|
35
|
-
# and in order to optimize parsing speed.
|
36
|
-
#
|
37
|
-
|
38
|
-
require 'xmlscan/visitor'
|
39
|
-
|
40
|
-
|
41
|
-
module XMLScan
|
42
|
-
|
43
|
-
class Input
|
44
|
-
|
45
|
-
def initialize(src)
|
46
|
-
@src = src
|
47
|
-
unless src.respond_to? :gets then
|
48
|
-
if src.respond_to? :to_ary then
|
49
|
-
@v = src.to_ary
|
50
|
-
@n = -1
|
51
|
-
def self.gets ; @v.at(@n += 1) ; end
|
52
|
-
def self.lineno ; @n + 1 ; end
|
53
|
-
else
|
54
|
-
@v = @src
|
55
|
-
def self.gets ; s = @v ; @v = nil ; s ; end
|
56
|
-
end
|
57
|
-
end
|
58
|
-
if src.respond_to? :lineno then
|
59
|
-
def self.lineno ; @src.lineno ; end
|
60
|
-
end
|
61
|
-
if src.respond_to? :path then
|
62
|
-
def self.path ; @src.path ; end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
attr_reader :src
|
67
|
-
|
68
|
-
def gets ; @src.gets ; end
|
69
|
-
def lineno ; 0 ; end
|
70
|
-
def path ; '-' ; end
|
71
|
-
|
72
|
-
def self.wrap(src)
|
73
|
-
unless src.respond_to? :gets and src.respond_to? :lineno and
|
74
|
-
src.respond_to? :path then
|
75
|
-
src = new(src)
|
76
|
-
end
|
77
|
-
src
|
78
|
-
end
|
79
|
-
|
80
|
-
def self.unwrap(obj)
|
81
|
-
if self === obj then
|
82
|
-
obj.src
|
83
|
-
else
|
84
|
-
obj
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
end
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
class PrivateArray < Array
|
93
|
-
m = superclass.instance_methods(false) - Kernel.instance_methods(false)
|
94
|
-
private(*m)
|
95
|
-
end
|
96
|
-
|
97
|
-
|
98
|
-
class Source < PrivateArray
|
99
|
-
# Source inherits Array only for speed.
|
100
|
-
|
101
|
-
def initialize(src)
|
102
|
-
super()
|
103
|
-
@src = Input.wrap(src)
|
104
|
-
@eof = false
|
105
|
-
@last = nil
|
106
|
-
end
|
107
|
-
|
108
|
-
def source
|
109
|
-
Input.unwrap @src
|
110
|
-
end
|
111
|
-
|
112
|
-
|
113
|
-
def eof?
|
114
|
-
@eof and empty?
|
115
|
-
end
|
116
|
-
|
117
|
-
def abort
|
118
|
-
@eof = true
|
119
|
-
@last = nil
|
120
|
-
clear
|
121
|
-
self
|
122
|
-
end
|
123
|
-
|
124
|
-
|
125
|
-
def get
|
126
|
-
pop or
|
127
|
-
unless @eof then
|
128
|
-
last = @last
|
129
|
-
begin
|
130
|
-
src = @src.gets
|
131
|
-
unless src then
|
132
|
-
@eof = true
|
133
|
-
unshift last
|
134
|
-
last = nil
|
135
|
-
break
|
136
|
-
end
|
137
|
-
a = src.split(/(?=<|>[<>])|>/n, -1)
|
138
|
-
if last then
|
139
|
-
unless /\A[<>]/n =~ a.first then
|
140
|
-
a[0] = last << (a.first || '')
|
141
|
-
else
|
142
|
-
push last
|
143
|
-
end
|
144
|
-
end
|
145
|
-
concat a
|
146
|
-
last = pop
|
147
|
-
end while empty?
|
148
|
-
@last = last
|
149
|
-
reverse!
|
150
|
-
pop
|
151
|
-
end
|
152
|
-
end
|
153
|
-
|
154
|
-
|
155
|
-
def prepare
|
156
|
-
s = get
|
157
|
-
s = get and s = '>' << s if s and s.empty? # preserve first `>'
|
158
|
-
s and push s
|
159
|
-
end
|
160
|
-
|
161
|
-
|
162
|
-
def tag_end?
|
163
|
-
s = last || @last and s[0] != ?<
|
164
|
-
end
|
165
|
-
|
166
|
-
def tag_start?
|
167
|
-
s = last || @last and s[0] == ?<
|
168
|
-
end
|
169
|
-
|
170
|
-
def close_tag # tag_end?, and remove a `>'.
|
171
|
-
unless s = last || @last and s[0] != ?< then
|
172
|
-
false
|
173
|
-
else
|
174
|
-
if s == '>' or s.empty? then
|
175
|
-
s1 = get
|
176
|
-
unless s = last || @last and s[0] == ?< then # for speed up
|
177
|
-
out = [ s1 ]
|
178
|
-
out.push get while s = last || @last and s == '>' || s.empty?
|
179
|
-
out.pop unless s and s[0] != ?< # De Morgan
|
180
|
-
concat out
|
181
|
-
end
|
182
|
-
end
|
183
|
-
true
|
184
|
-
end
|
185
|
-
end
|
186
|
-
|
187
|
-
|
188
|
-
def get_text # get until tag_start?
|
189
|
-
s = last || @last and s[0] != ?< and get
|
190
|
-
end
|
191
|
-
|
192
|
-
def get_tag # get until tag_end?
|
193
|
-
s = last || @last and s[0] == ?< and get
|
194
|
-
end
|
195
|
-
|
196
|
-
def get_plain
|
197
|
-
s = get
|
198
|
-
s = '>' << s unless not s or (c = s[0]) == ?< or c == ?> # De Morgan
|
199
|
-
s
|
200
|
-
end
|
201
|
-
|
202
|
-
def lineno
|
203
|
-
@src.lineno
|
204
|
-
end
|
205
|
-
|
206
|
-
def path
|
207
|
-
@src.path
|
208
|
-
end
|
209
|
-
|
210
|
-
|
211
|
-
# The following methods are for debug.
|
212
|
-
|
213
|
-
def inspect
|
214
|
-
a = []
|
215
|
-
reverse_each { |i|
|
216
|
-
a.push ">" unless /\A[<>]/n =~ i
|
217
|
-
a.push i.inspect
|
218
|
-
}
|
219
|
-
last = []
|
220
|
-
if @last then
|
221
|
-
last.push ">" unless /\A[<>]/n =~ @last
|
222
|
-
last.push @last.inspect
|
223
|
-
end
|
224
|
-
a.push '#eof' if @eof
|
225
|
-
"((#{a.join(' ')}) (#{last.join(' ')}) . #{source.inspect})"
|
226
|
-
end
|
227
|
-
|
228
|
-
def each
|
229
|
-
prepare
|
230
|
-
while s = get
|
231
|
-
yield s
|
232
|
-
end
|
233
|
-
self
|
234
|
-
end
|
235
|
-
|
236
|
-
def test
|
237
|
-
last or @last or (s = get and push s and s)
|
238
|
-
end
|
239
|
-
|
240
|
-
end
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
class XMLScanner
|
245
|
-
|
246
|
-
class << self
|
247
|
-
|
248
|
-
def provided_options
|
249
|
-
options = []
|
250
|
-
private_instance_methods(false).each { |i|
|
251
|
-
options.push $' if /\Aapply_option_/n =~ i
|
252
|
-
}
|
253
|
-
options
|
254
|
-
end
|
255
|
-
|
256
|
-
def apply_option(instance, option)
|
257
|
-
instance.__send__ "apply_option_#{option}"
|
258
|
-
end
|
259
|
-
|
260
|
-
def apply_options(instance, options)
|
261
|
-
h = {}
|
262
|
-
options.each { |i| h[i.to_s] = true }
|
263
|
-
options = h
|
264
|
-
ancestors.each { |klass|
|
265
|
-
if klass.respond_to? :provided_options then
|
266
|
-
klass.provided_options.each { |i|
|
267
|
-
if options.include? i then
|
268
|
-
options.delete i
|
269
|
-
klass.apply_option instance, i
|
270
|
-
end
|
271
|
-
}
|
272
|
-
end
|
273
|
-
}
|
274
|
-
unless options.empty? then
|
275
|
-
raise ArgumentError, "undefined option `#{options.keys[0]}'"
|
276
|
-
end
|
277
|
-
instance
|
278
|
-
end
|
279
|
-
private :apply_options
|
280
|
-
|
281
|
-
def new(visitor, *options)
|
282
|
-
instance = super(visitor)
|
283
|
-
apply_options instance, options
|
284
|
-
end
|
285
|
-
|
286
|
-
end
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
def initialize(visitor)
|
291
|
-
@visitor = visitor
|
292
|
-
@decoration = nil
|
293
|
-
@src = nil
|
294
|
-
@kcode = nil
|
295
|
-
end
|
296
|
-
|
297
|
-
|
298
|
-
def kcode=(kcode)
|
299
|
-
@kcode = Regexp.new('', nil, kcode || '').kcode
|
300
|
-
kcode
|
301
|
-
end
|
302
|
-
|
303
|
-
attr_reader :kcode
|
304
|
-
|
305
|
-
|
306
|
-
def decorate(decoration)
|
307
|
-
unless @decoration then
|
308
|
-
@visitor = @decoration = Decoration.new(@visitor)
|
309
|
-
end
|
310
|
-
@decoration.expand decoration
|
311
|
-
end
|
312
|
-
private :decorate
|
313
|
-
|
314
|
-
|
315
|
-
def lineno
|
316
|
-
@src && @src.lineno
|
317
|
-
end
|
318
|
-
|
319
|
-
def path
|
320
|
-
@src && @src.path
|
321
|
-
end
|
322
|
-
|
323
|
-
def source
|
324
|
-
@src.source
|
325
|
-
end
|
326
|
-
|
327
|
-
|
328
|
-
private
|
329
|
-
|
330
|
-
def parse_error(msg)
|
331
|
-
@visitor.parse_error msg
|
332
|
-
end
|
333
|
-
|
334
|
-
def wellformed_error(msg)
|
335
|
-
@visitor.wellformed_error msg
|
336
|
-
end
|
337
|
-
|
338
|
-
def valid_error(msg)
|
339
|
-
@visitor.valid_error msg
|
340
|
-
end
|
341
|
-
|
342
|
-
def warning(msg)
|
343
|
-
@visitor.warning msg
|
344
|
-
end
|
345
|
-
|
346
|
-
|
347
|
-
def on_xmldecl
|
348
|
-
@visitor.on_xmldecl
|
349
|
-
end
|
350
|
-
|
351
|
-
def on_xmldecl_version(str)
|
352
|
-
@visitor.on_xmldecl_version str
|
353
|
-
end
|
354
|
-
|
355
|
-
def on_xmldecl_encoding(str)
|
356
|
-
@visitor.on_xmldecl_encoding str
|
357
|
-
end
|
358
|
-
|
359
|
-
def on_xmldecl_standalone(str)
|
360
|
-
@visitor.on_xmldecl_standalone str
|
361
|
-
end
|
362
|
-
|
363
|
-
def on_xmldecl_other(name, value)
|
364
|
-
@visitor.on_xmldecl_other name, value
|
365
|
-
end
|
366
|
-
|
367
|
-
def on_xmldecl_end
|
368
|
-
@visitor.on_xmldecl_end
|
369
|
-
end
|
370
|
-
|
371
|
-
def on_doctype(root, pubid, sysid)
|
372
|
-
@visitor.on_doctype root, pubid, sysid
|
373
|
-
end
|
374
|
-
|
375
|
-
def on_prolog_space(str)
|
376
|
-
@visitor.on_prolog_space str
|
377
|
-
end
|
378
|
-
|
379
|
-
def on_comment(str)
|
380
|
-
@visitor.on_comment str
|
381
|
-
end
|
382
|
-
|
383
|
-
def on_pi(target, pi)
|
384
|
-
@visitor.on_pi target, pi
|
385
|
-
end
|
386
|
-
|
387
|
-
def on_chardata(str)
|
388
|
-
@visitor.on_chardata str
|
389
|
-
end
|
390
|
-
|
391
|
-
def on_cdata(str)
|
392
|
-
@visitor.on_cdata str
|
393
|
-
end
|
394
|
-
|
395
|
-
def on_etag(name)
|
396
|
-
@visitor.on_etag name
|
397
|
-
end
|
398
|
-
|
399
|
-
def on_entityref(ref)
|
400
|
-
@visitor.on_entityref ref
|
401
|
-
end
|
402
|
-
|
403
|
-
def on_charref(code)
|
404
|
-
@visitor.on_charref code
|
405
|
-
end
|
406
|
-
|
407
|
-
def on_charref_hex(code)
|
408
|
-
@visitor.on_charref_hex code
|
409
|
-
end
|
410
|
-
|
411
|
-
def on_start_document
|
412
|
-
@visitor.on_start_document
|
413
|
-
end
|
414
|
-
|
415
|
-
def on_end_document
|
416
|
-
@visitor.on_end_document
|
417
|
-
end
|
418
|
-
|
419
|
-
|
420
|
-
# <hoge fuga="foo&bar;&&foo" />HOGE
|
421
|
-
# ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
|
422
|
-
# 1 2 3 4 5 6 7 8 9 A
|
423
|
-
#
|
424
|
-
# The following method will be called with the following arguments
|
425
|
-
# when the parser reaches the above point;
|
426
|
-
#
|
427
|
-
# 1: on_stag ('hoge')
|
428
|
-
# 2: on_attribute ('fuga')
|
429
|
-
# 3: on_attr_value ('foo')
|
430
|
-
# 4: on_attr_entityref ('bar')
|
431
|
-
# 5: on_attr_charref (38)
|
432
|
-
# 6: on_attr_charref_hex (38)
|
433
|
-
# 7: on_attr_value ('foo')
|
434
|
-
# 8: on_attribute_end ('fuga')
|
435
|
-
# 9: on_stag_end_empty ('hoge')
|
436
|
-
# or
|
437
|
-
# on_stag_end ('hoge')
|
438
|
-
#
|
439
|
-
# A: on_chardata ('HOGE')
|
440
|
-
|
441
|
-
def on_stag(name)
|
442
|
-
@visitor.on_stag name
|
443
|
-
end
|
444
|
-
|
445
|
-
def on_attribute(name)
|
446
|
-
@visitor.on_attribute name
|
447
|
-
end
|
448
|
-
|
449
|
-
def on_attr_value(str)
|
450
|
-
@visitor.on_attr_value str
|
451
|
-
end
|
452
|
-
|
453
|
-
def on_attr_entityref(ref)
|
454
|
-
@visitor.on_attr_entityref ref
|
455
|
-
end
|
456
|
-
|
457
|
-
def on_attr_charref(code)
|
458
|
-
@visitor.on_attr_charref code
|
459
|
-
end
|
460
|
-
|
461
|
-
def on_attr_charref_hex(code)
|
462
|
-
@visitor.on_attr_charref_hex code
|
463
|
-
end
|
464
|
-
|
465
|
-
def on_attribute_end(name)
|
466
|
-
@visitor.on_attribute_end name
|
467
|
-
end
|
468
|
-
|
469
|
-
def on_stag_end_empty(name)
|
470
|
-
@visitor.on_stag_end_empty name
|
471
|
-
end
|
472
|
-
|
473
|
-
def on_stag_end(name)
|
474
|
-
@visitor.on_stag_end name
|
475
|
-
end
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
private
|
480
|
-
|
481
|
-
module KcodeRegexp
|
482
|
-
private
|
483
|
-
Kcodes = [ //n.kcode, //e.kcode, //s.kcode, //u.kcode ]
|
484
|
-
def kcode_regexp(re)
|
485
|
-
h = {}
|
486
|
-
Kcodes.each { |i| h[i] = Regexp.new(re, nil, i) }
|
487
|
-
h.default = Regexp.new(re)
|
488
|
-
h
|
489
|
-
end
|
490
|
-
end
|
491
|
-
extend KcodeRegexp
|
492
|
-
|
493
|
-
|
494
|
-
InvalidEntityRef = kcode_regexp('(?=[^#\d\w]|\z)')
|
495
|
-
|
496
|
-
def scan_chardata(s)
|
497
|
-
while true
|
498
|
-
unless /&/n =~ s then
|
499
|
-
on_chardata s
|
500
|
-
else
|
501
|
-
s = $`
|
502
|
-
on_chardata s unless s.empty?
|
503
|
-
ref = nil
|
504
|
-
$'.split('&', -1).each { |s|
|
505
|
-
unless /(?!\A);|(?=[ \t\r\n])/n =~ s and not $&.empty? then
|
506
|
-
if InvalidEntityRef[@kcode] =~ s and not (ref = $`).strip.empty?
|
507
|
-
then
|
508
|
-
parse_error "reference to `#{ref}' doesn't end with `;'"
|
509
|
-
else
|
510
|
-
parse_error "`&' is not used for entity/character references"
|
511
|
-
on_chardata('&' << s)
|
512
|
-
next
|
513
|
-
end
|
514
|
-
end
|
515
|
-
ref = $`
|
516
|
-
s = $'
|
517
|
-
if /\A[^#]/n =~ ref then
|
518
|
-
on_entityref ref
|
519
|
-
elsif /\A#(\d+)\z/n =~ ref then
|
520
|
-
on_charref $1.to_i
|
521
|
-
elsif /\A#x([\dA-Fa-f]+)\z/n =~ ref then
|
522
|
-
on_charref_hex $1.hex
|
523
|
-
else
|
524
|
-
parse_error "invalid character reference `#{ref}'"
|
525
|
-
end
|
526
|
-
on_chardata s unless s.empty?
|
527
|
-
}
|
528
|
-
end
|
529
|
-
s = @src.get_text
|
530
|
-
break unless s
|
531
|
-
s = '>' << s unless s == '>'
|
532
|
-
end
|
533
|
-
end
|
534
|
-
|
535
|
-
|
536
|
-
def scan_attvalue(s) # almostly copy & paste from scan_chardata
|
537
|
-
unless /&/n =~ s then
|
538
|
-
on_attr_value s
|
539
|
-
else
|
540
|
-
s = $`
|
541
|
-
on_attr_value s unless s.empty?
|
542
|
-
ref = nil
|
543
|
-
$'.split('&', -1).each { |s|
|
544
|
-
unless /(?!\A);|(?=[ \t\r\n])/n =~ s and not $&.empty? then
|
545
|
-
if InvalidEntityRef[@kcode] =~ s and not (ref = $`).strip.empty?
|
546
|
-
then
|
547
|
-
parse_error "reference to `#{ref}' doesn't end with `;'"
|
548
|
-
else
|
549
|
-
parse_error "`&' is not used for entity/character references"
|
550
|
-
on_attr_value('&' << s)
|
551
|
-
next
|
552
|
-
end
|
553
|
-
end
|
554
|
-
ref = $`
|
555
|
-
s = $'
|
556
|
-
if /\A[^#]/n =~ ref then
|
557
|
-
on_attr_entityref ref
|
558
|
-
elsif /\A#(\d+)\z/n =~ ref then
|
559
|
-
on_attr_charref $1.to_i
|
560
|
-
elsif /\A#x([\dA-Fa-f]+)\z/n =~ ref then
|
561
|
-
on_attr_charref_hex $1.hex
|
562
|
-
else
|
563
|
-
parse_error "invalid character reference `#{ref}'"
|
564
|
-
end
|
565
|
-
on_attr_value s unless s.empty?
|
566
|
-
}
|
567
|
-
end
|
568
|
-
end
|
569
|
-
|
570
|
-
|
571
|
-
def scan_comment(s)
|
572
|
-
s[0,4] = '' # remove `<!--'
|
573
|
-
comm = ''
|
574
|
-
until /--/n =~ s
|
575
|
-
comm << s
|
576
|
-
s = @src.get_plain
|
577
|
-
unless s then
|
578
|
-
parse_error "unterminated comment meets EOF"
|
579
|
-
return on_comment(comm)
|
580
|
-
end
|
581
|
-
end
|
582
|
-
comm << $`
|
583
|
-
until (s = $').empty? and @src.close_tag
|
584
|
-
if s == '-' and @src.close_tag then # --->
|
585
|
-
parse_error "comment ending in `--->' is not allowed"
|
586
|
-
comm << s
|
587
|
-
break
|
588
|
-
end
|
589
|
-
parse_error "comment includes `--'"
|
590
|
-
comm << '--'
|
591
|
-
until /--/n =~ s # copy & paste for performance
|
592
|
-
comm << s
|
593
|
-
s = @src.get_plain
|
594
|
-
unless s then
|
595
|
-
parse_error "unterminated comment meets EOF"
|
596
|
-
return on_comment(comm)
|
597
|
-
end
|
598
|
-
end
|
599
|
-
comm << $`
|
600
|
-
end
|
601
|
-
on_comment comm
|
602
|
-
end
|
603
|
-
|
604
|
-
|
605
|
-
def scan_pi(s)
|
606
|
-
unless /\A<\?([^ \t\n\r?]+)(?:[ \t\n\r]+|(?=\?\z))/n =~ s then
|
607
|
-
parse_error "parse error at `<?'"
|
608
|
-
s << '>' if @src.close_tag
|
609
|
-
on_chardata s
|
610
|
-
else
|
611
|
-
target = $1
|
612
|
-
pi = $'
|
613
|
-
until pi[-1] == ?? and @src.close_tag
|
614
|
-
s = @src.get_plain
|
615
|
-
unless s then
|
616
|
-
parse_error "unterminated PI meets EOF"
|
617
|
-
return on_pi(target, pi)
|
618
|
-
end
|
619
|
-
pi << s
|
620
|
-
end
|
621
|
-
pi.chop! # remove last `?'
|
622
|
-
on_pi target, pi
|
623
|
-
end
|
624
|
-
end
|
625
|
-
|
626
|
-
|
627
|
-
CDATAPattern = kcode_regexp('\]\]\z')
|
628
|
-
|
629
|
-
def scan_cdata(s)
|
630
|
-
cdata = s
|
631
|
-
re = CDATAPattern[@kcode]
|
632
|
-
until re =~ cdata and @src.close_tag
|
633
|
-
s = @src.get_plain
|
634
|
-
unless s then
|
635
|
-
parse_error "unterminated CDATA section meets EOF"
|
636
|
-
return on_cdata(cdata)
|
637
|
-
end
|
638
|
-
cdata << s
|
639
|
-
end
|
640
|
-
cdata.chop!.chop! # remove ']]'
|
641
|
-
on_cdata cdata
|
642
|
-
end
|
643
|
-
|
644
|
-
|
645
|
-
def found_unclosed_etag(name)
|
646
|
-
if @src.tag_start? then
|
647
|
-
parse_error "unclosed end tag `#{name}' meets another tag"
|
648
|
-
else
|
649
|
-
parse_error "unclosed end tag `#{name}' meets EOF"
|
650
|
-
end
|
651
|
-
end
|
652
|
-
|
653
|
-
def found_empty_etag
|
654
|
-
parse_error "parse error at `</'"
|
655
|
-
on_chardata '</>'
|
656
|
-
end
|
657
|
-
|
658
|
-
|
659
|
-
def scan_etag(s)
|
660
|
-
s[0,2] = '' # remove '</'
|
661
|
-
if s.empty? then
|
662
|
-
if @src.close_tag then # </>
|
663
|
-
return found_empty_etag
|
664
|
-
else # </< or </[EOF]
|
665
|
-
parse_error "parse error at `</'"
|
666
|
-
s << '>' if @src.close_tag
|
667
|
-
return on_chardata('</' << s)
|
668
|
-
end
|
669
|
-
elsif /[ \t\n\r]+/n =~ s then
|
670
|
-
s1, s2 = $`, $'
|
671
|
-
if s1.empty? then # </ tag
|
672
|
-
parse_error "parse error at `</'"
|
673
|
-
s << '>' if @src.close_tag
|
674
|
-
return on_chardata('</' + s)
|
675
|
-
elsif not s2.empty? then # </ta g
|
676
|
-
parse_error "illegal whitespace is found within end tag `#{s1}'"
|
677
|
-
while @src.get_tag
|
678
|
-
end
|
679
|
-
end
|
680
|
-
s = s1
|
681
|
-
end
|
682
|
-
found_unclosed_etag s unless @src.close_tag # </tag< or </tag[EOF]
|
683
|
-
on_etag s
|
684
|
-
end
|
685
|
-
|
686
|
-
|
687
|
-
def found_empty_stag
|
688
|
-
parse_error "parse error at `<'"
|
689
|
-
on_chardata '<>'
|
690
|
-
end
|
691
|
-
|
692
|
-
def found_unclosed_stag(name)
|
693
|
-
if @src.tag_start? then
|
694
|
-
parse_error "unclosed start tag `#{name}' meets another tag"
|
695
|
-
else
|
696
|
-
parse_error "unclosed start tag `#{name}' meets EOF"
|
697
|
-
end
|
698
|
-
end
|
699
|
-
|
700
|
-
def found_unclosed_emptyelem(name)
|
701
|
-
if @src.tag_start? then
|
702
|
-
parse_error "unclosed empty element tag `#{name}' meets another tag"
|
703
|
-
else
|
704
|
-
parse_error "unclosed empty element tag `#{name}' meets EOF"
|
705
|
-
end
|
706
|
-
end
|
707
|
-
|
708
|
-
|
709
|
-
def found_stag_error(s)
|
710
|
-
if /\A[\/='"]/n =~ s then
|
711
|
-
tok, s = $&, $'
|
712
|
-
elsif /(?=[ \t\n\r\/='"])/n =~ s then
|
713
|
-
tok, s = $`, $'
|
714
|
-
else
|
715
|
-
tok, s = s, nil
|
716
|
-
end
|
717
|
-
parse_error "parse error at `#{tok}'"
|
718
|
-
s
|
719
|
-
end
|
720
|
-
|
721
|
-
|
722
|
-
def scan_stag(s)
|
723
|
-
unless /(?=[\/ \t\n\r='"])/n =~ s then
|
724
|
-
name = s
|
725
|
-
name[0,1] = '' # remove `<'
|
726
|
-
if name.empty? then
|
727
|
-
if @src.close_tag then # <>
|
728
|
-
return found_empty_stag
|
729
|
-
else # << or <[EOF]
|
730
|
-
parse_error "parse error at `<'"
|
731
|
-
return on_chardata('<')
|
732
|
-
end
|
733
|
-
end
|
734
|
-
on_stag name
|
735
|
-
found_unclosed_stag name unless @src.close_tag
|
736
|
-
on_stag_end name
|
737
|
-
else
|
738
|
-
name = $`
|
739
|
-
s = $'
|
740
|
-
name[0,1] = '' # remove `<'
|
741
|
-
if name.empty? then # `< tag' or `<=`
|
742
|
-
parse_error "parse error at `<'"
|
743
|
-
s << '>' if @src.close_tag
|
744
|
-
return on_chardata('<' << s)
|
745
|
-
end
|
746
|
-
on_stag name
|
747
|
-
emptyelem = false
|
748
|
-
key,val,error,qmark,c = nil
|
749
|
-
begin
|
750
|
-
continue = false
|
751
|
-
s.scan(/[ \t\n\r]([^= \t\n\r\/'"]+)[ \t\n\r]*=[ \t\n\r]*('[^']*'?|"[^"]*"?)|\/\z|([^ \t\n\r][\S\s]*)/n
|
752
|
-
) { |key,val,error|
|
753
|
-
if key then # key="value"
|
754
|
-
on_attribute key
|
755
|
-
qmark = val.slice!(0,1)
|
756
|
-
if val[-1] == qmark[0] then
|
757
|
-
val.chop!
|
758
|
-
scan_attvalue val unless val.empty?
|
759
|
-
else
|
760
|
-
scan_attvalue val unless val.empty?
|
761
|
-
begin
|
762
|
-
s = @src.get
|
763
|
-
unless s then
|
764
|
-
parse_error "unterminated attribute `#{key}' meets EOF"
|
765
|
-
break
|
766
|
-
end
|
767
|
-
c = s[0]
|
768
|
-
val, s = s.split(qmark, 2)
|
769
|
-
if c == ?< then
|
770
|
-
wellformed_error "`<' is found in attribute `#{key}'"
|
771
|
-
elsif c != ?> then
|
772
|
-
scan_attvalue '>'
|
773
|
-
end
|
774
|
-
scan_attvalue val if c
|
775
|
-
end until s
|
776
|
-
continue = s # if eof then continue is false, else true.
|
777
|
-
end
|
778
|
-
on_attribute_end key
|
779
|
-
elsif error then
|
780
|
-
continue = s = found_stag_error(error)
|
781
|
-
else
|
782
|
-
emptyelem = true
|
783
|
-
end
|
784
|
-
}
|
785
|
-
end while continue
|
786
|
-
unless @src.close_tag then
|
787
|
-
if emptyelem then
|
788
|
-
found_unclosed_emptyelem name
|
789
|
-
else
|
790
|
-
found_unclosed_stag name
|
791
|
-
end
|
792
|
-
end
|
793
|
-
if emptyelem then
|
794
|
-
on_stag_end_empty name
|
795
|
-
else
|
796
|
-
on_stag_end name
|
797
|
-
end
|
798
|
-
end
|
799
|
-
end
|
800
|
-
|
801
|
-
|
802
|
-
def scan_bang_tag(s)
|
803
|
-
parse_error "parse error at `<!'"
|
804
|
-
s << '>' if @src.close_tag
|
805
|
-
on_chardata s
|
806
|
-
end
|
807
|
-
|
808
|
-
|
809
|
-
def scan_content(s)
|
810
|
-
src = @src # for speed
|
811
|
-
while s
|
812
|
-
if (c = s[0]) == ?< then
|
813
|
-
if (c = s[1]) == ?/ then
|
814
|
-
scan_etag s
|
815
|
-
elsif c == ?! then
|
816
|
-
if s[2] == ?- and s[3] == ?- then
|
817
|
-
scan_comment s
|
818
|
-
elsif /\A<!\[CDATA\[/n =~ s then
|
819
|
-
scan_cdata $'
|
820
|
-
else
|
821
|
-
scan_bang_tag s
|
822
|
-
end
|
823
|
-
elsif c == ?? then
|
824
|
-
scan_pi s
|
825
|
-
else
|
826
|
-
scan_stag s
|
827
|
-
end
|
828
|
-
else
|
829
|
-
scan_chardata s
|
830
|
-
end
|
831
|
-
s = src.get
|
832
|
-
end
|
833
|
-
end
|
834
|
-
|
835
|
-
|
836
|
-
def get_until_qmark(str, qmark)
|
837
|
-
begin
|
838
|
-
#s = @src.get_plain
|
839
|
-
s = @src.get
|
840
|
-
break unless s
|
841
|
-
c = s[0]
|
842
|
-
v, s = s.split(qmark, 2)
|
843
|
-
str << '>' unless c == ?< or c == ?> # De Morgan
|
844
|
-
str << v if c
|
845
|
-
end until s
|
846
|
-
s
|
847
|
-
end
|
848
|
-
|
849
|
-
|
850
|
-
XMLDeclPattern = kcode_regexp(%q{[ \t\n\r]([\-_\d\w]+)[ \t\n\r]*=[ \t\n\r]*('[^']*'?|"[^"]*"?)|(\?\z)|([\-_.\d\w]+|[^ \t\n\r])}) #'
|
851
|
-
|
852
|
-
def scan_xmldecl(s)
|
853
|
-
endmark = nil
|
854
|
-
state = 0
|
855
|
-
on_xmldecl
|
856
|
-
begin
|
857
|
-
continue = false
|
858
|
-
s.scan(XMLDeclPattern[@kcode]) { |key,val,endmark,error|
|
859
|
-
if key then
|
860
|
-
qmark = val.slice!(0,1) # remove quotation marks
|
861
|
-
if val[-1] == qmark[0] then
|
862
|
-
val.chop!
|
863
|
-
else
|
864
|
-
continue = s = get_until_qmark(val, qmark)
|
865
|
-
unless s then
|
866
|
-
parse_error "unterminated XML declaration meets EOF"
|
867
|
-
endmark = true
|
868
|
-
end
|
869
|
-
end
|
870
|
-
if state == 0 and key == 'version' then
|
871
|
-
on_xmldecl_version val
|
872
|
-
state = 1
|
873
|
-
elsif state == 1 and key == 'encoding' then
|
874
|
-
on_xmldecl_encoding val
|
875
|
-
state = 2
|
876
|
-
elsif state >= 1 and key == 'standalone' then
|
877
|
-
on_xmldecl_standalone val
|
878
|
-
state = 3
|
879
|
-
else
|
880
|
-
state = 3
|
881
|
-
if key == 'version' then
|
882
|
-
parse_error "version declaration must not be here"
|
883
|
-
on_xmldecl_version val
|
884
|
-
elsif key == 'encoding' then
|
885
|
-
parse_error "encoding declaration must not be here"
|
886
|
-
on_xmldecl_encoding val
|
887
|
-
state = 2 if state < 2
|
888
|
-
elsif key == 'standalone' then
|
889
|
-
parse_error "standalone declaration must not be here"
|
890
|
-
on_xmldecl_standalone val
|
891
|
-
else
|
892
|
-
parse_error "unknown declaration `#{key}' in XML declaration"
|
893
|
-
on_xmldecl_other key, val
|
894
|
-
end
|
895
|
-
end
|
896
|
-
elsif endmark then
|
897
|
-
unless @src.close_tag then
|
898
|
-
parse_error "unexpected `#{endmark}' found in XML declaration"
|
899
|
-
endmark = nil
|
900
|
-
end
|
901
|
-
# here always exit the loop.
|
902
|
-
else
|
903
|
-
parse_error "parse error at `#{error}'"
|
904
|
-
end
|
905
|
-
}
|
906
|
-
end while !endmark and continue || s = @src.get_plain
|
907
|
-
parse_error "unterminated XML declaration meets EOF" unless s or endmark
|
908
|
-
parse_error "no declaration found in XML declaration" if state == 0
|
909
|
-
on_xmldecl_end
|
910
|
-
end
|
911
|
-
|
912
|
-
|
913
|
-
SkipDTD = kcode_regexp(%q{(['"]|\A<!--|\A<\?|--\z|\?\z)|\]\s*\z}) #'
|
914
|
-
|
915
|
-
def skip_internal_dtd(s)
|
916
|
-
quote = nil
|
917
|
-
continue = true
|
918
|
-
begin # skip until `]>'
|
919
|
-
s.scan(SkipDTD[@kcode]) { |q,| #'
|
920
|
-
if quote then
|
921
|
-
quote = nil if quote == q and quote.size == 1 || @src.tag_end?
|
922
|
-
elsif q then
|
923
|
-
if q == '<!--' then
|
924
|
-
quote = '--'
|
925
|
-
elsif q == '<?' then
|
926
|
-
quote = '?'
|
927
|
-
elsif q == '"' or q == "'" then
|
928
|
-
quote = q
|
929
|
-
end
|
930
|
-
elsif @src.close_tag then
|
931
|
-
continue = false
|
932
|
-
end
|
933
|
-
}
|
934
|
-
end while continue and s = @src.get
|
935
|
-
parse_error "unterminated internal DTD subset meets EOF" unless s
|
936
|
-
end
|
937
|
-
|
938
|
-
|
939
|
-
def scan_internal_dtd(s)
|
940
|
-
warning "internal DTD subset is not supported"
|
941
|
-
skip_internal_dtd s
|
942
|
-
end
|
943
|
-
|
944
|
-
|
945
|
-
def found_invalid_pubsys(pubsys)
|
946
|
-
parse_error "`PUBLIC' or `SYSTEM' should be here"
|
947
|
-
'SYSTEM'
|
948
|
-
end
|
949
|
-
|
950
|
-
|
951
|
-
DoctypePattern = kcode_regexp(%q{[ \t\n\r](?:([^ \t\n\r\/'"=\[]+)|('[^']*'?|"[^"]*"?))|([\-_.\d\w]+|[^ \t\n\r])}) #"
|
952
|
-
|
953
|
-
def scan_doctype(s)
|
954
|
-
root = syspub = sysid = pubid = nil
|
955
|
-
internal_dtd = false
|
956
|
-
re = DoctypePattern[@kcode]
|
957
|
-
begin
|
958
|
-
if re =~ s then
|
959
|
-
name, str, delim, s = $1, $2, $3, $'
|
960
|
-
if name then
|
961
|
-
if not root then
|
962
|
-
root = name
|
963
|
-
elsif not syspub then
|
964
|
-
unless name == 'PUBLIC' or name == 'SYSTEM' then
|
965
|
-
name = found_invalid_pubsys(name)
|
966
|
-
end
|
967
|
-
syspub = name
|
968
|
-
else
|
969
|
-
parse_error "parse error at `#{name}'"
|
970
|
-
end
|
971
|
-
elsif str then
|
972
|
-
qmark = str.slice!(0,1) # remove quotation marks
|
973
|
-
unless syspub then
|
974
|
-
parse_error "parse error at `#{qmark}'"
|
975
|
-
s = str << s
|
976
|
-
else
|
977
|
-
if str[-1] == qmark[0] then
|
978
|
-
str.chop!
|
979
|
-
else
|
980
|
-
s = get_until_qmark(str, qmark) || ''
|
981
|
-
end
|
982
|
-
if not sysid then
|
983
|
-
sysid = str
|
984
|
-
elsif not pubid and syspub == 'PUBLIC' then
|
985
|
-
pubid = sysid
|
986
|
-
sysid = str
|
987
|
-
else
|
988
|
-
parse_error "too many external ID literals in DOCTYPE"
|
989
|
-
end
|
990
|
-
end
|
991
|
-
elsif delim == '[' then
|
992
|
-
internal_dtd = true
|
993
|
-
break
|
994
|
-
else
|
995
|
-
parse_error "parse error at `#{delim}'"
|
996
|
-
end
|
997
|
-
else
|
998
|
-
s = ''
|
999
|
-
end
|
1000
|
-
if s.empty? then
|
1001
|
-
break if @src.close_tag
|
1002
|
-
s = @src.get_plain
|
1003
|
-
end
|
1004
|
-
end while s
|
1005
|
-
parse_error "unterminated DOCTYPE declaration meets EOF" unless s
|
1006
|
-
unless root then
|
1007
|
-
parse_error "no root element is specified in DOCTYPE"
|
1008
|
-
end
|
1009
|
-
if syspub and not sysid then
|
1010
|
-
parse_error "too few external ID literals in DOCTYPE"
|
1011
|
-
end
|
1012
|
-
if syspub == 'PUBLIC' and not pubid then
|
1013
|
-
pubid, sysid = sysid, nil
|
1014
|
-
end
|
1015
|
-
on_doctype root, pubid, sysid
|
1016
|
-
scan_internal_dtd s if internal_dtd
|
1017
|
-
end
|
1018
|
-
|
1019
|
-
|
1020
|
-
def scan_prolog(s)
|
1021
|
-
if /\A<\?xml(?=[ \t\n\r])/n =~ s then
|
1022
|
-
scan_xmldecl $'
|
1023
|
-
s = @src.get
|
1024
|
-
end
|
1025
|
-
doctype = true
|
1026
|
-
src = @src # for speed
|
1027
|
-
while s
|
1028
|
-
if s[0] == ?< then
|
1029
|
-
if (c = s[1]) == ?! then
|
1030
|
-
if s[2] == ?- and s[3] == ?- then
|
1031
|
-
scan_comment s
|
1032
|
-
elsif /\A<!DOCTYPE(?=[ \t\n\r])/n =~ s and doctype then
|
1033
|
-
doctype = false
|
1034
|
-
scan_doctype $'
|
1035
|
-
else
|
1036
|
-
break
|
1037
|
-
end
|
1038
|
-
elsif c == ?? then
|
1039
|
-
scan_pi s
|
1040
|
-
else
|
1041
|
-
break
|
1042
|
-
end
|
1043
|
-
s = src.get
|
1044
|
-
elsif /[^ \t\r\n]/ !~ s then
|
1045
|
-
on_prolog_space s unless s.empty?
|
1046
|
-
s = src.get_plain
|
1047
|
-
else
|
1048
|
-
break
|
1049
|
-
end
|
1050
|
-
end
|
1051
|
-
scan_content(s || src.get)
|
1052
|
-
end
|
1053
|
-
|
1054
|
-
|
1055
|
-
def scan_document
|
1056
|
-
on_start_document
|
1057
|
-
@src.prepare
|
1058
|
-
scan_prolog @src.get
|
1059
|
-
on_end_document
|
1060
|
-
end
|
1061
|
-
|
1062
|
-
|
1063
|
-
def make_source(src)
|
1064
|
-
Source.new src
|
1065
|
-
end
|
1066
|
-
|
1067
|
-
|
1068
|
-
public
|
1069
|
-
|
1070
|
-
def parse_document(src)
|
1071
|
-
@src = make_source(src)
|
1072
|
-
begin
|
1073
|
-
scan_document
|
1074
|
-
ensure
|
1075
|
-
@src = nil
|
1076
|
-
end
|
1077
|
-
self
|
1078
|
-
end
|
1079
|
-
|
1080
|
-
alias parse parse_document
|
1081
|
-
|
1082
|
-
end
|
1083
|
-
|
1084
|
-
|
1085
|
-
end
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1091
|
-
if $0 == __FILE__ then
|
1092
|
-
class TestVisitor
|
1093
|
-
include XMLScan::Visitor
|
1094
|
-
def parse_error(msg)
|
1095
|
-
STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
1096
|
-
end
|
1097
|
-
def wellformed_error(msg)
|
1098
|
-
STDERR.printf("%s:%d: WFC: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
1099
|
-
end
|
1100
|
-
end
|
1101
|
-
|
1102
|
-
$s = scan = XMLScan::XMLScanner.new(TestVisitor.new)
|
1103
|
-
src = ARGF
|
1104
|
-
def src.path; filename; end
|
1105
|
-
t1 = Time.times.utime
|
1106
|
-
scan.parse src
|
1107
|
-
t2 = Time.times.utime
|
1108
|
-
STDERR.printf "%2.3f sec\n", t2 - t1
|
1109
|
-
end
|