rgen 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +20 -1
- data/MIT-LICENSE +1 -1
- data/README +12 -9
- data/lib/instantiators/ea_instantiator.rb +36 -0
- data/lib/metamodels/uml13_metamodel.rb +559 -0
- data/lib/metamodels/uml13_metamodel_ext.rb +26 -0
- data/lib/mmgen/metamodel_generator.rb +5 -5
- data/lib/mmgen/mm_ext/ecore_ext.rb +95 -0
- data/lib/mmgen/mmgen.rb +6 -4
- data/lib/mmgen/templates/annotations.tpl +37 -0
- data/lib/mmgen/templates/metamodel_generator.tpl +171 -0
- data/lib/rgen/ecore/ecore.rb +190 -0
- data/lib/rgen/ecore/ecore_instantiator.rb +25 -0
- data/lib/rgen/ecore/ecore_transformer.rb +85 -0
- data/lib/rgen/environment.rb +9 -24
- data/lib/rgen/find_helper.rb +68 -0
- data/lib/rgen/{instantiator.rb → instantiator/abstract_instantiator.rb} +6 -2
- data/lib/rgen/instantiator/abstract_xml_instantiator.rb +59 -0
- data/lib/rgen/instantiator/default_xml_instantiator.rb +117 -0
- data/lib/rgen/instantiator/ecore_xml_instantiator.rb +144 -0
- data/lib/rgen/instantiator/nodebased_xml_instantiator.rb +157 -0
- data/lib/rgen/instantiator/xmi11_instantiator.rb +164 -0
- data/lib/rgen/metamodel_builder.rb +103 -9
- data/lib/rgen/metamodel_builder/build_helper.rb +26 -4
- data/lib/rgen/metamodel_builder/builder_extensions.rb +285 -88
- data/lib/rgen/metamodel_builder/builder_runtime.rb +7 -1
- data/lib/rgen/metamodel_builder/data_types.rb +67 -0
- data/lib/rgen/metamodel_builder/intermediate/annotation.rb +30 -0
- data/lib/rgen/metamodel_builder/metamodel_description.rb +232 -0
- data/lib/rgen/metamodel_builder/mm_multiple.rb +23 -0
- data/lib/rgen/metamodel_builder/module_extension.rb +33 -0
- data/lib/rgen/model_comparator.rb +56 -0
- data/lib/rgen/model_dumper.rb +5 -5
- data/lib/rgen/name_helper.rb +17 -1
- data/lib/rgen/template_language.rb +148 -28
- data/lib/rgen/template_language/directory_template_container.rb +56 -38
- data/lib/rgen/template_language/output_handler.rb +93 -77
- data/lib/rgen/template_language/template_container.rb +186 -143
- data/lib/rgen/transformer.rb +19 -14
- data/lib/transformers/uml13_to_ecore.rb +75 -0
- data/redist/xmlscan/ChangeLog +1301 -0
- data/redist/xmlscan/README +34 -0
- data/redist/xmlscan/THANKS +11 -0
- data/redist/xmlscan/doc/changes.html +74 -0
- data/redist/xmlscan/doc/changes.rd +80 -0
- data/redist/xmlscan/doc/en/conformance.html +136 -0
- data/redist/xmlscan/doc/en/conformance.rd +152 -0
- data/redist/xmlscan/doc/en/manual.html +356 -0
- data/redist/xmlscan/doc/en/manual.rd +402 -0
- data/redist/xmlscan/doc/ja/conformance.ja.html +118 -0
- data/redist/xmlscan/doc/ja/conformance.ja.rd +134 -0
- data/redist/xmlscan/doc/ja/manual.ja.html +325 -0
- data/redist/xmlscan/doc/ja/manual.ja.rd +370 -0
- data/redist/xmlscan/doc/src/Makefile +41 -0
- data/redist/xmlscan/doc/src/conformance.rd.src +256 -0
- data/redist/xmlscan/doc/src/langsplit.rb +110 -0
- data/redist/xmlscan/doc/src/manual.rd.src +614 -0
- data/redist/xmlscan/install.rb +41 -0
- data/redist/xmlscan/lib/xmlscan/encoding.rb +311 -0
- data/redist/xmlscan/lib/xmlscan/htmlscan.rb +289 -0
- data/redist/xmlscan/lib/xmlscan/namespace.rb +352 -0
- data/redist/xmlscan/lib/xmlscan/parser.rb +299 -0
- data/redist/xmlscan/lib/xmlscan/scanner.rb +1109 -0
- data/redist/xmlscan/lib/xmlscan/version.rb +22 -0
- data/redist/xmlscan/lib/xmlscan/visitor.rb +158 -0
- data/redist/xmlscan/lib/xmlscan/xmlchar.rb +441 -0
- data/redist/xmlscan/memo/CONFORMANCE +1249 -0
- data/redist/xmlscan/memo/PRODUCTIONS +195 -0
- data/redist/xmlscan/memo/contentspec.ry +335 -0
- data/redist/xmlscan/samples/chibixml.rb +105 -0
- data/redist/xmlscan/samples/getxmlchar.rb +122 -0
- data/redist/xmlscan/samples/rexml.rb +159 -0
- data/redist/xmlscan/samples/xmlbench.rb +88 -0
- data/redist/xmlscan/samples/xmlbench/parser/chibixml.rb +22 -0
- data/redist/xmlscan/samples/xmlbench/parser/nqxml.rb +29 -0
- data/redist/xmlscan/samples/xmlbench/parser/rexml.rb +62 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlparser.rb +22 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan-0.0.10.rb +62 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan-chibixml.rb +22 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan-rexml.rb +22 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan.rb +99 -0
- data/redist/xmlscan/samples/xmlbench/xmlbench-lib.rb +116 -0
- data/redist/xmlscan/samples/xmlconftest.rb +200 -0
- data/redist/xmlscan/test.rb +7 -0
- data/redist/xmlscan/tests/deftestcase.rb +73 -0
- data/redist/xmlscan/tests/runtest.rb +47 -0
- data/redist/xmlscan/tests/testall.rb +14 -0
- data/redist/xmlscan/tests/testencoding.rb +438 -0
- data/redist/xmlscan/tests/testhtmlscan.rb +752 -0
- data/redist/xmlscan/tests/testnamespace.rb +457 -0
- data/redist/xmlscan/tests/testparser.rb +591 -0
- data/redist/xmlscan/tests/testscanner.rb +1749 -0
- data/redist/xmlscan/tests/testxmlchar.rb +143 -0
- data/redist/xmlscan/tests/visitor.rb +34 -0
- data/test/array_extensions_test.rb +2 -2
- data/test/ea_instantiator_test.rb +41 -0
- data/test/ecore_self_test.rb +53 -0
- data/test/environment_test.rb +11 -6
- data/test/metamodel_builder_test.rb +404 -245
- data/test/metamodel_roundtrip_test.rb +52 -0
- data/test/metamodel_roundtrip_test/TestModel.rb +65 -0
- data/test/metamodel_roundtrip_test/TestModel_Regenerated.rb +64 -0
- data/test/metamodel_roundtrip_test/houseMetamodel.ecore +32 -0
- data/test/metamodel_roundtrip_test/houseMetamodel_from_ecore.rb +39 -0
- data/test/rgen_test.rb +3 -3
- data/test/template_language_test.rb +65 -39
- data/test/template_language_test/expected_result.txt +24 -3
- data/test/template_language_test/templates/code/array.tpl +11 -0
- data/test/template_language_test/templates/content/author.tpl +7 -0
- data/test/template_language_test/templates/content/chapter.tpl +1 -1
- data/test/template_language_test/templates/root.tpl +17 -8
- data/test/template_language_test/testout.txt +24 -3
- data/test/testmodel/class_model_checker.rb +119 -0
- data/test/{xmi_instantiator_test/testmodel.eap → testmodel/ea_testmodel.eap} +0 -0
- data/test/{xmi_instantiator_test/testmodel.xml → testmodel/ea_testmodel.xml} +81 -14
- data/test/testmodel/ea_testmodel_partial.xml +317 -0
- data/test/testmodel/ecore_model_checker.rb +101 -0
- data/test/testmodel/manual_testmodel.xml +22 -0
- data/test/testmodel/object_model_checker.rb +67 -0
- data/test/transformer_test.rb +18 -10
- data/test/xml_instantiator_test.rb +81 -8
- data/test/xml_instantiator_test/simple_ecore_model_checker.rb +94 -0
- data/test/xml_instantiator_test/simple_xmi_ecore_instantiator.rb +53 -0
- data/test/xml_instantiator_test/simple_xmi_metamodel.rb +49 -0
- data/test/xml_instantiator_test/simple_xmi_to_ecore.rb +75 -0
- metadata +126 -28
- data/lib/ea/xmi_class_instantiator.rb +0 -46
- data/lib/ea/xmi_helper.rb +0 -26
- data/lib/ea/xmi_metamodel.rb +0 -34
- data/lib/ea/xmi_object_instantiator.rb +0 -46
- data/lib/ea/xmi_to_classmodel.rb +0 -78
- data/lib/ea/xmi_to_objectmodel.rb +0 -92
- data/lib/mmgen/mm_ext/uml_classmodel_ext.rb +0 -71
- data/lib/mmgen/templates/uml_classmodel.tpl +0 -63
- data/lib/rgen/xml_instantiator.rb +0 -132
- data/lib/uml/objectmodel_instantiator.rb +0 -53
- data/lib/uml/uml_classmodel.rb +0 -92
- data/lib/uml/uml_objectmodel.rb +0 -65
- data/test/metamodel_generator_test.rb +0 -44
- data/test/metamodel_generator_test/TestModel.rb +0 -40
- data/test/metamodel_generator_test/expected_result.txt +0 -40
- data/test/xmi_class_instantiator_test.rb +0 -24
- data/test/xmi_instantiator_test/class_model_checker.rb +0 -97
- data/test/xmi_object_instantiator_test.rb +0 -65
- data/test/xml_instantiator_test/testmodel.xml +0 -7
@@ -0,0 +1,1109 @@
|
|
1
|
+
#
|
2
|
+
# xmlscan/scanner.rb
|
3
|
+
#
|
4
|
+
# Copyright (C) Ueno Katsuhiro 2002
|
5
|
+
#
|
6
|
+
# $Id: scanner.rb,v 1.83 2003/05/12 14:13:33 katsu Exp $
|
7
|
+
#
|
8
|
+
|
9
|
+
#
|
10
|
+
# CONSIDERATIONS FOR CHARACTER ENCODINGS:
|
11
|
+
#
|
12
|
+
# There are the following common characteristics in character encodings
|
13
|
+
# which are supported by Ruby's $KCODE feature (ISO-8859-*, Shift_JIS,
|
14
|
+
# EUC, and UTF-8):
|
15
|
+
#
|
16
|
+
# - Stateless.
|
17
|
+
# - ASCII characters are encoded in the same manner as US-ASCII.
|
18
|
+
# - The octet sequences corresponding to non-ASCII characters begin
|
19
|
+
# with an octet greater than 0x80.
|
20
|
+
# - The following characters can be identified by just one octet.
|
21
|
+
# That is, every octets corresponding to the following characters in
|
22
|
+
# US-ASCII never appear as a part of an octet sequence representing a
|
23
|
+
# non-ASCII character.
|
24
|
+
#
|
25
|
+
# Whitespaces("\t", "\n", "\r", and " ") and
|
26
|
+
# ! \ " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
|
27
|
+
#
|
28
|
+
# Be careful that `[' and `]' are NOT included in the list!
|
29
|
+
#
|
30
|
+
# If we build a regular expression carefully in accordance with these
|
31
|
+
# characteristics, we can get the same match regardless of the value
|
32
|
+
# of $KCODE. Moreover, if it can be premised on them, we can detect
|
33
|
+
# several delimiters without regular expressions. XMLScanner uses this
|
34
|
+
# fact in order to share many regular expressions in all $KCODE modes,
|
35
|
+
# and in order to optimize parsing speed.
|
36
|
+
#
|
37
|
+
|
38
|
+
require 'xmlscan/visitor'
|
39
|
+
|
40
|
+
|
41
|
+
module XMLScan
|
42
|
+
|
43
|
+
class Input
|
44
|
+
|
45
|
+
def initialize(src)
|
46
|
+
@src = src
|
47
|
+
unless src.respond_to? :gets then
|
48
|
+
if src.respond_to? :to_ary then
|
49
|
+
@v = src.to_ary
|
50
|
+
@n = -1
|
51
|
+
def self.gets ; @v.at(@n += 1) ; end
|
52
|
+
def self.lineno ; @n + 1 ; end
|
53
|
+
else
|
54
|
+
@v = @src
|
55
|
+
def self.gets ; s = @v ; @v = nil ; s ; end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
if src.respond_to? :lineno then
|
59
|
+
def self.lineno ; @src.lineno ; end
|
60
|
+
end
|
61
|
+
if src.respond_to? :path then
|
62
|
+
def self.path ; @src.path ; end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
attr_reader :src
|
67
|
+
|
68
|
+
def gets ; @src.gets ; end
|
69
|
+
def lineno ; 0 ; end
|
70
|
+
def path ; '-' ; end
|
71
|
+
|
72
|
+
def self.wrap(src)
|
73
|
+
unless src.respond_to? :gets and src.respond_to? :lineno and
|
74
|
+
src.respond_to? :path then
|
75
|
+
src = new(src)
|
76
|
+
end
|
77
|
+
src
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.unwrap(obj)
|
81
|
+
if self === obj then
|
82
|
+
obj.src
|
83
|
+
else
|
84
|
+
obj
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
|
92
|
+
class PrivateArray < Array
|
93
|
+
m = superclass.instance_methods(false) - Kernel.instance_methods(false)
|
94
|
+
private(*m)
|
95
|
+
end
|
96
|
+
|
97
|
+
|
98
|
+
class Source < PrivateArray
|
99
|
+
# Source inherits Array only for speed.
|
100
|
+
|
101
|
+
def initialize(src)
|
102
|
+
super()
|
103
|
+
@src = Input.wrap(src)
|
104
|
+
@eof = false
|
105
|
+
@last = nil
|
106
|
+
end
|
107
|
+
|
108
|
+
def source
|
109
|
+
Input.unwrap @src
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
def eof?
|
114
|
+
@eof and empty?
|
115
|
+
end
|
116
|
+
|
117
|
+
def abort
|
118
|
+
@eof = true
|
119
|
+
@last = nil
|
120
|
+
clear
|
121
|
+
self
|
122
|
+
end
|
123
|
+
|
124
|
+
|
125
|
+
def get
|
126
|
+
pop or
|
127
|
+
unless @eof then
|
128
|
+
last = @last
|
129
|
+
begin
|
130
|
+
src = @src.gets
|
131
|
+
unless src then
|
132
|
+
@eof = true
|
133
|
+
unshift last
|
134
|
+
last = nil
|
135
|
+
break
|
136
|
+
end
|
137
|
+
a = src.split(/(?=<|>[<>])|>/n, -1)
|
138
|
+
if last then
|
139
|
+
unless /\A[<>]/n =~ a.first then
|
140
|
+
a[0] = last << (a.first || '')
|
141
|
+
else
|
142
|
+
push last
|
143
|
+
end
|
144
|
+
end
|
145
|
+
concat a
|
146
|
+
last = pop
|
147
|
+
end while empty?
|
148
|
+
@last = last
|
149
|
+
reverse!
|
150
|
+
pop
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
def prepare
|
156
|
+
s = get
|
157
|
+
s = get and s = '>' << s if s and s.empty? # preserve first `>'
|
158
|
+
s and push s
|
159
|
+
end
|
160
|
+
|
161
|
+
|
162
|
+
def tag_end?
|
163
|
+
s = last || @last and s[0] != ?<
|
164
|
+
end
|
165
|
+
|
166
|
+
def tag_start?
|
167
|
+
s = last || @last and s[0] == ?<
|
168
|
+
end
|
169
|
+
|
170
|
+
def close_tag # tag_end?, and remove a `>'.
|
171
|
+
unless s = last || @last and s[0] != ?< then
|
172
|
+
false
|
173
|
+
else
|
174
|
+
if s == '>' or s.empty? then
|
175
|
+
s1 = get
|
176
|
+
unless s = last || @last and s[0] == ?< then # for speed up
|
177
|
+
out = [ s1 ]
|
178
|
+
out.push get while s = last || @last and s == '>' || s.empty?
|
179
|
+
out.pop unless s and s[0] != ?< # De Morgan
|
180
|
+
concat out
|
181
|
+
end
|
182
|
+
end
|
183
|
+
true
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
|
188
|
+
def get_text # get until tag_start?
|
189
|
+
s = last || @last and s[0] != ?< and get
|
190
|
+
end
|
191
|
+
|
192
|
+
def get_tag # get until tag_end?
|
193
|
+
s = last || @last and s[0] == ?< and get
|
194
|
+
end
|
195
|
+
|
196
|
+
def get_plain
|
197
|
+
s = get
|
198
|
+
s = '>' << s unless not s or (c = s[0]) == ?< or c == ?> # De Morgan
|
199
|
+
s
|
200
|
+
end
|
201
|
+
|
202
|
+
def lineno
|
203
|
+
@src.lineno
|
204
|
+
end
|
205
|
+
|
206
|
+
def path
|
207
|
+
@src.path
|
208
|
+
end
|
209
|
+
|
210
|
+
|
211
|
+
# The following methods are for debug.
|
212
|
+
|
213
|
+
def inspect
|
214
|
+
a = []
|
215
|
+
reverse_each { |i|
|
216
|
+
a.push ">" unless /\A[<>]/n =~ i
|
217
|
+
a.push i.inspect
|
218
|
+
}
|
219
|
+
last = []
|
220
|
+
if @last then
|
221
|
+
last.push ">" unless /\A[<>]/n =~ @last
|
222
|
+
last.push @last.inspect
|
223
|
+
end
|
224
|
+
a.push '#eof' if @eof
|
225
|
+
"((#{a.join(' ')}) (#{last.join(' ')}) . #{source.inspect})"
|
226
|
+
end
|
227
|
+
|
228
|
+
def each
|
229
|
+
prepare
|
230
|
+
while s = get
|
231
|
+
yield s
|
232
|
+
end
|
233
|
+
self
|
234
|
+
end
|
235
|
+
|
236
|
+
def test
|
237
|
+
last or @last or (s = get and push s and s)
|
238
|
+
end
|
239
|
+
|
240
|
+
end
|
241
|
+
|
242
|
+
|
243
|
+
|
244
|
+
class XMLScanner
|
245
|
+
|
246
|
+
class << self
|
247
|
+
|
248
|
+
def provided_options
|
249
|
+
options = []
|
250
|
+
private_instance_methods(false).each { |i|
|
251
|
+
options.push $' if /\Aapply_option_/n =~ i
|
252
|
+
}
|
253
|
+
options
|
254
|
+
end
|
255
|
+
|
256
|
+
def apply_option(instance, option)
|
257
|
+
instance.__send__ "apply_option_#{option}"
|
258
|
+
end
|
259
|
+
|
260
|
+
def apply_options(instance, options)
|
261
|
+
h = {}
|
262
|
+
options.each { |i| h[i.to_s] = true }
|
263
|
+
options = h
|
264
|
+
ancestors.each { |klass|
|
265
|
+
if klass.respond_to? :provided_options then
|
266
|
+
klass.provided_options.each { |i|
|
267
|
+
if options.include? i then
|
268
|
+
options.delete i
|
269
|
+
klass.apply_option instance, i
|
270
|
+
end
|
271
|
+
}
|
272
|
+
end
|
273
|
+
}
|
274
|
+
unless options.empty? then
|
275
|
+
raise ArgumentError, "undefined option `#{options.keys[0]}'"
|
276
|
+
end
|
277
|
+
instance
|
278
|
+
end
|
279
|
+
private :apply_options
|
280
|
+
|
281
|
+
def new(visitor, *options)
|
282
|
+
instance = super(visitor)
|
283
|
+
apply_options instance, options
|
284
|
+
end
|
285
|
+
|
286
|
+
end
|
287
|
+
|
288
|
+
|
289
|
+
|
290
|
+
def initialize(visitor)
|
291
|
+
@visitor = visitor
|
292
|
+
@decoration = nil
|
293
|
+
@src = nil
|
294
|
+
@kcode = nil
|
295
|
+
end
|
296
|
+
|
297
|
+
|
298
|
+
def kcode=(kcode)
|
299
|
+
@kcode = Regexp.new('', nil, kcode || '').kcode
|
300
|
+
kcode
|
301
|
+
end
|
302
|
+
|
303
|
+
attr_reader :kcode
|
304
|
+
|
305
|
+
|
306
|
+
def decorate(decoration)
|
307
|
+
unless @decoration then
|
308
|
+
@visitor = @decoration = Decoration.new(@visitor)
|
309
|
+
end
|
310
|
+
@decoration.expand decoration
|
311
|
+
end
|
312
|
+
private :decorate
|
313
|
+
|
314
|
+
|
315
|
+
def lineno
|
316
|
+
@src && @src.lineno
|
317
|
+
end
|
318
|
+
|
319
|
+
def path
|
320
|
+
@src && @src.path
|
321
|
+
end
|
322
|
+
|
323
|
+
def source
|
324
|
+
@src.source
|
325
|
+
end
|
326
|
+
|
327
|
+
|
328
|
+
private
|
329
|
+
|
330
|
+
def parse_error(msg)
|
331
|
+
@visitor.parse_error msg
|
332
|
+
end
|
333
|
+
|
334
|
+
def wellformed_error(msg)
|
335
|
+
@visitor.wellformed_error msg
|
336
|
+
end
|
337
|
+
|
338
|
+
def valid_error(msg)
|
339
|
+
@visitor.valid_error msg
|
340
|
+
end
|
341
|
+
|
342
|
+
def warning(msg)
|
343
|
+
@visitor.warning msg
|
344
|
+
end
|
345
|
+
|
346
|
+
|
347
|
+
def on_xmldecl
|
348
|
+
@visitor.on_xmldecl
|
349
|
+
end
|
350
|
+
|
351
|
+
def on_xmldecl_version(str)
|
352
|
+
@visitor.on_xmldecl_version str
|
353
|
+
end
|
354
|
+
|
355
|
+
def on_xmldecl_encoding(str)
|
356
|
+
@visitor.on_xmldecl_encoding str
|
357
|
+
end
|
358
|
+
|
359
|
+
def on_xmldecl_standalone(str)
|
360
|
+
@visitor.on_xmldecl_standalone str
|
361
|
+
end
|
362
|
+
|
363
|
+
def on_xmldecl_other(name, value)
|
364
|
+
@visitor.on_xmldecl_other name, value
|
365
|
+
end
|
366
|
+
|
367
|
+
def on_xmldecl_end
|
368
|
+
@visitor.on_xmldecl_end
|
369
|
+
end
|
370
|
+
|
371
|
+
def on_doctype(root, pubid, sysid)
|
372
|
+
@visitor.on_doctype root, pubid, sysid
|
373
|
+
end
|
374
|
+
|
375
|
+
def on_prolog_space(str)
|
376
|
+
@visitor.on_prolog_space str
|
377
|
+
end
|
378
|
+
|
379
|
+
def on_comment(str)
|
380
|
+
@visitor.on_comment str
|
381
|
+
end
|
382
|
+
|
383
|
+
def on_pi(target, pi)
|
384
|
+
@visitor.on_pi target, pi
|
385
|
+
end
|
386
|
+
|
387
|
+
def on_chardata(str)
|
388
|
+
@visitor.on_chardata str
|
389
|
+
end
|
390
|
+
|
391
|
+
def on_cdata(str)
|
392
|
+
@visitor.on_cdata str
|
393
|
+
end
|
394
|
+
|
395
|
+
def on_etag(name)
|
396
|
+
@visitor.on_etag name
|
397
|
+
end
|
398
|
+
|
399
|
+
def on_entityref(ref)
|
400
|
+
@visitor.on_entityref ref
|
401
|
+
end
|
402
|
+
|
403
|
+
def on_charref(code)
|
404
|
+
@visitor.on_charref code
|
405
|
+
end
|
406
|
+
|
407
|
+
def on_charref_hex(code)
|
408
|
+
@visitor.on_charref_hex code
|
409
|
+
end
|
410
|
+
|
411
|
+
def on_start_document
|
412
|
+
@visitor.on_start_document
|
413
|
+
end
|
414
|
+
|
415
|
+
def on_end_document
|
416
|
+
@visitor.on_end_document
|
417
|
+
end
|
418
|
+
|
419
|
+
|
420
|
+
# <hoge fuga="foo&bar;&&foo" />HOGE
|
421
|
+
# ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
|
422
|
+
# 1 2 3 4 5 6 7 8 9 A
|
423
|
+
#
|
424
|
+
# The following method will be called with the following arguments
|
425
|
+
# when the parser reaches the above point;
|
426
|
+
#
|
427
|
+
# 1: on_stag ('hoge')
|
428
|
+
# 2: on_attribute ('fuga')
|
429
|
+
# 3: on_attr_value ('foo')
|
430
|
+
# 4: on_attr_entityref ('bar')
|
431
|
+
# 5: on_attr_charref (38)
|
432
|
+
# 6: on_attr_charref_hex (38)
|
433
|
+
# 7: on_attr_value ('foo')
|
434
|
+
# 8: on_attribute_end ('fuga')
|
435
|
+
# 9: on_stag_end_empty ('hoge')
|
436
|
+
# or
|
437
|
+
# on_stag_end ('hoge')
|
438
|
+
#
|
439
|
+
# A: on_chardata ('HOGE')
|
440
|
+
|
441
|
+
def on_stag(name)
|
442
|
+
@visitor.on_stag name
|
443
|
+
end
|
444
|
+
|
445
|
+
def on_attribute(name)
|
446
|
+
@visitor.on_attribute name
|
447
|
+
end
|
448
|
+
|
449
|
+
def on_attr_value(str)
|
450
|
+
@visitor.on_attr_value str
|
451
|
+
end
|
452
|
+
|
453
|
+
def on_attr_entityref(ref)
|
454
|
+
@visitor.on_attr_entityref ref
|
455
|
+
end
|
456
|
+
|
457
|
+
def on_attr_charref(code)
|
458
|
+
@visitor.on_attr_charref code
|
459
|
+
end
|
460
|
+
|
461
|
+
def on_attr_charref_hex(code)
|
462
|
+
@visitor.on_attr_charref_hex code
|
463
|
+
end
|
464
|
+
|
465
|
+
def on_attribute_end(name)
|
466
|
+
@visitor.on_attribute_end name
|
467
|
+
end
|
468
|
+
|
469
|
+
def on_stag_end_empty(name)
|
470
|
+
@visitor.on_stag_end_empty name
|
471
|
+
end
|
472
|
+
|
473
|
+
def on_stag_end(name)
|
474
|
+
@visitor.on_stag_end name
|
475
|
+
end
|
476
|
+
|
477
|
+
|
478
|
+
|
479
|
+
private
|
480
|
+
|
481
|
+
module KcodeRegexp
|
482
|
+
private
|
483
|
+
Kcodes = [ //n.kcode, //e.kcode, //s.kcode, //u.kcode ]
|
484
|
+
def kcode_regexp(re)
|
485
|
+
h = {}
|
486
|
+
Kcodes.each { |i| h[i] = Regexp.new(re, nil, i) }
|
487
|
+
h.default = Regexp.new(re)
|
488
|
+
h
|
489
|
+
end
|
490
|
+
end
|
491
|
+
extend KcodeRegexp
|
492
|
+
|
493
|
+
|
494
|
+
InvalidEntityRef = kcode_regexp('(?=[^#\d\w]|\z)')
|
495
|
+
|
496
|
+
def scan_chardata(s)
|
497
|
+
while true
|
498
|
+
unless /&/n =~ s then
|
499
|
+
on_chardata s
|
500
|
+
else
|
501
|
+
s = $`
|
502
|
+
on_chardata s unless s.empty?
|
503
|
+
ref = nil
|
504
|
+
$'.split('&', -1).each { |s|
|
505
|
+
unless /(?!\A);|(?=[ \t\r\n])/n =~ s and not $&.empty? then
|
506
|
+
if InvalidEntityRef[@kcode] =~ s and not (ref = $`).strip.empty?
|
507
|
+
then
|
508
|
+
parse_error "reference to `#{ref}' doesn't end with `;'"
|
509
|
+
else
|
510
|
+
parse_error "`&' is not used for entity/character references"
|
511
|
+
on_chardata('&' << s)
|
512
|
+
next
|
513
|
+
end
|
514
|
+
end
|
515
|
+
ref = $`
|
516
|
+
s = $'
|
517
|
+
if /\A[^#]/n =~ ref then
|
518
|
+
on_entityref ref
|
519
|
+
elsif /\A#(\d+)\z/n =~ ref then
|
520
|
+
on_charref $1.to_i
|
521
|
+
elsif /\A#x([\dA-Fa-f]+)\z/n =~ ref then
|
522
|
+
on_charref_hex $1.hex
|
523
|
+
else
|
524
|
+
parse_error "invalid character reference `#{ref}'"
|
525
|
+
end
|
526
|
+
on_chardata s unless s.empty?
|
527
|
+
}
|
528
|
+
end
|
529
|
+
s = @src.get_text
|
530
|
+
break unless s
|
531
|
+
s = '>' << s unless s == '>'
|
532
|
+
end
|
533
|
+
end
|
534
|
+
|
535
|
+
|
536
|
+
def scan_attvalue(s) # almostly copy & paste from scan_chardata
|
537
|
+
unless /&/n =~ s then
|
538
|
+
on_attr_value s
|
539
|
+
else
|
540
|
+
s = $`
|
541
|
+
on_attr_value s unless s.empty?
|
542
|
+
ref = nil
|
543
|
+
$'.split('&', -1).each { |s|
|
544
|
+
unless /(?!\A);|(?=[ \t\r\n])/n =~ s and not $&.empty? then
|
545
|
+
if InvalidEntityRef[@kcode] =~ s and not (ref = $`).strip.empty?
|
546
|
+
then
|
547
|
+
parse_error "reference to `#{ref}' doesn't end with `;'"
|
548
|
+
else
|
549
|
+
parse_error "`&' is not used for entity/character references"
|
550
|
+
on_attr_value('&' << s)
|
551
|
+
next
|
552
|
+
end
|
553
|
+
end
|
554
|
+
ref = $`
|
555
|
+
s = $'
|
556
|
+
if /\A[^#]/n =~ ref then
|
557
|
+
on_attr_entityref ref
|
558
|
+
elsif /\A#(\d+)\z/n =~ ref then
|
559
|
+
on_attr_charref $1.to_i
|
560
|
+
elsif /\A#x([\dA-Fa-f]+)\z/n =~ ref then
|
561
|
+
on_attr_charref_hex $1.hex
|
562
|
+
else
|
563
|
+
parse_error "invalid character reference `#{ref}'"
|
564
|
+
end
|
565
|
+
on_attr_value s unless s.empty?
|
566
|
+
}
|
567
|
+
end
|
568
|
+
end
|
569
|
+
|
570
|
+
|
571
|
+
def scan_comment(s)
|
572
|
+
s[0,4] = '' # remove `<!--'
|
573
|
+
comm = ''
|
574
|
+
until /--/n =~ s
|
575
|
+
comm << s
|
576
|
+
s = @src.get_plain
|
577
|
+
unless s then
|
578
|
+
parse_error "unterminated comment meets EOF"
|
579
|
+
return on_comment(comm)
|
580
|
+
end
|
581
|
+
end
|
582
|
+
comm << $`
|
583
|
+
until (s = $').empty? and @src.close_tag
|
584
|
+
if s == '-' and @src.close_tag then # --->
|
585
|
+
parse_error "comment ending in `--->' is not allowed"
|
586
|
+
comm << s
|
587
|
+
break
|
588
|
+
end
|
589
|
+
parse_error "comment includes `--'"
|
590
|
+
comm << '--'
|
591
|
+
until /--/n =~ s # copy & paste for performance
|
592
|
+
comm << s
|
593
|
+
s = @src.get_plain
|
594
|
+
unless s then
|
595
|
+
parse_error "unterminated comment meets EOF"
|
596
|
+
return on_comment(comm)
|
597
|
+
end
|
598
|
+
end
|
599
|
+
comm << $`
|
600
|
+
end
|
601
|
+
on_comment comm
|
602
|
+
end
|
603
|
+
|
604
|
+
|
605
|
+
def scan_pi(s)
|
606
|
+
unless /\A<\?([^ \t\n\r?]+)(?:[ \t\n\r]+|(?=\?\z))/n =~ s then
|
607
|
+
parse_error "parse error at `<?'"
|
608
|
+
s << '>' if @src.close_tag
|
609
|
+
on_chardata s
|
610
|
+
else
|
611
|
+
target = $1
|
612
|
+
pi = $'
|
613
|
+
until pi[-1] == ?? and @src.close_tag
|
614
|
+
s = @src.get_plain
|
615
|
+
unless s then
|
616
|
+
parse_error "unterminated PI meets EOF"
|
617
|
+
return on_pi(target, pi)
|
618
|
+
end
|
619
|
+
pi << s
|
620
|
+
end
|
621
|
+
pi.chop! # remove last `?'
|
622
|
+
on_pi target, pi
|
623
|
+
end
|
624
|
+
end
|
625
|
+
|
626
|
+
|
627
|
+
CDATAPattern = kcode_regexp('\]\]\z')
|
628
|
+
|
629
|
+
def scan_cdata(s)
|
630
|
+
cdata = s
|
631
|
+
re = CDATAPattern[@kcode]
|
632
|
+
until re =~ cdata and @src.close_tag
|
633
|
+
s = @src.get_plain
|
634
|
+
unless s then
|
635
|
+
parse_error "unterminated CDATA section meets EOF"
|
636
|
+
return on_cdata(cdata)
|
637
|
+
end
|
638
|
+
cdata << s
|
639
|
+
end
|
640
|
+
cdata.chop!.chop! # remove ']]'
|
641
|
+
on_cdata cdata
|
642
|
+
end
|
643
|
+
|
644
|
+
|
645
|
+
def found_unclosed_etag(name)
|
646
|
+
if @src.tag_start? then
|
647
|
+
parse_error "unclosed end tag `#{name}' meets another tag"
|
648
|
+
else
|
649
|
+
parse_error "unclosed end tag `#{name}' meets EOF"
|
650
|
+
end
|
651
|
+
end
|
652
|
+
|
653
|
+
def found_empty_etag
|
654
|
+
parse_error "parse error at `</'"
|
655
|
+
on_chardata '</>'
|
656
|
+
end
|
657
|
+
|
658
|
+
|
659
|
+
def scan_etag(s)
|
660
|
+
s[0,2] = '' # remove '</'
|
661
|
+
if s.empty? then
|
662
|
+
if @src.close_tag then # </>
|
663
|
+
return found_empty_etag
|
664
|
+
else # </< or </[EOF]
|
665
|
+
parse_error "parse error at `</'"
|
666
|
+
s << '>' if @src.close_tag
|
667
|
+
return on_chardata('</' << s)
|
668
|
+
end
|
669
|
+
elsif /[ \t\n\r]+/n =~ s then
|
670
|
+
s1, s2 = $`, $'
|
671
|
+
if s1.empty? then # </ tag
|
672
|
+
parse_error "parse error at `</'"
|
673
|
+
s << '>' if @src.close_tag
|
674
|
+
return on_chardata('</' + s)
|
675
|
+
elsif not s2.empty? then # </ta g
|
676
|
+
parse_error "illegal whitespace is found within end tag `#{s1}'"
|
677
|
+
while @src.get_tag
|
678
|
+
end
|
679
|
+
end
|
680
|
+
s = s1
|
681
|
+
end
|
682
|
+
found_unclosed_etag s unless @src.close_tag # </tag< or </tag[EOF]
|
683
|
+
on_etag s
|
684
|
+
end
|
685
|
+
|
686
|
+
|
687
|
+
def found_empty_stag
|
688
|
+
parse_error "parse error at `<'"
|
689
|
+
on_chardata '<>'
|
690
|
+
end
|
691
|
+
|
692
|
+
def found_unclosed_stag(name)
|
693
|
+
if @src.tag_start? then
|
694
|
+
parse_error "unclosed start tag `#{name}' meets another tag"
|
695
|
+
else
|
696
|
+
parse_error "unclosed start tag `#{name}' meets EOF"
|
697
|
+
end
|
698
|
+
end
|
699
|
+
|
700
|
+
def found_unclosed_emptyelem(name)
|
701
|
+
if @src.tag_start? then
|
702
|
+
parse_error "unclosed empty element tag `#{name}' meets another tag"
|
703
|
+
else
|
704
|
+
parse_error "unclosed empty element tag `#{name}' meets EOF"
|
705
|
+
end
|
706
|
+
end
|
707
|
+
|
708
|
+
|
709
|
+
def found_stag_error(s)
|
710
|
+
if /\A[\/='"]/n =~ s then
|
711
|
+
tok, s = $&, $'
|
712
|
+
elsif /(?=[ \t\n\r\/='"])/n =~ s then
|
713
|
+
tok, s = $`, $'
|
714
|
+
else
|
715
|
+
tok, s = s, nil
|
716
|
+
end
|
717
|
+
parse_error "parse error at `#{tok}'"
|
718
|
+
s
|
719
|
+
end
|
720
|
+
|
721
|
+
|
722
|
+
def scan_stag(s)
|
723
|
+
unless /(?=[\/ \t\n\r='"])/n =~ s then
|
724
|
+
name = s
|
725
|
+
name[0,1] = '' # remove `<'
|
726
|
+
if name.empty? then
|
727
|
+
if @src.close_tag then # <>
|
728
|
+
return found_empty_stag
|
729
|
+
else # << or <[EOF]
|
730
|
+
parse_error "parse error at `<'"
|
731
|
+
return on_chardata('<')
|
732
|
+
end
|
733
|
+
end
|
734
|
+
on_stag name
|
735
|
+
found_unclosed_stag name unless @src.close_tag
|
736
|
+
on_stag_end name
|
737
|
+
else
|
738
|
+
name = $`
|
739
|
+
s = $'
|
740
|
+
name[0,1] = '' # remove `<'
|
741
|
+
if name.empty? then # `< tag' or `<=`
|
742
|
+
parse_error "parse error at `<'"
|
743
|
+
s << '>' if @src.close_tag
|
744
|
+
return on_chardata('<' << s)
|
745
|
+
end
|
746
|
+
on_stag name
|
747
|
+
emptyelem = false
|
748
|
+
key,val,error,qmark,c = nil
|
749
|
+
begin
|
750
|
+
continue = false
|
751
|
+
s.scan(/[ \t\n\r]([^= \t\n\r\/'"]+)[ \t\n\r]*=[ \t\n\r]*('[^']*'?|"[^"]*"?)|\/\z|([^ \t\n\r][\S\s]*)/n
|
752
|
+
) { |key,val,error|
|
753
|
+
if key then # key="value"
|
754
|
+
on_attribute key
|
755
|
+
qmark = val.slice!(0,1)
|
756
|
+
if val[-1] == qmark[0] then
|
757
|
+
val.chop!
|
758
|
+
scan_attvalue val unless val.empty?
|
759
|
+
else
|
760
|
+
scan_attvalue val unless val.empty?
|
761
|
+
begin
|
762
|
+
s = @src.get
|
763
|
+
unless s then
|
764
|
+
parse_error "unterminated attribute `#{key}' meets EOF"
|
765
|
+
break
|
766
|
+
end
|
767
|
+
c = s[0]
|
768
|
+
val, s = s.split(qmark, 2)
|
769
|
+
if c == ?< then
|
770
|
+
wellformed_error "`<' is found in attribute `#{key}'"
|
771
|
+
elsif c != ?> then
|
772
|
+
scan_attvalue '>'
|
773
|
+
end
|
774
|
+
scan_attvalue val if c
|
775
|
+
end until s
|
776
|
+
continue = s # if eof then continue is false, else true.
|
777
|
+
end
|
778
|
+
on_attribute_end key
|
779
|
+
elsif error then
|
780
|
+
continue = s = found_stag_error(error)
|
781
|
+
else
|
782
|
+
emptyelem = true
|
783
|
+
end
|
784
|
+
}
|
785
|
+
end while continue
|
786
|
+
unless @src.close_tag then
|
787
|
+
if emptyelem then
|
788
|
+
found_unclosed_emptyelem name
|
789
|
+
else
|
790
|
+
found_unclosed_stag name
|
791
|
+
end
|
792
|
+
end
|
793
|
+
if emptyelem then
|
794
|
+
on_stag_end_empty name
|
795
|
+
else
|
796
|
+
on_stag_end name
|
797
|
+
end
|
798
|
+
end
|
799
|
+
end
|
800
|
+
|
801
|
+
|
802
|
+
def scan_bang_tag(s)
|
803
|
+
parse_error "parse error at `<!'"
|
804
|
+
s << '>' if @src.close_tag
|
805
|
+
on_chardata s
|
806
|
+
end
|
807
|
+
|
808
|
+
|
809
|
+
def scan_content(s)
|
810
|
+
src = @src # for speed
|
811
|
+
while s
|
812
|
+
if (c = s[0]) == ?< then
|
813
|
+
if (c = s[1]) == ?/ then
|
814
|
+
scan_etag s
|
815
|
+
elsif c == ?! then
|
816
|
+
if s[2] == ?- and s[3] == ?- then
|
817
|
+
scan_comment s
|
818
|
+
elsif /\A<!\[CDATA\[/n =~ s then
|
819
|
+
scan_cdata $'
|
820
|
+
else
|
821
|
+
scan_bang_tag s
|
822
|
+
end
|
823
|
+
elsif c == ?? then
|
824
|
+
scan_pi s
|
825
|
+
else
|
826
|
+
scan_stag s
|
827
|
+
end
|
828
|
+
else
|
829
|
+
scan_chardata s
|
830
|
+
end
|
831
|
+
s = src.get
|
832
|
+
end
|
833
|
+
end
|
834
|
+
|
835
|
+
|
836
|
+
def get_until_qmark(str, qmark)
|
837
|
+
begin
|
838
|
+
#s = @src.get_plain
|
839
|
+
s = @src.get
|
840
|
+
break unless s
|
841
|
+
c = s[0]
|
842
|
+
v, s = s.split(qmark, 2)
|
843
|
+
str << '>' unless c == ?< or c == ?> # De Morgan
|
844
|
+
str << v if c
|
845
|
+
end until s
|
846
|
+
s
|
847
|
+
end
|
848
|
+
|
849
|
+
|
850
|
+
XMLDeclPattern = kcode_regexp(%q{[ \t\n\r]([\-_\d\w]+)[ \t\n\r]*=[ \t\n\r]*('[^']*'?|"[^"]*"?)|(\?\z)|([\-_.\d\w]+|[^ \t\n\r])}) #'
|
851
|
+
|
852
|
+
def scan_xmldecl(s)
|
853
|
+
endmark = nil
|
854
|
+
state = 0
|
855
|
+
on_xmldecl
|
856
|
+
begin
|
857
|
+
continue = false
|
858
|
+
s.scan(XMLDeclPattern[@kcode]) { |key,val,endmark,error|
|
859
|
+
if key then
|
860
|
+
qmark = val.slice!(0,1) # remove quotation marks
|
861
|
+
if val[-1] == qmark[0] then
|
862
|
+
val.chop!
|
863
|
+
else
|
864
|
+
continue = s = get_until_qmark(val, qmark)
|
865
|
+
unless s then
|
866
|
+
parse_error "unterminated XML declaration meets EOF"
|
867
|
+
endmark = true
|
868
|
+
end
|
869
|
+
end
|
870
|
+
if state == 0 and key == 'version' then
|
871
|
+
on_xmldecl_version val
|
872
|
+
state = 1
|
873
|
+
elsif state == 1 and key == 'encoding' then
|
874
|
+
on_xmldecl_encoding val
|
875
|
+
state = 2
|
876
|
+
elsif state >= 1 and key == 'standalone' then
|
877
|
+
on_xmldecl_standalone val
|
878
|
+
state = 3
|
879
|
+
else
|
880
|
+
state = 3
|
881
|
+
if key == 'version' then
|
882
|
+
parse_error "version declaration must not be here"
|
883
|
+
on_xmldecl_version val
|
884
|
+
elsif key == 'encoding' then
|
885
|
+
parse_error "encoding declaration must not be here"
|
886
|
+
on_xmldecl_encoding val
|
887
|
+
state = 2 if state < 2
|
888
|
+
elsif key == 'standalone' then
|
889
|
+
parse_error "standalone declaration must not be here"
|
890
|
+
on_xmldecl_standalone val
|
891
|
+
else
|
892
|
+
parse_error "unknown declaration `#{key}' in XML declaration"
|
893
|
+
on_xmldecl_other key, val
|
894
|
+
end
|
895
|
+
end
|
896
|
+
elsif endmark then
|
897
|
+
unless @src.close_tag then
|
898
|
+
parse_error "unexpected `#{endmark}' found in XML declaration"
|
899
|
+
endmark = nil
|
900
|
+
end
|
901
|
+
# here always exit the loop.
|
902
|
+
else
|
903
|
+
parse_error "parse error at `#{error}'"
|
904
|
+
end
|
905
|
+
}
|
906
|
+
end while !endmark and continue || s = @src.get_plain
|
907
|
+
parse_error "unterminated XML declaration meets EOF" unless s or endmark
|
908
|
+
parse_error "no declaration found in XML declaration" if state == 0
|
909
|
+
on_xmldecl_end
|
910
|
+
end
|
911
|
+
|
912
|
+
|
913
|
+
SkipDTD = kcode_regexp(%q{(['"]|\A<!--|\A<\?|--\z|\?\z)|\]\s*\z}) #'
|
914
|
+
|
915
|
+
def skip_internal_dtd(s)
|
916
|
+
quote = nil
|
917
|
+
continue = true
|
918
|
+
begin # skip until `]>'
|
919
|
+
s.scan(SkipDTD[@kcode]) { |q,| #'
|
920
|
+
if quote then
|
921
|
+
quote = nil if quote == q and quote.size == 1 || @src.tag_end?
|
922
|
+
elsif q then
|
923
|
+
if q == '<!--' then
|
924
|
+
quote = '--'
|
925
|
+
elsif q == '<?' then
|
926
|
+
quote = '?'
|
927
|
+
elsif q == '"' or q == "'" then
|
928
|
+
quote = q
|
929
|
+
end
|
930
|
+
elsif @src.close_tag then
|
931
|
+
continue = false
|
932
|
+
end
|
933
|
+
}
|
934
|
+
end while continue and s = @src.get
|
935
|
+
parse_error "unterminated internal DTD subset meets EOF" unless s
|
936
|
+
end
|
937
|
+
|
938
|
+
|
939
|
+
def scan_internal_dtd(s)
|
940
|
+
warning "internal DTD subset is not supported"
|
941
|
+
skip_internal_dtd s
|
942
|
+
end
|
943
|
+
|
944
|
+
|
945
|
+
def found_invalid_pubsys(pubsys)
|
946
|
+
parse_error "`PUBLIC' or `SYSTEM' should be here"
|
947
|
+
'SYSTEM'
|
948
|
+
end
|
949
|
+
|
950
|
+
|
951
|
+
DoctypePattern = kcode_regexp(%q{[ \t\n\r](?:([^ \t\n\r\/'"=\[]+)|('[^']*'?|"[^"]*"?))|([\-_.\d\w]+|[^ \t\n\r])}) #"
|
952
|
+
|
953
|
+
def scan_doctype(s)
|
954
|
+
root = syspub = sysid = pubid = nil
|
955
|
+
internal_dtd = false
|
956
|
+
re = DoctypePattern[@kcode]
|
957
|
+
begin
|
958
|
+
if re =~ s then
|
959
|
+
name, str, delim, s = $1, $2, $3, $'
|
960
|
+
if name then
|
961
|
+
if not root then
|
962
|
+
root = name
|
963
|
+
elsif not syspub then
|
964
|
+
unless name == 'PUBLIC' or name == 'SYSTEM' then
|
965
|
+
name = found_invalid_pubsys(name)
|
966
|
+
end
|
967
|
+
syspub = name
|
968
|
+
else
|
969
|
+
parse_error "parse error at `#{name}'"
|
970
|
+
end
|
971
|
+
elsif str then
|
972
|
+
qmark = str.slice!(0,1) # remove quotation marks
|
973
|
+
unless syspub then
|
974
|
+
parse_error "parse error at `#{qmark}'"
|
975
|
+
s = str << s
|
976
|
+
else
|
977
|
+
if str[-1] == qmark[0] then
|
978
|
+
str.chop!
|
979
|
+
else
|
980
|
+
s = get_until_qmark(str, qmark) || ''
|
981
|
+
end
|
982
|
+
if not sysid then
|
983
|
+
sysid = str
|
984
|
+
elsif not pubid and syspub == 'PUBLIC' then
|
985
|
+
pubid = sysid
|
986
|
+
sysid = str
|
987
|
+
else
|
988
|
+
parse_error "too many external ID literals in DOCTYPE"
|
989
|
+
end
|
990
|
+
end
|
991
|
+
elsif delim == '[' then
|
992
|
+
internal_dtd = true
|
993
|
+
break
|
994
|
+
else
|
995
|
+
parse_error "parse error at `#{delim}'"
|
996
|
+
end
|
997
|
+
else
|
998
|
+
s = ''
|
999
|
+
end
|
1000
|
+
if s.empty? then
|
1001
|
+
break if @src.close_tag
|
1002
|
+
s = @src.get_plain
|
1003
|
+
end
|
1004
|
+
end while s
|
1005
|
+
parse_error "unterminated DOCTYPE declaration meets EOF" unless s
|
1006
|
+
unless root then
|
1007
|
+
parse_error "no root element is specified in DOCTYPE"
|
1008
|
+
end
|
1009
|
+
if syspub and not sysid then
|
1010
|
+
parse_error "too few external ID literals in DOCTYPE"
|
1011
|
+
end
|
1012
|
+
if syspub == 'PUBLIC' and not pubid then
|
1013
|
+
pubid, sysid = sysid, nil
|
1014
|
+
end
|
1015
|
+
on_doctype root, pubid, sysid
|
1016
|
+
scan_internal_dtd s if internal_dtd
|
1017
|
+
end
|
1018
|
+
|
1019
|
+
|
1020
|
+
def scan_prolog(s)
|
1021
|
+
if /\A<\?xml(?=[ \t\n\r])/n =~ s then
|
1022
|
+
scan_xmldecl $'
|
1023
|
+
s = @src.get
|
1024
|
+
end
|
1025
|
+
doctype = true
|
1026
|
+
src = @src # for speed
|
1027
|
+
while s
|
1028
|
+
if s[0] == ?< then
|
1029
|
+
if (c = s[1]) == ?! then
|
1030
|
+
if s[2] == ?- and s[3] == ?- then
|
1031
|
+
scan_comment s
|
1032
|
+
elsif /\A<!DOCTYPE(?=[ \t\n\r])/n =~ s and doctype then
|
1033
|
+
doctype = false
|
1034
|
+
scan_doctype $'
|
1035
|
+
else
|
1036
|
+
break
|
1037
|
+
end
|
1038
|
+
elsif c == ?? then
|
1039
|
+
scan_pi s
|
1040
|
+
else
|
1041
|
+
break
|
1042
|
+
end
|
1043
|
+
s = src.get
|
1044
|
+
elsif /[^ \t\r\n]/ !~ s then
|
1045
|
+
on_prolog_space s unless s.empty?
|
1046
|
+
s = src.get_plain
|
1047
|
+
else
|
1048
|
+
break
|
1049
|
+
end
|
1050
|
+
end
|
1051
|
+
scan_content(s || src.get)
|
1052
|
+
end
|
1053
|
+
|
1054
|
+
|
1055
|
+
def scan_document
|
1056
|
+
on_start_document
|
1057
|
+
@src.prepare
|
1058
|
+
scan_prolog @src.get
|
1059
|
+
on_end_document
|
1060
|
+
end
|
1061
|
+
|
1062
|
+
|
1063
|
+
def make_source(src)
|
1064
|
+
Source.new src
|
1065
|
+
end
|
1066
|
+
|
1067
|
+
|
1068
|
+
public
|
1069
|
+
|
1070
|
+
def parse_document(src)
|
1071
|
+
@src = make_source(src)
|
1072
|
+
begin
|
1073
|
+
scan_document
|
1074
|
+
ensure
|
1075
|
+
@src = nil
|
1076
|
+
end
|
1077
|
+
self
|
1078
|
+
end
|
1079
|
+
|
1080
|
+
alias parse parse_document
|
1081
|
+
|
1082
|
+
end
|
1083
|
+
|
1084
|
+
|
1085
|
+
end
|
1086
|
+
|
1087
|
+
|
1088
|
+
|
1089
|
+
|
1090
|
+
|
1091
|
+
if $0 == __FILE__ then
|
1092
|
+
class TestVisitor
|
1093
|
+
include XMLScan::Visitor
|
1094
|
+
def parse_error(msg)
|
1095
|
+
STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
1096
|
+
end
|
1097
|
+
def wellformed_error(msg)
|
1098
|
+
STDERR.printf("%s:%d: WFC: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
1099
|
+
end
|
1100
|
+
end
|
1101
|
+
|
1102
|
+
$s = scan = XMLScan::XMLScanner.new(TestVisitor.new)
|
1103
|
+
src = ARGF
|
1104
|
+
def src.path; filename; end
|
1105
|
+
t1 = Time.times.utime
|
1106
|
+
scan.parse src
|
1107
|
+
t2 = Time.times.utime
|
1108
|
+
STDERR.printf "%2.3f sec\n", t2 - t1
|
1109
|
+
end
|