rgen 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +20 -1
- data/MIT-LICENSE +1 -1
- data/README +12 -9
- data/lib/instantiators/ea_instantiator.rb +36 -0
- data/lib/metamodels/uml13_metamodel.rb +559 -0
- data/lib/metamodels/uml13_metamodel_ext.rb +26 -0
- data/lib/mmgen/metamodel_generator.rb +5 -5
- data/lib/mmgen/mm_ext/ecore_ext.rb +95 -0
- data/lib/mmgen/mmgen.rb +6 -4
- data/lib/mmgen/templates/annotations.tpl +37 -0
- data/lib/mmgen/templates/metamodel_generator.tpl +171 -0
- data/lib/rgen/ecore/ecore.rb +190 -0
- data/lib/rgen/ecore/ecore_instantiator.rb +25 -0
- data/lib/rgen/ecore/ecore_transformer.rb +85 -0
- data/lib/rgen/environment.rb +9 -24
- data/lib/rgen/find_helper.rb +68 -0
- data/lib/rgen/{instantiator.rb → instantiator/abstract_instantiator.rb} +6 -2
- data/lib/rgen/instantiator/abstract_xml_instantiator.rb +59 -0
- data/lib/rgen/instantiator/default_xml_instantiator.rb +117 -0
- data/lib/rgen/instantiator/ecore_xml_instantiator.rb +144 -0
- data/lib/rgen/instantiator/nodebased_xml_instantiator.rb +157 -0
- data/lib/rgen/instantiator/xmi11_instantiator.rb +164 -0
- data/lib/rgen/metamodel_builder.rb +103 -9
- data/lib/rgen/metamodel_builder/build_helper.rb +26 -4
- data/lib/rgen/metamodel_builder/builder_extensions.rb +285 -88
- data/lib/rgen/metamodel_builder/builder_runtime.rb +7 -1
- data/lib/rgen/metamodel_builder/data_types.rb +67 -0
- data/lib/rgen/metamodel_builder/intermediate/annotation.rb +30 -0
- data/lib/rgen/metamodel_builder/metamodel_description.rb +232 -0
- data/lib/rgen/metamodel_builder/mm_multiple.rb +23 -0
- data/lib/rgen/metamodel_builder/module_extension.rb +33 -0
- data/lib/rgen/model_comparator.rb +56 -0
- data/lib/rgen/model_dumper.rb +5 -5
- data/lib/rgen/name_helper.rb +17 -1
- data/lib/rgen/template_language.rb +148 -28
- data/lib/rgen/template_language/directory_template_container.rb +56 -38
- data/lib/rgen/template_language/output_handler.rb +93 -77
- data/lib/rgen/template_language/template_container.rb +186 -143
- data/lib/rgen/transformer.rb +19 -14
- data/lib/transformers/uml13_to_ecore.rb +75 -0
- data/redist/xmlscan/ChangeLog +1301 -0
- data/redist/xmlscan/README +34 -0
- data/redist/xmlscan/THANKS +11 -0
- data/redist/xmlscan/doc/changes.html +74 -0
- data/redist/xmlscan/doc/changes.rd +80 -0
- data/redist/xmlscan/doc/en/conformance.html +136 -0
- data/redist/xmlscan/doc/en/conformance.rd +152 -0
- data/redist/xmlscan/doc/en/manual.html +356 -0
- data/redist/xmlscan/doc/en/manual.rd +402 -0
- data/redist/xmlscan/doc/ja/conformance.ja.html +118 -0
- data/redist/xmlscan/doc/ja/conformance.ja.rd +134 -0
- data/redist/xmlscan/doc/ja/manual.ja.html +325 -0
- data/redist/xmlscan/doc/ja/manual.ja.rd +370 -0
- data/redist/xmlscan/doc/src/Makefile +41 -0
- data/redist/xmlscan/doc/src/conformance.rd.src +256 -0
- data/redist/xmlscan/doc/src/langsplit.rb +110 -0
- data/redist/xmlscan/doc/src/manual.rd.src +614 -0
- data/redist/xmlscan/install.rb +41 -0
- data/redist/xmlscan/lib/xmlscan/encoding.rb +311 -0
- data/redist/xmlscan/lib/xmlscan/htmlscan.rb +289 -0
- data/redist/xmlscan/lib/xmlscan/namespace.rb +352 -0
- data/redist/xmlscan/lib/xmlscan/parser.rb +299 -0
- data/redist/xmlscan/lib/xmlscan/scanner.rb +1109 -0
- data/redist/xmlscan/lib/xmlscan/version.rb +22 -0
- data/redist/xmlscan/lib/xmlscan/visitor.rb +158 -0
- data/redist/xmlscan/lib/xmlscan/xmlchar.rb +441 -0
- data/redist/xmlscan/memo/CONFORMANCE +1249 -0
- data/redist/xmlscan/memo/PRODUCTIONS +195 -0
- data/redist/xmlscan/memo/contentspec.ry +335 -0
- data/redist/xmlscan/samples/chibixml.rb +105 -0
- data/redist/xmlscan/samples/getxmlchar.rb +122 -0
- data/redist/xmlscan/samples/rexml.rb +159 -0
- data/redist/xmlscan/samples/xmlbench.rb +88 -0
- data/redist/xmlscan/samples/xmlbench/parser/chibixml.rb +22 -0
- data/redist/xmlscan/samples/xmlbench/parser/nqxml.rb +29 -0
- data/redist/xmlscan/samples/xmlbench/parser/rexml.rb +62 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlparser.rb +22 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan-0.0.10.rb +62 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan-chibixml.rb +22 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan-rexml.rb +22 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan.rb +99 -0
- data/redist/xmlscan/samples/xmlbench/xmlbench-lib.rb +116 -0
- data/redist/xmlscan/samples/xmlconftest.rb +200 -0
- data/redist/xmlscan/test.rb +7 -0
- data/redist/xmlscan/tests/deftestcase.rb +73 -0
- data/redist/xmlscan/tests/runtest.rb +47 -0
- data/redist/xmlscan/tests/testall.rb +14 -0
- data/redist/xmlscan/tests/testencoding.rb +438 -0
- data/redist/xmlscan/tests/testhtmlscan.rb +752 -0
- data/redist/xmlscan/tests/testnamespace.rb +457 -0
- data/redist/xmlscan/tests/testparser.rb +591 -0
- data/redist/xmlscan/tests/testscanner.rb +1749 -0
- data/redist/xmlscan/tests/testxmlchar.rb +143 -0
- data/redist/xmlscan/tests/visitor.rb +34 -0
- data/test/array_extensions_test.rb +2 -2
- data/test/ea_instantiator_test.rb +41 -0
- data/test/ecore_self_test.rb +53 -0
- data/test/environment_test.rb +11 -6
- data/test/metamodel_builder_test.rb +404 -245
- data/test/metamodel_roundtrip_test.rb +52 -0
- data/test/metamodel_roundtrip_test/TestModel.rb +65 -0
- data/test/metamodel_roundtrip_test/TestModel_Regenerated.rb +64 -0
- data/test/metamodel_roundtrip_test/houseMetamodel.ecore +32 -0
- data/test/metamodel_roundtrip_test/houseMetamodel_from_ecore.rb +39 -0
- data/test/rgen_test.rb +3 -3
- data/test/template_language_test.rb +65 -39
- data/test/template_language_test/expected_result.txt +24 -3
- data/test/template_language_test/templates/code/array.tpl +11 -0
- data/test/template_language_test/templates/content/author.tpl +7 -0
- data/test/template_language_test/templates/content/chapter.tpl +1 -1
- data/test/template_language_test/templates/root.tpl +17 -8
- data/test/template_language_test/testout.txt +24 -3
- data/test/testmodel/class_model_checker.rb +119 -0
- data/test/{xmi_instantiator_test/testmodel.eap → testmodel/ea_testmodel.eap} +0 -0
- data/test/{xmi_instantiator_test/testmodel.xml → testmodel/ea_testmodel.xml} +81 -14
- data/test/testmodel/ea_testmodel_partial.xml +317 -0
- data/test/testmodel/ecore_model_checker.rb +101 -0
- data/test/testmodel/manual_testmodel.xml +22 -0
- data/test/testmodel/object_model_checker.rb +67 -0
- data/test/transformer_test.rb +18 -10
- data/test/xml_instantiator_test.rb +81 -8
- data/test/xml_instantiator_test/simple_ecore_model_checker.rb +94 -0
- data/test/xml_instantiator_test/simple_xmi_ecore_instantiator.rb +53 -0
- data/test/xml_instantiator_test/simple_xmi_metamodel.rb +49 -0
- data/test/xml_instantiator_test/simple_xmi_to_ecore.rb +75 -0
- metadata +126 -28
- data/lib/ea/xmi_class_instantiator.rb +0 -46
- data/lib/ea/xmi_helper.rb +0 -26
- data/lib/ea/xmi_metamodel.rb +0 -34
- data/lib/ea/xmi_object_instantiator.rb +0 -46
- data/lib/ea/xmi_to_classmodel.rb +0 -78
- data/lib/ea/xmi_to_objectmodel.rb +0 -92
- data/lib/mmgen/mm_ext/uml_classmodel_ext.rb +0 -71
- data/lib/mmgen/templates/uml_classmodel.tpl +0 -63
- data/lib/rgen/xml_instantiator.rb +0 -132
- data/lib/uml/objectmodel_instantiator.rb +0 -53
- data/lib/uml/uml_classmodel.rb +0 -92
- data/lib/uml/uml_objectmodel.rb +0 -65
- data/test/metamodel_generator_test.rb +0 -44
- data/test/metamodel_generator_test/TestModel.rb +0 -40
- data/test/metamodel_generator_test/expected_result.txt +0 -40
- data/test/xmi_class_instantiator_test.rb +0 -24
- data/test/xmi_instantiator_test/class_model_checker.rb +0 -97
- data/test/xmi_object_instantiator_test.rb +0 -65
- data/test/xml_instantiator_test/testmodel.xml +0 -7
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# install.rb
|
4
|
+
#
|
5
|
+
# $Id: install.rb,v 1.2 2002/12/26 21:09:38 katsu Exp $
|
6
|
+
|
7
|
+
require 'rbconfig'
|
8
|
+
require 'ftools'
|
9
|
+
require 'find'
|
10
|
+
require 'getoptlong'
|
11
|
+
|
12
|
+
DEFAULT_DESTDIR = Config::CONFIG['sitelibdir'] || Config::CONFIG['sitedir']
|
13
|
+
SRCDIR = File.dirname(__FILE__)
|
14
|
+
|
15
|
+
|
16
|
+
def install_rb(from, to)
|
17
|
+
from = SRCDIR + '/' + from
|
18
|
+
Find.find(from) { |src|
|
19
|
+
next unless File.file? src
|
20
|
+
next unless /\.rb\z/ =~ src
|
21
|
+
dst = src.sub(/\A#{Regexp.escape(from)}/, to)
|
22
|
+
File.makedirs File.dirname(dst), true
|
23
|
+
File.install src, dst, 0644, true
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
destdir = DEFAULT_DESTDIR
|
29
|
+
begin
|
30
|
+
GetoptLong.new([ "-d", "--destdir", GetoptLong::REQUIRED_ARGUMENT ]
|
31
|
+
).each_option { |opt, arg|
|
32
|
+
case opt
|
33
|
+
when '-d' then
|
34
|
+
destdir = arg
|
35
|
+
end
|
36
|
+
}
|
37
|
+
rescue
|
38
|
+
exit 2
|
39
|
+
end
|
40
|
+
|
41
|
+
install_rb "lib", destdir
|
@@ -0,0 +1,311 @@
|
|
1
|
+
#
|
2
|
+
# xmlscan/encoding.rb
|
3
|
+
#
|
4
|
+
# Copyright (C) Ueno Katsuhiro 2002
|
5
|
+
#
|
6
|
+
# $Id: encoding.rb,v 1.3 2003/01/12 04:10:33 katsu Exp $
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'xmlscan/visitor'
|
10
|
+
|
11
|
+
|
12
|
+
module XMLScan
|
13
|
+
|
14
|
+
class EncodingError < Error ; end
|
15
|
+
|
16
|
+
|
17
|
+
class Converter
|
18
|
+
|
19
|
+
def initialize
|
20
|
+
end
|
21
|
+
|
22
|
+
def convert(s)
|
23
|
+
s
|
24
|
+
end
|
25
|
+
|
26
|
+
def finish
|
27
|
+
''
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
class SimpleConverter < Converter
|
35
|
+
|
36
|
+
def SimpleConverter.new_class(block)
|
37
|
+
Class.new(self).module_eval {
|
38
|
+
define_method(:convert, block)
|
39
|
+
self
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
43
|
+
# checking for Module#define_method works
|
44
|
+
begin
|
45
|
+
Class.new.module_eval{define_method(:a){};self}.new.a
|
46
|
+
rescue Exception
|
47
|
+
class << SimpleConverter
|
48
|
+
remove_method :new_class
|
49
|
+
end
|
50
|
+
def SimpleConverter.new_class(block)
|
51
|
+
Class.new(self).module_eval {
|
52
|
+
const_set :ConvProc, block
|
53
|
+
module_eval "def convert(s) ; ConvProc.call s ; end"
|
54
|
+
self
|
55
|
+
}
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
class EncodingClass
|
64
|
+
|
65
|
+
KCODE_None = //n.kcode
|
66
|
+
|
67
|
+
|
68
|
+
class ConverterProperty
|
69
|
+
|
70
|
+
def inspect
|
71
|
+
"#<Conversion #{@from.name}:#{@to.name} #{@cost}>"
|
72
|
+
end
|
73
|
+
|
74
|
+
def initialize(from, to, cost, klass = nil)
|
75
|
+
@from, @to, @cost, @klass = from, to, cost, klass
|
76
|
+
end
|
77
|
+
|
78
|
+
def new_converter
|
79
|
+
@klass and @klass.new
|
80
|
+
end
|
81
|
+
|
82
|
+
attr_reader :from, :to, :cost
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
|
87
|
+
class EncodingProperty
|
88
|
+
|
89
|
+
def inspect
|
90
|
+
s = "#<Encoding #{@name}/#{@kcode}>"
|
91
|
+
end
|
92
|
+
|
93
|
+
def initialize(name)
|
94
|
+
@name = name
|
95
|
+
conv = ConverterProperty.new(self, self, 0)
|
96
|
+
@converter = { self => conv }
|
97
|
+
@convertable_from = { self => true }
|
98
|
+
@kcode_map = {}
|
99
|
+
end
|
100
|
+
|
101
|
+
attr_reader :name, :kcode_map
|
102
|
+
|
103
|
+
|
104
|
+
def convertable_from(encoding)
|
105
|
+
@convertable_from[encoding] = true
|
106
|
+
end
|
107
|
+
protected :convertable_from
|
108
|
+
|
109
|
+
def changed
|
110
|
+
@convertable_from.each_key { |i| i.update_kcode_map }
|
111
|
+
end
|
112
|
+
private :changed
|
113
|
+
|
114
|
+
|
115
|
+
def kcode?
|
116
|
+
defined? @kcode
|
117
|
+
end
|
118
|
+
|
119
|
+
def kcode
|
120
|
+
if defined? @kcode then
|
121
|
+
@kcode
|
122
|
+
else
|
123
|
+
KCODE_None
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def kcode=(kcode)
|
128
|
+
if defined? @kcode then
|
129
|
+
raise EncodingError, "KCODE conflict" unless @kcode == kcode
|
130
|
+
else
|
131
|
+
@kcode = kcode
|
132
|
+
changed
|
133
|
+
end
|
134
|
+
kcode
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
def converter(to)
|
139
|
+
@converter[to]
|
140
|
+
end
|
141
|
+
|
142
|
+
def add_converter(to, cost, conv_class)
|
143
|
+
if equal? to then
|
144
|
+
raise EncodingError,"attempt to add a converter to the same encoding"
|
145
|
+
end
|
146
|
+
oldconv = @converter[to]
|
147
|
+
if not oldconv or cost <= oldconv.cost then
|
148
|
+
conv = ConverterProperty.new(self, to, cost, conv_class)
|
149
|
+
@converter[to] = conv
|
150
|
+
to.convertable_from self
|
151
|
+
changed
|
152
|
+
end
|
153
|
+
nil
|
154
|
+
end
|
155
|
+
|
156
|
+
|
157
|
+
def update_kcode_map
|
158
|
+
@kcode_map.clear
|
159
|
+
@converter.each_value { |conv|
|
160
|
+
k = conv.to.kcode
|
161
|
+
if conv.to.kcode? and k then
|
162
|
+
oldconv = @kcode_map[k]
|
163
|
+
@kcode_map[k] = conv if not oldconv or conv.cost <= oldconv.cost
|
164
|
+
end
|
165
|
+
}
|
166
|
+
end
|
167
|
+
protected :update_kcode_map
|
168
|
+
|
169
|
+
end
|
170
|
+
|
171
|
+
|
172
|
+
|
173
|
+
def initialize
|
174
|
+
@encoding = {}
|
175
|
+
end
|
176
|
+
|
177
|
+
class << self
|
178
|
+
private :new
|
179
|
+
attr_reader :instance
|
180
|
+
end
|
181
|
+
@instance = new
|
182
|
+
|
183
|
+
|
184
|
+
private
|
185
|
+
|
186
|
+
def get_encoding(name)
|
187
|
+
encoding = @encoding[name.downcase]
|
188
|
+
raise EncodingError, "undeclared encoding `#{name}'" unless encoding
|
189
|
+
encoding
|
190
|
+
end
|
191
|
+
|
192
|
+
def touch_encoding(name)
|
193
|
+
name = name.downcase
|
194
|
+
encoding = @encoding[name]
|
195
|
+
encoding = @encoding[name] = EncodingProperty.new(name) unless encoding
|
196
|
+
encoding
|
197
|
+
end
|
198
|
+
|
199
|
+
|
200
|
+
public
|
201
|
+
|
202
|
+
def alias(newname, oldname)
|
203
|
+
newname = newname.downcase
|
204
|
+
if @encoding.key? newname then
|
205
|
+
raise EncodingError, "encoding `#{newname}' is already declared"
|
206
|
+
end
|
207
|
+
@encoding[newname] = get_encoding(oldname)
|
208
|
+
nil
|
209
|
+
end
|
210
|
+
|
211
|
+
|
212
|
+
def kcode(name)
|
213
|
+
encoding = @encoding[name.downcase]
|
214
|
+
if encoding then
|
215
|
+
encoding.kcode
|
216
|
+
else
|
217
|
+
KCODE_None
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
|
222
|
+
def set_kcode(name, kcode)
|
223
|
+
if kcode then
|
224
|
+
kcode = Regexp.new('', nil, kcode).kcode
|
225
|
+
else
|
226
|
+
kcode = nil
|
227
|
+
end
|
228
|
+
touch_encoding(name).kcode = kcode
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
def add_converter(from, to, cost, conv_class = nil, &block)
|
233
|
+
if block and conv_class then
|
234
|
+
raise ArgumentError, "multiple converters given"
|
235
|
+
elsif not block and not conv_class then
|
236
|
+
raise ArgumentError, "no converter given"
|
237
|
+
else
|
238
|
+
block = conv_class if Proc === conv_class
|
239
|
+
conv_class = SimpleConverter.new_class(block) if block
|
240
|
+
end
|
241
|
+
from = touch_encoding(from)
|
242
|
+
to = touch_encoding(to)
|
243
|
+
from.add_converter to, cost, conv_class
|
244
|
+
end
|
245
|
+
|
246
|
+
|
247
|
+
def converter(from, to)
|
248
|
+
fromenc = get_encoding(from)
|
249
|
+
toenc = get_encoding(to)
|
250
|
+
conv = fromenc.converter(toenc)
|
251
|
+
raise EncodingError, "can't convert `#{from}' to `#{to}'" unless conv
|
252
|
+
conv.new_converter
|
253
|
+
end
|
254
|
+
|
255
|
+
|
256
|
+
def converter3(from, to = nil)
|
257
|
+
to = from unless to
|
258
|
+
fromenc = get_encoding(from)
|
259
|
+
toenc = get_encoding(to)
|
260
|
+
kcode_map = fromenc.kcode_map
|
261
|
+
if kcode_map.empty? then
|
262
|
+
if fromenc.kcode and fromenc.equal? toenc then
|
263
|
+
return [ nil, fromenc.kcode, nil ]
|
264
|
+
else
|
265
|
+
raise EncodingError, "can't convert `#{from}' to any KCODE"
|
266
|
+
end
|
267
|
+
end
|
268
|
+
mincost, minkcode, minconv = nil
|
269
|
+
kcode_map.each { |kcode,conv|
|
270
|
+
conv2 = conv.to.converter(toenc)
|
271
|
+
if conv2 then
|
272
|
+
cost = conv.cost + conv2.cost
|
273
|
+
if not mincost or cost < mincost then
|
274
|
+
mincost, minkcode, minconv = cost, kcode, conv
|
275
|
+
end
|
276
|
+
end
|
277
|
+
}
|
278
|
+
unless mincost then
|
279
|
+
raise EncodingError, "can't convert `#{from}' to `#{to}' via any KCODE"
|
280
|
+
end
|
281
|
+
conv = minconv.new_converter
|
282
|
+
conv2 = minconv.to.converter(toenc)
|
283
|
+
conv2 = conv2 && conv2.new_converter
|
284
|
+
[ conv, minkcode, conv2 ]
|
285
|
+
end
|
286
|
+
|
287
|
+
end
|
288
|
+
|
289
|
+
|
290
|
+
Encoding = EncodingClass.instance
|
291
|
+
|
292
|
+
Encoding.set_kcode 'utf-8', 'U'
|
293
|
+
Encoding.set_kcode 'utf-16', nil
|
294
|
+
Encoding.alias 'iso-10646-ucs-2', 'utf-16'
|
295
|
+
Encoding.set_kcode 'iso-10646-ucs-4', nil
|
296
|
+
Encoding.set_kcode 'iso-8859-1', 'N'
|
297
|
+
Encoding.set_kcode 'iso-8859-2', 'N'
|
298
|
+
Encoding.set_kcode 'iso-8859-3', 'N'
|
299
|
+
Encoding.set_kcode 'iso-8859-4', 'N'
|
300
|
+
Encoding.set_kcode 'iso-8859-5', 'N'
|
301
|
+
Encoding.set_kcode 'iso-8859-6', 'N'
|
302
|
+
Encoding.set_kcode 'iso-8859-7', 'N'
|
303
|
+
Encoding.set_kcode 'iso-8859-8', 'N'
|
304
|
+
Encoding.set_kcode 'iso-8859-9', 'N'
|
305
|
+
Encoding.set_kcode 'iso-2022-jp', nil
|
306
|
+
Encoding.set_kcode 'shift_jis', 'S'
|
307
|
+
Encoding.set_kcode 'Windows-31J', 'S'
|
308
|
+
Encoding.set_kcode 'euc-jp', 'E'
|
309
|
+
Encoding.set_kcode 'euc-kr', 'E'
|
310
|
+
|
311
|
+
end
|
@@ -0,0 +1,289 @@
|
|
1
|
+
#
|
2
|
+
# xmlscan/htmlscan.rb
|
3
|
+
#
|
4
|
+
# Copyright (C) Ueno Katsuhiro 2002
|
5
|
+
#
|
6
|
+
# $Id: htmlscan.rb,v 1.18 2003/05/01 15:36:50 katsu Exp $
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'xmlscan/scanner'
|
10
|
+
|
11
|
+
|
12
|
+
module XMLScan
|
13
|
+
|
14
|
+
class HTMLScanner < XMLScanner
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def wellformed_error(msg)
|
19
|
+
# All wellformed error raised by XMLScanner are ignored.
|
20
|
+
# XMLScanner only raises wellformed error in stan_stag, which is a
|
21
|
+
# method completely overrided by HTMLScanner, so this method is
|
22
|
+
# never called in fact.
|
23
|
+
end
|
24
|
+
|
25
|
+
def on_xmldecl
|
26
|
+
raise "[BUG] this method must be never called"
|
27
|
+
end
|
28
|
+
|
29
|
+
def on_xmldecl_version(str)
|
30
|
+
raise "[BUG] this method must be never called"
|
31
|
+
end
|
32
|
+
|
33
|
+
def on_xmldecl_encoding(str)
|
34
|
+
raise "[BUG] this method must be never called"
|
35
|
+
end
|
36
|
+
|
37
|
+
def on_xmldecl_standalone(str)
|
38
|
+
raise "[BUG] this method must be never called"
|
39
|
+
end
|
40
|
+
|
41
|
+
def on_xmldecl_other(name, value)
|
42
|
+
raise "[BUG] this method must be never called"
|
43
|
+
end
|
44
|
+
|
45
|
+
def on_xmldecl_end
|
46
|
+
raise "[BUG] this method must be never called"
|
47
|
+
end
|
48
|
+
|
49
|
+
def on_stag_end_empty(name)
|
50
|
+
raise "[BUG] this method must be never called"
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def scan_comment(s)
|
57
|
+
s[0,4] = '' # remove `<!--'
|
58
|
+
comm = ''
|
59
|
+
until /--/n =~ s
|
60
|
+
comm << s
|
61
|
+
s = @src.get_plain
|
62
|
+
unless s then
|
63
|
+
parse_error "unterminated comment meets EOF"
|
64
|
+
return on_comment(comm)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
comm << $`
|
68
|
+
s = $'
|
69
|
+
until s.empty? || s.strip.empty? and @src.close_tag # --> or -- >
|
70
|
+
comm << '--'
|
71
|
+
if /\A\s*--/n =~ s then # <!--hoge-- --
|
72
|
+
comm << $&
|
73
|
+
s = $'
|
74
|
+
if s.empty? and @src.close_tag then # <!--hoge-- -->
|
75
|
+
parse_error "`-->' is found but comment must not end here"
|
76
|
+
comm.chop!.chop!
|
77
|
+
break
|
78
|
+
end
|
79
|
+
else # <!--hoge-- fuga
|
80
|
+
parse_error "only whitespace can appear between two comments"
|
81
|
+
end
|
82
|
+
if /\A-\s*\z/n =~ s and @src.close_tag then # <!--hoge--->
|
83
|
+
parse_error "`-->' is found but comment must not end here"
|
84
|
+
comm.chop!
|
85
|
+
break
|
86
|
+
end
|
87
|
+
until /--/n =~ s # copy & paste for performance
|
88
|
+
comm << s
|
89
|
+
s = @src.get_plain
|
90
|
+
unless s then
|
91
|
+
parse_error "unterminated comment meets EOF"
|
92
|
+
return on_comment(comm)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
comm << $`
|
96
|
+
s = $'
|
97
|
+
end
|
98
|
+
on_comment comm
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
alias scan_xml_pi scan_pi # PIO "<?" PIC "?>" -- <? PI ?> --
|
103
|
+
|
104
|
+
|
105
|
+
def scan_pi(s) # <?PI > this is default in SGML.
|
106
|
+
s[0,2] = '' # remove `<?'
|
107
|
+
pi = s
|
108
|
+
until @src.close_tag
|
109
|
+
s = @src.get_plain
|
110
|
+
unless s then
|
111
|
+
parse_error "unterminated PI meets EOF"
|
112
|
+
break
|
113
|
+
end
|
114
|
+
pi << s
|
115
|
+
end
|
116
|
+
on_pi '', pi
|
117
|
+
end
|
118
|
+
|
119
|
+
|
120
|
+
def scan_stag(s)
|
121
|
+
unless /(?=[\/\s='"])/n =~ s then
|
122
|
+
name = s
|
123
|
+
name[0,1] = '' # remove `<'
|
124
|
+
if name.empty? then # <> or <<
|
125
|
+
if @src.close_tag then
|
126
|
+
return found_empty_stag
|
127
|
+
else
|
128
|
+
parse_error "parse error at `<'"
|
129
|
+
return on_chardata('<')
|
130
|
+
end
|
131
|
+
end
|
132
|
+
on_stag name
|
133
|
+
found_unclosed_stag name unless @src.close_tag
|
134
|
+
on_stag_end name
|
135
|
+
else
|
136
|
+
name = $`
|
137
|
+
s = $'
|
138
|
+
name[0,1] = '' # remove `<'
|
139
|
+
if name.empty? then # `< tag' or `<=`
|
140
|
+
parse_error "parse error at `<'"
|
141
|
+
if @src.close_tag then
|
142
|
+
s << '>'
|
143
|
+
end
|
144
|
+
return on_chardata('<' << s)
|
145
|
+
end
|
146
|
+
on_stag name
|
147
|
+
begin
|
148
|
+
continue = false
|
149
|
+
s.scan(
|
150
|
+
/([^\s=\/'"]+)(?:\s*=\s*(?:('[^']*'?|"[^"]*"?)|([^\s='"]+)))?|(\S)/n
|
151
|
+
) { |key,val,val2,error|
|
152
|
+
if key then
|
153
|
+
if val then # key="value"
|
154
|
+
on_attribute key
|
155
|
+
qmark = val.slice!(0,1)
|
156
|
+
if val[-1] == qmark[0] then
|
157
|
+
val.chop!
|
158
|
+
scan_attvalue val unless val.empty?
|
159
|
+
else
|
160
|
+
scan_attvalue val unless val.empty?
|
161
|
+
begin
|
162
|
+
s = @src.get
|
163
|
+
unless s then
|
164
|
+
parse_error "unterminated attribute `#{key}' meets EOF"
|
165
|
+
break
|
166
|
+
end
|
167
|
+
c = s[0]
|
168
|
+
val, s = s.split(qmark, 2)
|
169
|
+
scan_attvalue '>' unless c == ?< or c == ?>
|
170
|
+
scan_attvalue val if c
|
171
|
+
end until s
|
172
|
+
continue = s
|
173
|
+
end
|
174
|
+
on_attribute_end key
|
175
|
+
elsif val2 then # key=value
|
176
|
+
on_attribute key
|
177
|
+
on_attr_value val2
|
178
|
+
on_attribute_end key
|
179
|
+
else # value
|
180
|
+
on_attribute nil
|
181
|
+
on_attr_value key
|
182
|
+
on_attribute_end nil
|
183
|
+
end
|
184
|
+
else
|
185
|
+
parse_error "parse error at `#{error}'"
|
186
|
+
end
|
187
|
+
}
|
188
|
+
end while continue
|
189
|
+
found_unclosed_stag name unless @src.close_tag
|
190
|
+
on_stag_end name
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
|
195
|
+
# This method should be called only from on_stag_end.
|
196
|
+
def get_cdata_content
|
197
|
+
unless not s = @src.test or s[0] == ?< && s[1] == ?/ then
|
198
|
+
dst = @src.get
|
199
|
+
until not s = @src.test or s[0] == ?< && s[1] == ?/
|
200
|
+
dst << @src.get_plain
|
201
|
+
end
|
202
|
+
dst
|
203
|
+
else
|
204
|
+
''
|
205
|
+
end
|
206
|
+
end
|
207
|
+
public :get_cdata_content
|
208
|
+
|
209
|
+
|
210
|
+
def scan_bang_tag(s)
|
211
|
+
if s == '<!' and @src.close_tag then # <!>
|
212
|
+
on_comment ''
|
213
|
+
else
|
214
|
+
parse_error "parse error at `<!'"
|
215
|
+
while s and not @src.close_tag # skip entire
|
216
|
+
s = @src.get_plain
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
|
222
|
+
def scan_internal_dtd(s)
|
223
|
+
parse_error "DTD subset is found but it is not permitted in HTML"
|
224
|
+
skip_internal_dtd s
|
225
|
+
end
|
226
|
+
|
227
|
+
|
228
|
+
def found_invalid_pubsys(pubsys)
|
229
|
+
s = pubsys.upcase
|
230
|
+
return s if s == 'PUBLIC' or s == 'SYSTEM'
|
231
|
+
super
|
232
|
+
end
|
233
|
+
|
234
|
+
|
235
|
+
def scan_prolog(s)
|
236
|
+
doctype = 0
|
237
|
+
while s
|
238
|
+
if s[0] == ?< then
|
239
|
+
if (c = s[1]) == ?! then
|
240
|
+
if s[2] == ?- and s[3] == ?- then
|
241
|
+
scan_comment s
|
242
|
+
elsif /\A<!doctype(?=\s)/in =~ s then
|
243
|
+
doctype += 1
|
244
|
+
if doctype > 1 then
|
245
|
+
parse_error "another document type declaration is found"
|
246
|
+
end
|
247
|
+
scan_doctype $'
|
248
|
+
else
|
249
|
+
break
|
250
|
+
end
|
251
|
+
elsif c == ?? then
|
252
|
+
scan_pi s
|
253
|
+
else
|
254
|
+
break
|
255
|
+
end
|
256
|
+
elsif s.strip.empty? then
|
257
|
+
on_prolog_space s
|
258
|
+
else
|
259
|
+
break
|
260
|
+
end
|
261
|
+
s = @src.get
|
262
|
+
end
|
263
|
+
scan_content(s || @src.get)
|
264
|
+
end
|
265
|
+
|
266
|
+
end
|
267
|
+
|
268
|
+
end
|
269
|
+
|
270
|
+
|
271
|
+
|
272
|
+
|
273
|
+
|
274
|
+
if $0 == __FILE__ then
|
275
|
+
class TestVisitor
|
276
|
+
include XMLScan::Visitor
|
277
|
+
def parse_error(msg)
|
278
|
+
STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
$s = scan = XMLScan::HTMLScanner.new(TestVisitor.new)
|
283
|
+
src = ARGF
|
284
|
+
def src.path; filename; end
|
285
|
+
t1 = Time.times.utime
|
286
|
+
scan.parse src
|
287
|
+
t2 = Time.times.utime
|
288
|
+
STDERR.printf "%2.3f sec\n", t2 - t1
|
289
|
+
end
|