rgen 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +20 -1
- data/MIT-LICENSE +1 -1
- data/README +12 -9
- data/lib/instantiators/ea_instantiator.rb +36 -0
- data/lib/metamodels/uml13_metamodel.rb +559 -0
- data/lib/metamodels/uml13_metamodel_ext.rb +26 -0
- data/lib/mmgen/metamodel_generator.rb +5 -5
- data/lib/mmgen/mm_ext/ecore_ext.rb +95 -0
- data/lib/mmgen/mmgen.rb +6 -4
- data/lib/mmgen/templates/annotations.tpl +37 -0
- data/lib/mmgen/templates/metamodel_generator.tpl +171 -0
- data/lib/rgen/ecore/ecore.rb +190 -0
- data/lib/rgen/ecore/ecore_instantiator.rb +25 -0
- data/lib/rgen/ecore/ecore_transformer.rb +85 -0
- data/lib/rgen/environment.rb +9 -24
- data/lib/rgen/find_helper.rb +68 -0
- data/lib/rgen/{instantiator.rb → instantiator/abstract_instantiator.rb} +6 -2
- data/lib/rgen/instantiator/abstract_xml_instantiator.rb +59 -0
- data/lib/rgen/instantiator/default_xml_instantiator.rb +117 -0
- data/lib/rgen/instantiator/ecore_xml_instantiator.rb +144 -0
- data/lib/rgen/instantiator/nodebased_xml_instantiator.rb +157 -0
- data/lib/rgen/instantiator/xmi11_instantiator.rb +164 -0
- data/lib/rgen/metamodel_builder.rb +103 -9
- data/lib/rgen/metamodel_builder/build_helper.rb +26 -4
- data/lib/rgen/metamodel_builder/builder_extensions.rb +285 -88
- data/lib/rgen/metamodel_builder/builder_runtime.rb +7 -1
- data/lib/rgen/metamodel_builder/data_types.rb +67 -0
- data/lib/rgen/metamodel_builder/intermediate/annotation.rb +30 -0
- data/lib/rgen/metamodel_builder/metamodel_description.rb +232 -0
- data/lib/rgen/metamodel_builder/mm_multiple.rb +23 -0
- data/lib/rgen/metamodel_builder/module_extension.rb +33 -0
- data/lib/rgen/model_comparator.rb +56 -0
- data/lib/rgen/model_dumper.rb +5 -5
- data/lib/rgen/name_helper.rb +17 -1
- data/lib/rgen/template_language.rb +148 -28
- data/lib/rgen/template_language/directory_template_container.rb +56 -38
- data/lib/rgen/template_language/output_handler.rb +93 -77
- data/lib/rgen/template_language/template_container.rb +186 -143
- data/lib/rgen/transformer.rb +19 -14
- data/lib/transformers/uml13_to_ecore.rb +75 -0
- data/redist/xmlscan/ChangeLog +1301 -0
- data/redist/xmlscan/README +34 -0
- data/redist/xmlscan/THANKS +11 -0
- data/redist/xmlscan/doc/changes.html +74 -0
- data/redist/xmlscan/doc/changes.rd +80 -0
- data/redist/xmlscan/doc/en/conformance.html +136 -0
- data/redist/xmlscan/doc/en/conformance.rd +152 -0
- data/redist/xmlscan/doc/en/manual.html +356 -0
- data/redist/xmlscan/doc/en/manual.rd +402 -0
- data/redist/xmlscan/doc/ja/conformance.ja.html +118 -0
- data/redist/xmlscan/doc/ja/conformance.ja.rd +134 -0
- data/redist/xmlscan/doc/ja/manual.ja.html +325 -0
- data/redist/xmlscan/doc/ja/manual.ja.rd +370 -0
- data/redist/xmlscan/doc/src/Makefile +41 -0
- data/redist/xmlscan/doc/src/conformance.rd.src +256 -0
- data/redist/xmlscan/doc/src/langsplit.rb +110 -0
- data/redist/xmlscan/doc/src/manual.rd.src +614 -0
- data/redist/xmlscan/install.rb +41 -0
- data/redist/xmlscan/lib/xmlscan/encoding.rb +311 -0
- data/redist/xmlscan/lib/xmlscan/htmlscan.rb +289 -0
- data/redist/xmlscan/lib/xmlscan/namespace.rb +352 -0
- data/redist/xmlscan/lib/xmlscan/parser.rb +299 -0
- data/redist/xmlscan/lib/xmlscan/scanner.rb +1109 -0
- data/redist/xmlscan/lib/xmlscan/version.rb +22 -0
- data/redist/xmlscan/lib/xmlscan/visitor.rb +158 -0
- data/redist/xmlscan/lib/xmlscan/xmlchar.rb +441 -0
- data/redist/xmlscan/memo/CONFORMANCE +1249 -0
- data/redist/xmlscan/memo/PRODUCTIONS +195 -0
- data/redist/xmlscan/memo/contentspec.ry +335 -0
- data/redist/xmlscan/samples/chibixml.rb +105 -0
- data/redist/xmlscan/samples/getxmlchar.rb +122 -0
- data/redist/xmlscan/samples/rexml.rb +159 -0
- data/redist/xmlscan/samples/xmlbench.rb +88 -0
- data/redist/xmlscan/samples/xmlbench/parser/chibixml.rb +22 -0
- data/redist/xmlscan/samples/xmlbench/parser/nqxml.rb +29 -0
- data/redist/xmlscan/samples/xmlbench/parser/rexml.rb +62 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlparser.rb +22 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan-0.0.10.rb +62 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan-chibixml.rb +22 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan-rexml.rb +22 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan.rb +99 -0
- data/redist/xmlscan/samples/xmlbench/xmlbench-lib.rb +116 -0
- data/redist/xmlscan/samples/xmlconftest.rb +200 -0
- data/redist/xmlscan/test.rb +7 -0
- data/redist/xmlscan/tests/deftestcase.rb +73 -0
- data/redist/xmlscan/tests/runtest.rb +47 -0
- data/redist/xmlscan/tests/testall.rb +14 -0
- data/redist/xmlscan/tests/testencoding.rb +438 -0
- data/redist/xmlscan/tests/testhtmlscan.rb +752 -0
- data/redist/xmlscan/tests/testnamespace.rb +457 -0
- data/redist/xmlscan/tests/testparser.rb +591 -0
- data/redist/xmlscan/tests/testscanner.rb +1749 -0
- data/redist/xmlscan/tests/testxmlchar.rb +143 -0
- data/redist/xmlscan/tests/visitor.rb +34 -0
- data/test/array_extensions_test.rb +2 -2
- data/test/ea_instantiator_test.rb +41 -0
- data/test/ecore_self_test.rb +53 -0
- data/test/environment_test.rb +11 -6
- data/test/metamodel_builder_test.rb +404 -245
- data/test/metamodel_roundtrip_test.rb +52 -0
- data/test/metamodel_roundtrip_test/TestModel.rb +65 -0
- data/test/metamodel_roundtrip_test/TestModel_Regenerated.rb +64 -0
- data/test/metamodel_roundtrip_test/houseMetamodel.ecore +32 -0
- data/test/metamodel_roundtrip_test/houseMetamodel_from_ecore.rb +39 -0
- data/test/rgen_test.rb +3 -3
- data/test/template_language_test.rb +65 -39
- data/test/template_language_test/expected_result.txt +24 -3
- data/test/template_language_test/templates/code/array.tpl +11 -0
- data/test/template_language_test/templates/content/author.tpl +7 -0
- data/test/template_language_test/templates/content/chapter.tpl +1 -1
- data/test/template_language_test/templates/root.tpl +17 -8
- data/test/template_language_test/testout.txt +24 -3
- data/test/testmodel/class_model_checker.rb +119 -0
- data/test/{xmi_instantiator_test/testmodel.eap → testmodel/ea_testmodel.eap} +0 -0
- data/test/{xmi_instantiator_test/testmodel.xml → testmodel/ea_testmodel.xml} +81 -14
- data/test/testmodel/ea_testmodel_partial.xml +317 -0
- data/test/testmodel/ecore_model_checker.rb +101 -0
- data/test/testmodel/manual_testmodel.xml +22 -0
- data/test/testmodel/object_model_checker.rb +67 -0
- data/test/transformer_test.rb +18 -10
- data/test/xml_instantiator_test.rb +81 -8
- data/test/xml_instantiator_test/simple_ecore_model_checker.rb +94 -0
- data/test/xml_instantiator_test/simple_xmi_ecore_instantiator.rb +53 -0
- data/test/xml_instantiator_test/simple_xmi_metamodel.rb +49 -0
- data/test/xml_instantiator_test/simple_xmi_to_ecore.rb +75 -0
- metadata +126 -28
- data/lib/ea/xmi_class_instantiator.rb +0 -46
- data/lib/ea/xmi_helper.rb +0 -26
- data/lib/ea/xmi_metamodel.rb +0 -34
- data/lib/ea/xmi_object_instantiator.rb +0 -46
- data/lib/ea/xmi_to_classmodel.rb +0 -78
- data/lib/ea/xmi_to_objectmodel.rb +0 -92
- data/lib/mmgen/mm_ext/uml_classmodel_ext.rb +0 -71
- data/lib/mmgen/templates/uml_classmodel.tpl +0 -63
- data/lib/rgen/xml_instantiator.rb +0 -132
- data/lib/uml/objectmodel_instantiator.rb +0 -53
- data/lib/uml/uml_classmodel.rb +0 -92
- data/lib/uml/uml_objectmodel.rb +0 -65
- data/test/metamodel_generator_test.rb +0 -44
- data/test/metamodel_generator_test/TestModel.rb +0 -40
- data/test/metamodel_generator_test/expected_result.txt +0 -40
- data/test/xmi_class_instantiator_test.rb +0 -24
- data/test/xmi_instantiator_test/class_model_checker.rb +0 -97
- data/test/xmi_object_instantiator_test.rb +0 -65
- data/test/xml_instantiator_test/testmodel.xml +0 -7
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
#
|
|
3
|
+
# install.rb
|
|
4
|
+
#
|
|
5
|
+
# $Id: install.rb,v 1.2 2002/12/26 21:09:38 katsu Exp $
|
|
6
|
+
|
|
7
|
+
require 'rbconfig'
|
|
8
|
+
require 'ftools'
|
|
9
|
+
require 'find'
|
|
10
|
+
require 'getoptlong'
|
|
11
|
+
|
|
12
|
+
DEFAULT_DESTDIR = Config::CONFIG['sitelibdir'] || Config::CONFIG['sitedir']
|
|
13
|
+
SRCDIR = File.dirname(__FILE__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def install_rb(from, to)
|
|
17
|
+
from = SRCDIR + '/' + from
|
|
18
|
+
Find.find(from) { |src|
|
|
19
|
+
next unless File.file? src
|
|
20
|
+
next unless /\.rb\z/ =~ src
|
|
21
|
+
dst = src.sub(/\A#{Regexp.escape(from)}/, to)
|
|
22
|
+
File.makedirs File.dirname(dst), true
|
|
23
|
+
File.install src, dst, 0644, true
|
|
24
|
+
}
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
destdir = DEFAULT_DESTDIR
|
|
29
|
+
begin
|
|
30
|
+
GetoptLong.new([ "-d", "--destdir", GetoptLong::REQUIRED_ARGUMENT ]
|
|
31
|
+
).each_option { |opt, arg|
|
|
32
|
+
case opt
|
|
33
|
+
when '-d' then
|
|
34
|
+
destdir = arg
|
|
35
|
+
end
|
|
36
|
+
}
|
|
37
|
+
rescue
|
|
38
|
+
exit 2
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
install_rb "lib", destdir
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
#
|
|
2
|
+
# xmlscan/encoding.rb
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) Ueno Katsuhiro 2002
|
|
5
|
+
#
|
|
6
|
+
# $Id: encoding.rb,v 1.3 2003/01/12 04:10:33 katsu Exp $
|
|
7
|
+
#
|
|
8
|
+
|
|
9
|
+
require 'xmlscan/visitor'
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
module XMLScan
|
|
13
|
+
|
|
14
|
+
class EncodingError < Error ; end
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Converter
|
|
18
|
+
|
|
19
|
+
def initialize
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def convert(s)
|
|
23
|
+
s
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def finish
|
|
27
|
+
''
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class SimpleConverter < Converter
|
|
35
|
+
|
|
36
|
+
def SimpleConverter.new_class(block)
|
|
37
|
+
Class.new(self).module_eval {
|
|
38
|
+
define_method(:convert, block)
|
|
39
|
+
self
|
|
40
|
+
}
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# checking for Module#define_method works
|
|
44
|
+
begin
|
|
45
|
+
Class.new.module_eval{define_method(:a){};self}.new.a
|
|
46
|
+
rescue Exception
|
|
47
|
+
class << SimpleConverter
|
|
48
|
+
remove_method :new_class
|
|
49
|
+
end
|
|
50
|
+
def SimpleConverter.new_class(block)
|
|
51
|
+
Class.new(self).module_eval {
|
|
52
|
+
const_set :ConvProc, block
|
|
53
|
+
module_eval "def convert(s) ; ConvProc.call s ; end"
|
|
54
|
+
self
|
|
55
|
+
}
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class EncodingClass
|
|
64
|
+
|
|
65
|
+
KCODE_None = //n.kcode
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class ConverterProperty
|
|
69
|
+
|
|
70
|
+
def inspect
|
|
71
|
+
"#<Conversion #{@from.name}:#{@to.name} #{@cost}>"
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def initialize(from, to, cost, klass = nil)
|
|
75
|
+
@from, @to, @cost, @klass = from, to, cost, klass
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def new_converter
|
|
79
|
+
@klass and @klass.new
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
attr_reader :from, :to, :cost
|
|
83
|
+
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class EncodingProperty
|
|
88
|
+
|
|
89
|
+
def inspect
|
|
90
|
+
s = "#<Encoding #{@name}/#{@kcode}>"
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def initialize(name)
|
|
94
|
+
@name = name
|
|
95
|
+
conv = ConverterProperty.new(self, self, 0)
|
|
96
|
+
@converter = { self => conv }
|
|
97
|
+
@convertable_from = { self => true }
|
|
98
|
+
@kcode_map = {}
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
attr_reader :name, :kcode_map
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def convertable_from(encoding)
|
|
105
|
+
@convertable_from[encoding] = true
|
|
106
|
+
end
|
|
107
|
+
protected :convertable_from
|
|
108
|
+
|
|
109
|
+
def changed
|
|
110
|
+
@convertable_from.each_key { |i| i.update_kcode_map }
|
|
111
|
+
end
|
|
112
|
+
private :changed
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def kcode?
|
|
116
|
+
defined? @kcode
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def kcode
|
|
120
|
+
if defined? @kcode then
|
|
121
|
+
@kcode
|
|
122
|
+
else
|
|
123
|
+
KCODE_None
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def kcode=(kcode)
|
|
128
|
+
if defined? @kcode then
|
|
129
|
+
raise EncodingError, "KCODE conflict" unless @kcode == kcode
|
|
130
|
+
else
|
|
131
|
+
@kcode = kcode
|
|
132
|
+
changed
|
|
133
|
+
end
|
|
134
|
+
kcode
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def converter(to)
|
|
139
|
+
@converter[to]
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def add_converter(to, cost, conv_class)
|
|
143
|
+
if equal? to then
|
|
144
|
+
raise EncodingError,"attempt to add a converter to the same encoding"
|
|
145
|
+
end
|
|
146
|
+
oldconv = @converter[to]
|
|
147
|
+
if not oldconv or cost <= oldconv.cost then
|
|
148
|
+
conv = ConverterProperty.new(self, to, cost, conv_class)
|
|
149
|
+
@converter[to] = conv
|
|
150
|
+
to.convertable_from self
|
|
151
|
+
changed
|
|
152
|
+
end
|
|
153
|
+
nil
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def update_kcode_map
|
|
158
|
+
@kcode_map.clear
|
|
159
|
+
@converter.each_value { |conv|
|
|
160
|
+
k = conv.to.kcode
|
|
161
|
+
if conv.to.kcode? and k then
|
|
162
|
+
oldconv = @kcode_map[k]
|
|
163
|
+
@kcode_map[k] = conv if not oldconv or conv.cost <= oldconv.cost
|
|
164
|
+
end
|
|
165
|
+
}
|
|
166
|
+
end
|
|
167
|
+
protected :update_kcode_map
|
|
168
|
+
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def initialize
|
|
174
|
+
@encoding = {}
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
class << self
|
|
178
|
+
private :new
|
|
179
|
+
attr_reader :instance
|
|
180
|
+
end
|
|
181
|
+
@instance = new
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
private
|
|
185
|
+
|
|
186
|
+
def get_encoding(name)
|
|
187
|
+
encoding = @encoding[name.downcase]
|
|
188
|
+
raise EncodingError, "undeclared encoding `#{name}'" unless encoding
|
|
189
|
+
encoding
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def touch_encoding(name)
|
|
193
|
+
name = name.downcase
|
|
194
|
+
encoding = @encoding[name]
|
|
195
|
+
encoding = @encoding[name] = EncodingProperty.new(name) unless encoding
|
|
196
|
+
encoding
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
public
|
|
201
|
+
|
|
202
|
+
def alias(newname, oldname)
|
|
203
|
+
newname = newname.downcase
|
|
204
|
+
if @encoding.key? newname then
|
|
205
|
+
raise EncodingError, "encoding `#{newname}' is already declared"
|
|
206
|
+
end
|
|
207
|
+
@encoding[newname] = get_encoding(oldname)
|
|
208
|
+
nil
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def kcode(name)
|
|
213
|
+
encoding = @encoding[name.downcase]
|
|
214
|
+
if encoding then
|
|
215
|
+
encoding.kcode
|
|
216
|
+
else
|
|
217
|
+
KCODE_None
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def set_kcode(name, kcode)
|
|
223
|
+
if kcode then
|
|
224
|
+
kcode = Regexp.new('', nil, kcode).kcode
|
|
225
|
+
else
|
|
226
|
+
kcode = nil
|
|
227
|
+
end
|
|
228
|
+
touch_encoding(name).kcode = kcode
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def add_converter(from, to, cost, conv_class = nil, &block)
|
|
233
|
+
if block and conv_class then
|
|
234
|
+
raise ArgumentError, "multiple converters given"
|
|
235
|
+
elsif not block and not conv_class then
|
|
236
|
+
raise ArgumentError, "no converter given"
|
|
237
|
+
else
|
|
238
|
+
block = conv_class if Proc === conv_class
|
|
239
|
+
conv_class = SimpleConverter.new_class(block) if block
|
|
240
|
+
end
|
|
241
|
+
from = touch_encoding(from)
|
|
242
|
+
to = touch_encoding(to)
|
|
243
|
+
from.add_converter to, cost, conv_class
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def converter(from, to)
|
|
248
|
+
fromenc = get_encoding(from)
|
|
249
|
+
toenc = get_encoding(to)
|
|
250
|
+
conv = fromenc.converter(toenc)
|
|
251
|
+
raise EncodingError, "can't convert `#{from}' to `#{to}'" unless conv
|
|
252
|
+
conv.new_converter
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def converter3(from, to = nil)
|
|
257
|
+
to = from unless to
|
|
258
|
+
fromenc = get_encoding(from)
|
|
259
|
+
toenc = get_encoding(to)
|
|
260
|
+
kcode_map = fromenc.kcode_map
|
|
261
|
+
if kcode_map.empty? then
|
|
262
|
+
if fromenc.kcode and fromenc.equal? toenc then
|
|
263
|
+
return [ nil, fromenc.kcode, nil ]
|
|
264
|
+
else
|
|
265
|
+
raise EncodingError, "can't convert `#{from}' to any KCODE"
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
mincost, minkcode, minconv = nil
|
|
269
|
+
kcode_map.each { |kcode,conv|
|
|
270
|
+
conv2 = conv.to.converter(toenc)
|
|
271
|
+
if conv2 then
|
|
272
|
+
cost = conv.cost + conv2.cost
|
|
273
|
+
if not mincost or cost < mincost then
|
|
274
|
+
mincost, minkcode, minconv = cost, kcode, conv
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
}
|
|
278
|
+
unless mincost then
|
|
279
|
+
raise EncodingError, "can't convert `#{from}' to `#{to}' via any KCODE"
|
|
280
|
+
end
|
|
281
|
+
conv = minconv.new_converter
|
|
282
|
+
conv2 = minconv.to.converter(toenc)
|
|
283
|
+
conv2 = conv2 && conv2.new_converter
|
|
284
|
+
[ conv, minkcode, conv2 ]
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
Encoding = EncodingClass.instance
|
|
291
|
+
|
|
292
|
+
Encoding.set_kcode 'utf-8', 'U'
|
|
293
|
+
Encoding.set_kcode 'utf-16', nil
|
|
294
|
+
Encoding.alias 'iso-10646-ucs-2', 'utf-16'
|
|
295
|
+
Encoding.set_kcode 'iso-10646-ucs-4', nil
|
|
296
|
+
Encoding.set_kcode 'iso-8859-1', 'N'
|
|
297
|
+
Encoding.set_kcode 'iso-8859-2', 'N'
|
|
298
|
+
Encoding.set_kcode 'iso-8859-3', 'N'
|
|
299
|
+
Encoding.set_kcode 'iso-8859-4', 'N'
|
|
300
|
+
Encoding.set_kcode 'iso-8859-5', 'N'
|
|
301
|
+
Encoding.set_kcode 'iso-8859-6', 'N'
|
|
302
|
+
Encoding.set_kcode 'iso-8859-7', 'N'
|
|
303
|
+
Encoding.set_kcode 'iso-8859-8', 'N'
|
|
304
|
+
Encoding.set_kcode 'iso-8859-9', 'N'
|
|
305
|
+
Encoding.set_kcode 'iso-2022-jp', nil
|
|
306
|
+
Encoding.set_kcode 'shift_jis', 'S'
|
|
307
|
+
Encoding.set_kcode 'Windows-31J', 'S'
|
|
308
|
+
Encoding.set_kcode 'euc-jp', 'E'
|
|
309
|
+
Encoding.set_kcode 'euc-kr', 'E'
|
|
310
|
+
|
|
311
|
+
end
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
#
|
|
2
|
+
# xmlscan/htmlscan.rb
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) Ueno Katsuhiro 2002
|
|
5
|
+
#
|
|
6
|
+
# $Id: htmlscan.rb,v 1.18 2003/05/01 15:36:50 katsu Exp $
|
|
7
|
+
#
|
|
8
|
+
|
|
9
|
+
require 'xmlscan/scanner'
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
module XMLScan
|
|
13
|
+
|
|
14
|
+
class HTMLScanner < XMLScanner
|
|
15
|
+
|
|
16
|
+
private
|
|
17
|
+
|
|
18
|
+
def wellformed_error(msg)
|
|
19
|
+
# All wellformed error raised by XMLScanner are ignored.
|
|
20
|
+
# XMLScanner only raises wellformed error in stan_stag, which is a
|
|
21
|
+
# method completely overrided by HTMLScanner, so this method is
|
|
22
|
+
# never called in fact.
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def on_xmldecl
|
|
26
|
+
raise "[BUG] this method must be never called"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def on_xmldecl_version(str)
|
|
30
|
+
raise "[BUG] this method must be never called"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def on_xmldecl_encoding(str)
|
|
34
|
+
raise "[BUG] this method must be never called"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def on_xmldecl_standalone(str)
|
|
38
|
+
raise "[BUG] this method must be never called"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def on_xmldecl_other(name, value)
|
|
42
|
+
raise "[BUG] this method must be never called"
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def on_xmldecl_end
|
|
46
|
+
raise "[BUG] this method must be never called"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def on_stag_end_empty(name)
|
|
50
|
+
raise "[BUG] this method must be never called"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def scan_comment(s)
|
|
57
|
+
s[0,4] = '' # remove `<!--'
|
|
58
|
+
comm = ''
|
|
59
|
+
until /--/n =~ s
|
|
60
|
+
comm << s
|
|
61
|
+
s = @src.get_plain
|
|
62
|
+
unless s then
|
|
63
|
+
parse_error "unterminated comment meets EOF"
|
|
64
|
+
return on_comment(comm)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
comm << $`
|
|
68
|
+
s = $'
|
|
69
|
+
until s.empty? || s.strip.empty? and @src.close_tag # --> or -- >
|
|
70
|
+
comm << '--'
|
|
71
|
+
if /\A\s*--/n =~ s then # <!--hoge-- --
|
|
72
|
+
comm << $&
|
|
73
|
+
s = $'
|
|
74
|
+
if s.empty? and @src.close_tag then # <!--hoge-- -->
|
|
75
|
+
parse_error "`-->' is found but comment must not end here"
|
|
76
|
+
comm.chop!.chop!
|
|
77
|
+
break
|
|
78
|
+
end
|
|
79
|
+
else # <!--hoge-- fuga
|
|
80
|
+
parse_error "only whitespace can appear between two comments"
|
|
81
|
+
end
|
|
82
|
+
if /\A-\s*\z/n =~ s and @src.close_tag then # <!--hoge--->
|
|
83
|
+
parse_error "`-->' is found but comment must not end here"
|
|
84
|
+
comm.chop!
|
|
85
|
+
break
|
|
86
|
+
end
|
|
87
|
+
until /--/n =~ s # copy & paste for performance
|
|
88
|
+
comm << s
|
|
89
|
+
s = @src.get_plain
|
|
90
|
+
unless s then
|
|
91
|
+
parse_error "unterminated comment meets EOF"
|
|
92
|
+
return on_comment(comm)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
comm << $`
|
|
96
|
+
s = $'
|
|
97
|
+
end
|
|
98
|
+
on_comment comm
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
alias scan_xml_pi scan_pi # PIO "<?" PIC "?>" -- <? PI ?> --
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def scan_pi(s) # <?PI > this is default in SGML.
|
|
106
|
+
s[0,2] = '' # remove `<?'
|
|
107
|
+
pi = s
|
|
108
|
+
until @src.close_tag
|
|
109
|
+
s = @src.get_plain
|
|
110
|
+
unless s then
|
|
111
|
+
parse_error "unterminated PI meets EOF"
|
|
112
|
+
break
|
|
113
|
+
end
|
|
114
|
+
pi << s
|
|
115
|
+
end
|
|
116
|
+
on_pi '', pi
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def scan_stag(s)
|
|
121
|
+
unless /(?=[\/\s='"])/n =~ s then
|
|
122
|
+
name = s
|
|
123
|
+
name[0,1] = '' # remove `<'
|
|
124
|
+
if name.empty? then # <> or <<
|
|
125
|
+
if @src.close_tag then
|
|
126
|
+
return found_empty_stag
|
|
127
|
+
else
|
|
128
|
+
parse_error "parse error at `<'"
|
|
129
|
+
return on_chardata('<')
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
on_stag name
|
|
133
|
+
found_unclosed_stag name unless @src.close_tag
|
|
134
|
+
on_stag_end name
|
|
135
|
+
else
|
|
136
|
+
name = $`
|
|
137
|
+
s = $'
|
|
138
|
+
name[0,1] = '' # remove `<'
|
|
139
|
+
if name.empty? then # `< tag' or `<=`
|
|
140
|
+
parse_error "parse error at `<'"
|
|
141
|
+
if @src.close_tag then
|
|
142
|
+
s << '>'
|
|
143
|
+
end
|
|
144
|
+
return on_chardata('<' << s)
|
|
145
|
+
end
|
|
146
|
+
on_stag name
|
|
147
|
+
begin
|
|
148
|
+
continue = false
|
|
149
|
+
s.scan(
|
|
150
|
+
/([^\s=\/'"]+)(?:\s*=\s*(?:('[^']*'?|"[^"]*"?)|([^\s='"]+)))?|(\S)/n
|
|
151
|
+
) { |key,val,val2,error|
|
|
152
|
+
if key then
|
|
153
|
+
if val then # key="value"
|
|
154
|
+
on_attribute key
|
|
155
|
+
qmark = val.slice!(0,1)
|
|
156
|
+
if val[-1] == qmark[0] then
|
|
157
|
+
val.chop!
|
|
158
|
+
scan_attvalue val unless val.empty?
|
|
159
|
+
else
|
|
160
|
+
scan_attvalue val unless val.empty?
|
|
161
|
+
begin
|
|
162
|
+
s = @src.get
|
|
163
|
+
unless s then
|
|
164
|
+
parse_error "unterminated attribute `#{key}' meets EOF"
|
|
165
|
+
break
|
|
166
|
+
end
|
|
167
|
+
c = s[0]
|
|
168
|
+
val, s = s.split(qmark, 2)
|
|
169
|
+
scan_attvalue '>' unless c == ?< or c == ?>
|
|
170
|
+
scan_attvalue val if c
|
|
171
|
+
end until s
|
|
172
|
+
continue = s
|
|
173
|
+
end
|
|
174
|
+
on_attribute_end key
|
|
175
|
+
elsif val2 then # key=value
|
|
176
|
+
on_attribute key
|
|
177
|
+
on_attr_value val2
|
|
178
|
+
on_attribute_end key
|
|
179
|
+
else # value
|
|
180
|
+
on_attribute nil
|
|
181
|
+
on_attr_value key
|
|
182
|
+
on_attribute_end nil
|
|
183
|
+
end
|
|
184
|
+
else
|
|
185
|
+
parse_error "parse error at `#{error}'"
|
|
186
|
+
end
|
|
187
|
+
}
|
|
188
|
+
end while continue
|
|
189
|
+
found_unclosed_stag name unless @src.close_tag
|
|
190
|
+
on_stag_end name
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
# This method should be called only from on_stag_end.
|
|
196
|
+
def get_cdata_content
|
|
197
|
+
unless not s = @src.test or s[0] == ?< && s[1] == ?/ then
|
|
198
|
+
dst = @src.get
|
|
199
|
+
until not s = @src.test or s[0] == ?< && s[1] == ?/
|
|
200
|
+
dst << @src.get_plain
|
|
201
|
+
end
|
|
202
|
+
dst
|
|
203
|
+
else
|
|
204
|
+
''
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
public :get_cdata_content
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def scan_bang_tag(s)
|
|
211
|
+
if s == '<!' and @src.close_tag then # <!>
|
|
212
|
+
on_comment ''
|
|
213
|
+
else
|
|
214
|
+
parse_error "parse error at `<!'"
|
|
215
|
+
while s and not @src.close_tag # skip entire
|
|
216
|
+
s = @src.get_plain
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def scan_internal_dtd(s)
|
|
223
|
+
parse_error "DTD subset is found but it is not permitted in HTML"
|
|
224
|
+
skip_internal_dtd s
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def found_invalid_pubsys(pubsys)
|
|
229
|
+
s = pubsys.upcase
|
|
230
|
+
return s if s == 'PUBLIC' or s == 'SYSTEM'
|
|
231
|
+
super
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def scan_prolog(s)
|
|
236
|
+
doctype = 0
|
|
237
|
+
while s
|
|
238
|
+
if s[0] == ?< then
|
|
239
|
+
if (c = s[1]) == ?! then
|
|
240
|
+
if s[2] == ?- and s[3] == ?- then
|
|
241
|
+
scan_comment s
|
|
242
|
+
elsif /\A<!doctype(?=\s)/in =~ s then
|
|
243
|
+
doctype += 1
|
|
244
|
+
if doctype > 1 then
|
|
245
|
+
parse_error "another document type declaration is found"
|
|
246
|
+
end
|
|
247
|
+
scan_doctype $'
|
|
248
|
+
else
|
|
249
|
+
break
|
|
250
|
+
end
|
|
251
|
+
elsif c == ?? then
|
|
252
|
+
scan_pi s
|
|
253
|
+
else
|
|
254
|
+
break
|
|
255
|
+
end
|
|
256
|
+
elsif s.strip.empty? then
|
|
257
|
+
on_prolog_space s
|
|
258
|
+
else
|
|
259
|
+
break
|
|
260
|
+
end
|
|
261
|
+
s = @src.get
|
|
262
|
+
end
|
|
263
|
+
scan_content(s || @src.get)
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
if $0 == __FILE__ then
|
|
275
|
+
class TestVisitor
|
|
276
|
+
include XMLScan::Visitor
|
|
277
|
+
def parse_error(msg)
|
|
278
|
+
STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
$s = scan = XMLScan::HTMLScanner.new(TestVisitor.new)
|
|
283
|
+
src = ARGF
|
|
284
|
+
def src.path; filename; end
|
|
285
|
+
t1 = Time.times.utime
|
|
286
|
+
scan.parse src
|
|
287
|
+
t2 = Time.times.utime
|
|
288
|
+
STDERR.printf "%2.3f sec\n", t2 - t1
|
|
289
|
+
end
|