rgen 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. data/CHANGELOG +20 -1
  2. data/MIT-LICENSE +1 -1
  3. data/README +12 -9
  4. data/lib/instantiators/ea_instantiator.rb +36 -0
  5. data/lib/metamodels/uml13_metamodel.rb +559 -0
  6. data/lib/metamodels/uml13_metamodel_ext.rb +26 -0
  7. data/lib/mmgen/metamodel_generator.rb +5 -5
  8. data/lib/mmgen/mm_ext/ecore_ext.rb +95 -0
  9. data/lib/mmgen/mmgen.rb +6 -4
  10. data/lib/mmgen/templates/annotations.tpl +37 -0
  11. data/lib/mmgen/templates/metamodel_generator.tpl +171 -0
  12. data/lib/rgen/ecore/ecore.rb +190 -0
  13. data/lib/rgen/ecore/ecore_instantiator.rb +25 -0
  14. data/lib/rgen/ecore/ecore_transformer.rb +85 -0
  15. data/lib/rgen/environment.rb +9 -24
  16. data/lib/rgen/find_helper.rb +68 -0
  17. data/lib/rgen/{instantiator.rb → instantiator/abstract_instantiator.rb} +6 -2
  18. data/lib/rgen/instantiator/abstract_xml_instantiator.rb +59 -0
  19. data/lib/rgen/instantiator/default_xml_instantiator.rb +117 -0
  20. data/lib/rgen/instantiator/ecore_xml_instantiator.rb +144 -0
  21. data/lib/rgen/instantiator/nodebased_xml_instantiator.rb +157 -0
  22. data/lib/rgen/instantiator/xmi11_instantiator.rb +164 -0
  23. data/lib/rgen/metamodel_builder.rb +103 -9
  24. data/lib/rgen/metamodel_builder/build_helper.rb +26 -4
  25. data/lib/rgen/metamodel_builder/builder_extensions.rb +285 -88
  26. data/lib/rgen/metamodel_builder/builder_runtime.rb +7 -1
  27. data/lib/rgen/metamodel_builder/data_types.rb +67 -0
  28. data/lib/rgen/metamodel_builder/intermediate/annotation.rb +30 -0
  29. data/lib/rgen/metamodel_builder/metamodel_description.rb +232 -0
  30. data/lib/rgen/metamodel_builder/mm_multiple.rb +23 -0
  31. data/lib/rgen/metamodel_builder/module_extension.rb +33 -0
  32. data/lib/rgen/model_comparator.rb +56 -0
  33. data/lib/rgen/model_dumper.rb +5 -5
  34. data/lib/rgen/name_helper.rb +17 -1
  35. data/lib/rgen/template_language.rb +148 -28
  36. data/lib/rgen/template_language/directory_template_container.rb +56 -38
  37. data/lib/rgen/template_language/output_handler.rb +93 -77
  38. data/lib/rgen/template_language/template_container.rb +186 -143
  39. data/lib/rgen/transformer.rb +19 -14
  40. data/lib/transformers/uml13_to_ecore.rb +75 -0
  41. data/redist/xmlscan/ChangeLog +1301 -0
  42. data/redist/xmlscan/README +34 -0
  43. data/redist/xmlscan/THANKS +11 -0
  44. data/redist/xmlscan/doc/changes.html +74 -0
  45. data/redist/xmlscan/doc/changes.rd +80 -0
  46. data/redist/xmlscan/doc/en/conformance.html +136 -0
  47. data/redist/xmlscan/doc/en/conformance.rd +152 -0
  48. data/redist/xmlscan/doc/en/manual.html +356 -0
  49. data/redist/xmlscan/doc/en/manual.rd +402 -0
  50. data/redist/xmlscan/doc/ja/conformance.ja.html +118 -0
  51. data/redist/xmlscan/doc/ja/conformance.ja.rd +134 -0
  52. data/redist/xmlscan/doc/ja/manual.ja.html +325 -0
  53. data/redist/xmlscan/doc/ja/manual.ja.rd +370 -0
  54. data/redist/xmlscan/doc/src/Makefile +41 -0
  55. data/redist/xmlscan/doc/src/conformance.rd.src +256 -0
  56. data/redist/xmlscan/doc/src/langsplit.rb +110 -0
  57. data/redist/xmlscan/doc/src/manual.rd.src +614 -0
  58. data/redist/xmlscan/install.rb +41 -0
  59. data/redist/xmlscan/lib/xmlscan/encoding.rb +311 -0
  60. data/redist/xmlscan/lib/xmlscan/htmlscan.rb +289 -0
  61. data/redist/xmlscan/lib/xmlscan/namespace.rb +352 -0
  62. data/redist/xmlscan/lib/xmlscan/parser.rb +299 -0
  63. data/redist/xmlscan/lib/xmlscan/scanner.rb +1109 -0
  64. data/redist/xmlscan/lib/xmlscan/version.rb +22 -0
  65. data/redist/xmlscan/lib/xmlscan/visitor.rb +158 -0
  66. data/redist/xmlscan/lib/xmlscan/xmlchar.rb +441 -0
  67. data/redist/xmlscan/memo/CONFORMANCE +1249 -0
  68. data/redist/xmlscan/memo/PRODUCTIONS +195 -0
  69. data/redist/xmlscan/memo/contentspec.ry +335 -0
  70. data/redist/xmlscan/samples/chibixml.rb +105 -0
  71. data/redist/xmlscan/samples/getxmlchar.rb +122 -0
  72. data/redist/xmlscan/samples/rexml.rb +159 -0
  73. data/redist/xmlscan/samples/xmlbench.rb +88 -0
  74. data/redist/xmlscan/samples/xmlbench/parser/chibixml.rb +22 -0
  75. data/redist/xmlscan/samples/xmlbench/parser/nqxml.rb +29 -0
  76. data/redist/xmlscan/samples/xmlbench/parser/rexml.rb +62 -0
  77. data/redist/xmlscan/samples/xmlbench/parser/xmlparser.rb +22 -0
  78. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-0.0.10.rb +62 -0
  79. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-chibixml.rb +22 -0
  80. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-rexml.rb +22 -0
  81. data/redist/xmlscan/samples/xmlbench/parser/xmlscan.rb +99 -0
  82. data/redist/xmlscan/samples/xmlbench/xmlbench-lib.rb +116 -0
  83. data/redist/xmlscan/samples/xmlconftest.rb +200 -0
  84. data/redist/xmlscan/test.rb +7 -0
  85. data/redist/xmlscan/tests/deftestcase.rb +73 -0
  86. data/redist/xmlscan/tests/runtest.rb +47 -0
  87. data/redist/xmlscan/tests/testall.rb +14 -0
  88. data/redist/xmlscan/tests/testencoding.rb +438 -0
  89. data/redist/xmlscan/tests/testhtmlscan.rb +752 -0
  90. data/redist/xmlscan/tests/testnamespace.rb +457 -0
  91. data/redist/xmlscan/tests/testparser.rb +591 -0
  92. data/redist/xmlscan/tests/testscanner.rb +1749 -0
  93. data/redist/xmlscan/tests/testxmlchar.rb +143 -0
  94. data/redist/xmlscan/tests/visitor.rb +34 -0
  95. data/test/array_extensions_test.rb +2 -2
  96. data/test/ea_instantiator_test.rb +41 -0
  97. data/test/ecore_self_test.rb +53 -0
  98. data/test/environment_test.rb +11 -6
  99. data/test/metamodel_builder_test.rb +404 -245
  100. data/test/metamodel_roundtrip_test.rb +52 -0
  101. data/test/metamodel_roundtrip_test/TestModel.rb +65 -0
  102. data/test/metamodel_roundtrip_test/TestModel_Regenerated.rb +64 -0
  103. data/test/metamodel_roundtrip_test/houseMetamodel.ecore +32 -0
  104. data/test/metamodel_roundtrip_test/houseMetamodel_from_ecore.rb +39 -0
  105. data/test/rgen_test.rb +3 -3
  106. data/test/template_language_test.rb +65 -39
  107. data/test/template_language_test/expected_result.txt +24 -3
  108. data/test/template_language_test/templates/code/array.tpl +11 -0
  109. data/test/template_language_test/templates/content/author.tpl +7 -0
  110. data/test/template_language_test/templates/content/chapter.tpl +1 -1
  111. data/test/template_language_test/templates/root.tpl +17 -8
  112. data/test/template_language_test/testout.txt +24 -3
  113. data/test/testmodel/class_model_checker.rb +119 -0
  114. data/test/{xmi_instantiator_test/testmodel.eap → testmodel/ea_testmodel.eap} +0 -0
  115. data/test/{xmi_instantiator_test/testmodel.xml → testmodel/ea_testmodel.xml} +81 -14
  116. data/test/testmodel/ea_testmodel_partial.xml +317 -0
  117. data/test/testmodel/ecore_model_checker.rb +101 -0
  118. data/test/testmodel/manual_testmodel.xml +22 -0
  119. data/test/testmodel/object_model_checker.rb +67 -0
  120. data/test/transformer_test.rb +18 -10
  121. data/test/xml_instantiator_test.rb +81 -8
  122. data/test/xml_instantiator_test/simple_ecore_model_checker.rb +94 -0
  123. data/test/xml_instantiator_test/simple_xmi_ecore_instantiator.rb +53 -0
  124. data/test/xml_instantiator_test/simple_xmi_metamodel.rb +49 -0
  125. data/test/xml_instantiator_test/simple_xmi_to_ecore.rb +75 -0
  126. metadata +126 -28
  127. data/lib/ea/xmi_class_instantiator.rb +0 -46
  128. data/lib/ea/xmi_helper.rb +0 -26
  129. data/lib/ea/xmi_metamodel.rb +0 -34
  130. data/lib/ea/xmi_object_instantiator.rb +0 -46
  131. data/lib/ea/xmi_to_classmodel.rb +0 -78
  132. data/lib/ea/xmi_to_objectmodel.rb +0 -92
  133. data/lib/mmgen/mm_ext/uml_classmodel_ext.rb +0 -71
  134. data/lib/mmgen/templates/uml_classmodel.tpl +0 -63
  135. data/lib/rgen/xml_instantiator.rb +0 -132
  136. data/lib/uml/objectmodel_instantiator.rb +0 -53
  137. data/lib/uml/uml_classmodel.rb +0 -92
  138. data/lib/uml/uml_objectmodel.rb +0 -65
  139. data/test/metamodel_generator_test.rb +0 -44
  140. data/test/metamodel_generator_test/TestModel.rb +0 -40
  141. data/test/metamodel_generator_test/expected_result.txt +0 -40
  142. data/test/xmi_class_instantiator_test.rb +0 -24
  143. data/test/xmi_instantiator_test/class_model_checker.rb +0 -97
  144. data/test/xmi_object_instantiator_test.rb +0 -65
  145. data/test/xml_instantiator_test/testmodel.xml +0 -7
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # install.rb
4
+ #
5
+ # $Id: install.rb,v 1.2 2002/12/26 21:09:38 katsu Exp $
6
+
7
+ require 'rbconfig'
8
+ require 'ftools'
9
+ require 'find'
10
+ require 'getoptlong'
11
+
12
+ DEFAULT_DESTDIR = Config::CONFIG['sitelibdir'] || Config::CONFIG['sitedir']
13
+ SRCDIR = File.dirname(__FILE__)
14
+
15
+
16
+ def install_rb(from, to)
17
+ from = SRCDIR + '/' + from
18
+ Find.find(from) { |src|
19
+ next unless File.file? src
20
+ next unless /\.rb\z/ =~ src
21
+ dst = src.sub(/\A#{Regexp.escape(from)}/, to)
22
+ File.makedirs File.dirname(dst), true
23
+ File.install src, dst, 0644, true
24
+ }
25
+ end
26
+
27
+
28
+ destdir = DEFAULT_DESTDIR
29
+ begin
30
+ GetoptLong.new([ "-d", "--destdir", GetoptLong::REQUIRED_ARGUMENT ]
31
+ ).each_option { |opt, arg|
32
+ case opt
33
+ when '-d' then
34
+ destdir = arg
35
+ end
36
+ }
37
+ rescue
38
+ exit 2
39
+ end
40
+
41
+ install_rb "lib", destdir
@@ -0,0 +1,311 @@
1
+ #
2
+ # xmlscan/encoding.rb
3
+ #
4
+ # Copyright (C) Ueno Katsuhiro 2002
5
+ #
6
+ # $Id: encoding.rb,v 1.3 2003/01/12 04:10:33 katsu Exp $
7
+ #
8
+
9
+ require 'xmlscan/visitor'
10
+
11
+
12
+ module XMLScan
13
+
14
+ class EncodingError < Error ; end
15
+
16
+
17
+ class Converter
18
+
19
+ def initialize
20
+ end
21
+
22
+ def convert(s)
23
+ s
24
+ end
25
+
26
+ def finish
27
+ ''
28
+ end
29
+
30
+ end
31
+
32
+
33
+
34
+ class SimpleConverter < Converter
35
+
36
+ def SimpleConverter.new_class(block)
37
+ Class.new(self).module_eval {
38
+ define_method(:convert, block)
39
+ self
40
+ }
41
+ end
42
+
43
+ # checking for Module#define_method works
44
+ begin
45
+ Class.new.module_eval{define_method(:a){};self}.new.a
46
+ rescue Exception
47
+ class << SimpleConverter
48
+ remove_method :new_class
49
+ end
50
+ def SimpleConverter.new_class(block)
51
+ Class.new(self).module_eval {
52
+ const_set :ConvProc, block
53
+ module_eval "def convert(s) ; ConvProc.call s ; end"
54
+ self
55
+ }
56
+ end
57
+ end
58
+
59
+ end
60
+
61
+
62
+
63
+ class EncodingClass
64
+
65
+ KCODE_None = //n.kcode
66
+
67
+
68
+ class ConverterProperty
69
+
70
+ def inspect
71
+ "#<Conversion #{@from.name}:#{@to.name} #{@cost}>"
72
+ end
73
+
74
+ def initialize(from, to, cost, klass = nil)
75
+ @from, @to, @cost, @klass = from, to, cost, klass
76
+ end
77
+
78
+ def new_converter
79
+ @klass and @klass.new
80
+ end
81
+
82
+ attr_reader :from, :to, :cost
83
+
84
+ end
85
+
86
+
87
+ class EncodingProperty
88
+
89
+ def inspect
90
+ s = "#<Encoding #{@name}/#{@kcode}>"
91
+ end
92
+
93
+ def initialize(name)
94
+ @name = name
95
+ conv = ConverterProperty.new(self, self, 0)
96
+ @converter = { self => conv }
97
+ @convertable_from = { self => true }
98
+ @kcode_map = {}
99
+ end
100
+
101
+ attr_reader :name, :kcode_map
102
+
103
+
104
+ def convertable_from(encoding)
105
+ @convertable_from[encoding] = true
106
+ end
107
+ protected :convertable_from
108
+
109
+ def changed
110
+ @convertable_from.each_key { |i| i.update_kcode_map }
111
+ end
112
+ private :changed
113
+
114
+
115
+ def kcode?
116
+ defined? @kcode
117
+ end
118
+
119
+ def kcode
120
+ if defined? @kcode then
121
+ @kcode
122
+ else
123
+ KCODE_None
124
+ end
125
+ end
126
+
127
+ def kcode=(kcode)
128
+ if defined? @kcode then
129
+ raise EncodingError, "KCODE conflict" unless @kcode == kcode
130
+ else
131
+ @kcode = kcode
132
+ changed
133
+ end
134
+ kcode
135
+ end
136
+
137
+
138
+ def converter(to)
139
+ @converter[to]
140
+ end
141
+
142
+ def add_converter(to, cost, conv_class)
143
+ if equal? to then
144
+ raise EncodingError,"attempt to add a converter to the same encoding"
145
+ end
146
+ oldconv = @converter[to]
147
+ if not oldconv or cost <= oldconv.cost then
148
+ conv = ConverterProperty.new(self, to, cost, conv_class)
149
+ @converter[to] = conv
150
+ to.convertable_from self
151
+ changed
152
+ end
153
+ nil
154
+ end
155
+
156
+
157
+ def update_kcode_map
158
+ @kcode_map.clear
159
+ @converter.each_value { |conv|
160
+ k = conv.to.kcode
161
+ if conv.to.kcode? and k then
162
+ oldconv = @kcode_map[k]
163
+ @kcode_map[k] = conv if not oldconv or conv.cost <= oldconv.cost
164
+ end
165
+ }
166
+ end
167
+ protected :update_kcode_map
168
+
169
+ end
170
+
171
+
172
+
173
+ def initialize
174
+ @encoding = {}
175
+ end
176
+
177
+ class << self
178
+ private :new
179
+ attr_reader :instance
180
+ end
181
+ @instance = new
182
+
183
+
184
+ private
185
+
186
+ def get_encoding(name)
187
+ encoding = @encoding[name.downcase]
188
+ raise EncodingError, "undeclared encoding `#{name}'" unless encoding
189
+ encoding
190
+ end
191
+
192
+ def touch_encoding(name)
193
+ name = name.downcase
194
+ encoding = @encoding[name]
195
+ encoding = @encoding[name] = EncodingProperty.new(name) unless encoding
196
+ encoding
197
+ end
198
+
199
+
200
+ public
201
+
202
+ def alias(newname, oldname)
203
+ newname = newname.downcase
204
+ if @encoding.key? newname then
205
+ raise EncodingError, "encoding `#{newname}' is already declared"
206
+ end
207
+ @encoding[newname] = get_encoding(oldname)
208
+ nil
209
+ end
210
+
211
+
212
+ def kcode(name)
213
+ encoding = @encoding[name.downcase]
214
+ if encoding then
215
+ encoding.kcode
216
+ else
217
+ KCODE_None
218
+ end
219
+ end
220
+
221
+
222
+ def set_kcode(name, kcode)
223
+ if kcode then
224
+ kcode = Regexp.new('', nil, kcode).kcode
225
+ else
226
+ kcode = nil
227
+ end
228
+ touch_encoding(name).kcode = kcode
229
+ end
230
+
231
+
232
+ def add_converter(from, to, cost, conv_class = nil, &block)
233
+ if block and conv_class then
234
+ raise ArgumentError, "multiple converters given"
235
+ elsif not block and not conv_class then
236
+ raise ArgumentError, "no converter given"
237
+ else
238
+ block = conv_class if Proc === conv_class
239
+ conv_class = SimpleConverter.new_class(block) if block
240
+ end
241
+ from = touch_encoding(from)
242
+ to = touch_encoding(to)
243
+ from.add_converter to, cost, conv_class
244
+ end
245
+
246
+
247
+ def converter(from, to)
248
+ fromenc = get_encoding(from)
249
+ toenc = get_encoding(to)
250
+ conv = fromenc.converter(toenc)
251
+ raise EncodingError, "can't convert `#{from}' to `#{to}'" unless conv
252
+ conv.new_converter
253
+ end
254
+
255
+
256
+ def converter3(from, to = nil)
257
+ to = from unless to
258
+ fromenc = get_encoding(from)
259
+ toenc = get_encoding(to)
260
+ kcode_map = fromenc.kcode_map
261
+ if kcode_map.empty? then
262
+ if fromenc.kcode and fromenc.equal? toenc then
263
+ return [ nil, fromenc.kcode, nil ]
264
+ else
265
+ raise EncodingError, "can't convert `#{from}' to any KCODE"
266
+ end
267
+ end
268
+ mincost, minkcode, minconv = nil
269
+ kcode_map.each { |kcode,conv|
270
+ conv2 = conv.to.converter(toenc)
271
+ if conv2 then
272
+ cost = conv.cost + conv2.cost
273
+ if not mincost or cost < mincost then
274
+ mincost, minkcode, minconv = cost, kcode, conv
275
+ end
276
+ end
277
+ }
278
+ unless mincost then
279
+ raise EncodingError, "can't convert `#{from}' to `#{to}' via any KCODE"
280
+ end
281
+ conv = minconv.new_converter
282
+ conv2 = minconv.to.converter(toenc)
283
+ conv2 = conv2 && conv2.new_converter
284
+ [ conv, minkcode, conv2 ]
285
+ end
286
+
287
+ end
288
+
289
+
290
+ Encoding = EncodingClass.instance
291
+
292
+ Encoding.set_kcode 'utf-8', 'U'
293
+ Encoding.set_kcode 'utf-16', nil
294
+ Encoding.alias 'iso-10646-ucs-2', 'utf-16'
295
+ Encoding.set_kcode 'iso-10646-ucs-4', nil
296
+ Encoding.set_kcode 'iso-8859-1', 'N'
297
+ Encoding.set_kcode 'iso-8859-2', 'N'
298
+ Encoding.set_kcode 'iso-8859-3', 'N'
299
+ Encoding.set_kcode 'iso-8859-4', 'N'
300
+ Encoding.set_kcode 'iso-8859-5', 'N'
301
+ Encoding.set_kcode 'iso-8859-6', 'N'
302
+ Encoding.set_kcode 'iso-8859-7', 'N'
303
+ Encoding.set_kcode 'iso-8859-8', 'N'
304
+ Encoding.set_kcode 'iso-8859-9', 'N'
305
+ Encoding.set_kcode 'iso-2022-jp', nil
306
+ Encoding.set_kcode 'shift_jis', 'S'
307
+ Encoding.set_kcode 'Windows-31J', 'S'
308
+ Encoding.set_kcode 'euc-jp', 'E'
309
+ Encoding.set_kcode 'euc-kr', 'E'
310
+
311
+ end
@@ -0,0 +1,289 @@
1
+ #
2
+ # xmlscan/htmlscan.rb
3
+ #
4
+ # Copyright (C) Ueno Katsuhiro 2002
5
+ #
6
+ # $Id: htmlscan.rb,v 1.18 2003/05/01 15:36:50 katsu Exp $
7
+ #
8
+
9
+ require 'xmlscan/scanner'
10
+
11
+
12
+ module XMLScan
13
+
14
+ class HTMLScanner < XMLScanner
15
+
16
+ private
17
+
18
+ def wellformed_error(msg)
19
+ # All wellformed error raised by XMLScanner are ignored.
20
+ # XMLScanner only raises wellformed error in stan_stag, which is a
21
+ # method completely overrided by HTMLScanner, so this method is
22
+ # never called in fact.
23
+ end
24
+
25
+ def on_xmldecl
26
+ raise "[BUG] this method must be never called"
27
+ end
28
+
29
+ def on_xmldecl_version(str)
30
+ raise "[BUG] this method must be never called"
31
+ end
32
+
33
+ def on_xmldecl_encoding(str)
34
+ raise "[BUG] this method must be never called"
35
+ end
36
+
37
+ def on_xmldecl_standalone(str)
38
+ raise "[BUG] this method must be never called"
39
+ end
40
+
41
+ def on_xmldecl_other(name, value)
42
+ raise "[BUG] this method must be never called"
43
+ end
44
+
45
+ def on_xmldecl_end
46
+ raise "[BUG] this method must be never called"
47
+ end
48
+
49
+ def on_stag_end_empty(name)
50
+ raise "[BUG] this method must be never called"
51
+ end
52
+
53
+
54
+ private
55
+
56
+ def scan_comment(s)
57
+ s[0,4] = '' # remove `<!--'
58
+ comm = ''
59
+ until /--/n =~ s
60
+ comm << s
61
+ s = @src.get_plain
62
+ unless s then
63
+ parse_error "unterminated comment meets EOF"
64
+ return on_comment(comm)
65
+ end
66
+ end
67
+ comm << $`
68
+ s = $'
69
+ until s.empty? || s.strip.empty? and @src.close_tag # --> or -- >
70
+ comm << '--'
71
+ if /\A\s*--/n =~ s then # <!--hoge-- --
72
+ comm << $&
73
+ s = $'
74
+ if s.empty? and @src.close_tag then # <!--hoge-- -->
75
+ parse_error "`-->' is found but comment must not end here"
76
+ comm.chop!.chop!
77
+ break
78
+ end
79
+ else # <!--hoge-- fuga
80
+ parse_error "only whitespace can appear between two comments"
81
+ end
82
+ if /\A-\s*\z/n =~ s and @src.close_tag then # <!--hoge--->
83
+ parse_error "`-->' is found but comment must not end here"
84
+ comm.chop!
85
+ break
86
+ end
87
+ until /--/n =~ s # copy & paste for performance
88
+ comm << s
89
+ s = @src.get_plain
90
+ unless s then
91
+ parse_error "unterminated comment meets EOF"
92
+ return on_comment(comm)
93
+ end
94
+ end
95
+ comm << $`
96
+ s = $'
97
+ end
98
+ on_comment comm
99
+ end
100
+
101
+
102
+ alias scan_xml_pi scan_pi # PIO "<?" PIC "?>" -- <? PI ?> --
103
+
104
+
105
+ def scan_pi(s) # <?PI > this is default in SGML.
106
+ s[0,2] = '' # remove `<?'
107
+ pi = s
108
+ until @src.close_tag
109
+ s = @src.get_plain
110
+ unless s then
111
+ parse_error "unterminated PI meets EOF"
112
+ break
113
+ end
114
+ pi << s
115
+ end
116
+ on_pi '', pi
117
+ end
118
+
119
+
120
+ def scan_stag(s)
121
+ unless /(?=[\/\s='"])/n =~ s then
122
+ name = s
123
+ name[0,1] = '' # remove `<'
124
+ if name.empty? then # <> or <<
125
+ if @src.close_tag then
126
+ return found_empty_stag
127
+ else
128
+ parse_error "parse error at `<'"
129
+ return on_chardata('<')
130
+ end
131
+ end
132
+ on_stag name
133
+ found_unclosed_stag name unless @src.close_tag
134
+ on_stag_end name
135
+ else
136
+ name = $`
137
+ s = $'
138
+ name[0,1] = '' # remove `<'
139
+ if name.empty? then # `< tag' or `<=`
140
+ parse_error "parse error at `<'"
141
+ if @src.close_tag then
142
+ s << '>'
143
+ end
144
+ return on_chardata('<' << s)
145
+ end
146
+ on_stag name
147
+ begin
148
+ continue = false
149
+ s.scan(
150
+ /([^\s=\/'"]+)(?:\s*=\s*(?:('[^']*'?|"[^"]*"?)|([^\s='"]+)))?|(\S)/n
151
+ ) { |key,val,val2,error|
152
+ if key then
153
+ if val then # key="value"
154
+ on_attribute key
155
+ qmark = val.slice!(0,1)
156
+ if val[-1] == qmark[0] then
157
+ val.chop!
158
+ scan_attvalue val unless val.empty?
159
+ else
160
+ scan_attvalue val unless val.empty?
161
+ begin
162
+ s = @src.get
163
+ unless s then
164
+ parse_error "unterminated attribute `#{key}' meets EOF"
165
+ break
166
+ end
167
+ c = s[0]
168
+ val, s = s.split(qmark, 2)
169
+ scan_attvalue '>' unless c == ?< or c == ?>
170
+ scan_attvalue val if c
171
+ end until s
172
+ continue = s
173
+ end
174
+ on_attribute_end key
175
+ elsif val2 then # key=value
176
+ on_attribute key
177
+ on_attr_value val2
178
+ on_attribute_end key
179
+ else # value
180
+ on_attribute nil
181
+ on_attr_value key
182
+ on_attribute_end nil
183
+ end
184
+ else
185
+ parse_error "parse error at `#{error}'"
186
+ end
187
+ }
188
+ end while continue
189
+ found_unclosed_stag name unless @src.close_tag
190
+ on_stag_end name
191
+ end
192
+ end
193
+
194
+
195
+ # This method should be called only from on_stag_end.
196
+ def get_cdata_content
197
+ unless not s = @src.test or s[0] == ?< && s[1] == ?/ then
198
+ dst = @src.get
199
+ until not s = @src.test or s[0] == ?< && s[1] == ?/
200
+ dst << @src.get_plain
201
+ end
202
+ dst
203
+ else
204
+ ''
205
+ end
206
+ end
207
+ public :get_cdata_content
208
+
209
+
210
+ def scan_bang_tag(s)
211
+ if s == '<!' and @src.close_tag then # <!>
212
+ on_comment ''
213
+ else
214
+ parse_error "parse error at `<!'"
215
+ while s and not @src.close_tag # skip entire
216
+ s = @src.get_plain
217
+ end
218
+ end
219
+ end
220
+
221
+
222
+ def scan_internal_dtd(s)
223
+ parse_error "DTD subset is found but it is not permitted in HTML"
224
+ skip_internal_dtd s
225
+ end
226
+
227
+
228
+ def found_invalid_pubsys(pubsys)
229
+ s = pubsys.upcase
230
+ return s if s == 'PUBLIC' or s == 'SYSTEM'
231
+ super
232
+ end
233
+
234
+
235
+ def scan_prolog(s)
236
+ doctype = 0
237
+ while s
238
+ if s[0] == ?< then
239
+ if (c = s[1]) == ?! then
240
+ if s[2] == ?- and s[3] == ?- then
241
+ scan_comment s
242
+ elsif /\A<!doctype(?=\s)/in =~ s then
243
+ doctype += 1
244
+ if doctype > 1 then
245
+ parse_error "another document type declaration is found"
246
+ end
247
+ scan_doctype $'
248
+ else
249
+ break
250
+ end
251
+ elsif c == ?? then
252
+ scan_pi s
253
+ else
254
+ break
255
+ end
256
+ elsif s.strip.empty? then
257
+ on_prolog_space s
258
+ else
259
+ break
260
+ end
261
+ s = @src.get
262
+ end
263
+ scan_content(s || @src.get)
264
+ end
265
+
266
+ end
267
+
268
+ end
269
+
270
+
271
+
272
+
273
+
274
+ if $0 == __FILE__ then
275
+ class TestVisitor
276
+ include XMLScan::Visitor
277
+ def parse_error(msg)
278
+ STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
279
+ end
280
+ end
281
+
282
+ $s = scan = XMLScan::HTMLScanner.new(TestVisitor.new)
283
+ src = ARGF
284
+ def src.path; filename; end
285
+ t1 = Time.times.utime
286
+ scan.parse src
287
+ t2 = Time.times.utime
288
+ STDERR.printf "%2.3f sec\n", t2 - t1
289
+ end