rgen 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. data/CHANGELOG +20 -1
  2. data/MIT-LICENSE +1 -1
  3. data/README +12 -9
  4. data/lib/instantiators/ea_instantiator.rb +36 -0
  5. data/lib/metamodels/uml13_metamodel.rb +559 -0
  6. data/lib/metamodels/uml13_metamodel_ext.rb +26 -0
  7. data/lib/mmgen/metamodel_generator.rb +5 -5
  8. data/lib/mmgen/mm_ext/ecore_ext.rb +95 -0
  9. data/lib/mmgen/mmgen.rb +6 -4
  10. data/lib/mmgen/templates/annotations.tpl +37 -0
  11. data/lib/mmgen/templates/metamodel_generator.tpl +171 -0
  12. data/lib/rgen/ecore/ecore.rb +190 -0
  13. data/lib/rgen/ecore/ecore_instantiator.rb +25 -0
  14. data/lib/rgen/ecore/ecore_transformer.rb +85 -0
  15. data/lib/rgen/environment.rb +9 -24
  16. data/lib/rgen/find_helper.rb +68 -0
  17. data/lib/rgen/{instantiator.rb → instantiator/abstract_instantiator.rb} +6 -2
  18. data/lib/rgen/instantiator/abstract_xml_instantiator.rb +59 -0
  19. data/lib/rgen/instantiator/default_xml_instantiator.rb +117 -0
  20. data/lib/rgen/instantiator/ecore_xml_instantiator.rb +144 -0
  21. data/lib/rgen/instantiator/nodebased_xml_instantiator.rb +157 -0
  22. data/lib/rgen/instantiator/xmi11_instantiator.rb +164 -0
  23. data/lib/rgen/metamodel_builder.rb +103 -9
  24. data/lib/rgen/metamodel_builder/build_helper.rb +26 -4
  25. data/lib/rgen/metamodel_builder/builder_extensions.rb +285 -88
  26. data/lib/rgen/metamodel_builder/builder_runtime.rb +7 -1
  27. data/lib/rgen/metamodel_builder/data_types.rb +67 -0
  28. data/lib/rgen/metamodel_builder/intermediate/annotation.rb +30 -0
  29. data/lib/rgen/metamodel_builder/metamodel_description.rb +232 -0
  30. data/lib/rgen/metamodel_builder/mm_multiple.rb +23 -0
  31. data/lib/rgen/metamodel_builder/module_extension.rb +33 -0
  32. data/lib/rgen/model_comparator.rb +56 -0
  33. data/lib/rgen/model_dumper.rb +5 -5
  34. data/lib/rgen/name_helper.rb +17 -1
  35. data/lib/rgen/template_language.rb +148 -28
  36. data/lib/rgen/template_language/directory_template_container.rb +56 -38
  37. data/lib/rgen/template_language/output_handler.rb +93 -77
  38. data/lib/rgen/template_language/template_container.rb +186 -143
  39. data/lib/rgen/transformer.rb +19 -14
  40. data/lib/transformers/uml13_to_ecore.rb +75 -0
  41. data/redist/xmlscan/ChangeLog +1301 -0
  42. data/redist/xmlscan/README +34 -0
  43. data/redist/xmlscan/THANKS +11 -0
  44. data/redist/xmlscan/doc/changes.html +74 -0
  45. data/redist/xmlscan/doc/changes.rd +80 -0
  46. data/redist/xmlscan/doc/en/conformance.html +136 -0
  47. data/redist/xmlscan/doc/en/conformance.rd +152 -0
  48. data/redist/xmlscan/doc/en/manual.html +356 -0
  49. data/redist/xmlscan/doc/en/manual.rd +402 -0
  50. data/redist/xmlscan/doc/ja/conformance.ja.html +118 -0
  51. data/redist/xmlscan/doc/ja/conformance.ja.rd +134 -0
  52. data/redist/xmlscan/doc/ja/manual.ja.html +325 -0
  53. data/redist/xmlscan/doc/ja/manual.ja.rd +370 -0
  54. data/redist/xmlscan/doc/src/Makefile +41 -0
  55. data/redist/xmlscan/doc/src/conformance.rd.src +256 -0
  56. data/redist/xmlscan/doc/src/langsplit.rb +110 -0
  57. data/redist/xmlscan/doc/src/manual.rd.src +614 -0
  58. data/redist/xmlscan/install.rb +41 -0
  59. data/redist/xmlscan/lib/xmlscan/encoding.rb +311 -0
  60. data/redist/xmlscan/lib/xmlscan/htmlscan.rb +289 -0
  61. data/redist/xmlscan/lib/xmlscan/namespace.rb +352 -0
  62. data/redist/xmlscan/lib/xmlscan/parser.rb +299 -0
  63. data/redist/xmlscan/lib/xmlscan/scanner.rb +1109 -0
  64. data/redist/xmlscan/lib/xmlscan/version.rb +22 -0
  65. data/redist/xmlscan/lib/xmlscan/visitor.rb +158 -0
  66. data/redist/xmlscan/lib/xmlscan/xmlchar.rb +441 -0
  67. data/redist/xmlscan/memo/CONFORMANCE +1249 -0
  68. data/redist/xmlscan/memo/PRODUCTIONS +195 -0
  69. data/redist/xmlscan/memo/contentspec.ry +335 -0
  70. data/redist/xmlscan/samples/chibixml.rb +105 -0
  71. data/redist/xmlscan/samples/getxmlchar.rb +122 -0
  72. data/redist/xmlscan/samples/rexml.rb +159 -0
  73. data/redist/xmlscan/samples/xmlbench.rb +88 -0
  74. data/redist/xmlscan/samples/xmlbench/parser/chibixml.rb +22 -0
  75. data/redist/xmlscan/samples/xmlbench/parser/nqxml.rb +29 -0
  76. data/redist/xmlscan/samples/xmlbench/parser/rexml.rb +62 -0
  77. data/redist/xmlscan/samples/xmlbench/parser/xmlparser.rb +22 -0
  78. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-0.0.10.rb +62 -0
  79. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-chibixml.rb +22 -0
  80. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-rexml.rb +22 -0
  81. data/redist/xmlscan/samples/xmlbench/parser/xmlscan.rb +99 -0
  82. data/redist/xmlscan/samples/xmlbench/xmlbench-lib.rb +116 -0
  83. data/redist/xmlscan/samples/xmlconftest.rb +200 -0
  84. data/redist/xmlscan/test.rb +7 -0
  85. data/redist/xmlscan/tests/deftestcase.rb +73 -0
  86. data/redist/xmlscan/tests/runtest.rb +47 -0
  87. data/redist/xmlscan/tests/testall.rb +14 -0
  88. data/redist/xmlscan/tests/testencoding.rb +438 -0
  89. data/redist/xmlscan/tests/testhtmlscan.rb +752 -0
  90. data/redist/xmlscan/tests/testnamespace.rb +457 -0
  91. data/redist/xmlscan/tests/testparser.rb +591 -0
  92. data/redist/xmlscan/tests/testscanner.rb +1749 -0
  93. data/redist/xmlscan/tests/testxmlchar.rb +143 -0
  94. data/redist/xmlscan/tests/visitor.rb +34 -0
  95. data/test/array_extensions_test.rb +2 -2
  96. data/test/ea_instantiator_test.rb +41 -0
  97. data/test/ecore_self_test.rb +53 -0
  98. data/test/environment_test.rb +11 -6
  99. data/test/metamodel_builder_test.rb +404 -245
  100. data/test/metamodel_roundtrip_test.rb +52 -0
  101. data/test/metamodel_roundtrip_test/TestModel.rb +65 -0
  102. data/test/metamodel_roundtrip_test/TestModel_Regenerated.rb +64 -0
  103. data/test/metamodel_roundtrip_test/houseMetamodel.ecore +32 -0
  104. data/test/metamodel_roundtrip_test/houseMetamodel_from_ecore.rb +39 -0
  105. data/test/rgen_test.rb +3 -3
  106. data/test/template_language_test.rb +65 -39
  107. data/test/template_language_test/expected_result.txt +24 -3
  108. data/test/template_language_test/templates/code/array.tpl +11 -0
  109. data/test/template_language_test/templates/content/author.tpl +7 -0
  110. data/test/template_language_test/templates/content/chapter.tpl +1 -1
  111. data/test/template_language_test/templates/root.tpl +17 -8
  112. data/test/template_language_test/testout.txt +24 -3
  113. data/test/testmodel/class_model_checker.rb +119 -0
  114. data/test/{xmi_instantiator_test/testmodel.eap → testmodel/ea_testmodel.eap} +0 -0
  115. data/test/{xmi_instantiator_test/testmodel.xml → testmodel/ea_testmodel.xml} +81 -14
  116. data/test/testmodel/ea_testmodel_partial.xml +317 -0
  117. data/test/testmodel/ecore_model_checker.rb +101 -0
  118. data/test/testmodel/manual_testmodel.xml +22 -0
  119. data/test/testmodel/object_model_checker.rb +67 -0
  120. data/test/transformer_test.rb +18 -10
  121. data/test/xml_instantiator_test.rb +81 -8
  122. data/test/xml_instantiator_test/simple_ecore_model_checker.rb +94 -0
  123. data/test/xml_instantiator_test/simple_xmi_ecore_instantiator.rb +53 -0
  124. data/test/xml_instantiator_test/simple_xmi_metamodel.rb +49 -0
  125. data/test/xml_instantiator_test/simple_xmi_to_ecore.rb +75 -0
  126. metadata +126 -28
  127. data/lib/ea/xmi_class_instantiator.rb +0 -46
  128. data/lib/ea/xmi_helper.rb +0 -26
  129. data/lib/ea/xmi_metamodel.rb +0 -34
  130. data/lib/ea/xmi_object_instantiator.rb +0 -46
  131. data/lib/ea/xmi_to_classmodel.rb +0 -78
  132. data/lib/ea/xmi_to_objectmodel.rb +0 -92
  133. data/lib/mmgen/mm_ext/uml_classmodel_ext.rb +0 -71
  134. data/lib/mmgen/templates/uml_classmodel.tpl +0 -63
  135. data/lib/rgen/xml_instantiator.rb +0 -132
  136. data/lib/uml/objectmodel_instantiator.rb +0 -53
  137. data/lib/uml/uml_classmodel.rb +0 -92
  138. data/lib/uml/uml_objectmodel.rb +0 -65
  139. data/test/metamodel_generator_test.rb +0 -44
  140. data/test/metamodel_generator_test/TestModel.rb +0 -40
  141. data/test/metamodel_generator_test/expected_result.txt +0 -40
  142. data/test/xmi_class_instantiator_test.rb +0 -24
  143. data/test/xmi_instantiator_test/class_model_checker.rb +0 -97
  144. data/test/xmi_object_instantiator_test.rb +0 -65
  145. data/test/xml_instantiator_test/testmodel.xml +0 -7
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # install.rb
4
+ #
5
+ # $Id: install.rb,v 1.2 2002/12/26 21:09:38 katsu Exp $
6
+
7
+ require 'rbconfig'
8
+ require 'ftools'
9
+ require 'find'
10
+ require 'getoptlong'
11
+
12
+ DEFAULT_DESTDIR = Config::CONFIG['sitelibdir'] || Config::CONFIG['sitedir']
13
+ SRCDIR = File.dirname(__FILE__)
14
+
15
+
16
+ def install_rb(from, to)
17
+ from = SRCDIR + '/' + from
18
+ Find.find(from) { |src|
19
+ next unless File.file? src
20
+ next unless /\.rb\z/ =~ src
21
+ dst = src.sub(/\A#{Regexp.escape(from)}/, to)
22
+ File.makedirs File.dirname(dst), true
23
+ File.install src, dst, 0644, true
24
+ }
25
+ end
26
+
27
+
28
+ destdir = DEFAULT_DESTDIR
29
+ begin
30
+ GetoptLong.new([ "-d", "--destdir", GetoptLong::REQUIRED_ARGUMENT ]
31
+ ).each_option { |opt, arg|
32
+ case opt
33
+ when '-d' then
34
+ destdir = arg
35
+ end
36
+ }
37
+ rescue
38
+ exit 2
39
+ end
40
+
41
+ install_rb "lib", destdir
@@ -0,0 +1,311 @@
1
+ #
2
+ # xmlscan/encoding.rb
3
+ #
4
+ # Copyright (C) Ueno Katsuhiro 2002
5
+ #
6
+ # $Id: encoding.rb,v 1.3 2003/01/12 04:10:33 katsu Exp $
7
+ #
8
+
9
+ require 'xmlscan/visitor'
10
+
11
+
12
+ module XMLScan
13
+
14
+ class EncodingError < Error ; end
15
+
16
+
17
+ class Converter
18
+
19
+ def initialize
20
+ end
21
+
22
+ def convert(s)
23
+ s
24
+ end
25
+
26
+ def finish
27
+ ''
28
+ end
29
+
30
+ end
31
+
32
+
33
+
34
+ class SimpleConverter < Converter
35
+
36
+ def SimpleConverter.new_class(block)
37
+ Class.new(self).module_eval {
38
+ define_method(:convert, block)
39
+ self
40
+ }
41
+ end
42
+
43
+ # checking for Module#define_method works
44
+ begin
45
+ Class.new.module_eval{define_method(:a){};self}.new.a
46
+ rescue Exception
47
+ class << SimpleConverter
48
+ remove_method :new_class
49
+ end
50
+ def SimpleConverter.new_class(block)
51
+ Class.new(self).module_eval {
52
+ const_set :ConvProc, block
53
+ module_eval "def convert(s) ; ConvProc.call s ; end"
54
+ self
55
+ }
56
+ end
57
+ end
58
+
59
+ end
60
+
61
+
62
+
63
+ class EncodingClass
64
+
65
+ KCODE_None = //n.kcode
66
+
67
+
68
+ class ConverterProperty
69
+
70
+ def inspect
71
+ "#<Conversion #{@from.name}:#{@to.name} #{@cost}>"
72
+ end
73
+
74
+ def initialize(from, to, cost, klass = nil)
75
+ @from, @to, @cost, @klass = from, to, cost, klass
76
+ end
77
+
78
+ def new_converter
79
+ @klass and @klass.new
80
+ end
81
+
82
+ attr_reader :from, :to, :cost
83
+
84
+ end
85
+
86
+
87
+ class EncodingProperty
88
+
89
+ def inspect
90
+ s = "#<Encoding #{@name}/#{@kcode}>"
91
+ end
92
+
93
+ def initialize(name)
94
+ @name = name
95
+ conv = ConverterProperty.new(self, self, 0)
96
+ @converter = { self => conv }
97
+ @convertable_from = { self => true }
98
+ @kcode_map = {}
99
+ end
100
+
101
+ attr_reader :name, :kcode_map
102
+
103
+
104
+ def convertable_from(encoding)
105
+ @convertable_from[encoding] = true
106
+ end
107
+ protected :convertable_from
108
+
109
+ def changed
110
+ @convertable_from.each_key { |i| i.update_kcode_map }
111
+ end
112
+ private :changed
113
+
114
+
115
+ def kcode?
116
+ defined? @kcode
117
+ end
118
+
119
+ def kcode
120
+ if defined? @kcode then
121
+ @kcode
122
+ else
123
+ KCODE_None
124
+ end
125
+ end
126
+
127
+ def kcode=(kcode)
128
+ if defined? @kcode then
129
+ raise EncodingError, "KCODE conflict" unless @kcode == kcode
130
+ else
131
+ @kcode = kcode
132
+ changed
133
+ end
134
+ kcode
135
+ end
136
+
137
+
138
+ def converter(to)
139
+ @converter[to]
140
+ end
141
+
142
+ def add_converter(to, cost, conv_class)
143
+ if equal? to then
144
+ raise EncodingError,"attempt to add a converter to the same encoding"
145
+ end
146
+ oldconv = @converter[to]
147
+ if not oldconv or cost <= oldconv.cost then
148
+ conv = ConverterProperty.new(self, to, cost, conv_class)
149
+ @converter[to] = conv
150
+ to.convertable_from self
151
+ changed
152
+ end
153
+ nil
154
+ end
155
+
156
+
157
+ def update_kcode_map
158
+ @kcode_map.clear
159
+ @converter.each_value { |conv|
160
+ k = conv.to.kcode
161
+ if conv.to.kcode? and k then
162
+ oldconv = @kcode_map[k]
163
+ @kcode_map[k] = conv if not oldconv or conv.cost <= oldconv.cost
164
+ end
165
+ }
166
+ end
167
+ protected :update_kcode_map
168
+
169
+ end
170
+
171
+
172
+
173
+ def initialize
174
+ @encoding = {}
175
+ end
176
+
177
+ class << self
178
+ private :new
179
+ attr_reader :instance
180
+ end
181
+ @instance = new
182
+
183
+
184
+ private
185
+
186
+ def get_encoding(name)
187
+ encoding = @encoding[name.downcase]
188
+ raise EncodingError, "undeclared encoding `#{name}'" unless encoding
189
+ encoding
190
+ end
191
+
192
+ def touch_encoding(name)
193
+ name = name.downcase
194
+ encoding = @encoding[name]
195
+ encoding = @encoding[name] = EncodingProperty.new(name) unless encoding
196
+ encoding
197
+ end
198
+
199
+
200
+ public
201
+
202
+ def alias(newname, oldname)
203
+ newname = newname.downcase
204
+ if @encoding.key? newname then
205
+ raise EncodingError, "encoding `#{newname}' is already declared"
206
+ end
207
+ @encoding[newname] = get_encoding(oldname)
208
+ nil
209
+ end
210
+
211
+
212
+ def kcode(name)
213
+ encoding = @encoding[name.downcase]
214
+ if encoding then
215
+ encoding.kcode
216
+ else
217
+ KCODE_None
218
+ end
219
+ end
220
+
221
+
222
+ def set_kcode(name, kcode)
223
+ if kcode then
224
+ kcode = Regexp.new('', nil, kcode).kcode
225
+ else
226
+ kcode = nil
227
+ end
228
+ touch_encoding(name).kcode = kcode
229
+ end
230
+
231
+
232
+ def add_converter(from, to, cost, conv_class = nil, &block)
233
+ if block and conv_class then
234
+ raise ArgumentError, "multiple converters given"
235
+ elsif not block and not conv_class then
236
+ raise ArgumentError, "no converter given"
237
+ else
238
+ block = conv_class if Proc === conv_class
239
+ conv_class = SimpleConverter.new_class(block) if block
240
+ end
241
+ from = touch_encoding(from)
242
+ to = touch_encoding(to)
243
+ from.add_converter to, cost, conv_class
244
+ end
245
+
246
+
247
+ def converter(from, to)
248
+ fromenc = get_encoding(from)
249
+ toenc = get_encoding(to)
250
+ conv = fromenc.converter(toenc)
251
+ raise EncodingError, "can't convert `#{from}' to `#{to}'" unless conv
252
+ conv.new_converter
253
+ end
254
+
255
+
256
+ def converter3(from, to = nil)
257
+ to = from unless to
258
+ fromenc = get_encoding(from)
259
+ toenc = get_encoding(to)
260
+ kcode_map = fromenc.kcode_map
261
+ if kcode_map.empty? then
262
+ if fromenc.kcode and fromenc.equal? toenc then
263
+ return [ nil, fromenc.kcode, nil ]
264
+ else
265
+ raise EncodingError, "can't convert `#{from}' to any KCODE"
266
+ end
267
+ end
268
+ mincost, minkcode, minconv = nil
269
+ kcode_map.each { |kcode,conv|
270
+ conv2 = conv.to.converter(toenc)
271
+ if conv2 then
272
+ cost = conv.cost + conv2.cost
273
+ if not mincost or cost < mincost then
274
+ mincost, minkcode, minconv = cost, kcode, conv
275
+ end
276
+ end
277
+ }
278
+ unless mincost then
279
+ raise EncodingError, "can't convert `#{from}' to `#{to}' via any KCODE"
280
+ end
281
+ conv = minconv.new_converter
282
+ conv2 = minconv.to.converter(toenc)
283
+ conv2 = conv2 && conv2.new_converter
284
+ [ conv, minkcode, conv2 ]
285
+ end
286
+
287
+ end
288
+
289
+
290
+ Encoding = EncodingClass.instance
291
+
292
+ Encoding.set_kcode 'utf-8', 'U'
293
+ Encoding.set_kcode 'utf-16', nil
294
+ Encoding.alias 'iso-10646-ucs-2', 'utf-16'
295
+ Encoding.set_kcode 'iso-10646-ucs-4', nil
296
+ Encoding.set_kcode 'iso-8859-1', 'N'
297
+ Encoding.set_kcode 'iso-8859-2', 'N'
298
+ Encoding.set_kcode 'iso-8859-3', 'N'
299
+ Encoding.set_kcode 'iso-8859-4', 'N'
300
+ Encoding.set_kcode 'iso-8859-5', 'N'
301
+ Encoding.set_kcode 'iso-8859-6', 'N'
302
+ Encoding.set_kcode 'iso-8859-7', 'N'
303
+ Encoding.set_kcode 'iso-8859-8', 'N'
304
+ Encoding.set_kcode 'iso-8859-9', 'N'
305
+ Encoding.set_kcode 'iso-2022-jp', nil
306
+ Encoding.set_kcode 'shift_jis', 'S'
307
+ Encoding.set_kcode 'Windows-31J', 'S'
308
+ Encoding.set_kcode 'euc-jp', 'E'
309
+ Encoding.set_kcode 'euc-kr', 'E'
310
+
311
+ end
@@ -0,0 +1,289 @@
1
+ #
2
+ # xmlscan/htmlscan.rb
3
+ #
4
+ # Copyright (C) Ueno Katsuhiro 2002
5
+ #
6
+ # $Id: htmlscan.rb,v 1.18 2003/05/01 15:36:50 katsu Exp $
7
+ #
8
+
9
+ require 'xmlscan/scanner'
10
+
11
+
12
+ module XMLScan
13
+
14
+ class HTMLScanner < XMLScanner
15
+
16
+ private
17
+
18
+ def wellformed_error(msg)
19
+ # All wellformed error raised by XMLScanner are ignored.
20
+ # XMLScanner only raises wellformed error in stan_stag, which is a
21
+ # method completely overrided by HTMLScanner, so this method is
22
+ # never called in fact.
23
+ end
24
+
25
+ def on_xmldecl
26
+ raise "[BUG] this method must be never called"
27
+ end
28
+
29
+ def on_xmldecl_version(str)
30
+ raise "[BUG] this method must be never called"
31
+ end
32
+
33
+ def on_xmldecl_encoding(str)
34
+ raise "[BUG] this method must be never called"
35
+ end
36
+
37
+ def on_xmldecl_standalone(str)
38
+ raise "[BUG] this method must be never called"
39
+ end
40
+
41
+ def on_xmldecl_other(name, value)
42
+ raise "[BUG] this method must be never called"
43
+ end
44
+
45
+ def on_xmldecl_end
46
+ raise "[BUG] this method must be never called"
47
+ end
48
+
49
+ def on_stag_end_empty(name)
50
+ raise "[BUG] this method must be never called"
51
+ end
52
+
53
+
54
+ private
55
+
56
+ def scan_comment(s)
57
+ s[0,4] = '' # remove `<!--'
58
+ comm = ''
59
+ until /--/n =~ s
60
+ comm << s
61
+ s = @src.get_plain
62
+ unless s then
63
+ parse_error "unterminated comment meets EOF"
64
+ return on_comment(comm)
65
+ end
66
+ end
67
+ comm << $`
68
+ s = $'
69
+ until s.empty? || s.strip.empty? and @src.close_tag # --> or -- >
70
+ comm << '--'
71
+ if /\A\s*--/n =~ s then # <!--hoge-- --
72
+ comm << $&
73
+ s = $'
74
+ if s.empty? and @src.close_tag then # <!--hoge-- -->
75
+ parse_error "`-->' is found but comment must not end here"
76
+ comm.chop!.chop!
77
+ break
78
+ end
79
+ else # <!--hoge-- fuga
80
+ parse_error "only whitespace can appear between two comments"
81
+ end
82
+ if /\A-\s*\z/n =~ s and @src.close_tag then # <!--hoge--->
83
+ parse_error "`-->' is found but comment must not end here"
84
+ comm.chop!
85
+ break
86
+ end
87
+ until /--/n =~ s # copy & paste for performance
88
+ comm << s
89
+ s = @src.get_plain
90
+ unless s then
91
+ parse_error "unterminated comment meets EOF"
92
+ return on_comment(comm)
93
+ end
94
+ end
95
+ comm << $`
96
+ s = $'
97
+ end
98
+ on_comment comm
99
+ end
100
+
101
+
102
+ alias scan_xml_pi scan_pi # PIO "<?" PIC "?>" -- <? PI ?> --
103
+
104
+
105
+ def scan_pi(s) # <?PI > this is default in SGML.
106
+ s[0,2] = '' # remove `<?'
107
+ pi = s
108
+ until @src.close_tag
109
+ s = @src.get_plain
110
+ unless s then
111
+ parse_error "unterminated PI meets EOF"
112
+ break
113
+ end
114
+ pi << s
115
+ end
116
+ on_pi '', pi
117
+ end
118
+
119
+
120
+ def scan_stag(s)
121
+ unless /(?=[\/\s='"])/n =~ s then
122
+ name = s
123
+ name[0,1] = '' # remove `<'
124
+ if name.empty? then # <> or <<
125
+ if @src.close_tag then
126
+ return found_empty_stag
127
+ else
128
+ parse_error "parse error at `<'"
129
+ return on_chardata('<')
130
+ end
131
+ end
132
+ on_stag name
133
+ found_unclosed_stag name unless @src.close_tag
134
+ on_stag_end name
135
+ else
136
+ name = $`
137
+ s = $'
138
+ name[0,1] = '' # remove `<'
139
+ if name.empty? then # `< tag' or `<=`
140
+ parse_error "parse error at `<'"
141
+ if @src.close_tag then
142
+ s << '>'
143
+ end
144
+ return on_chardata('<' << s)
145
+ end
146
+ on_stag name
147
+ begin
148
+ continue = false
149
+ s.scan(
150
+ /([^\s=\/'"]+)(?:\s*=\s*(?:('[^']*'?|"[^"]*"?)|([^\s='"]+)))?|(\S)/n
151
+ ) { |key,val,val2,error|
152
+ if key then
153
+ if val then # key="value"
154
+ on_attribute key
155
+ qmark = val.slice!(0,1)
156
+ if val[-1] == qmark[0] then
157
+ val.chop!
158
+ scan_attvalue val unless val.empty?
159
+ else
160
+ scan_attvalue val unless val.empty?
161
+ begin
162
+ s = @src.get
163
+ unless s then
164
+ parse_error "unterminated attribute `#{key}' meets EOF"
165
+ break
166
+ end
167
+ c = s[0]
168
+ val, s = s.split(qmark, 2)
169
+ scan_attvalue '>' unless c == ?< or c == ?>
170
+ scan_attvalue val if c
171
+ end until s
172
+ continue = s
173
+ end
174
+ on_attribute_end key
175
+ elsif val2 then # key=value
176
+ on_attribute key
177
+ on_attr_value val2
178
+ on_attribute_end key
179
+ else # value
180
+ on_attribute nil
181
+ on_attr_value key
182
+ on_attribute_end nil
183
+ end
184
+ else
185
+ parse_error "parse error at `#{error}'"
186
+ end
187
+ }
188
+ end while continue
189
+ found_unclosed_stag name unless @src.close_tag
190
+ on_stag_end name
191
+ end
192
+ end
193
+
194
+
195
+ # This method should be called only from on_stag_end.
196
+ def get_cdata_content
197
+ unless not s = @src.test or s[0] == ?< && s[1] == ?/ then
198
+ dst = @src.get
199
+ until not s = @src.test or s[0] == ?< && s[1] == ?/
200
+ dst << @src.get_plain
201
+ end
202
+ dst
203
+ else
204
+ ''
205
+ end
206
+ end
207
+ public :get_cdata_content
208
+
209
+
210
+ def scan_bang_tag(s)
211
+ if s == '<!' and @src.close_tag then # <!>
212
+ on_comment ''
213
+ else
214
+ parse_error "parse error at `<!'"
215
+ while s and not @src.close_tag # skip entire
216
+ s = @src.get_plain
217
+ end
218
+ end
219
+ end
220
+
221
+
222
+ def scan_internal_dtd(s)
223
+ parse_error "DTD subset is found but it is not permitted in HTML"
224
+ skip_internal_dtd s
225
+ end
226
+
227
+
228
+ def found_invalid_pubsys(pubsys)
229
+ s = pubsys.upcase
230
+ return s if s == 'PUBLIC' or s == 'SYSTEM'
231
+ super
232
+ end
233
+
234
+
235
+ def scan_prolog(s)
236
+ doctype = 0
237
+ while s
238
+ if s[0] == ?< then
239
+ if (c = s[1]) == ?! then
240
+ if s[2] == ?- and s[3] == ?- then
241
+ scan_comment s
242
+ elsif /\A<!doctype(?=\s)/in =~ s then
243
+ doctype += 1
244
+ if doctype > 1 then
245
+ parse_error "another document type declaration is found"
246
+ end
247
+ scan_doctype $'
248
+ else
249
+ break
250
+ end
251
+ elsif c == ?? then
252
+ scan_pi s
253
+ else
254
+ break
255
+ end
256
+ elsif s.strip.empty? then
257
+ on_prolog_space s
258
+ else
259
+ break
260
+ end
261
+ s = @src.get
262
+ end
263
+ scan_content(s || @src.get)
264
+ end
265
+
266
+ end
267
+
268
+ end
269
+
270
+
271
+
272
+
273
+
274
+ if $0 == __FILE__ then
275
+ class TestVisitor
276
+ include XMLScan::Visitor
277
+ def parse_error(msg)
278
+ STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
279
+ end
280
+ end
281
+
282
+ $s = scan = XMLScan::HTMLScanner.new(TestVisitor.new)
283
+ src = ARGF
284
+ def src.path; filename; end
285
+ t1 = Time.times.utime
286
+ scan.parse src
287
+ t2 = Time.times.utime
288
+ STDERR.printf "%2.3f sec\n", t2 - t1
289
+ end