rgen 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. data/CHANGELOG +20 -1
  2. data/MIT-LICENSE +1 -1
  3. data/README +12 -9
  4. data/lib/instantiators/ea_instantiator.rb +36 -0
  5. data/lib/metamodels/uml13_metamodel.rb +559 -0
  6. data/lib/metamodels/uml13_metamodel_ext.rb +26 -0
  7. data/lib/mmgen/metamodel_generator.rb +5 -5
  8. data/lib/mmgen/mm_ext/ecore_ext.rb +95 -0
  9. data/lib/mmgen/mmgen.rb +6 -4
  10. data/lib/mmgen/templates/annotations.tpl +37 -0
  11. data/lib/mmgen/templates/metamodel_generator.tpl +171 -0
  12. data/lib/rgen/ecore/ecore.rb +190 -0
  13. data/lib/rgen/ecore/ecore_instantiator.rb +25 -0
  14. data/lib/rgen/ecore/ecore_transformer.rb +85 -0
  15. data/lib/rgen/environment.rb +9 -24
  16. data/lib/rgen/find_helper.rb +68 -0
  17. data/lib/rgen/{instantiator.rb → instantiator/abstract_instantiator.rb} +6 -2
  18. data/lib/rgen/instantiator/abstract_xml_instantiator.rb +59 -0
  19. data/lib/rgen/instantiator/default_xml_instantiator.rb +117 -0
  20. data/lib/rgen/instantiator/ecore_xml_instantiator.rb +144 -0
  21. data/lib/rgen/instantiator/nodebased_xml_instantiator.rb +157 -0
  22. data/lib/rgen/instantiator/xmi11_instantiator.rb +164 -0
  23. data/lib/rgen/metamodel_builder.rb +103 -9
  24. data/lib/rgen/metamodel_builder/build_helper.rb +26 -4
  25. data/lib/rgen/metamodel_builder/builder_extensions.rb +285 -88
  26. data/lib/rgen/metamodel_builder/builder_runtime.rb +7 -1
  27. data/lib/rgen/metamodel_builder/data_types.rb +67 -0
  28. data/lib/rgen/metamodel_builder/intermediate/annotation.rb +30 -0
  29. data/lib/rgen/metamodel_builder/metamodel_description.rb +232 -0
  30. data/lib/rgen/metamodel_builder/mm_multiple.rb +23 -0
  31. data/lib/rgen/metamodel_builder/module_extension.rb +33 -0
  32. data/lib/rgen/model_comparator.rb +56 -0
  33. data/lib/rgen/model_dumper.rb +5 -5
  34. data/lib/rgen/name_helper.rb +17 -1
  35. data/lib/rgen/template_language.rb +148 -28
  36. data/lib/rgen/template_language/directory_template_container.rb +56 -38
  37. data/lib/rgen/template_language/output_handler.rb +93 -77
  38. data/lib/rgen/template_language/template_container.rb +186 -143
  39. data/lib/rgen/transformer.rb +19 -14
  40. data/lib/transformers/uml13_to_ecore.rb +75 -0
  41. data/redist/xmlscan/ChangeLog +1301 -0
  42. data/redist/xmlscan/README +34 -0
  43. data/redist/xmlscan/THANKS +11 -0
  44. data/redist/xmlscan/doc/changes.html +74 -0
  45. data/redist/xmlscan/doc/changes.rd +80 -0
  46. data/redist/xmlscan/doc/en/conformance.html +136 -0
  47. data/redist/xmlscan/doc/en/conformance.rd +152 -0
  48. data/redist/xmlscan/doc/en/manual.html +356 -0
  49. data/redist/xmlscan/doc/en/manual.rd +402 -0
  50. data/redist/xmlscan/doc/ja/conformance.ja.html +118 -0
  51. data/redist/xmlscan/doc/ja/conformance.ja.rd +134 -0
  52. data/redist/xmlscan/doc/ja/manual.ja.html +325 -0
  53. data/redist/xmlscan/doc/ja/manual.ja.rd +370 -0
  54. data/redist/xmlscan/doc/src/Makefile +41 -0
  55. data/redist/xmlscan/doc/src/conformance.rd.src +256 -0
  56. data/redist/xmlscan/doc/src/langsplit.rb +110 -0
  57. data/redist/xmlscan/doc/src/manual.rd.src +614 -0
  58. data/redist/xmlscan/install.rb +41 -0
  59. data/redist/xmlscan/lib/xmlscan/encoding.rb +311 -0
  60. data/redist/xmlscan/lib/xmlscan/htmlscan.rb +289 -0
  61. data/redist/xmlscan/lib/xmlscan/namespace.rb +352 -0
  62. data/redist/xmlscan/lib/xmlscan/parser.rb +299 -0
  63. data/redist/xmlscan/lib/xmlscan/scanner.rb +1109 -0
  64. data/redist/xmlscan/lib/xmlscan/version.rb +22 -0
  65. data/redist/xmlscan/lib/xmlscan/visitor.rb +158 -0
  66. data/redist/xmlscan/lib/xmlscan/xmlchar.rb +441 -0
  67. data/redist/xmlscan/memo/CONFORMANCE +1249 -0
  68. data/redist/xmlscan/memo/PRODUCTIONS +195 -0
  69. data/redist/xmlscan/memo/contentspec.ry +335 -0
  70. data/redist/xmlscan/samples/chibixml.rb +105 -0
  71. data/redist/xmlscan/samples/getxmlchar.rb +122 -0
  72. data/redist/xmlscan/samples/rexml.rb +159 -0
  73. data/redist/xmlscan/samples/xmlbench.rb +88 -0
  74. data/redist/xmlscan/samples/xmlbench/parser/chibixml.rb +22 -0
  75. data/redist/xmlscan/samples/xmlbench/parser/nqxml.rb +29 -0
  76. data/redist/xmlscan/samples/xmlbench/parser/rexml.rb +62 -0
  77. data/redist/xmlscan/samples/xmlbench/parser/xmlparser.rb +22 -0
  78. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-0.0.10.rb +62 -0
  79. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-chibixml.rb +22 -0
  80. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-rexml.rb +22 -0
  81. data/redist/xmlscan/samples/xmlbench/parser/xmlscan.rb +99 -0
  82. data/redist/xmlscan/samples/xmlbench/xmlbench-lib.rb +116 -0
  83. data/redist/xmlscan/samples/xmlconftest.rb +200 -0
  84. data/redist/xmlscan/test.rb +7 -0
  85. data/redist/xmlscan/tests/deftestcase.rb +73 -0
  86. data/redist/xmlscan/tests/runtest.rb +47 -0
  87. data/redist/xmlscan/tests/testall.rb +14 -0
  88. data/redist/xmlscan/tests/testencoding.rb +438 -0
  89. data/redist/xmlscan/tests/testhtmlscan.rb +752 -0
  90. data/redist/xmlscan/tests/testnamespace.rb +457 -0
  91. data/redist/xmlscan/tests/testparser.rb +591 -0
  92. data/redist/xmlscan/tests/testscanner.rb +1749 -0
  93. data/redist/xmlscan/tests/testxmlchar.rb +143 -0
  94. data/redist/xmlscan/tests/visitor.rb +34 -0
  95. data/test/array_extensions_test.rb +2 -2
  96. data/test/ea_instantiator_test.rb +41 -0
  97. data/test/ecore_self_test.rb +53 -0
  98. data/test/environment_test.rb +11 -6
  99. data/test/metamodel_builder_test.rb +404 -245
  100. data/test/metamodel_roundtrip_test.rb +52 -0
  101. data/test/metamodel_roundtrip_test/TestModel.rb +65 -0
  102. data/test/metamodel_roundtrip_test/TestModel_Regenerated.rb +64 -0
  103. data/test/metamodel_roundtrip_test/houseMetamodel.ecore +32 -0
  104. data/test/metamodel_roundtrip_test/houseMetamodel_from_ecore.rb +39 -0
  105. data/test/rgen_test.rb +3 -3
  106. data/test/template_language_test.rb +65 -39
  107. data/test/template_language_test/expected_result.txt +24 -3
  108. data/test/template_language_test/templates/code/array.tpl +11 -0
  109. data/test/template_language_test/templates/content/author.tpl +7 -0
  110. data/test/template_language_test/templates/content/chapter.tpl +1 -1
  111. data/test/template_language_test/templates/root.tpl +17 -8
  112. data/test/template_language_test/testout.txt +24 -3
  113. data/test/testmodel/class_model_checker.rb +119 -0
  114. data/test/{xmi_instantiator_test/testmodel.eap → testmodel/ea_testmodel.eap} +0 -0
  115. data/test/{xmi_instantiator_test/testmodel.xml → testmodel/ea_testmodel.xml} +81 -14
  116. data/test/testmodel/ea_testmodel_partial.xml +317 -0
  117. data/test/testmodel/ecore_model_checker.rb +101 -0
  118. data/test/testmodel/manual_testmodel.xml +22 -0
  119. data/test/testmodel/object_model_checker.rb +67 -0
  120. data/test/transformer_test.rb +18 -10
  121. data/test/xml_instantiator_test.rb +81 -8
  122. data/test/xml_instantiator_test/simple_ecore_model_checker.rb +94 -0
  123. data/test/xml_instantiator_test/simple_xmi_ecore_instantiator.rb +53 -0
  124. data/test/xml_instantiator_test/simple_xmi_metamodel.rb +49 -0
  125. data/test/xml_instantiator_test/simple_xmi_to_ecore.rb +75 -0
  126. metadata +126 -28
  127. data/lib/ea/xmi_class_instantiator.rb +0 -46
  128. data/lib/ea/xmi_helper.rb +0 -26
  129. data/lib/ea/xmi_metamodel.rb +0 -34
  130. data/lib/ea/xmi_object_instantiator.rb +0 -46
  131. data/lib/ea/xmi_to_classmodel.rb +0 -78
  132. data/lib/ea/xmi_to_objectmodel.rb +0 -92
  133. data/lib/mmgen/mm_ext/uml_classmodel_ext.rb +0 -71
  134. data/lib/mmgen/templates/uml_classmodel.tpl +0 -63
  135. data/lib/rgen/xml_instantiator.rb +0 -132
  136. data/lib/uml/objectmodel_instantiator.rb +0 -53
  137. data/lib/uml/uml_classmodel.rb +0 -92
  138. data/lib/uml/uml_objectmodel.rb +0 -65
  139. data/test/metamodel_generator_test.rb +0 -44
  140. data/test/metamodel_generator_test/TestModel.rb +0 -40
  141. data/test/metamodel_generator_test/expected_result.txt +0 -40
  142. data/test/xmi_class_instantiator_test.rb +0 -24
  143. data/test/xmi_instantiator_test/class_model_checker.rb +0 -97
  144. data/test/xmi_object_instantiator_test.rb +0 -65
  145. data/test/xml_instantiator_test/testmodel.xml +0 -7
@@ -0,0 +1,1109 @@
1
+ #
2
+ # xmlscan/scanner.rb
3
+ #
4
+ # Copyright (C) Ueno Katsuhiro 2002
5
+ #
6
+ # $Id: scanner.rb,v 1.83 2003/05/12 14:13:33 katsu Exp $
7
+ #
8
+
9
+ #
10
+ # CONSIDERATIONS FOR CHARACTER ENCODINGS:
11
+ #
12
+ # There are the following common characteristics in character encodings
13
+ # which are supported by Ruby's $KCODE feature (ISO-8859-*, Shift_JIS,
14
+ # EUC, and UTF-8):
15
+ #
16
+ # - Stateless.
17
+ # - ASCII characters are encoded in the same manner as US-ASCII.
18
+ # - The octet sequences corresponding to non-ASCII characters begin
19
+ # with an octet greater than 0x80.
20
+ # - The following characters can be identified by just one octet.
21
+ # That is, every octets corresponding to the following characters in
22
+ # US-ASCII never appear as a part of an octet sequence representing a
23
+ # non-ASCII character.
24
+ #
25
+ # Whitespaces("\t", "\n", "\r", and " ") and
26
+ # ! \ " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
27
+ #
28
+ # Be careful that `[' and `]' are NOT included in the list!
29
+ #
30
+ # If we build a regular expression carefully in accordance with these
31
+ # characteristics, we can get the same match regardless of the value
32
+ # of $KCODE. Moreover, if it can be premised on them, we can detect
33
+ # several delimiters without regular expressions. XMLScanner uses this
34
+ # fact in order to share many regular expressions in all $KCODE modes,
35
+ # and in order to optimize parsing speed.
36
+ #
37
+
38
+ require 'xmlscan/visitor'
39
+
40
+
41
+ module XMLScan
42
+
43
+ class Input
44
+
45
+ def initialize(src)
46
+ @src = src
47
+ unless src.respond_to? :gets then
48
+ if src.respond_to? :to_ary then
49
+ @v = src.to_ary
50
+ @n = -1
51
+ def self.gets ; @v.at(@n += 1) ; end
52
+ def self.lineno ; @n + 1 ; end
53
+ else
54
+ @v = @src
55
+ def self.gets ; s = @v ; @v = nil ; s ; end
56
+ end
57
+ end
58
+ if src.respond_to? :lineno then
59
+ def self.lineno ; @src.lineno ; end
60
+ end
61
+ if src.respond_to? :path then
62
+ def self.path ; @src.path ; end
63
+ end
64
+ end
65
+
66
+ attr_reader :src
67
+
68
+ def gets ; @src.gets ; end
69
+ def lineno ; 0 ; end
70
+ def path ; '-' ; end
71
+
72
+ def self.wrap(src)
73
+ unless src.respond_to? :gets and src.respond_to? :lineno and
74
+ src.respond_to? :path then
75
+ src = new(src)
76
+ end
77
+ src
78
+ end
79
+
80
+ def self.unwrap(obj)
81
+ if self === obj then
82
+ obj.src
83
+ else
84
+ obj
85
+ end
86
+ end
87
+
88
+ end
89
+
90
+
91
+
92
+ class PrivateArray < Array
93
+ m = superclass.instance_methods(false) - Kernel.instance_methods(false)
94
+ private(*m)
95
+ end
96
+
97
+
98
+ class Source < PrivateArray
99
+ # Source inherits Array only for speed.
100
+
101
+ def initialize(src)
102
+ super()
103
+ @src = Input.wrap(src)
104
+ @eof = false
105
+ @last = nil
106
+ end
107
+
108
+ def source
109
+ Input.unwrap @src
110
+ end
111
+
112
+
113
+ def eof?
114
+ @eof and empty?
115
+ end
116
+
117
+ def abort
118
+ @eof = true
119
+ @last = nil
120
+ clear
121
+ self
122
+ end
123
+
124
+
125
+ def get
126
+ pop or
127
+ unless @eof then
128
+ last = @last
129
+ begin
130
+ src = @src.gets
131
+ unless src then
132
+ @eof = true
133
+ unshift last
134
+ last = nil
135
+ break
136
+ end
137
+ a = src.split(/(?=<|>[<>])|>/n, -1)
138
+ if last then
139
+ unless /\A[<>]/n =~ a.first then
140
+ a[0] = last << (a.first || '')
141
+ else
142
+ push last
143
+ end
144
+ end
145
+ concat a
146
+ last = pop
147
+ end while empty?
148
+ @last = last
149
+ reverse!
150
+ pop
151
+ end
152
+ end
153
+
154
+
155
+ def prepare
156
+ s = get
157
+ s = get and s = '>' << s if s and s.empty? # preserve first `>'
158
+ s and push s
159
+ end
160
+
161
+
162
+ def tag_end?
163
+ s = last || @last and s[0] != ?<
164
+ end
165
+
166
+ def tag_start?
167
+ s = last || @last and s[0] == ?<
168
+ end
169
+
170
+ def close_tag # tag_end?, and remove a `>'.
171
+ unless s = last || @last and s[0] != ?< then
172
+ false
173
+ else
174
+ if s == '>' or s.empty? then
175
+ s1 = get
176
+ unless s = last || @last and s[0] == ?< then # for speed up
177
+ out = [ s1 ]
178
+ out.push get while s = last || @last and s == '>' || s.empty?
179
+ out.pop unless s and s[0] != ?< # De Morgan
180
+ concat out
181
+ end
182
+ end
183
+ true
184
+ end
185
+ end
186
+
187
+
188
+ def get_text # get until tag_start?
189
+ s = last || @last and s[0] != ?< and get
190
+ end
191
+
192
+ def get_tag # get until tag_end?
193
+ s = last || @last and s[0] == ?< and get
194
+ end
195
+
196
+ def get_plain
197
+ s = get
198
+ s = '>' << s unless not s or (c = s[0]) == ?< or c == ?> # De Morgan
199
+ s
200
+ end
201
+
202
+ def lineno
203
+ @src.lineno
204
+ end
205
+
206
+ def path
207
+ @src.path
208
+ end
209
+
210
+
211
+ # The following methods are for debug.
212
+
213
+ def inspect
214
+ a = []
215
+ reverse_each { |i|
216
+ a.push ">" unless /\A[<>]/n =~ i
217
+ a.push i.inspect
218
+ }
219
+ last = []
220
+ if @last then
221
+ last.push ">" unless /\A[<>]/n =~ @last
222
+ last.push @last.inspect
223
+ end
224
+ a.push '#eof' if @eof
225
+ "((#{a.join(' ')}) (#{last.join(' ')}) . #{source.inspect})"
226
+ end
227
+
228
+ def each
229
+ prepare
230
+ while s = get
231
+ yield s
232
+ end
233
+ self
234
+ end
235
+
236
+ def test
237
+ last or @last or (s = get and push s and s)
238
+ end
239
+
240
+ end
241
+
242
+
243
+
244
+ class XMLScanner
245
+
246
+ class << self
247
+
248
+ def provided_options
249
+ options = []
250
+ private_instance_methods(false).each { |i|
251
+ options.push $' if /\Aapply_option_/n =~ i
252
+ }
253
+ options
254
+ end
255
+
256
+ def apply_option(instance, option)
257
+ instance.__send__ "apply_option_#{option}"
258
+ end
259
+
260
+ def apply_options(instance, options)
261
+ h = {}
262
+ options.each { |i| h[i.to_s] = true }
263
+ options = h
264
+ ancestors.each { |klass|
265
+ if klass.respond_to? :provided_options then
266
+ klass.provided_options.each { |i|
267
+ if options.include? i then
268
+ options.delete i
269
+ klass.apply_option instance, i
270
+ end
271
+ }
272
+ end
273
+ }
274
+ unless options.empty? then
275
+ raise ArgumentError, "undefined option `#{options.keys[0]}'"
276
+ end
277
+ instance
278
+ end
279
+ private :apply_options
280
+
281
+ def new(visitor, *options)
282
+ instance = super(visitor)
283
+ apply_options instance, options
284
+ end
285
+
286
+ end
287
+
288
+
289
+
290
+ def initialize(visitor)
291
+ @visitor = visitor
292
+ @decoration = nil
293
+ @src = nil
294
+ @kcode = nil
295
+ end
296
+
297
+
298
+ def kcode=(kcode)
299
+ @kcode = Regexp.new('', nil, kcode || '').kcode
300
+ kcode
301
+ end
302
+
303
+ attr_reader :kcode
304
+
305
+
306
+ def decorate(decoration)
307
+ unless @decoration then
308
+ @visitor = @decoration = Decoration.new(@visitor)
309
+ end
310
+ @decoration.expand decoration
311
+ end
312
+ private :decorate
313
+
314
+
315
+ def lineno
316
+ @src && @src.lineno
317
+ end
318
+
319
+ def path
320
+ @src && @src.path
321
+ end
322
+
323
+ def source
324
+ @src.source
325
+ end
326
+
327
+
328
+ private
329
+
330
+ def parse_error(msg)
331
+ @visitor.parse_error msg
332
+ end
333
+
334
+ def wellformed_error(msg)
335
+ @visitor.wellformed_error msg
336
+ end
337
+
338
+ def valid_error(msg)
339
+ @visitor.valid_error msg
340
+ end
341
+
342
+ def warning(msg)
343
+ @visitor.warning msg
344
+ end
345
+
346
+
347
+ def on_xmldecl
348
+ @visitor.on_xmldecl
349
+ end
350
+
351
+ def on_xmldecl_version(str)
352
+ @visitor.on_xmldecl_version str
353
+ end
354
+
355
+ def on_xmldecl_encoding(str)
356
+ @visitor.on_xmldecl_encoding str
357
+ end
358
+
359
+ def on_xmldecl_standalone(str)
360
+ @visitor.on_xmldecl_standalone str
361
+ end
362
+
363
+ def on_xmldecl_other(name, value)
364
+ @visitor.on_xmldecl_other name, value
365
+ end
366
+
367
+ def on_xmldecl_end
368
+ @visitor.on_xmldecl_end
369
+ end
370
+
371
+ def on_doctype(root, pubid, sysid)
372
+ @visitor.on_doctype root, pubid, sysid
373
+ end
374
+
375
+ def on_prolog_space(str)
376
+ @visitor.on_prolog_space str
377
+ end
378
+
379
+ def on_comment(str)
380
+ @visitor.on_comment str
381
+ end
382
+
383
+ def on_pi(target, pi)
384
+ @visitor.on_pi target, pi
385
+ end
386
+
387
+ def on_chardata(str)
388
+ @visitor.on_chardata str
389
+ end
390
+
391
+ def on_cdata(str)
392
+ @visitor.on_cdata str
393
+ end
394
+
395
+ def on_etag(name)
396
+ @visitor.on_etag name
397
+ end
398
+
399
+ def on_entityref(ref)
400
+ @visitor.on_entityref ref
401
+ end
402
+
403
+ def on_charref(code)
404
+ @visitor.on_charref code
405
+ end
406
+
407
+ def on_charref_hex(code)
408
+ @visitor.on_charref_hex code
409
+ end
410
+
411
+ def on_start_document
412
+ @visitor.on_start_document
413
+ end
414
+
415
+ def on_end_document
416
+ @visitor.on_end_document
417
+ end
418
+
419
+
420
+ # <hoge fuga="foo&bar;&#38;&#x26;foo" />HOGE
421
+ # ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
422
+ # 1 2 3 4 5 6 7 8 9 A
423
+ #
424
+ # The following method will be called with the following arguments
425
+ # when the parser reaches the above point;
426
+ #
427
+ # 1: on_stag ('hoge')
428
+ # 2: on_attribute ('fuga')
429
+ # 3: on_attr_value ('foo')
430
+ # 4: on_attr_entityref ('bar')
431
+ # 5: on_attr_charref (38)
432
+ # 6: on_attr_charref_hex (38)
433
+ # 7: on_attr_value ('foo')
434
+ # 8: on_attribute_end ('fuga')
435
+ # 9: on_stag_end_empty ('hoge')
436
+ # or
437
+ # on_stag_end ('hoge')
438
+ #
439
+ # A: on_chardata ('HOGE')
440
+
441
+ def on_stag(name)
442
+ @visitor.on_stag name
443
+ end
444
+
445
+ def on_attribute(name)
446
+ @visitor.on_attribute name
447
+ end
448
+
449
+ def on_attr_value(str)
450
+ @visitor.on_attr_value str
451
+ end
452
+
453
+ def on_attr_entityref(ref)
454
+ @visitor.on_attr_entityref ref
455
+ end
456
+
457
+ def on_attr_charref(code)
458
+ @visitor.on_attr_charref code
459
+ end
460
+
461
+ def on_attr_charref_hex(code)
462
+ @visitor.on_attr_charref_hex code
463
+ end
464
+
465
+ def on_attribute_end(name)
466
+ @visitor.on_attribute_end name
467
+ end
468
+
469
+ def on_stag_end_empty(name)
470
+ @visitor.on_stag_end_empty name
471
+ end
472
+
473
+ def on_stag_end(name)
474
+ @visitor.on_stag_end name
475
+ end
476
+
477
+
478
+
479
+ private
480
+
481
+ module KcodeRegexp
482
+ private
483
+ Kcodes = [ //n.kcode, //e.kcode, //s.kcode, //u.kcode ]
484
+ def kcode_regexp(re)
485
+ h = {}
486
+ Kcodes.each { |i| h[i] = Regexp.new(re, nil, i) }
487
+ h.default = Regexp.new(re)
488
+ h
489
+ end
490
+ end
491
+ extend KcodeRegexp
492
+
493
+
494
+ InvalidEntityRef = kcode_regexp('(?=[^#\d\w]|\z)')
495
+
496
+ def scan_chardata(s)
497
+ while true
498
+ unless /&/n =~ s then
499
+ on_chardata s
500
+ else
501
+ s = $`
502
+ on_chardata s unless s.empty?
503
+ ref = nil
504
+ $'.split('&', -1).each { |s|
505
+ unless /(?!\A);|(?=[ \t\r\n])/n =~ s and not $&.empty? then
506
+ if InvalidEntityRef[@kcode] =~ s and not (ref = $`).strip.empty?
507
+ then
508
+ parse_error "reference to `#{ref}' doesn't end with `;'"
509
+ else
510
+ parse_error "`&' is not used for entity/character references"
511
+ on_chardata('&' << s)
512
+ next
513
+ end
514
+ end
515
+ ref = $`
516
+ s = $'
517
+ if /\A[^#]/n =~ ref then
518
+ on_entityref ref
519
+ elsif /\A#(\d+)\z/n =~ ref then
520
+ on_charref $1.to_i
521
+ elsif /\A#x([\dA-Fa-f]+)\z/n =~ ref then
522
+ on_charref_hex $1.hex
523
+ else
524
+ parse_error "invalid character reference `#{ref}'"
525
+ end
526
+ on_chardata s unless s.empty?
527
+ }
528
+ end
529
+ s = @src.get_text
530
+ break unless s
531
+ s = '>' << s unless s == '>'
532
+ end
533
+ end
534
+
535
+
536
+ def scan_attvalue(s) # almostly copy & paste from scan_chardata
537
+ unless /&/n =~ s then
538
+ on_attr_value s
539
+ else
540
+ s = $`
541
+ on_attr_value s unless s.empty?
542
+ ref = nil
543
+ $'.split('&', -1).each { |s|
544
+ unless /(?!\A);|(?=[ \t\r\n])/n =~ s and not $&.empty? then
545
+ if InvalidEntityRef[@kcode] =~ s and not (ref = $`).strip.empty?
546
+ then
547
+ parse_error "reference to `#{ref}' doesn't end with `;'"
548
+ else
549
+ parse_error "`&' is not used for entity/character references"
550
+ on_attr_value('&' << s)
551
+ next
552
+ end
553
+ end
554
+ ref = $`
555
+ s = $'
556
+ if /\A[^#]/n =~ ref then
557
+ on_attr_entityref ref
558
+ elsif /\A#(\d+)\z/n =~ ref then
559
+ on_attr_charref $1.to_i
560
+ elsif /\A#x([\dA-Fa-f]+)\z/n =~ ref then
561
+ on_attr_charref_hex $1.hex
562
+ else
563
+ parse_error "invalid character reference `#{ref}'"
564
+ end
565
+ on_attr_value s unless s.empty?
566
+ }
567
+ end
568
+ end
569
+
570
+
571
+ def scan_comment(s)
572
+ s[0,4] = '' # remove `<!--'
573
+ comm = ''
574
+ until /--/n =~ s
575
+ comm << s
576
+ s = @src.get_plain
577
+ unless s then
578
+ parse_error "unterminated comment meets EOF"
579
+ return on_comment(comm)
580
+ end
581
+ end
582
+ comm << $`
583
+ until (s = $').empty? and @src.close_tag
584
+ if s == '-' and @src.close_tag then # --->
585
+ parse_error "comment ending in `--->' is not allowed"
586
+ comm << s
587
+ break
588
+ end
589
+ parse_error "comment includes `--'"
590
+ comm << '--'
591
+ until /--/n =~ s # copy & paste for performance
592
+ comm << s
593
+ s = @src.get_plain
594
+ unless s then
595
+ parse_error "unterminated comment meets EOF"
596
+ return on_comment(comm)
597
+ end
598
+ end
599
+ comm << $`
600
+ end
601
+ on_comment comm
602
+ end
603
+
604
+
605
+ def scan_pi(s)
606
+ unless /\A<\?([^ \t\n\r?]+)(?:[ \t\n\r]+|(?=\?\z))/n =~ s then
607
+ parse_error "parse error at `<?'"
608
+ s << '>' if @src.close_tag
609
+ on_chardata s
610
+ else
611
+ target = $1
612
+ pi = $'
613
+ until pi[-1] == ?? and @src.close_tag
614
+ s = @src.get_plain
615
+ unless s then
616
+ parse_error "unterminated PI meets EOF"
617
+ return on_pi(target, pi)
618
+ end
619
+ pi << s
620
+ end
621
+ pi.chop! # remove last `?'
622
+ on_pi target, pi
623
+ end
624
+ end
625
+
626
+
627
+ CDATAPattern = kcode_regexp('\]\]\z')
628
+
629
+ def scan_cdata(s)
630
+ cdata = s
631
+ re = CDATAPattern[@kcode]
632
+ until re =~ cdata and @src.close_tag
633
+ s = @src.get_plain
634
+ unless s then
635
+ parse_error "unterminated CDATA section meets EOF"
636
+ return on_cdata(cdata)
637
+ end
638
+ cdata << s
639
+ end
640
+ cdata.chop!.chop! # remove ']]'
641
+ on_cdata cdata
642
+ end
643
+
644
+
645
+ def found_unclosed_etag(name)
646
+ if @src.tag_start? then
647
+ parse_error "unclosed end tag `#{name}' meets another tag"
648
+ else
649
+ parse_error "unclosed end tag `#{name}' meets EOF"
650
+ end
651
+ end
652
+
653
+ def found_empty_etag
654
+ parse_error "parse error at `</'"
655
+ on_chardata '</>'
656
+ end
657
+
658
+
659
+ def scan_etag(s)
660
+ s[0,2] = '' # remove '</'
661
+ if s.empty? then
662
+ if @src.close_tag then # </>
663
+ return found_empty_etag
664
+ else # </< or </[EOF]
665
+ parse_error "parse error at `</'"
666
+ s << '>' if @src.close_tag
667
+ return on_chardata('</' << s)
668
+ end
669
+ elsif /[ \t\n\r]+/n =~ s then
670
+ s1, s2 = $`, $'
671
+ if s1.empty? then # </ tag
672
+ parse_error "parse error at `</'"
673
+ s << '>' if @src.close_tag
674
+ return on_chardata('</' + s)
675
+ elsif not s2.empty? then # </ta g
676
+ parse_error "illegal whitespace is found within end tag `#{s1}'"
677
+ while @src.get_tag
678
+ end
679
+ end
680
+ s = s1
681
+ end
682
+ found_unclosed_etag s unless @src.close_tag # </tag< or </tag[EOF]
683
+ on_etag s
684
+ end
685
+
686
+
687
+ def found_empty_stag
688
+ parse_error "parse error at `<'"
689
+ on_chardata '<>'
690
+ end
691
+
692
+ def found_unclosed_stag(name)
693
+ if @src.tag_start? then
694
+ parse_error "unclosed start tag `#{name}' meets another tag"
695
+ else
696
+ parse_error "unclosed start tag `#{name}' meets EOF"
697
+ end
698
+ end
699
+
700
+ def found_unclosed_emptyelem(name)
701
+ if @src.tag_start? then
702
+ parse_error "unclosed empty element tag `#{name}' meets another tag"
703
+ else
704
+ parse_error "unclosed empty element tag `#{name}' meets EOF"
705
+ end
706
+ end
707
+
708
+
709
+ def found_stag_error(s)
710
+ if /\A[\/='"]/n =~ s then
711
+ tok, s = $&, $'
712
+ elsif /(?=[ \t\n\r\/='"])/n =~ s then
713
+ tok, s = $`, $'
714
+ else
715
+ tok, s = s, nil
716
+ end
717
+ parse_error "parse error at `#{tok}'"
718
+ s
719
+ end
720
+
721
+
722
+ def scan_stag(s)
723
+ unless /(?=[\/ \t\n\r='"])/n =~ s then
724
+ name = s
725
+ name[0,1] = '' # remove `<'
726
+ if name.empty? then
727
+ if @src.close_tag then # <>
728
+ return found_empty_stag
729
+ else # << or <[EOF]
730
+ parse_error "parse error at `<'"
731
+ return on_chardata('<')
732
+ end
733
+ end
734
+ on_stag name
735
+ found_unclosed_stag name unless @src.close_tag
736
+ on_stag_end name
737
+ else
738
+ name = $`
739
+ s = $'
740
+ name[0,1] = '' # remove `<'
741
+ if name.empty? then # `< tag' or `<=`
742
+ parse_error "parse error at `<'"
743
+ s << '>' if @src.close_tag
744
+ return on_chardata('<' << s)
745
+ end
746
+ on_stag name
747
+ emptyelem = false
748
+ key,val,error,qmark,c = nil
749
+ begin
750
+ continue = false
751
+ s.scan(/[ \t\n\r]([^= \t\n\r\/'"]+)[ \t\n\r]*=[ \t\n\r]*('[^']*'?|"[^"]*"?)|\/\z|([^ \t\n\r][\S\s]*)/n
752
+ ) { |key,val,error|
753
+ if key then # key="value"
754
+ on_attribute key
755
+ qmark = val.slice!(0,1)
756
+ if val[-1] == qmark[0] then
757
+ val.chop!
758
+ scan_attvalue val unless val.empty?
759
+ else
760
+ scan_attvalue val unless val.empty?
761
+ begin
762
+ s = @src.get
763
+ unless s then
764
+ parse_error "unterminated attribute `#{key}' meets EOF"
765
+ break
766
+ end
767
+ c = s[0]
768
+ val, s = s.split(qmark, 2)
769
+ if c == ?< then
770
+ wellformed_error "`<' is found in attribute `#{key}'"
771
+ elsif c != ?> then
772
+ scan_attvalue '>'
773
+ end
774
+ scan_attvalue val if c
775
+ end until s
776
+ continue = s # if eof then continue is false, else true.
777
+ end
778
+ on_attribute_end key
779
+ elsif error then
780
+ continue = s = found_stag_error(error)
781
+ else
782
+ emptyelem = true
783
+ end
784
+ }
785
+ end while continue
786
+ unless @src.close_tag then
787
+ if emptyelem then
788
+ found_unclosed_emptyelem name
789
+ else
790
+ found_unclosed_stag name
791
+ end
792
+ end
793
+ if emptyelem then
794
+ on_stag_end_empty name
795
+ else
796
+ on_stag_end name
797
+ end
798
+ end
799
+ end
800
+
801
+
802
+ def scan_bang_tag(s)
803
+ parse_error "parse error at `<!'"
804
+ s << '>' if @src.close_tag
805
+ on_chardata s
806
+ end
807
+
808
+
809
+ def scan_content(s)
810
+ src = @src # for speed
811
+ while s
812
+ if (c = s[0]) == ?< then
813
+ if (c = s[1]) == ?/ then
814
+ scan_etag s
815
+ elsif c == ?! then
816
+ if s[2] == ?- and s[3] == ?- then
817
+ scan_comment s
818
+ elsif /\A<!\[CDATA\[/n =~ s then
819
+ scan_cdata $'
820
+ else
821
+ scan_bang_tag s
822
+ end
823
+ elsif c == ?? then
824
+ scan_pi s
825
+ else
826
+ scan_stag s
827
+ end
828
+ else
829
+ scan_chardata s
830
+ end
831
+ s = src.get
832
+ end
833
+ end
834
+
835
+
836
+ def get_until_qmark(str, qmark)
837
+ begin
838
+ #s = @src.get_plain
839
+ s = @src.get
840
+ break unless s
841
+ c = s[0]
842
+ v, s = s.split(qmark, 2)
843
+ str << '>' unless c == ?< or c == ?> # De Morgan
844
+ str << v if c
845
+ end until s
846
+ s
847
+ end
848
+
849
+
850
+ XMLDeclPattern = kcode_regexp(%q{[ \t\n\r]([\-_\d\w]+)[ \t\n\r]*=[ \t\n\r]*('[^']*'?|"[^"]*"?)|(\?\z)|([\-_.\d\w]+|[^ \t\n\r])}) #'
851
+
852
+ def scan_xmldecl(s)
853
+ endmark = nil
854
+ state = 0
855
+ on_xmldecl
856
+ begin
857
+ continue = false
858
+ s.scan(XMLDeclPattern[@kcode]) { |key,val,endmark,error|
859
+ if key then
860
+ qmark = val.slice!(0,1) # remove quotation marks
861
+ if val[-1] == qmark[0] then
862
+ val.chop!
863
+ else
864
+ continue = s = get_until_qmark(val, qmark)
865
+ unless s then
866
+ parse_error "unterminated XML declaration meets EOF"
867
+ endmark = true
868
+ end
869
+ end
870
+ if state == 0 and key == 'version' then
871
+ on_xmldecl_version val
872
+ state = 1
873
+ elsif state == 1 and key == 'encoding' then
874
+ on_xmldecl_encoding val
875
+ state = 2
876
+ elsif state >= 1 and key == 'standalone' then
877
+ on_xmldecl_standalone val
878
+ state = 3
879
+ else
880
+ state = 3
881
+ if key == 'version' then
882
+ parse_error "version declaration must not be here"
883
+ on_xmldecl_version val
884
+ elsif key == 'encoding' then
885
+ parse_error "encoding declaration must not be here"
886
+ on_xmldecl_encoding val
887
+ state = 2 if state < 2
888
+ elsif key == 'standalone' then
889
+ parse_error "standalone declaration must not be here"
890
+ on_xmldecl_standalone val
891
+ else
892
+ parse_error "unknown declaration `#{key}' in XML declaration"
893
+ on_xmldecl_other key, val
894
+ end
895
+ end
896
+ elsif endmark then
897
+ unless @src.close_tag then
898
+ parse_error "unexpected `#{endmark}' found in XML declaration"
899
+ endmark = nil
900
+ end
901
+ # here always exit the loop.
902
+ else
903
+ parse_error "parse error at `#{error}'"
904
+ end
905
+ }
906
+ end while !endmark and continue || s = @src.get_plain
907
+ parse_error "unterminated XML declaration meets EOF" unless s or endmark
908
+ parse_error "no declaration found in XML declaration" if state == 0
909
+ on_xmldecl_end
910
+ end
911
+
912
+
913
+ SkipDTD = kcode_regexp(%q{(['"]|\A<!--|\A<\?|--\z|\?\z)|\]\s*\z}) #'
914
+
915
+ def skip_internal_dtd(s)
916
+ quote = nil
917
+ continue = true
918
+ begin # skip until `]>'
919
+ s.scan(SkipDTD[@kcode]) { |q,| #'
920
+ if quote then
921
+ quote = nil if quote == q and quote.size == 1 || @src.tag_end?
922
+ elsif q then
923
+ if q == '<!--' then
924
+ quote = '--'
925
+ elsif q == '<?' then
926
+ quote = '?'
927
+ elsif q == '"' or q == "'" then
928
+ quote = q
929
+ end
930
+ elsif @src.close_tag then
931
+ continue = false
932
+ end
933
+ }
934
+ end while continue and s = @src.get
935
+ parse_error "unterminated internal DTD subset meets EOF" unless s
936
+ end
937
+
938
+
939
+ def scan_internal_dtd(s)
940
+ warning "internal DTD subset is not supported"
941
+ skip_internal_dtd s
942
+ end
943
+
944
+
945
+ def found_invalid_pubsys(pubsys)
946
+ parse_error "`PUBLIC' or `SYSTEM' should be here"
947
+ 'SYSTEM'
948
+ end
949
+
950
+
951
+ DoctypePattern = kcode_regexp(%q{[ \t\n\r](?:([^ \t\n\r\/'"=\[]+)|('[^']*'?|"[^"]*"?))|([\-_.\d\w]+|[^ \t\n\r])}) #"
952
+
953
+ def scan_doctype(s)
954
+ root = syspub = sysid = pubid = nil
955
+ internal_dtd = false
956
+ re = DoctypePattern[@kcode]
957
+ begin
958
+ if re =~ s then
959
+ name, str, delim, s = $1, $2, $3, $'
960
+ if name then
961
+ if not root then
962
+ root = name
963
+ elsif not syspub then
964
+ unless name == 'PUBLIC' or name == 'SYSTEM' then
965
+ name = found_invalid_pubsys(name)
966
+ end
967
+ syspub = name
968
+ else
969
+ parse_error "parse error at `#{name}'"
970
+ end
971
+ elsif str then
972
+ qmark = str.slice!(0,1) # remove quotation marks
973
+ unless syspub then
974
+ parse_error "parse error at `#{qmark}'"
975
+ s = str << s
976
+ else
977
+ if str[-1] == qmark[0] then
978
+ str.chop!
979
+ else
980
+ s = get_until_qmark(str, qmark) || ''
981
+ end
982
+ if not sysid then
983
+ sysid = str
984
+ elsif not pubid and syspub == 'PUBLIC' then
985
+ pubid = sysid
986
+ sysid = str
987
+ else
988
+ parse_error "too many external ID literals in DOCTYPE"
989
+ end
990
+ end
991
+ elsif delim == '[' then
992
+ internal_dtd = true
993
+ break
994
+ else
995
+ parse_error "parse error at `#{delim}'"
996
+ end
997
+ else
998
+ s = ''
999
+ end
1000
+ if s.empty? then
1001
+ break if @src.close_tag
1002
+ s = @src.get_plain
1003
+ end
1004
+ end while s
1005
+ parse_error "unterminated DOCTYPE declaration meets EOF" unless s
1006
+ unless root then
1007
+ parse_error "no root element is specified in DOCTYPE"
1008
+ end
1009
+ if syspub and not sysid then
1010
+ parse_error "too few external ID literals in DOCTYPE"
1011
+ end
1012
+ if syspub == 'PUBLIC' and not pubid then
1013
+ pubid, sysid = sysid, nil
1014
+ end
1015
+ on_doctype root, pubid, sysid
1016
+ scan_internal_dtd s if internal_dtd
1017
+ end
1018
+
1019
+
1020
+ def scan_prolog(s)
1021
+ if /\A<\?xml(?=[ \t\n\r])/n =~ s then
1022
+ scan_xmldecl $'
1023
+ s = @src.get
1024
+ end
1025
+ doctype = true
1026
+ src = @src # for speed
1027
+ while s
1028
+ if s[0] == ?< then
1029
+ if (c = s[1]) == ?! then
1030
+ if s[2] == ?- and s[3] == ?- then
1031
+ scan_comment s
1032
+ elsif /\A<!DOCTYPE(?=[ \t\n\r])/n =~ s and doctype then
1033
+ doctype = false
1034
+ scan_doctype $'
1035
+ else
1036
+ break
1037
+ end
1038
+ elsif c == ?? then
1039
+ scan_pi s
1040
+ else
1041
+ break
1042
+ end
1043
+ s = src.get
1044
+ elsif /[^ \t\r\n]/ !~ s then
1045
+ on_prolog_space s unless s.empty?
1046
+ s = src.get_plain
1047
+ else
1048
+ break
1049
+ end
1050
+ end
1051
+ scan_content(s || src.get)
1052
+ end
1053
+
1054
+
1055
+ def scan_document
1056
+ on_start_document
1057
+ @src.prepare
1058
+ scan_prolog @src.get
1059
+ on_end_document
1060
+ end
1061
+
1062
+
1063
+ def make_source(src)
1064
+ Source.new src
1065
+ end
1066
+
1067
+
1068
+ public
1069
+
1070
+ def parse_document(src)
1071
+ @src = make_source(src)
1072
+ begin
1073
+ scan_document
1074
+ ensure
1075
+ @src = nil
1076
+ end
1077
+ self
1078
+ end
1079
+
1080
+ alias parse parse_document
1081
+
1082
+ end
1083
+
1084
+
1085
+ end
1086
+
1087
+
1088
+
1089
+
1090
+
1091
+ if $0 == __FILE__ then
1092
+ class TestVisitor
1093
+ include XMLScan::Visitor
1094
+ def parse_error(msg)
1095
+ STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
1096
+ end
1097
+ def wellformed_error(msg)
1098
+ STDERR.printf("%s:%d: WFC: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
1099
+ end
1100
+ end
1101
+
1102
+ $s = scan = XMLScan::XMLScanner.new(TestVisitor.new)
1103
+ src = ARGF
1104
+ def src.path; filename; end
1105
+ t1 = Time.times.utime
1106
+ scan.parse src
1107
+ t2 = Time.times.utime
1108
+ STDERR.printf "%2.3f sec\n", t2 - t1
1109
+ end