rgen 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. data/CHANGELOG +20 -1
  2. data/MIT-LICENSE +1 -1
  3. data/README +12 -9
  4. data/lib/instantiators/ea_instantiator.rb +36 -0
  5. data/lib/metamodels/uml13_metamodel.rb +559 -0
  6. data/lib/metamodels/uml13_metamodel_ext.rb +26 -0
  7. data/lib/mmgen/metamodel_generator.rb +5 -5
  8. data/lib/mmgen/mm_ext/ecore_ext.rb +95 -0
  9. data/lib/mmgen/mmgen.rb +6 -4
  10. data/lib/mmgen/templates/annotations.tpl +37 -0
  11. data/lib/mmgen/templates/metamodel_generator.tpl +171 -0
  12. data/lib/rgen/ecore/ecore.rb +190 -0
  13. data/lib/rgen/ecore/ecore_instantiator.rb +25 -0
  14. data/lib/rgen/ecore/ecore_transformer.rb +85 -0
  15. data/lib/rgen/environment.rb +9 -24
  16. data/lib/rgen/find_helper.rb +68 -0
  17. data/lib/rgen/{instantiator.rb → instantiator/abstract_instantiator.rb} +6 -2
  18. data/lib/rgen/instantiator/abstract_xml_instantiator.rb +59 -0
  19. data/lib/rgen/instantiator/default_xml_instantiator.rb +117 -0
  20. data/lib/rgen/instantiator/ecore_xml_instantiator.rb +144 -0
  21. data/lib/rgen/instantiator/nodebased_xml_instantiator.rb +157 -0
  22. data/lib/rgen/instantiator/xmi11_instantiator.rb +164 -0
  23. data/lib/rgen/metamodel_builder.rb +103 -9
  24. data/lib/rgen/metamodel_builder/build_helper.rb +26 -4
  25. data/lib/rgen/metamodel_builder/builder_extensions.rb +285 -88
  26. data/lib/rgen/metamodel_builder/builder_runtime.rb +7 -1
  27. data/lib/rgen/metamodel_builder/data_types.rb +67 -0
  28. data/lib/rgen/metamodel_builder/intermediate/annotation.rb +30 -0
  29. data/lib/rgen/metamodel_builder/metamodel_description.rb +232 -0
  30. data/lib/rgen/metamodel_builder/mm_multiple.rb +23 -0
  31. data/lib/rgen/metamodel_builder/module_extension.rb +33 -0
  32. data/lib/rgen/model_comparator.rb +56 -0
  33. data/lib/rgen/model_dumper.rb +5 -5
  34. data/lib/rgen/name_helper.rb +17 -1
  35. data/lib/rgen/template_language.rb +148 -28
  36. data/lib/rgen/template_language/directory_template_container.rb +56 -38
  37. data/lib/rgen/template_language/output_handler.rb +93 -77
  38. data/lib/rgen/template_language/template_container.rb +186 -143
  39. data/lib/rgen/transformer.rb +19 -14
  40. data/lib/transformers/uml13_to_ecore.rb +75 -0
  41. data/redist/xmlscan/ChangeLog +1301 -0
  42. data/redist/xmlscan/README +34 -0
  43. data/redist/xmlscan/THANKS +11 -0
  44. data/redist/xmlscan/doc/changes.html +74 -0
  45. data/redist/xmlscan/doc/changes.rd +80 -0
  46. data/redist/xmlscan/doc/en/conformance.html +136 -0
  47. data/redist/xmlscan/doc/en/conformance.rd +152 -0
  48. data/redist/xmlscan/doc/en/manual.html +356 -0
  49. data/redist/xmlscan/doc/en/manual.rd +402 -0
  50. data/redist/xmlscan/doc/ja/conformance.ja.html +118 -0
  51. data/redist/xmlscan/doc/ja/conformance.ja.rd +134 -0
  52. data/redist/xmlscan/doc/ja/manual.ja.html +325 -0
  53. data/redist/xmlscan/doc/ja/manual.ja.rd +370 -0
  54. data/redist/xmlscan/doc/src/Makefile +41 -0
  55. data/redist/xmlscan/doc/src/conformance.rd.src +256 -0
  56. data/redist/xmlscan/doc/src/langsplit.rb +110 -0
  57. data/redist/xmlscan/doc/src/manual.rd.src +614 -0
  58. data/redist/xmlscan/install.rb +41 -0
  59. data/redist/xmlscan/lib/xmlscan/encoding.rb +311 -0
  60. data/redist/xmlscan/lib/xmlscan/htmlscan.rb +289 -0
  61. data/redist/xmlscan/lib/xmlscan/namespace.rb +352 -0
  62. data/redist/xmlscan/lib/xmlscan/parser.rb +299 -0
  63. data/redist/xmlscan/lib/xmlscan/scanner.rb +1109 -0
  64. data/redist/xmlscan/lib/xmlscan/version.rb +22 -0
  65. data/redist/xmlscan/lib/xmlscan/visitor.rb +158 -0
  66. data/redist/xmlscan/lib/xmlscan/xmlchar.rb +441 -0
  67. data/redist/xmlscan/memo/CONFORMANCE +1249 -0
  68. data/redist/xmlscan/memo/PRODUCTIONS +195 -0
  69. data/redist/xmlscan/memo/contentspec.ry +335 -0
  70. data/redist/xmlscan/samples/chibixml.rb +105 -0
  71. data/redist/xmlscan/samples/getxmlchar.rb +122 -0
  72. data/redist/xmlscan/samples/rexml.rb +159 -0
  73. data/redist/xmlscan/samples/xmlbench.rb +88 -0
  74. data/redist/xmlscan/samples/xmlbench/parser/chibixml.rb +22 -0
  75. data/redist/xmlscan/samples/xmlbench/parser/nqxml.rb +29 -0
  76. data/redist/xmlscan/samples/xmlbench/parser/rexml.rb +62 -0
  77. data/redist/xmlscan/samples/xmlbench/parser/xmlparser.rb +22 -0
  78. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-0.0.10.rb +62 -0
  79. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-chibixml.rb +22 -0
  80. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-rexml.rb +22 -0
  81. data/redist/xmlscan/samples/xmlbench/parser/xmlscan.rb +99 -0
  82. data/redist/xmlscan/samples/xmlbench/xmlbench-lib.rb +116 -0
  83. data/redist/xmlscan/samples/xmlconftest.rb +200 -0
  84. data/redist/xmlscan/test.rb +7 -0
  85. data/redist/xmlscan/tests/deftestcase.rb +73 -0
  86. data/redist/xmlscan/tests/runtest.rb +47 -0
  87. data/redist/xmlscan/tests/testall.rb +14 -0
  88. data/redist/xmlscan/tests/testencoding.rb +438 -0
  89. data/redist/xmlscan/tests/testhtmlscan.rb +752 -0
  90. data/redist/xmlscan/tests/testnamespace.rb +457 -0
  91. data/redist/xmlscan/tests/testparser.rb +591 -0
  92. data/redist/xmlscan/tests/testscanner.rb +1749 -0
  93. data/redist/xmlscan/tests/testxmlchar.rb +143 -0
  94. data/redist/xmlscan/tests/visitor.rb +34 -0
  95. data/test/array_extensions_test.rb +2 -2
  96. data/test/ea_instantiator_test.rb +41 -0
  97. data/test/ecore_self_test.rb +53 -0
  98. data/test/environment_test.rb +11 -6
  99. data/test/metamodel_builder_test.rb +404 -245
  100. data/test/metamodel_roundtrip_test.rb +52 -0
  101. data/test/metamodel_roundtrip_test/TestModel.rb +65 -0
  102. data/test/metamodel_roundtrip_test/TestModel_Regenerated.rb +64 -0
  103. data/test/metamodel_roundtrip_test/houseMetamodel.ecore +32 -0
  104. data/test/metamodel_roundtrip_test/houseMetamodel_from_ecore.rb +39 -0
  105. data/test/rgen_test.rb +3 -3
  106. data/test/template_language_test.rb +65 -39
  107. data/test/template_language_test/expected_result.txt +24 -3
  108. data/test/template_language_test/templates/code/array.tpl +11 -0
  109. data/test/template_language_test/templates/content/author.tpl +7 -0
  110. data/test/template_language_test/templates/content/chapter.tpl +1 -1
  111. data/test/template_language_test/templates/root.tpl +17 -8
  112. data/test/template_language_test/testout.txt +24 -3
  113. data/test/testmodel/class_model_checker.rb +119 -0
  114. data/test/{xmi_instantiator_test/testmodel.eap → testmodel/ea_testmodel.eap} +0 -0
  115. data/test/{xmi_instantiator_test/testmodel.xml → testmodel/ea_testmodel.xml} +81 -14
  116. data/test/testmodel/ea_testmodel_partial.xml +317 -0
  117. data/test/testmodel/ecore_model_checker.rb +101 -0
  118. data/test/testmodel/manual_testmodel.xml +22 -0
  119. data/test/testmodel/object_model_checker.rb +67 -0
  120. data/test/transformer_test.rb +18 -10
  121. data/test/xml_instantiator_test.rb +81 -8
  122. data/test/xml_instantiator_test/simple_ecore_model_checker.rb +94 -0
  123. data/test/xml_instantiator_test/simple_xmi_ecore_instantiator.rb +53 -0
  124. data/test/xml_instantiator_test/simple_xmi_metamodel.rb +49 -0
  125. data/test/xml_instantiator_test/simple_xmi_to_ecore.rb +75 -0
  126. metadata +126 -28
  127. data/lib/ea/xmi_class_instantiator.rb +0 -46
  128. data/lib/ea/xmi_helper.rb +0 -26
  129. data/lib/ea/xmi_metamodel.rb +0 -34
  130. data/lib/ea/xmi_object_instantiator.rb +0 -46
  131. data/lib/ea/xmi_to_classmodel.rb +0 -78
  132. data/lib/ea/xmi_to_objectmodel.rb +0 -92
  133. data/lib/mmgen/mm_ext/uml_classmodel_ext.rb +0 -71
  134. data/lib/mmgen/templates/uml_classmodel.tpl +0 -63
  135. data/lib/rgen/xml_instantiator.rb +0 -132
  136. data/lib/uml/objectmodel_instantiator.rb +0 -53
  137. data/lib/uml/uml_classmodel.rb +0 -92
  138. data/lib/uml/uml_objectmodel.rb +0 -65
  139. data/test/metamodel_generator_test.rb +0 -44
  140. data/test/metamodel_generator_test/TestModel.rb +0 -40
  141. data/test/metamodel_generator_test/expected_result.txt +0 -40
  142. data/test/xmi_class_instantiator_test.rb +0 -24
  143. data/test/xmi_instantiator_test/class_model_checker.rb +0 -97
  144. data/test/xmi_object_instantiator_test.rb +0 -65
  145. data/test/xml_instantiator_test/testmodel.xml +0 -7
@@ -0,0 +1,1109 @@
1
+ #
2
+ # xmlscan/scanner.rb
3
+ #
4
+ # Copyright (C) Ueno Katsuhiro 2002
5
+ #
6
+ # $Id: scanner.rb,v 1.83 2003/05/12 14:13:33 katsu Exp $
7
+ #
8
+
9
+ #
10
+ # CONSIDERATIONS FOR CHARACTER ENCODINGS:
11
+ #
12
+ # There are the following common characteristics in character encodings
13
+ # which are supported by Ruby's $KCODE feature (ISO-8859-*, Shift_JIS,
14
+ # EUC, and UTF-8):
15
+ #
16
+ # - Stateless.
17
+ # - ASCII characters are encoded in the same manner as US-ASCII.
18
+ # - The octet sequences corresponding to non-ASCII characters begin
19
+ # with an octet greater than 0x80.
20
+ # - The following characters can be identified by just one octet.
21
+ # That is, every octets corresponding to the following characters in
22
+ # US-ASCII never appear as a part of an octet sequence representing a
23
+ # non-ASCII character.
24
+ #
25
+ # Whitespaces("\t", "\n", "\r", and " ") and
26
+ # ! \ " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
27
+ #
28
+ # Be careful that `[' and `]' are NOT included in the list!
29
+ #
30
+ # If we build a regular expression carefully in accordance with these
31
+ # characteristics, we can get the same match regardless of the value
32
+ # of $KCODE. Moreover, if it can be premised on them, we can detect
33
+ # several delimiters without regular expressions. XMLScanner uses this
34
+ # fact in order to share many regular expressions in all $KCODE modes,
35
+ # and in order to optimize parsing speed.
36
+ #
37
+
38
+ require 'xmlscan/visitor'
39
+
40
+
41
+ module XMLScan
42
+
43
+ class Input
44
+
45
+ def initialize(src)
46
+ @src = src
47
+ unless src.respond_to? :gets then
48
+ if src.respond_to? :to_ary then
49
+ @v = src.to_ary
50
+ @n = -1
51
+ def self.gets ; @v.at(@n += 1) ; end
52
+ def self.lineno ; @n + 1 ; end
53
+ else
54
+ @v = @src
55
+ def self.gets ; s = @v ; @v = nil ; s ; end
56
+ end
57
+ end
58
+ if src.respond_to? :lineno then
59
+ def self.lineno ; @src.lineno ; end
60
+ end
61
+ if src.respond_to? :path then
62
+ def self.path ; @src.path ; end
63
+ end
64
+ end
65
+
66
+ attr_reader :src
67
+
68
+ def gets ; @src.gets ; end
69
+ def lineno ; 0 ; end
70
+ def path ; '-' ; end
71
+
72
+ def self.wrap(src)
73
+ unless src.respond_to? :gets and src.respond_to? :lineno and
74
+ src.respond_to? :path then
75
+ src = new(src)
76
+ end
77
+ src
78
+ end
79
+
80
+ def self.unwrap(obj)
81
+ if self === obj then
82
+ obj.src
83
+ else
84
+ obj
85
+ end
86
+ end
87
+
88
+ end
89
+
90
+
91
+
92
+ class PrivateArray < Array
93
+ m = superclass.instance_methods(false) - Kernel.instance_methods(false)
94
+ private(*m)
95
+ end
96
+
97
+
98
+ class Source < PrivateArray
99
+ # Source inherits Array only for speed.
100
+
101
+ def initialize(src)
102
+ super()
103
+ @src = Input.wrap(src)
104
+ @eof = false
105
+ @last = nil
106
+ end
107
+
108
+ def source
109
+ Input.unwrap @src
110
+ end
111
+
112
+
113
+ def eof?
114
+ @eof and empty?
115
+ end
116
+
117
+ def abort
118
+ @eof = true
119
+ @last = nil
120
+ clear
121
+ self
122
+ end
123
+
124
+
125
+ def get
126
+ pop or
127
+ unless @eof then
128
+ last = @last
129
+ begin
130
+ src = @src.gets
131
+ unless src then
132
+ @eof = true
133
+ unshift last
134
+ last = nil
135
+ break
136
+ end
137
+ a = src.split(/(?=<|>[<>])|>/n, -1)
138
+ if last then
139
+ unless /\A[<>]/n =~ a.first then
140
+ a[0] = last << (a.first || '')
141
+ else
142
+ push last
143
+ end
144
+ end
145
+ concat a
146
+ last = pop
147
+ end while empty?
148
+ @last = last
149
+ reverse!
150
+ pop
151
+ end
152
+ end
153
+
154
+
155
+ def prepare
156
+ s = get
157
+ s = get and s = '>' << s if s and s.empty? # preserve first `>'
158
+ s and push s
159
+ end
160
+
161
+
162
+ def tag_end?
163
+ s = last || @last and s[0] != ?<
164
+ end
165
+
166
+ def tag_start?
167
+ s = last || @last and s[0] == ?<
168
+ end
169
+
170
+ def close_tag # tag_end?, and remove a `>'.
171
+ unless s = last || @last and s[0] != ?< then
172
+ false
173
+ else
174
+ if s == '>' or s.empty? then
175
+ s1 = get
176
+ unless s = last || @last and s[0] == ?< then # for speed up
177
+ out = [ s1 ]
178
+ out.push get while s = last || @last and s == '>' || s.empty?
179
+ out.pop unless s and s[0] != ?< # De Morgan
180
+ concat out
181
+ end
182
+ end
183
+ true
184
+ end
185
+ end
186
+
187
+
188
+ def get_text # get until tag_start?
189
+ s = last || @last and s[0] != ?< and get
190
+ end
191
+
192
+ def get_tag # get until tag_end?
193
+ s = last || @last and s[0] == ?< and get
194
+ end
195
+
196
+ def get_plain
197
+ s = get
198
+ s = '>' << s unless not s or (c = s[0]) == ?< or c == ?> # De Morgan
199
+ s
200
+ end
201
+
202
+ def lineno
203
+ @src.lineno
204
+ end
205
+
206
+ def path
207
+ @src.path
208
+ end
209
+
210
+
211
+ # The following methods are for debug.
212
+
213
+ def inspect
214
+ a = []
215
+ reverse_each { |i|
216
+ a.push ">" unless /\A[<>]/n =~ i
217
+ a.push i.inspect
218
+ }
219
+ last = []
220
+ if @last then
221
+ last.push ">" unless /\A[<>]/n =~ @last
222
+ last.push @last.inspect
223
+ end
224
+ a.push '#eof' if @eof
225
+ "((#{a.join(' ')}) (#{last.join(' ')}) . #{source.inspect})"
226
+ end
227
+
228
+ def each
229
+ prepare
230
+ while s = get
231
+ yield s
232
+ end
233
+ self
234
+ end
235
+
236
+ def test
237
+ last or @last or (s = get and push s and s)
238
+ end
239
+
240
+ end
241
+
242
+
243
+
244
+ class XMLScanner
245
+
246
+ class << self
247
+
248
+ def provided_options
249
+ options = []
250
+ private_instance_methods(false).each { |i|
251
+ options.push $' if /\Aapply_option_/n =~ i
252
+ }
253
+ options
254
+ end
255
+
256
+ def apply_option(instance, option)
257
+ instance.__send__ "apply_option_#{option}"
258
+ end
259
+
260
+ def apply_options(instance, options)
261
+ h = {}
262
+ options.each { |i| h[i.to_s] = true }
263
+ options = h
264
+ ancestors.each { |klass|
265
+ if klass.respond_to? :provided_options then
266
+ klass.provided_options.each { |i|
267
+ if options.include? i then
268
+ options.delete i
269
+ klass.apply_option instance, i
270
+ end
271
+ }
272
+ end
273
+ }
274
+ unless options.empty? then
275
+ raise ArgumentError, "undefined option `#{options.keys[0]}'"
276
+ end
277
+ instance
278
+ end
279
+ private :apply_options
280
+
281
+ def new(visitor, *options)
282
+ instance = super(visitor)
283
+ apply_options instance, options
284
+ end
285
+
286
+ end
287
+
288
+
289
+
290
+ def initialize(visitor)
291
+ @visitor = visitor
292
+ @decoration = nil
293
+ @src = nil
294
+ @kcode = nil
295
+ end
296
+
297
+
298
+ def kcode=(kcode)
299
+ @kcode = Regexp.new('', nil, kcode || '').kcode
300
+ kcode
301
+ end
302
+
303
+ attr_reader :kcode
304
+
305
+
306
+ def decorate(decoration)
307
+ unless @decoration then
308
+ @visitor = @decoration = Decoration.new(@visitor)
309
+ end
310
+ @decoration.expand decoration
311
+ end
312
+ private :decorate
313
+
314
+
315
+ def lineno
316
+ @src && @src.lineno
317
+ end
318
+
319
+ def path
320
+ @src && @src.path
321
+ end
322
+
323
+ def source
324
+ @src.source
325
+ end
326
+
327
+
328
+ private
329
+
330
+ def parse_error(msg)
331
+ @visitor.parse_error msg
332
+ end
333
+
334
+ def wellformed_error(msg)
335
+ @visitor.wellformed_error msg
336
+ end
337
+
338
+ def valid_error(msg)
339
+ @visitor.valid_error msg
340
+ end
341
+
342
+ def warning(msg)
343
+ @visitor.warning msg
344
+ end
345
+
346
+
347
+ def on_xmldecl
348
+ @visitor.on_xmldecl
349
+ end
350
+
351
+ def on_xmldecl_version(str)
352
+ @visitor.on_xmldecl_version str
353
+ end
354
+
355
+ def on_xmldecl_encoding(str)
356
+ @visitor.on_xmldecl_encoding str
357
+ end
358
+
359
+ def on_xmldecl_standalone(str)
360
+ @visitor.on_xmldecl_standalone str
361
+ end
362
+
363
+ def on_xmldecl_other(name, value)
364
+ @visitor.on_xmldecl_other name, value
365
+ end
366
+
367
+ def on_xmldecl_end
368
+ @visitor.on_xmldecl_end
369
+ end
370
+
371
+ def on_doctype(root, pubid, sysid)
372
+ @visitor.on_doctype root, pubid, sysid
373
+ end
374
+
375
+ def on_prolog_space(str)
376
+ @visitor.on_prolog_space str
377
+ end
378
+
379
+ def on_comment(str)
380
+ @visitor.on_comment str
381
+ end
382
+
383
+ def on_pi(target, pi)
384
+ @visitor.on_pi target, pi
385
+ end
386
+
387
+ def on_chardata(str)
388
+ @visitor.on_chardata str
389
+ end
390
+
391
+ def on_cdata(str)
392
+ @visitor.on_cdata str
393
+ end
394
+
395
+ def on_etag(name)
396
+ @visitor.on_etag name
397
+ end
398
+
399
+ def on_entityref(ref)
400
+ @visitor.on_entityref ref
401
+ end
402
+
403
+ def on_charref(code)
404
+ @visitor.on_charref code
405
+ end
406
+
407
+ def on_charref_hex(code)
408
+ @visitor.on_charref_hex code
409
+ end
410
+
411
+ def on_start_document
412
+ @visitor.on_start_document
413
+ end
414
+
415
+ def on_end_document
416
+ @visitor.on_end_document
417
+ end
418
+
419
+
420
+ # <hoge fuga="foo&bar;&#38;&#x26;foo" />HOGE
421
+ # ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
422
+ # 1 2 3 4 5 6 7 8 9 A
423
+ #
424
+ # The following method will be called with the following arguments
425
+ # when the parser reaches the above point;
426
+ #
427
+ # 1: on_stag ('hoge')
428
+ # 2: on_attribute ('fuga')
429
+ # 3: on_attr_value ('foo')
430
+ # 4: on_attr_entityref ('bar')
431
+ # 5: on_attr_charref (38)
432
+ # 6: on_attr_charref_hex (38)
433
+ # 7: on_attr_value ('foo')
434
+ # 8: on_attribute_end ('fuga')
435
+ # 9: on_stag_end_empty ('hoge')
436
+ # or
437
+ # on_stag_end ('hoge')
438
+ #
439
+ # A: on_chardata ('HOGE')
440
+
441
+ def on_stag(name)
442
+ @visitor.on_stag name
443
+ end
444
+
445
+ def on_attribute(name)
446
+ @visitor.on_attribute name
447
+ end
448
+
449
+ def on_attr_value(str)
450
+ @visitor.on_attr_value str
451
+ end
452
+
453
+ def on_attr_entityref(ref)
454
+ @visitor.on_attr_entityref ref
455
+ end
456
+
457
+ def on_attr_charref(code)
458
+ @visitor.on_attr_charref code
459
+ end
460
+
461
+ def on_attr_charref_hex(code)
462
+ @visitor.on_attr_charref_hex code
463
+ end
464
+
465
+ def on_attribute_end(name)
466
+ @visitor.on_attribute_end name
467
+ end
468
+
469
+ def on_stag_end_empty(name)
470
+ @visitor.on_stag_end_empty name
471
+ end
472
+
473
+ def on_stag_end(name)
474
+ @visitor.on_stag_end name
475
+ end
476
+
477
+
478
+
479
+ private
480
+
481
+ module KcodeRegexp
482
+ private
483
+ Kcodes = [ //n.kcode, //e.kcode, //s.kcode, //u.kcode ]
484
+ def kcode_regexp(re)
485
+ h = {}
486
+ Kcodes.each { |i| h[i] = Regexp.new(re, nil, i) }
487
+ h.default = Regexp.new(re)
488
+ h
489
+ end
490
+ end
491
+ extend KcodeRegexp
492
+
493
+
494
+ InvalidEntityRef = kcode_regexp('(?=[^#\d\w]|\z)')
495
+
496
+ def scan_chardata(s)
497
+ while true
498
+ unless /&/n =~ s then
499
+ on_chardata s
500
+ else
501
+ s = $`
502
+ on_chardata s unless s.empty?
503
+ ref = nil
504
+ $'.split('&', -1).each { |s|
505
+ unless /(?!\A);|(?=[ \t\r\n])/n =~ s and not $&.empty? then
506
+ if InvalidEntityRef[@kcode] =~ s and not (ref = $`).strip.empty?
507
+ then
508
+ parse_error "reference to `#{ref}' doesn't end with `;'"
509
+ else
510
+ parse_error "`&' is not used for entity/character references"
511
+ on_chardata('&' << s)
512
+ next
513
+ end
514
+ end
515
+ ref = $`
516
+ s = $'
517
+ if /\A[^#]/n =~ ref then
518
+ on_entityref ref
519
+ elsif /\A#(\d+)\z/n =~ ref then
520
+ on_charref $1.to_i
521
+ elsif /\A#x([\dA-Fa-f]+)\z/n =~ ref then
522
+ on_charref_hex $1.hex
523
+ else
524
+ parse_error "invalid character reference `#{ref}'"
525
+ end
526
+ on_chardata s unless s.empty?
527
+ }
528
+ end
529
+ s = @src.get_text
530
+ break unless s
531
+ s = '>' << s unless s == '>'
532
+ end
533
+ end
534
+
535
+
536
+ def scan_attvalue(s) # almostly copy & paste from scan_chardata
537
+ unless /&/n =~ s then
538
+ on_attr_value s
539
+ else
540
+ s = $`
541
+ on_attr_value s unless s.empty?
542
+ ref = nil
543
+ $'.split('&', -1).each { |s|
544
+ unless /(?!\A);|(?=[ \t\r\n])/n =~ s and not $&.empty? then
545
+ if InvalidEntityRef[@kcode] =~ s and not (ref = $`).strip.empty?
546
+ then
547
+ parse_error "reference to `#{ref}' doesn't end with `;'"
548
+ else
549
+ parse_error "`&' is not used for entity/character references"
550
+ on_attr_value('&' << s)
551
+ next
552
+ end
553
+ end
554
+ ref = $`
555
+ s = $'
556
+ if /\A[^#]/n =~ ref then
557
+ on_attr_entityref ref
558
+ elsif /\A#(\d+)\z/n =~ ref then
559
+ on_attr_charref $1.to_i
560
+ elsif /\A#x([\dA-Fa-f]+)\z/n =~ ref then
561
+ on_attr_charref_hex $1.hex
562
+ else
563
+ parse_error "invalid character reference `#{ref}'"
564
+ end
565
+ on_attr_value s unless s.empty?
566
+ }
567
+ end
568
+ end
569
+
570
+
571
+ def scan_comment(s)
572
+ s[0,4] = '' # remove `<!--'
573
+ comm = ''
574
+ until /--/n =~ s
575
+ comm << s
576
+ s = @src.get_plain
577
+ unless s then
578
+ parse_error "unterminated comment meets EOF"
579
+ return on_comment(comm)
580
+ end
581
+ end
582
+ comm << $`
583
+ until (s = $').empty? and @src.close_tag
584
+ if s == '-' and @src.close_tag then # --->
585
+ parse_error "comment ending in `--->' is not allowed"
586
+ comm << s
587
+ break
588
+ end
589
+ parse_error "comment includes `--'"
590
+ comm << '--'
591
+ until /--/n =~ s # copy & paste for performance
592
+ comm << s
593
+ s = @src.get_plain
594
+ unless s then
595
+ parse_error "unterminated comment meets EOF"
596
+ return on_comment(comm)
597
+ end
598
+ end
599
+ comm << $`
600
+ end
601
+ on_comment comm
602
+ end
603
+
604
+
605
+ def scan_pi(s)
606
+ unless /\A<\?([^ \t\n\r?]+)(?:[ \t\n\r]+|(?=\?\z))/n =~ s then
607
+ parse_error "parse error at `<?'"
608
+ s << '>' if @src.close_tag
609
+ on_chardata s
610
+ else
611
+ target = $1
612
+ pi = $'
613
+ until pi[-1] == ?? and @src.close_tag
614
+ s = @src.get_plain
615
+ unless s then
616
+ parse_error "unterminated PI meets EOF"
617
+ return on_pi(target, pi)
618
+ end
619
+ pi << s
620
+ end
621
+ pi.chop! # remove last `?'
622
+ on_pi target, pi
623
+ end
624
+ end
625
+
626
+
627
+ CDATAPattern = kcode_regexp('\]\]\z')
628
+
629
+ def scan_cdata(s)
630
+ cdata = s
631
+ re = CDATAPattern[@kcode]
632
+ until re =~ cdata and @src.close_tag
633
+ s = @src.get_plain
634
+ unless s then
635
+ parse_error "unterminated CDATA section meets EOF"
636
+ return on_cdata(cdata)
637
+ end
638
+ cdata << s
639
+ end
640
+ cdata.chop!.chop! # remove ']]'
641
+ on_cdata cdata
642
+ end
643
+
644
+
645
+ def found_unclosed_etag(name)
646
+ if @src.tag_start? then
647
+ parse_error "unclosed end tag `#{name}' meets another tag"
648
+ else
649
+ parse_error "unclosed end tag `#{name}' meets EOF"
650
+ end
651
+ end
652
+
653
+ def found_empty_etag
654
+ parse_error "parse error at `</'"
655
+ on_chardata '</>'
656
+ end
657
+
658
+
659
+ def scan_etag(s)
660
+ s[0,2] = '' # remove '</'
661
+ if s.empty? then
662
+ if @src.close_tag then # </>
663
+ return found_empty_etag
664
+ else # </< or </[EOF]
665
+ parse_error "parse error at `</'"
666
+ s << '>' if @src.close_tag
667
+ return on_chardata('</' << s)
668
+ end
669
+ elsif /[ \t\n\r]+/n =~ s then
670
+ s1, s2 = $`, $'
671
+ if s1.empty? then # </ tag
672
+ parse_error "parse error at `</'"
673
+ s << '>' if @src.close_tag
674
+ return on_chardata('</' + s)
675
+ elsif not s2.empty? then # </ta g
676
+ parse_error "illegal whitespace is found within end tag `#{s1}'"
677
+ while @src.get_tag
678
+ end
679
+ end
680
+ s = s1
681
+ end
682
+ found_unclosed_etag s unless @src.close_tag # </tag< or </tag[EOF]
683
+ on_etag s
684
+ end
685
+
686
+
687
+ def found_empty_stag
688
+ parse_error "parse error at `<'"
689
+ on_chardata '<>'
690
+ end
691
+
692
+ def found_unclosed_stag(name)
693
+ if @src.tag_start? then
694
+ parse_error "unclosed start tag `#{name}' meets another tag"
695
+ else
696
+ parse_error "unclosed start tag `#{name}' meets EOF"
697
+ end
698
+ end
699
+
700
+ def found_unclosed_emptyelem(name)
701
+ if @src.tag_start? then
702
+ parse_error "unclosed empty element tag `#{name}' meets another tag"
703
+ else
704
+ parse_error "unclosed empty element tag `#{name}' meets EOF"
705
+ end
706
+ end
707
+
708
+
709
+ def found_stag_error(s)
710
+ if /\A[\/='"]/n =~ s then
711
+ tok, s = $&, $'
712
+ elsif /(?=[ \t\n\r\/='"])/n =~ s then
713
+ tok, s = $`, $'
714
+ else
715
+ tok, s = s, nil
716
+ end
717
+ parse_error "parse error at `#{tok}'"
718
+ s
719
+ end
720
+
721
+
722
+ def scan_stag(s)
723
+ unless /(?=[\/ \t\n\r='"])/n =~ s then
724
+ name = s
725
+ name[0,1] = '' # remove `<'
726
+ if name.empty? then
727
+ if @src.close_tag then # <>
728
+ return found_empty_stag
729
+ else # << or <[EOF]
730
+ parse_error "parse error at `<'"
731
+ return on_chardata('<')
732
+ end
733
+ end
734
+ on_stag name
735
+ found_unclosed_stag name unless @src.close_tag
736
+ on_stag_end name
737
+ else
738
+ name = $`
739
+ s = $'
740
+ name[0,1] = '' # remove `<'
741
+ if name.empty? then # `< tag' or `<=`
742
+ parse_error "parse error at `<'"
743
+ s << '>' if @src.close_tag
744
+ return on_chardata('<' << s)
745
+ end
746
+ on_stag name
747
+ emptyelem = false
748
+ key,val,error,qmark,c = nil
749
+ begin
750
+ continue = false
751
+ s.scan(/[ \t\n\r]([^= \t\n\r\/'"]+)[ \t\n\r]*=[ \t\n\r]*('[^']*'?|"[^"]*"?)|\/\z|([^ \t\n\r][\S\s]*)/n
752
+ ) { |key,val,error|
753
+ if key then # key="value"
754
+ on_attribute key
755
+ qmark = val.slice!(0,1)
756
+ if val[-1] == qmark[0] then
757
+ val.chop!
758
+ scan_attvalue val unless val.empty?
759
+ else
760
+ scan_attvalue val unless val.empty?
761
+ begin
762
+ s = @src.get
763
+ unless s then
764
+ parse_error "unterminated attribute `#{key}' meets EOF"
765
+ break
766
+ end
767
+ c = s[0]
768
+ val, s = s.split(qmark, 2)
769
+ if c == ?< then
770
+ wellformed_error "`<' is found in attribute `#{key}'"
771
+ elsif c != ?> then
772
+ scan_attvalue '>'
773
+ end
774
+ scan_attvalue val if c
775
+ end until s
776
+ continue = s # if eof then continue is false, else true.
777
+ end
778
+ on_attribute_end key
779
+ elsif error then
780
+ continue = s = found_stag_error(error)
781
+ else
782
+ emptyelem = true
783
+ end
784
+ }
785
+ end while continue
786
+ unless @src.close_tag then
787
+ if emptyelem then
788
+ found_unclosed_emptyelem name
789
+ else
790
+ found_unclosed_stag name
791
+ end
792
+ end
793
+ if emptyelem then
794
+ on_stag_end_empty name
795
+ else
796
+ on_stag_end name
797
+ end
798
+ end
799
+ end
800
+
801
+
802
+ def scan_bang_tag(s)
803
+ parse_error "parse error at `<!'"
804
+ s << '>' if @src.close_tag
805
+ on_chardata s
806
+ end
807
+
808
+
809
+ def scan_content(s)
810
+ src = @src # for speed
811
+ while s
812
+ if (c = s[0]) == ?< then
813
+ if (c = s[1]) == ?/ then
814
+ scan_etag s
815
+ elsif c == ?! then
816
+ if s[2] == ?- and s[3] == ?- then
817
+ scan_comment s
818
+ elsif /\A<!\[CDATA\[/n =~ s then
819
+ scan_cdata $'
820
+ else
821
+ scan_bang_tag s
822
+ end
823
+ elsif c == ?? then
824
+ scan_pi s
825
+ else
826
+ scan_stag s
827
+ end
828
+ else
829
+ scan_chardata s
830
+ end
831
+ s = src.get
832
+ end
833
+ end
834
+
835
+
836
+ def get_until_qmark(str, qmark)
837
+ begin
838
+ #s = @src.get_plain
839
+ s = @src.get
840
+ break unless s
841
+ c = s[0]
842
+ v, s = s.split(qmark, 2)
843
+ str << '>' unless c == ?< or c == ?> # De Morgan
844
+ str << v if c
845
+ end until s
846
+ s
847
+ end
848
+
849
+
850
+ XMLDeclPattern = kcode_regexp(%q{[ \t\n\r]([\-_\d\w]+)[ \t\n\r]*=[ \t\n\r]*('[^']*'?|"[^"]*"?)|(\?\z)|([\-_.\d\w]+|[^ \t\n\r])}) #'
851
+
852
+ def scan_xmldecl(s)
853
+ endmark = nil
854
+ state = 0
855
+ on_xmldecl
856
+ begin
857
+ continue = false
858
+ s.scan(XMLDeclPattern[@kcode]) { |key,val,endmark,error|
859
+ if key then
860
+ qmark = val.slice!(0,1) # remove quotation marks
861
+ if val[-1] == qmark[0] then
862
+ val.chop!
863
+ else
864
+ continue = s = get_until_qmark(val, qmark)
865
+ unless s then
866
+ parse_error "unterminated XML declaration meets EOF"
867
+ endmark = true
868
+ end
869
+ end
870
+ if state == 0 and key == 'version' then
871
+ on_xmldecl_version val
872
+ state = 1
873
+ elsif state == 1 and key == 'encoding' then
874
+ on_xmldecl_encoding val
875
+ state = 2
876
+ elsif state >= 1 and key == 'standalone' then
877
+ on_xmldecl_standalone val
878
+ state = 3
879
+ else
880
+ state = 3
881
+ if key == 'version' then
882
+ parse_error "version declaration must not be here"
883
+ on_xmldecl_version val
884
+ elsif key == 'encoding' then
885
+ parse_error "encoding declaration must not be here"
886
+ on_xmldecl_encoding val
887
+ state = 2 if state < 2
888
+ elsif key == 'standalone' then
889
+ parse_error "standalone declaration must not be here"
890
+ on_xmldecl_standalone val
891
+ else
892
+ parse_error "unknown declaration `#{key}' in XML declaration"
893
+ on_xmldecl_other key, val
894
+ end
895
+ end
896
+ elsif endmark then
897
+ unless @src.close_tag then
898
+ parse_error "unexpected `#{endmark}' found in XML declaration"
899
+ endmark = nil
900
+ end
901
+ # here always exit the loop.
902
+ else
903
+ parse_error "parse error at `#{error}'"
904
+ end
905
+ }
906
+ end while !endmark and continue || s = @src.get_plain
907
+ parse_error "unterminated XML declaration meets EOF" unless s or endmark
908
+ parse_error "no declaration found in XML declaration" if state == 0
909
+ on_xmldecl_end
910
+ end
911
+
912
+
913
+ SkipDTD = kcode_regexp(%q{(['"]|\A<!--|\A<\?|--\z|\?\z)|\]\s*\z}) #'
914
+
915
+ def skip_internal_dtd(s)
916
+ quote = nil
917
+ continue = true
918
+ begin # skip until `]>'
919
+ s.scan(SkipDTD[@kcode]) { |q,| #'
920
+ if quote then
921
+ quote = nil if quote == q and quote.size == 1 || @src.tag_end?
922
+ elsif q then
923
+ if q == '<!--' then
924
+ quote = '--'
925
+ elsif q == '<?' then
926
+ quote = '?'
927
+ elsif q == '"' or q == "'" then
928
+ quote = q
929
+ end
930
+ elsif @src.close_tag then
931
+ continue = false
932
+ end
933
+ }
934
+ end while continue and s = @src.get
935
+ parse_error "unterminated internal DTD subset meets EOF" unless s
936
+ end
937
+
938
+
939
+ def scan_internal_dtd(s)
940
+ warning "internal DTD subset is not supported"
941
+ skip_internal_dtd s
942
+ end
943
+
944
+
945
+ def found_invalid_pubsys(pubsys)
946
+ parse_error "`PUBLIC' or `SYSTEM' should be here"
947
+ 'SYSTEM'
948
+ end
949
+
950
+
951
+ DoctypePattern = kcode_regexp(%q{[ \t\n\r](?:([^ \t\n\r\/'"=\[]+)|('[^']*'?|"[^"]*"?))|([\-_.\d\w]+|[^ \t\n\r])}) #"
952
+
953
+ def scan_doctype(s)
954
+ root = syspub = sysid = pubid = nil
955
+ internal_dtd = false
956
+ re = DoctypePattern[@kcode]
957
+ begin
958
+ if re =~ s then
959
+ name, str, delim, s = $1, $2, $3, $'
960
+ if name then
961
+ if not root then
962
+ root = name
963
+ elsif not syspub then
964
+ unless name == 'PUBLIC' or name == 'SYSTEM' then
965
+ name = found_invalid_pubsys(name)
966
+ end
967
+ syspub = name
968
+ else
969
+ parse_error "parse error at `#{name}'"
970
+ end
971
+ elsif str then
972
+ qmark = str.slice!(0,1) # remove quotation marks
973
+ unless syspub then
974
+ parse_error "parse error at `#{qmark}'"
975
+ s = str << s
976
+ else
977
+ if str[-1] == qmark[0] then
978
+ str.chop!
979
+ else
980
+ s = get_until_qmark(str, qmark) || ''
981
+ end
982
+ if not sysid then
983
+ sysid = str
984
+ elsif not pubid and syspub == 'PUBLIC' then
985
+ pubid = sysid
986
+ sysid = str
987
+ else
988
+ parse_error "too many external ID literals in DOCTYPE"
989
+ end
990
+ end
991
+ elsif delim == '[' then
992
+ internal_dtd = true
993
+ break
994
+ else
995
+ parse_error "parse error at `#{delim}'"
996
+ end
997
+ else
998
+ s = ''
999
+ end
1000
+ if s.empty? then
1001
+ break if @src.close_tag
1002
+ s = @src.get_plain
1003
+ end
1004
+ end while s
1005
+ parse_error "unterminated DOCTYPE declaration meets EOF" unless s
1006
+ unless root then
1007
+ parse_error "no root element is specified in DOCTYPE"
1008
+ end
1009
+ if syspub and not sysid then
1010
+ parse_error "too few external ID literals in DOCTYPE"
1011
+ end
1012
+ if syspub == 'PUBLIC' and not pubid then
1013
+ pubid, sysid = sysid, nil
1014
+ end
1015
+ on_doctype root, pubid, sysid
1016
+ scan_internal_dtd s if internal_dtd
1017
+ end
1018
+
1019
+
1020
+ def scan_prolog(s)
1021
+ if /\A<\?xml(?=[ \t\n\r])/n =~ s then
1022
+ scan_xmldecl $'
1023
+ s = @src.get
1024
+ end
1025
+ doctype = true
1026
+ src = @src # for speed
1027
+ while s
1028
+ if s[0] == ?< then
1029
+ if (c = s[1]) == ?! then
1030
+ if s[2] == ?- and s[3] == ?- then
1031
+ scan_comment s
1032
+ elsif /\A<!DOCTYPE(?=[ \t\n\r])/n =~ s and doctype then
1033
+ doctype = false
1034
+ scan_doctype $'
1035
+ else
1036
+ break
1037
+ end
1038
+ elsif c == ?? then
1039
+ scan_pi s
1040
+ else
1041
+ break
1042
+ end
1043
+ s = src.get
1044
+ elsif /[^ \t\r\n]/ !~ s then
1045
+ on_prolog_space s unless s.empty?
1046
+ s = src.get_plain
1047
+ else
1048
+ break
1049
+ end
1050
+ end
1051
+ scan_content(s || src.get)
1052
+ end
1053
+
1054
+
1055
+ def scan_document
1056
+ on_start_document
1057
+ @src.prepare
1058
+ scan_prolog @src.get
1059
+ on_end_document
1060
+ end
1061
+
1062
+
1063
+ def make_source(src)
1064
+ Source.new src
1065
+ end
1066
+
1067
+
1068
+ public
1069
+
1070
+ def parse_document(src)
1071
+ @src = make_source(src)
1072
+ begin
1073
+ scan_document
1074
+ ensure
1075
+ @src = nil
1076
+ end
1077
+ self
1078
+ end
1079
+
1080
+ alias parse parse_document
1081
+
1082
+ end
1083
+
1084
+
1085
+ end
1086
+
1087
+
1088
+
1089
+
1090
+
1091
+ if $0 == __FILE__ then
1092
+ class TestVisitor
1093
+ include XMLScan::Visitor
1094
+ def parse_error(msg)
1095
+ STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
1096
+ end
1097
+ def wellformed_error(msg)
1098
+ STDERR.printf("%s:%d: WFC: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
1099
+ end
1100
+ end
1101
+
1102
+ $s = scan = XMLScan::XMLScanner.new(TestVisitor.new)
1103
+ src = ARGF
1104
+ def src.path; filename; end
1105
+ t1 = Time.times.utime
1106
+ scan.parse src
1107
+ t2 = Time.times.utime
1108
+ STDERR.printf "%2.3f sec\n", t2 - t1
1109
+ end