rgen 0.5.4 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (125) hide show
  1. data/CHANGELOG +28 -0
  2. data/Rakefile +3 -4
  3. data/lib/ea_support/uml13_ea_metamodel.rb +3 -3
  4. data/lib/ea_support/uml13_ea_to_uml13.rb +33 -2
  5. data/lib/ea_support/uml13_to_uml13_ea.rb +7 -0
  6. data/lib/mmgen/mm_ext/ecore_mmgen_ext.rb +4 -4
  7. data/lib/mmgen/templates/metamodel_generator.tpl +143 -143
  8. data/lib/rgen/ecore/ecore.rb +11 -1
  9. data/lib/rgen/ecore/ecore_interface.rb +47 -0
  10. data/lib/rgen/ecore/ecore_to_ruby.rb +166 -0
  11. data/lib/rgen/ecore/{ecore_transformer.rb → ruby_to_ecore.rb} +11 -11
  12. data/lib/rgen/environment.rb +15 -2
  13. data/lib/rgen/fragment/dump_file_cache.rb +63 -0
  14. data/lib/rgen/fragment/fragmented_model.rb +139 -0
  15. data/lib/rgen/fragment/model_fragment.rb +268 -0
  16. data/lib/rgen/instantiator/abstract_xml_instantiator.rb +44 -72
  17. data/lib/rgen/instantiator/default_xml_instantiator.rb +2 -2
  18. data/lib/rgen/instantiator/ecore_xml_instantiator.rb +16 -1
  19. data/lib/rgen/instantiator/json_instantiator.rb +16 -2
  20. data/lib/rgen/instantiator/nodebased_xml_instantiator.rb +118 -138
  21. data/lib/rgen/instantiator/qualified_name_resolver.rb +5 -1
  22. data/lib/rgen/instantiator/reference_resolver.rb +126 -24
  23. data/lib/rgen/instantiator/xmi11_instantiator.rb +6 -2
  24. data/lib/rgen/metamodel_builder.rb +18 -6
  25. data/lib/rgen/metamodel_builder/builder_extensions.rb +431 -407
  26. data/lib/rgen/metamodel_builder/builder_runtime.rb +8 -8
  27. data/lib/rgen/metamodel_builder/constant_order_helper.rb +4 -4
  28. data/lib/rgen/metamodel_builder/data_types.rb +5 -1
  29. data/lib/rgen/metamodel_builder/intermediate/feature.rb +167 -0
  30. data/lib/rgen/metamodel_builder/module_extension.rb +2 -2
  31. data/lib/rgen/model_builder.rb +10 -5
  32. data/lib/rgen/model_builder/builder_context.rb +17 -1
  33. data/lib/rgen/serializer/opposite_reference_filter.rb +18 -0
  34. data/lib/rgen/serializer/qualified_name_provider.rb +45 -0
  35. data/lib/rgen/template_language/template_container.rb +3 -1
  36. data/lib/rgen/{auto_class_creator.rb → util/auto_class_creator.rb} +6 -1
  37. data/lib/rgen/util/cached_glob.rb +67 -0
  38. data/lib/rgen/util/file_cache_map.rb +104 -0
  39. data/lib/rgen/util/file_change_detector.rb +78 -0
  40. data/lib/rgen/{method_delegation.rb → util/method_delegation.rb} +18 -3
  41. data/lib/rgen/{model_comparator.rb → util/model_comparator.rb} +17 -5
  42. data/lib/rgen/{model_comparator_base.rb → util/model_comparator_base.rb} +6 -1
  43. data/lib/rgen/{model_dumper.rb → util/model_dumper.rb} +6 -1
  44. data/lib/rgen/{name_helper.rb → util/name_helper.rb} +6 -1
  45. data/lib/rgen/util/pattern_matcher.rb +329 -0
  46. data/lib/transformers/uml13_to_ecore.rb +103 -60
  47. data/test/ecore_self_test.rb +43 -42
  48. data/test/json_test.rb +15 -0
  49. data/test/metamodel_builder_test.rb +361 -206
  50. data/test/metamodel_from_ecore_test.rb +45 -0
  51. data/test/metamodel_order_test.rb +10 -4
  52. data/test/metamodel_roundtrip_test.rb +2 -2
  53. data/test/metamodel_roundtrip_test/TestModel_Regenerated.rb +1 -1
  54. data/test/metamodel_roundtrip_test/houseMetamodel_Regenerated.ecore +50 -50
  55. data/test/method_delegation_test.rb +9 -9
  56. data/test/model_builder/ecore_internal.rb +19 -9
  57. data/test/model_builder/serializer_test.rb +1 -1
  58. data/test/reference_resolver_test.rb +79 -12
  59. data/test/rgen_test.rb +2 -0
  60. data/test/template_language_test.rb +7 -0
  61. data/test/template_language_test/templates/callback_indent_test/a.tpl +12 -0
  62. data/test/template_language_test/templates/callback_indent_test/b.tpl +5 -0
  63. data/test/testmodel/ea_testmodel_regenerated.xml +588 -583
  64. data/test/transformer_test.rb +3 -3
  65. data/test/util/file_cache_map_test.rb +91 -0
  66. data/test/util/file_cache_map_test/testdir/fileA +1 -0
  67. data/test/util_test.rb +4 -0
  68. data/test/xml_instantiator_test.rb +139 -135
  69. metadata +49 -104
  70. data/lib/rgen/ecore/ecore_instantiator.rb +0 -31
  71. data/lib/rgen/metamodel_builder/metamodel_description.rb +0 -232
  72. data/redist/xmlscan/ChangeLog +0 -1301
  73. data/redist/xmlscan/README +0 -34
  74. data/redist/xmlscan/THANKS +0 -11
  75. data/redist/xmlscan/doc/changes.html +0 -74
  76. data/redist/xmlscan/doc/changes.rd +0 -80
  77. data/redist/xmlscan/doc/en/conformance.html +0 -136
  78. data/redist/xmlscan/doc/en/conformance.rd +0 -152
  79. data/redist/xmlscan/doc/en/manual.html +0 -356
  80. data/redist/xmlscan/doc/en/manual.rd +0 -402
  81. data/redist/xmlscan/doc/ja/conformance.ja.html +0 -118
  82. data/redist/xmlscan/doc/ja/conformance.ja.rd +0 -134
  83. data/redist/xmlscan/doc/ja/manual.ja.html +0 -325
  84. data/redist/xmlscan/doc/ja/manual.ja.rd +0 -370
  85. data/redist/xmlscan/doc/src/Makefile +0 -41
  86. data/redist/xmlscan/doc/src/conformance.rd.src +0 -256
  87. data/redist/xmlscan/doc/src/langsplit.rb +0 -110
  88. data/redist/xmlscan/doc/src/manual.rd.src +0 -614
  89. data/redist/xmlscan/install.rb +0 -41
  90. data/redist/xmlscan/lib/xmlscan/encoding.rb +0 -311
  91. data/redist/xmlscan/lib/xmlscan/htmlscan.rb +0 -289
  92. data/redist/xmlscan/lib/xmlscan/namespace.rb +0 -352
  93. data/redist/xmlscan/lib/xmlscan/parser.rb +0 -299
  94. data/redist/xmlscan/lib/xmlscan/scanner.rb +0 -1109
  95. data/redist/xmlscan/lib/xmlscan/version.rb +0 -22
  96. data/redist/xmlscan/lib/xmlscan/visitor.rb +0 -158
  97. data/redist/xmlscan/lib/xmlscan/xmlchar.rb +0 -441
  98. data/redist/xmlscan/memo/CONFORMANCE +0 -1249
  99. data/redist/xmlscan/memo/PRODUCTIONS +0 -195
  100. data/redist/xmlscan/memo/contentspec.ry +0 -335
  101. data/redist/xmlscan/samples/chibixml.rb +0 -105
  102. data/redist/xmlscan/samples/getxmlchar.rb +0 -122
  103. data/redist/xmlscan/samples/rexml.rb +0 -159
  104. data/redist/xmlscan/samples/xmlbench.rb +0 -88
  105. data/redist/xmlscan/samples/xmlbench/parser/chibixml.rb +0 -22
  106. data/redist/xmlscan/samples/xmlbench/parser/nqxml.rb +0 -29
  107. data/redist/xmlscan/samples/xmlbench/parser/rexml.rb +0 -62
  108. data/redist/xmlscan/samples/xmlbench/parser/xmlparser.rb +0 -22
  109. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-0.0.10.rb +0 -62
  110. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-chibixml.rb +0 -22
  111. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-rexml.rb +0 -22
  112. data/redist/xmlscan/samples/xmlbench/parser/xmlscan.rb +0 -99
  113. data/redist/xmlscan/samples/xmlbench/xmlbench-lib.rb +0 -116
  114. data/redist/xmlscan/samples/xmlconftest.rb +0 -200
  115. data/redist/xmlscan/test.rb +0 -7
  116. data/redist/xmlscan/tests/deftestcase.rb +0 -73
  117. data/redist/xmlscan/tests/runtest.rb +0 -47
  118. data/redist/xmlscan/tests/testall.rb +0 -14
  119. data/redist/xmlscan/tests/testencoding.rb +0 -438
  120. data/redist/xmlscan/tests/testhtmlscan.rb +0 -752
  121. data/redist/xmlscan/tests/testnamespace.rb +0 -457
  122. data/redist/xmlscan/tests/testparser.rb +0 -591
  123. data/redist/xmlscan/tests/testscanner.rb +0 -1749
  124. data/redist/xmlscan/tests/testxmlchar.rb +0 -143
  125. data/redist/xmlscan/tests/visitor.rb +0 -34
@@ -1,1109 +0,0 @@
1
- #
2
- # xmlscan/scanner.rb
3
- #
4
- # Copyright (C) Ueno Katsuhiro 2002
5
- #
6
- # $Id: scanner.rb,v 1.83 2003/05/12 14:13:33 katsu Exp $
7
- #
8
-
9
- #
10
- # CONSIDERATIONS FOR CHARACTER ENCODINGS:
11
- #
12
- # There are the following common characteristics in character encodings
13
- # which are supported by Ruby's $KCODE feature (ISO-8859-*, Shift_JIS,
14
- # EUC, and UTF-8):
15
- #
16
- # - Stateless.
17
- # - ASCII characters are encoded in the same manner as US-ASCII.
18
- # - The octet sequences corresponding to non-ASCII characters begin
19
- # with an octet greater than 0x80.
20
- # - The following characters can be identified by just one octet.
21
- # That is, every octets corresponding to the following characters in
22
- # US-ASCII never appear as a part of an octet sequence representing a
23
- # non-ASCII character.
24
- #
25
- # Whitespaces("\t", "\n", "\r", and " ") and
26
- # ! \ " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
27
- #
28
- # Be careful that `[' and `]' are NOT included in the list!
29
- #
30
- # If we build a regular expression carefully in accordance with these
31
- # characteristics, we can get the same match regardless of the value
32
- # of $KCODE. Moreover, if it can be premised on them, we can detect
33
- # several delimiters without regular expressions. XMLScanner uses this
34
- # fact in order to share many regular expressions in all $KCODE modes,
35
- # and in order to optimize parsing speed.
36
- #
37
-
38
- require 'xmlscan/visitor'
39
-
40
-
41
- module XMLScan
42
-
43
- class Input
44
-
45
- def initialize(src)
46
- @src = src
47
- unless src.respond_to? :gets then
48
- if src.respond_to? :to_ary then
49
- @v = src.to_ary
50
- @n = -1
51
- def self.gets ; @v.at(@n += 1) ; end
52
- def self.lineno ; @n + 1 ; end
53
- else
54
- @v = @src
55
- def self.gets ; s = @v ; @v = nil ; s ; end
56
- end
57
- end
58
- if src.respond_to? :lineno then
59
- def self.lineno ; @src.lineno ; end
60
- end
61
- if src.respond_to? :path then
62
- def self.path ; @src.path ; end
63
- end
64
- end
65
-
66
- attr_reader :src
67
-
68
- def gets ; @src.gets ; end
69
- def lineno ; 0 ; end
70
- def path ; '-' ; end
71
-
72
- def self.wrap(src)
73
- unless src.respond_to? :gets and src.respond_to? :lineno and
74
- src.respond_to? :path then
75
- src = new(src)
76
- end
77
- src
78
- end
79
-
80
- def self.unwrap(obj)
81
- if self === obj then
82
- obj.src
83
- else
84
- obj
85
- end
86
- end
87
-
88
- end
89
-
90
-
91
-
92
- class PrivateArray < Array
93
- m = superclass.instance_methods(false) - Kernel.instance_methods(false)
94
- private(*m)
95
- end
96
-
97
-
98
- class Source < PrivateArray
99
- # Source inherits Array only for speed.
100
-
101
- def initialize(src)
102
- super()
103
- @src = Input.wrap(src)
104
- @eof = false
105
- @last = nil
106
- end
107
-
108
- def source
109
- Input.unwrap @src
110
- end
111
-
112
-
113
- def eof?
114
- @eof and empty?
115
- end
116
-
117
- def abort
118
- @eof = true
119
- @last = nil
120
- clear
121
- self
122
- end
123
-
124
-
125
- def get
126
- pop or
127
- unless @eof then
128
- last = @last
129
- begin
130
- src = @src.gets
131
- unless src then
132
- @eof = true
133
- unshift last
134
- last = nil
135
- break
136
- end
137
- a = src.split(/(?=<|>[<>])|>/n, -1)
138
- if last then
139
- unless /\A[<>]/n =~ a.first then
140
- a[0] = last << (a.first || '')
141
- else
142
- push last
143
- end
144
- end
145
- concat a
146
- last = pop
147
- end while empty?
148
- @last = last
149
- reverse!
150
- pop
151
- end
152
- end
153
-
154
-
155
- def prepare
156
- s = get
157
- s = get and s = '>' << s if s and s.empty? # preserve first `>'
158
- s and push s
159
- end
160
-
161
-
162
- def tag_end?
163
- s = last || @last and s[0] != ?<
164
- end
165
-
166
- def tag_start?
167
- s = last || @last and s[0] == ?<
168
- end
169
-
170
- def close_tag # tag_end?, and remove a `>'.
171
- unless s = last || @last and s[0] != ?< then
172
- false
173
- else
174
- if s == '>' or s.empty? then
175
- s1 = get
176
- unless s = last || @last and s[0] == ?< then # for speed up
177
- out = [ s1 ]
178
- out.push get while s = last || @last and s == '>' || s.empty?
179
- out.pop unless s and s[0] != ?< # De Morgan
180
- concat out
181
- end
182
- end
183
- true
184
- end
185
- end
186
-
187
-
188
- def get_text # get until tag_start?
189
- s = last || @last and s[0] != ?< and get
190
- end
191
-
192
- def get_tag # get until tag_end?
193
- s = last || @last and s[0] == ?< and get
194
- end
195
-
196
- def get_plain
197
- s = get
198
- s = '>' << s unless not s or (c = s[0]) == ?< or c == ?> # De Morgan
199
- s
200
- end
201
-
202
- def lineno
203
- @src.lineno
204
- end
205
-
206
- def path
207
- @src.path
208
- end
209
-
210
-
211
- # The following methods are for debug.
212
-
213
- def inspect
214
- a = []
215
- reverse_each { |i|
216
- a.push ">" unless /\A[<>]/n =~ i
217
- a.push i.inspect
218
- }
219
- last = []
220
- if @last then
221
- last.push ">" unless /\A[<>]/n =~ @last
222
- last.push @last.inspect
223
- end
224
- a.push '#eof' if @eof
225
- "((#{a.join(' ')}) (#{last.join(' ')}) . #{source.inspect})"
226
- end
227
-
228
- def each
229
- prepare
230
- while s = get
231
- yield s
232
- end
233
- self
234
- end
235
-
236
- def test
237
- last or @last or (s = get and push s and s)
238
- end
239
-
240
- end
241
-
242
-
243
-
244
- class XMLScanner
245
-
246
- class << self
247
-
248
- def provided_options
249
- options = []
250
- private_instance_methods(false).each { |i|
251
- options.push $' if /\Aapply_option_/n =~ i
252
- }
253
- options
254
- end
255
-
256
- def apply_option(instance, option)
257
- instance.__send__ "apply_option_#{option}"
258
- end
259
-
260
- def apply_options(instance, options)
261
- h = {}
262
- options.each { |i| h[i.to_s] = true }
263
- options = h
264
- ancestors.each { |klass|
265
- if klass.respond_to? :provided_options then
266
- klass.provided_options.each { |i|
267
- if options.include? i then
268
- options.delete i
269
- klass.apply_option instance, i
270
- end
271
- }
272
- end
273
- }
274
- unless options.empty? then
275
- raise ArgumentError, "undefined option `#{options.keys[0]}'"
276
- end
277
- instance
278
- end
279
- private :apply_options
280
-
281
- def new(visitor, *options)
282
- instance = super(visitor)
283
- apply_options instance, options
284
- end
285
-
286
- end
287
-
288
-
289
-
290
- def initialize(visitor)
291
- @visitor = visitor
292
- @decoration = nil
293
- @src = nil
294
- @kcode = nil
295
- end
296
-
297
-
298
- def kcode=(kcode)
299
- @kcode = Regexp.new('', nil, kcode || '').kcode
300
- kcode
301
- end
302
-
303
- attr_reader :kcode
304
-
305
-
306
- def decorate(decoration)
307
- unless @decoration then
308
- @visitor = @decoration = Decoration.new(@visitor)
309
- end
310
- @decoration.expand decoration
311
- end
312
- private :decorate
313
-
314
-
315
- def lineno
316
- @src && @src.lineno
317
- end
318
-
319
- def path
320
- @src && @src.path
321
- end
322
-
323
- def source
324
- @src.source
325
- end
326
-
327
-
328
- private
329
-
330
- def parse_error(msg)
331
- @visitor.parse_error msg
332
- end
333
-
334
- def wellformed_error(msg)
335
- @visitor.wellformed_error msg
336
- end
337
-
338
- def valid_error(msg)
339
- @visitor.valid_error msg
340
- end
341
-
342
- def warning(msg)
343
- @visitor.warning msg
344
- end
345
-
346
-
347
- def on_xmldecl
348
- @visitor.on_xmldecl
349
- end
350
-
351
- def on_xmldecl_version(str)
352
- @visitor.on_xmldecl_version str
353
- end
354
-
355
- def on_xmldecl_encoding(str)
356
- @visitor.on_xmldecl_encoding str
357
- end
358
-
359
- def on_xmldecl_standalone(str)
360
- @visitor.on_xmldecl_standalone str
361
- end
362
-
363
- def on_xmldecl_other(name, value)
364
- @visitor.on_xmldecl_other name, value
365
- end
366
-
367
- def on_xmldecl_end
368
- @visitor.on_xmldecl_end
369
- end
370
-
371
- def on_doctype(root, pubid, sysid)
372
- @visitor.on_doctype root, pubid, sysid
373
- end
374
-
375
- def on_prolog_space(str)
376
- @visitor.on_prolog_space str
377
- end
378
-
379
- def on_comment(str)
380
- @visitor.on_comment str
381
- end
382
-
383
- def on_pi(target, pi)
384
- @visitor.on_pi target, pi
385
- end
386
-
387
- def on_chardata(str)
388
- @visitor.on_chardata str
389
- end
390
-
391
- def on_cdata(str)
392
- @visitor.on_cdata str
393
- end
394
-
395
- def on_etag(name)
396
- @visitor.on_etag name
397
- end
398
-
399
- def on_entityref(ref)
400
- @visitor.on_entityref ref
401
- end
402
-
403
- def on_charref(code)
404
- @visitor.on_charref code
405
- end
406
-
407
- def on_charref_hex(code)
408
- @visitor.on_charref_hex code
409
- end
410
-
411
- def on_start_document
412
- @visitor.on_start_document
413
- end
414
-
415
- def on_end_document
416
- @visitor.on_end_document
417
- end
418
-
419
-
420
- # <hoge fuga="foo&bar;&#38;&#x26;foo" />HOGE
421
- # ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
422
- # 1 2 3 4 5 6 7 8 9 A
423
- #
424
- # The following method will be called with the following arguments
425
- # when the parser reaches the above point;
426
- #
427
- # 1: on_stag ('hoge')
428
- # 2: on_attribute ('fuga')
429
- # 3: on_attr_value ('foo')
430
- # 4: on_attr_entityref ('bar')
431
- # 5: on_attr_charref (38)
432
- # 6: on_attr_charref_hex (38)
433
- # 7: on_attr_value ('foo')
434
- # 8: on_attribute_end ('fuga')
435
- # 9: on_stag_end_empty ('hoge')
436
- # or
437
- # on_stag_end ('hoge')
438
- #
439
- # A: on_chardata ('HOGE')
440
-
441
- def on_stag(name)
442
- @visitor.on_stag name
443
- end
444
-
445
- def on_attribute(name)
446
- @visitor.on_attribute name
447
- end
448
-
449
- def on_attr_value(str)
450
- @visitor.on_attr_value str
451
- end
452
-
453
- def on_attr_entityref(ref)
454
- @visitor.on_attr_entityref ref
455
- end
456
-
457
- def on_attr_charref(code)
458
- @visitor.on_attr_charref code
459
- end
460
-
461
- def on_attr_charref_hex(code)
462
- @visitor.on_attr_charref_hex code
463
- end
464
-
465
- def on_attribute_end(name)
466
- @visitor.on_attribute_end name
467
- end
468
-
469
- def on_stag_end_empty(name)
470
- @visitor.on_stag_end_empty name
471
- end
472
-
473
- def on_stag_end(name)
474
- @visitor.on_stag_end name
475
- end
476
-
477
-
478
-
479
- private
480
-
481
- module KcodeRegexp
482
- private
483
- Kcodes = [ //n.kcode, //e.kcode, //s.kcode, //u.kcode ]
484
- def kcode_regexp(re)
485
- h = {}
486
- Kcodes.each { |i| h[i] = Regexp.new(re, nil, i) }
487
- h.default = Regexp.new(re)
488
- h
489
- end
490
- end
491
- extend KcodeRegexp
492
-
493
-
494
- InvalidEntityRef = kcode_regexp('(?=[^#\d\w]|\z)')
495
-
496
- def scan_chardata(s)
497
- while true
498
- unless /&/n =~ s then
499
- on_chardata s
500
- else
501
- s = $`
502
- on_chardata s unless s.empty?
503
- ref = nil
504
- $'.split('&', -1).each { |s|
505
- unless /(?!\A);|(?=[ \t\r\n])/n =~ s and not $&.empty? then
506
- if InvalidEntityRef[@kcode] =~ s and not (ref = $`).strip.empty?
507
- then
508
- parse_error "reference to `#{ref}' doesn't end with `;'"
509
- else
510
- parse_error "`&' is not used for entity/character references"
511
- on_chardata('&' << s)
512
- next
513
- end
514
- end
515
- ref = $`
516
- s = $'
517
- if /\A[^#]/n =~ ref then
518
- on_entityref ref
519
- elsif /\A#(\d+)\z/n =~ ref then
520
- on_charref $1.to_i
521
- elsif /\A#x([\dA-Fa-f]+)\z/n =~ ref then
522
- on_charref_hex $1.hex
523
- else
524
- parse_error "invalid character reference `#{ref}'"
525
- end
526
- on_chardata s unless s.empty?
527
- }
528
- end
529
- s = @src.get_text
530
- break unless s
531
- s = '>' << s unless s == '>'
532
- end
533
- end
534
-
535
-
536
- def scan_attvalue(s) # almostly copy & paste from scan_chardata
537
- unless /&/n =~ s then
538
- on_attr_value s
539
- else
540
- s = $`
541
- on_attr_value s unless s.empty?
542
- ref = nil
543
- $'.split('&', -1).each { |s|
544
- unless /(?!\A);|(?=[ \t\r\n])/n =~ s and not $&.empty? then
545
- if InvalidEntityRef[@kcode] =~ s and not (ref = $`).strip.empty?
546
- then
547
- parse_error "reference to `#{ref}' doesn't end with `;'"
548
- else
549
- parse_error "`&' is not used for entity/character references"
550
- on_attr_value('&' << s)
551
- next
552
- end
553
- end
554
- ref = $`
555
- s = $'
556
- if /\A[^#]/n =~ ref then
557
- on_attr_entityref ref
558
- elsif /\A#(\d+)\z/n =~ ref then
559
- on_attr_charref $1.to_i
560
- elsif /\A#x([\dA-Fa-f]+)\z/n =~ ref then
561
- on_attr_charref_hex $1.hex
562
- else
563
- parse_error "invalid character reference `#{ref}'"
564
- end
565
- on_attr_value s unless s.empty?
566
- }
567
- end
568
- end
569
-
570
-
571
- def scan_comment(s)
572
- s[0,4] = '' # remove `<!--'
573
- comm = ''
574
- until /--/n =~ s
575
- comm << s
576
- s = @src.get_plain
577
- unless s then
578
- parse_error "unterminated comment meets EOF"
579
- return on_comment(comm)
580
- end
581
- end
582
- comm << $`
583
- until (s = $').empty? and @src.close_tag
584
- if s == '-' and @src.close_tag then # --->
585
- parse_error "comment ending in `--->' is not allowed"
586
- comm << s
587
- break
588
- end
589
- parse_error "comment includes `--'"
590
- comm << '--'
591
- until /--/n =~ s # copy & paste for performance
592
- comm << s
593
- s = @src.get_plain
594
- unless s then
595
- parse_error "unterminated comment meets EOF"
596
- return on_comment(comm)
597
- end
598
- end
599
- comm << $`
600
- end
601
- on_comment comm
602
- end
603
-
604
-
605
- def scan_pi(s)
606
- unless /\A<\?([^ \t\n\r?]+)(?:[ \t\n\r]+|(?=\?\z))/n =~ s then
607
- parse_error "parse error at `<?'"
608
- s << '>' if @src.close_tag
609
- on_chardata s
610
- else
611
- target = $1
612
- pi = $'
613
- until pi[-1] == ?? and @src.close_tag
614
- s = @src.get_plain
615
- unless s then
616
- parse_error "unterminated PI meets EOF"
617
- return on_pi(target, pi)
618
- end
619
- pi << s
620
- end
621
- pi.chop! # remove last `?'
622
- on_pi target, pi
623
- end
624
- end
625
-
626
-
627
- CDATAPattern = kcode_regexp('\]\]\z')
628
-
629
- def scan_cdata(s)
630
- cdata = s
631
- re = CDATAPattern[@kcode]
632
- until re =~ cdata and @src.close_tag
633
- s = @src.get_plain
634
- unless s then
635
- parse_error "unterminated CDATA section meets EOF"
636
- return on_cdata(cdata)
637
- end
638
- cdata << s
639
- end
640
- cdata.chop!.chop! # remove ']]'
641
- on_cdata cdata
642
- end
643
-
644
-
645
- def found_unclosed_etag(name)
646
- if @src.tag_start? then
647
- parse_error "unclosed end tag `#{name}' meets another tag"
648
- else
649
- parse_error "unclosed end tag `#{name}' meets EOF"
650
- end
651
- end
652
-
653
- def found_empty_etag
654
- parse_error "parse error at `</'"
655
- on_chardata '</>'
656
- end
657
-
658
-
659
- def scan_etag(s)
660
- s[0,2] = '' # remove '</'
661
- if s.empty? then
662
- if @src.close_tag then # </>
663
- return found_empty_etag
664
- else # </< or </[EOF]
665
- parse_error "parse error at `</'"
666
- s << '>' if @src.close_tag
667
- return on_chardata('</' << s)
668
- end
669
- elsif /[ \t\n\r]+/n =~ s then
670
- s1, s2 = $`, $'
671
- if s1.empty? then # </ tag
672
- parse_error "parse error at `</'"
673
- s << '>' if @src.close_tag
674
- return on_chardata('</' + s)
675
- elsif not s2.empty? then # </ta g
676
- parse_error "illegal whitespace is found within end tag `#{s1}'"
677
- while @src.get_tag
678
- end
679
- end
680
- s = s1
681
- end
682
- found_unclosed_etag s unless @src.close_tag # </tag< or </tag[EOF]
683
- on_etag s
684
- end
685
-
686
-
687
- def found_empty_stag
688
- parse_error "parse error at `<'"
689
- on_chardata '<>'
690
- end
691
-
692
- def found_unclosed_stag(name)
693
- if @src.tag_start? then
694
- parse_error "unclosed start tag `#{name}' meets another tag"
695
- else
696
- parse_error "unclosed start tag `#{name}' meets EOF"
697
- end
698
- end
699
-
700
- def found_unclosed_emptyelem(name)
701
- if @src.tag_start? then
702
- parse_error "unclosed empty element tag `#{name}' meets another tag"
703
- else
704
- parse_error "unclosed empty element tag `#{name}' meets EOF"
705
- end
706
- end
707
-
708
-
709
- def found_stag_error(s)
710
- if /\A[\/='"]/n =~ s then
711
- tok, s = $&, $'
712
- elsif /(?=[ \t\n\r\/='"])/n =~ s then
713
- tok, s = $`, $'
714
- else
715
- tok, s = s, nil
716
- end
717
- parse_error "parse error at `#{tok}'"
718
- s
719
- end
720
-
721
-
722
- def scan_stag(s)
723
- unless /(?=[\/ \t\n\r='"])/n =~ s then
724
- name = s
725
- name[0,1] = '' # remove `<'
726
- if name.empty? then
727
- if @src.close_tag then # <>
728
- return found_empty_stag
729
- else # << or <[EOF]
730
- parse_error "parse error at `<'"
731
- return on_chardata('<')
732
- end
733
- end
734
- on_stag name
735
- found_unclosed_stag name unless @src.close_tag
736
- on_stag_end name
737
- else
738
- name = $`
739
- s = $'
740
- name[0,1] = '' # remove `<'
741
- if name.empty? then # `< tag' or `<=`
742
- parse_error "parse error at `<'"
743
- s << '>' if @src.close_tag
744
- return on_chardata('<' << s)
745
- end
746
- on_stag name
747
- emptyelem = false
748
- key,val,error,qmark,c = nil
749
- begin
750
- continue = false
751
- s.scan(/[ \t\n\r]([^= \t\n\r\/'"]+)[ \t\n\r]*=[ \t\n\r]*('[^']*'?|"[^"]*"?)|\/\z|([^ \t\n\r][\S\s]*)/n
752
- ) { |key,val,error|
753
- if key then # key="value"
754
- on_attribute key
755
- qmark = val.slice!(0,1)
756
- if val[-1] == qmark[0] then
757
- val.chop!
758
- scan_attvalue val unless val.empty?
759
- else
760
- scan_attvalue val unless val.empty?
761
- begin
762
- s = @src.get
763
- unless s then
764
- parse_error "unterminated attribute `#{key}' meets EOF"
765
- break
766
- end
767
- c = s[0]
768
- val, s = s.split(qmark, 2)
769
- if c == ?< then
770
- wellformed_error "`<' is found in attribute `#{key}'"
771
- elsif c != ?> then
772
- scan_attvalue '>'
773
- end
774
- scan_attvalue val if c
775
- end until s
776
- continue = s # if eof then continue is false, else true.
777
- end
778
- on_attribute_end key
779
- elsif error then
780
- continue = s = found_stag_error(error)
781
- else
782
- emptyelem = true
783
- end
784
- }
785
- end while continue
786
- unless @src.close_tag then
787
- if emptyelem then
788
- found_unclosed_emptyelem name
789
- else
790
- found_unclosed_stag name
791
- end
792
- end
793
- if emptyelem then
794
- on_stag_end_empty name
795
- else
796
- on_stag_end name
797
- end
798
- end
799
- end
800
-
801
-
802
- def scan_bang_tag(s)
803
- parse_error "parse error at `<!'"
804
- s << '>' if @src.close_tag
805
- on_chardata s
806
- end
807
-
808
-
809
- def scan_content(s)
810
- src = @src # for speed
811
- while s
812
- if (c = s[0]) == ?< then
813
- if (c = s[1]) == ?/ then
814
- scan_etag s
815
- elsif c == ?! then
816
- if s[2] == ?- and s[3] == ?- then
817
- scan_comment s
818
- elsif /\A<!\[CDATA\[/n =~ s then
819
- scan_cdata $'
820
- else
821
- scan_bang_tag s
822
- end
823
- elsif c == ?? then
824
- scan_pi s
825
- else
826
- scan_stag s
827
- end
828
- else
829
- scan_chardata s
830
- end
831
- s = src.get
832
- end
833
- end
834
-
835
-
836
- def get_until_qmark(str, qmark)
837
- begin
838
- #s = @src.get_plain
839
- s = @src.get
840
- break unless s
841
- c = s[0]
842
- v, s = s.split(qmark, 2)
843
- str << '>' unless c == ?< or c == ?> # De Morgan
844
- str << v if c
845
- end until s
846
- s
847
- end
848
-
849
-
850
- XMLDeclPattern = kcode_regexp(%q{[ \t\n\r]([\-_\d\w]+)[ \t\n\r]*=[ \t\n\r]*('[^']*'?|"[^"]*"?)|(\?\z)|([\-_.\d\w]+|[^ \t\n\r])}) #'
851
-
852
- def scan_xmldecl(s)
853
- endmark = nil
854
- state = 0
855
- on_xmldecl
856
- begin
857
- continue = false
858
- s.scan(XMLDeclPattern[@kcode]) { |key,val,endmark,error|
859
- if key then
860
- qmark = val.slice!(0,1) # remove quotation marks
861
- if val[-1] == qmark[0] then
862
- val.chop!
863
- else
864
- continue = s = get_until_qmark(val, qmark)
865
- unless s then
866
- parse_error "unterminated XML declaration meets EOF"
867
- endmark = true
868
- end
869
- end
870
- if state == 0 and key == 'version' then
871
- on_xmldecl_version val
872
- state = 1
873
- elsif state == 1 and key == 'encoding' then
874
- on_xmldecl_encoding val
875
- state = 2
876
- elsif state >= 1 and key == 'standalone' then
877
- on_xmldecl_standalone val
878
- state = 3
879
- else
880
- state = 3
881
- if key == 'version' then
882
- parse_error "version declaration must not be here"
883
- on_xmldecl_version val
884
- elsif key == 'encoding' then
885
- parse_error "encoding declaration must not be here"
886
- on_xmldecl_encoding val
887
- state = 2 if state < 2
888
- elsif key == 'standalone' then
889
- parse_error "standalone declaration must not be here"
890
- on_xmldecl_standalone val
891
- else
892
- parse_error "unknown declaration `#{key}' in XML declaration"
893
- on_xmldecl_other key, val
894
- end
895
- end
896
- elsif endmark then
897
- unless @src.close_tag then
898
- parse_error "unexpected `#{endmark}' found in XML declaration"
899
- endmark = nil
900
- end
901
- # here always exit the loop.
902
- else
903
- parse_error "parse error at `#{error}'"
904
- end
905
- }
906
- end while !endmark and continue || s = @src.get_plain
907
- parse_error "unterminated XML declaration meets EOF" unless s or endmark
908
- parse_error "no declaration found in XML declaration" if state == 0
909
- on_xmldecl_end
910
- end
911
-
912
-
913
- SkipDTD = kcode_regexp(%q{(['"]|\A<!--|\A<\?|--\z|\?\z)|\]\s*\z}) #'
914
-
915
- def skip_internal_dtd(s)
916
- quote = nil
917
- continue = true
918
- begin # skip until `]>'
919
- s.scan(SkipDTD[@kcode]) { |q,| #'
920
- if quote then
921
- quote = nil if quote == q and quote.size == 1 || @src.tag_end?
922
- elsif q then
923
- if q == '<!--' then
924
- quote = '--'
925
- elsif q == '<?' then
926
- quote = '?'
927
- elsif q == '"' or q == "'" then
928
- quote = q
929
- end
930
- elsif @src.close_tag then
931
- continue = false
932
- end
933
- }
934
- end while continue and s = @src.get
935
- parse_error "unterminated internal DTD subset meets EOF" unless s
936
- end
937
-
938
-
939
- def scan_internal_dtd(s)
940
- warning "internal DTD subset is not supported"
941
- skip_internal_dtd s
942
- end
943
-
944
-
945
- def found_invalid_pubsys(pubsys)
946
- parse_error "`PUBLIC' or `SYSTEM' should be here"
947
- 'SYSTEM'
948
- end
949
-
950
-
951
- DoctypePattern = kcode_regexp(%q{[ \t\n\r](?:([^ \t\n\r\/'"=\[]+)|('[^']*'?|"[^"]*"?))|([\-_.\d\w]+|[^ \t\n\r])}) #"
952
-
953
- def scan_doctype(s)
954
- root = syspub = sysid = pubid = nil
955
- internal_dtd = false
956
- re = DoctypePattern[@kcode]
957
- begin
958
- if re =~ s then
959
- name, str, delim, s = $1, $2, $3, $'
960
- if name then
961
- if not root then
962
- root = name
963
- elsif not syspub then
964
- unless name == 'PUBLIC' or name == 'SYSTEM' then
965
- name = found_invalid_pubsys(name)
966
- end
967
- syspub = name
968
- else
969
- parse_error "parse error at `#{name}'"
970
- end
971
- elsif str then
972
- qmark = str.slice!(0,1) # remove quotation marks
973
- unless syspub then
974
- parse_error "parse error at `#{qmark}'"
975
- s = str << s
976
- else
977
- if str[-1] == qmark[0] then
978
- str.chop!
979
- else
980
- s = get_until_qmark(str, qmark) || ''
981
- end
982
- if not sysid then
983
- sysid = str
984
- elsif not pubid and syspub == 'PUBLIC' then
985
- pubid = sysid
986
- sysid = str
987
- else
988
- parse_error "too many external ID literals in DOCTYPE"
989
- end
990
- end
991
- elsif delim == '[' then
992
- internal_dtd = true
993
- break
994
- else
995
- parse_error "parse error at `#{delim}'"
996
- end
997
- else
998
- s = ''
999
- end
1000
- if s.empty? then
1001
- break if @src.close_tag
1002
- s = @src.get_plain
1003
- end
1004
- end while s
1005
- parse_error "unterminated DOCTYPE declaration meets EOF" unless s
1006
- unless root then
1007
- parse_error "no root element is specified in DOCTYPE"
1008
- end
1009
- if syspub and not sysid then
1010
- parse_error "too few external ID literals in DOCTYPE"
1011
- end
1012
- if syspub == 'PUBLIC' and not pubid then
1013
- pubid, sysid = sysid, nil
1014
- end
1015
- on_doctype root, pubid, sysid
1016
- scan_internal_dtd s if internal_dtd
1017
- end
1018
-
1019
-
1020
- def scan_prolog(s)
1021
- if /\A<\?xml(?=[ \t\n\r])/n =~ s then
1022
- scan_xmldecl $'
1023
- s = @src.get
1024
- end
1025
- doctype = true
1026
- src = @src # for speed
1027
- while s
1028
- if s[0] == ?< then
1029
- if (c = s[1]) == ?! then
1030
- if s[2] == ?- and s[3] == ?- then
1031
- scan_comment s
1032
- elsif /\A<!DOCTYPE(?=[ \t\n\r])/n =~ s and doctype then
1033
- doctype = false
1034
- scan_doctype $'
1035
- else
1036
- break
1037
- end
1038
- elsif c == ?? then
1039
- scan_pi s
1040
- else
1041
- break
1042
- end
1043
- s = src.get
1044
- elsif /[^ \t\r\n]/ !~ s then
1045
- on_prolog_space s unless s.empty?
1046
- s = src.get_plain
1047
- else
1048
- break
1049
- end
1050
- end
1051
- scan_content(s || src.get)
1052
- end
1053
-
1054
-
1055
- def scan_document
1056
- on_start_document
1057
- @src.prepare
1058
- scan_prolog @src.get
1059
- on_end_document
1060
- end
1061
-
1062
-
1063
- def make_source(src)
1064
- Source.new src
1065
- end
1066
-
1067
-
1068
- public
1069
-
1070
- def parse_document(src)
1071
- @src = make_source(src)
1072
- begin
1073
- scan_document
1074
- ensure
1075
- @src = nil
1076
- end
1077
- self
1078
- end
1079
-
1080
- alias parse parse_document
1081
-
1082
- end
1083
-
1084
-
1085
- end
1086
-
1087
-
1088
-
1089
-
1090
-
1091
- if $0 == __FILE__ then
1092
- class TestVisitor
1093
- include XMLScan::Visitor
1094
- def parse_error(msg)
1095
- STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
1096
- end
1097
- def wellformed_error(msg)
1098
- STDERR.printf("%s:%d: WFC: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
1099
- end
1100
- end
1101
-
1102
- $s = scan = XMLScan::XMLScanner.new(TestVisitor.new)
1103
- src = ARGF
1104
- def src.path; filename; end
1105
- t1 = Time.times.utime
1106
- scan.parse src
1107
- t2 = Time.times.utime
1108
- STDERR.printf "%2.3f sec\n", t2 - t1
1109
- end