rgen 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. data/CHANGELOG +20 -1
  2. data/MIT-LICENSE +1 -1
  3. data/README +12 -9
  4. data/lib/instantiators/ea_instantiator.rb +36 -0
  5. data/lib/metamodels/uml13_metamodel.rb +559 -0
  6. data/lib/metamodels/uml13_metamodel_ext.rb +26 -0
  7. data/lib/mmgen/metamodel_generator.rb +5 -5
  8. data/lib/mmgen/mm_ext/ecore_ext.rb +95 -0
  9. data/lib/mmgen/mmgen.rb +6 -4
  10. data/lib/mmgen/templates/annotations.tpl +37 -0
  11. data/lib/mmgen/templates/metamodel_generator.tpl +171 -0
  12. data/lib/rgen/ecore/ecore.rb +190 -0
  13. data/lib/rgen/ecore/ecore_instantiator.rb +25 -0
  14. data/lib/rgen/ecore/ecore_transformer.rb +85 -0
  15. data/lib/rgen/environment.rb +9 -24
  16. data/lib/rgen/find_helper.rb +68 -0
  17. data/lib/rgen/{instantiator.rb → instantiator/abstract_instantiator.rb} +6 -2
  18. data/lib/rgen/instantiator/abstract_xml_instantiator.rb +59 -0
  19. data/lib/rgen/instantiator/default_xml_instantiator.rb +117 -0
  20. data/lib/rgen/instantiator/ecore_xml_instantiator.rb +144 -0
  21. data/lib/rgen/instantiator/nodebased_xml_instantiator.rb +157 -0
  22. data/lib/rgen/instantiator/xmi11_instantiator.rb +164 -0
  23. data/lib/rgen/metamodel_builder.rb +103 -9
  24. data/lib/rgen/metamodel_builder/build_helper.rb +26 -4
  25. data/lib/rgen/metamodel_builder/builder_extensions.rb +285 -88
  26. data/lib/rgen/metamodel_builder/builder_runtime.rb +7 -1
  27. data/lib/rgen/metamodel_builder/data_types.rb +67 -0
  28. data/lib/rgen/metamodel_builder/intermediate/annotation.rb +30 -0
  29. data/lib/rgen/metamodel_builder/metamodel_description.rb +232 -0
  30. data/lib/rgen/metamodel_builder/mm_multiple.rb +23 -0
  31. data/lib/rgen/metamodel_builder/module_extension.rb +33 -0
  32. data/lib/rgen/model_comparator.rb +56 -0
  33. data/lib/rgen/model_dumper.rb +5 -5
  34. data/lib/rgen/name_helper.rb +17 -1
  35. data/lib/rgen/template_language.rb +148 -28
  36. data/lib/rgen/template_language/directory_template_container.rb +56 -38
  37. data/lib/rgen/template_language/output_handler.rb +93 -77
  38. data/lib/rgen/template_language/template_container.rb +186 -143
  39. data/lib/rgen/transformer.rb +19 -14
  40. data/lib/transformers/uml13_to_ecore.rb +75 -0
  41. data/redist/xmlscan/ChangeLog +1301 -0
  42. data/redist/xmlscan/README +34 -0
  43. data/redist/xmlscan/THANKS +11 -0
  44. data/redist/xmlscan/doc/changes.html +74 -0
  45. data/redist/xmlscan/doc/changes.rd +80 -0
  46. data/redist/xmlscan/doc/en/conformance.html +136 -0
  47. data/redist/xmlscan/doc/en/conformance.rd +152 -0
  48. data/redist/xmlscan/doc/en/manual.html +356 -0
  49. data/redist/xmlscan/doc/en/manual.rd +402 -0
  50. data/redist/xmlscan/doc/ja/conformance.ja.html +118 -0
  51. data/redist/xmlscan/doc/ja/conformance.ja.rd +134 -0
  52. data/redist/xmlscan/doc/ja/manual.ja.html +325 -0
  53. data/redist/xmlscan/doc/ja/manual.ja.rd +370 -0
  54. data/redist/xmlscan/doc/src/Makefile +41 -0
  55. data/redist/xmlscan/doc/src/conformance.rd.src +256 -0
  56. data/redist/xmlscan/doc/src/langsplit.rb +110 -0
  57. data/redist/xmlscan/doc/src/manual.rd.src +614 -0
  58. data/redist/xmlscan/install.rb +41 -0
  59. data/redist/xmlscan/lib/xmlscan/encoding.rb +311 -0
  60. data/redist/xmlscan/lib/xmlscan/htmlscan.rb +289 -0
  61. data/redist/xmlscan/lib/xmlscan/namespace.rb +352 -0
  62. data/redist/xmlscan/lib/xmlscan/parser.rb +299 -0
  63. data/redist/xmlscan/lib/xmlscan/scanner.rb +1109 -0
  64. data/redist/xmlscan/lib/xmlscan/version.rb +22 -0
  65. data/redist/xmlscan/lib/xmlscan/visitor.rb +158 -0
  66. data/redist/xmlscan/lib/xmlscan/xmlchar.rb +441 -0
  67. data/redist/xmlscan/memo/CONFORMANCE +1249 -0
  68. data/redist/xmlscan/memo/PRODUCTIONS +195 -0
  69. data/redist/xmlscan/memo/contentspec.ry +335 -0
  70. data/redist/xmlscan/samples/chibixml.rb +105 -0
  71. data/redist/xmlscan/samples/getxmlchar.rb +122 -0
  72. data/redist/xmlscan/samples/rexml.rb +159 -0
  73. data/redist/xmlscan/samples/xmlbench.rb +88 -0
  74. data/redist/xmlscan/samples/xmlbench/parser/chibixml.rb +22 -0
  75. data/redist/xmlscan/samples/xmlbench/parser/nqxml.rb +29 -0
  76. data/redist/xmlscan/samples/xmlbench/parser/rexml.rb +62 -0
  77. data/redist/xmlscan/samples/xmlbench/parser/xmlparser.rb +22 -0
  78. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-0.0.10.rb +62 -0
  79. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-chibixml.rb +22 -0
  80. data/redist/xmlscan/samples/xmlbench/parser/xmlscan-rexml.rb +22 -0
  81. data/redist/xmlscan/samples/xmlbench/parser/xmlscan.rb +99 -0
  82. data/redist/xmlscan/samples/xmlbench/xmlbench-lib.rb +116 -0
  83. data/redist/xmlscan/samples/xmlconftest.rb +200 -0
  84. data/redist/xmlscan/test.rb +7 -0
  85. data/redist/xmlscan/tests/deftestcase.rb +73 -0
  86. data/redist/xmlscan/tests/runtest.rb +47 -0
  87. data/redist/xmlscan/tests/testall.rb +14 -0
  88. data/redist/xmlscan/tests/testencoding.rb +438 -0
  89. data/redist/xmlscan/tests/testhtmlscan.rb +752 -0
  90. data/redist/xmlscan/tests/testnamespace.rb +457 -0
  91. data/redist/xmlscan/tests/testparser.rb +591 -0
  92. data/redist/xmlscan/tests/testscanner.rb +1749 -0
  93. data/redist/xmlscan/tests/testxmlchar.rb +143 -0
  94. data/redist/xmlscan/tests/visitor.rb +34 -0
  95. data/test/array_extensions_test.rb +2 -2
  96. data/test/ea_instantiator_test.rb +41 -0
  97. data/test/ecore_self_test.rb +53 -0
  98. data/test/environment_test.rb +11 -6
  99. data/test/metamodel_builder_test.rb +404 -245
  100. data/test/metamodel_roundtrip_test.rb +52 -0
  101. data/test/metamodel_roundtrip_test/TestModel.rb +65 -0
  102. data/test/metamodel_roundtrip_test/TestModel_Regenerated.rb +64 -0
  103. data/test/metamodel_roundtrip_test/houseMetamodel.ecore +32 -0
  104. data/test/metamodel_roundtrip_test/houseMetamodel_from_ecore.rb +39 -0
  105. data/test/rgen_test.rb +3 -3
  106. data/test/template_language_test.rb +65 -39
  107. data/test/template_language_test/expected_result.txt +24 -3
  108. data/test/template_language_test/templates/code/array.tpl +11 -0
  109. data/test/template_language_test/templates/content/author.tpl +7 -0
  110. data/test/template_language_test/templates/content/chapter.tpl +1 -1
  111. data/test/template_language_test/templates/root.tpl +17 -8
  112. data/test/template_language_test/testout.txt +24 -3
  113. data/test/testmodel/class_model_checker.rb +119 -0
  114. data/test/{xmi_instantiator_test/testmodel.eap → testmodel/ea_testmodel.eap} +0 -0
  115. data/test/{xmi_instantiator_test/testmodel.xml → testmodel/ea_testmodel.xml} +81 -14
  116. data/test/testmodel/ea_testmodel_partial.xml +317 -0
  117. data/test/testmodel/ecore_model_checker.rb +101 -0
  118. data/test/testmodel/manual_testmodel.xml +22 -0
  119. data/test/testmodel/object_model_checker.rb +67 -0
  120. data/test/transformer_test.rb +18 -10
  121. data/test/xml_instantiator_test.rb +81 -8
  122. data/test/xml_instantiator_test/simple_ecore_model_checker.rb +94 -0
  123. data/test/xml_instantiator_test/simple_xmi_ecore_instantiator.rb +53 -0
  124. data/test/xml_instantiator_test/simple_xmi_metamodel.rb +49 -0
  125. data/test/xml_instantiator_test/simple_xmi_to_ecore.rb +75 -0
  126. metadata +126 -28
  127. data/lib/ea/xmi_class_instantiator.rb +0 -46
  128. data/lib/ea/xmi_helper.rb +0 -26
  129. data/lib/ea/xmi_metamodel.rb +0 -34
  130. data/lib/ea/xmi_object_instantiator.rb +0 -46
  131. data/lib/ea/xmi_to_classmodel.rb +0 -78
  132. data/lib/ea/xmi_to_objectmodel.rb +0 -92
  133. data/lib/mmgen/mm_ext/uml_classmodel_ext.rb +0 -71
  134. data/lib/mmgen/templates/uml_classmodel.tpl +0 -63
  135. data/lib/rgen/xml_instantiator.rb +0 -132
  136. data/lib/uml/objectmodel_instantiator.rb +0 -53
  137. data/lib/uml/uml_classmodel.rb +0 -92
  138. data/lib/uml/uml_objectmodel.rb +0 -65
  139. data/test/metamodel_generator_test.rb +0 -44
  140. data/test/metamodel_generator_test/TestModel.rb +0 -40
  141. data/test/metamodel_generator_test/expected_result.txt +0 -40
  142. data/test/xmi_class_instantiator_test.rb +0 -24
  143. data/test/xmi_instantiator_test/class_model_checker.rb +0 -97
  144. data/test/xmi_object_instantiator_test.rb +0 -65
  145. data/test/xml_instantiator_test/testmodel.xml +0 -7
@@ -0,0 +1,22 @@
1
+ #
2
+ # xmlscan/version.rb
3
+ #
4
+ # Copyright (C) UENO Katsuhiro 2002
5
+ #
6
+ # $Id: version.rb,v 1.9 2003/01/22 17:00:49 katsu Exp $
7
+ #
8
+
9
+ module XMLScan
10
+
11
+ # The version like 'X.X.0' (TENNY is 0) means that this is an unstable
12
+ # release. Incompatible changes will be applied to this version
13
+ # without special notice. This version should be distributed as a
14
+ # snapshot only.
15
+ #
16
+ # TENNY which is larger than 1 (e.g. 'X.X.1' or 'X.X.2') means this
17
+ # release is a stable release.
18
+
19
+ VERSION = '0.3.0'
20
+ RELEASE_DATE = '2003-01-23'
21
+
22
+ end
@@ -0,0 +1,158 @@
1
+ #
2
+ # xmlscan/visitor.rb
3
+ #
4
+ # Copyright (C) Ueno Katsuhiro 2002
5
+ #
6
+ # $Id: visitor.rb,v 1.3 2003/05/12 14:13:33 katsu Exp $
7
+ #
8
+
9
+ require 'xmlscan/version'
10
+
11
+
12
+ module XMLScan
13
+
14
+ class Error < StandardError
15
+
16
+ def initialize(msg, path = nil, lineno = nil)
17
+ super msg
18
+ @path = path
19
+ @lineno = lineno
20
+ end
21
+
22
+ attr_reader :path, :lineno
23
+
24
+ def to_s
25
+ if @lineno and @path then
26
+ "#{@path}:#{@lineno}:#{super}"
27
+ else
28
+ super
29
+ end
30
+ end
31
+
32
+ end
33
+
34
+ class ParseError < Error ; end
35
+ class NotWellFormedError < Error ; end
36
+ class NotValidError < Error ; end
37
+
38
+
39
+ module Visitor
40
+
41
+ def parse_error(msg)
42
+ raise ParseError.new(msg)
43
+ end
44
+
45
+ def wellformed_error(msg)
46
+ raise NotWellFormedError.new(msg)
47
+ end
48
+
49
+ def valid_error(msg)
50
+ raise NotValidError.new(msg)
51
+ end
52
+
53
+ def warning(msg)
54
+ end
55
+
56
+ def on_xmldecl
57
+ end
58
+
59
+ def on_xmldecl_version(str)
60
+ end
61
+
62
+ def on_xmldecl_encoding(str)
63
+ end
64
+
65
+ def on_xmldecl_standalone(str)
66
+ end
67
+
68
+ def on_xmldecl_other(name, value)
69
+ end
70
+
71
+ def on_xmldecl_end
72
+ end
73
+
74
+ def on_doctype(root, pubid, sysid)
75
+ end
76
+
77
+ def on_prolog_space(str)
78
+ end
79
+
80
+ def on_comment(str)
81
+ end
82
+
83
+ def on_pi(target, pi)
84
+ end
85
+
86
+ def on_chardata(str)
87
+ end
88
+
89
+ def on_cdata(str)
90
+ end
91
+
92
+ def on_etag(name)
93
+ end
94
+
95
+ def on_entityref(ref)
96
+ end
97
+
98
+ def on_charref(code)
99
+ end
100
+
101
+ def on_charref_hex(code)
102
+ end
103
+
104
+ def on_start_document
105
+ end
106
+
107
+ def on_end_document
108
+ end
109
+
110
+ def on_stag(name)
111
+ end
112
+
113
+ def on_attribute(name)
114
+ end
115
+
116
+ def on_attr_value(str)
117
+ end
118
+
119
+ def on_attr_entityref(ref)
120
+ end
121
+
122
+ def on_attr_charref(code)
123
+ end
124
+
125
+ def on_attr_charref_hex(code)
126
+ end
127
+
128
+ def on_attribute_end(name)
129
+ end
130
+
131
+ def on_stag_end_empty(name)
132
+ end
133
+
134
+ def on_stag_end(name)
135
+ end
136
+
137
+ end
138
+
139
+
140
+ class Decoration
141
+
142
+ include Visitor
143
+
144
+ def initialize(visitor)
145
+ @visitor = visitor
146
+ end
147
+
148
+ Visitor.instance_methods(false).each { |i|
149
+ module_eval <<-END, __FILE__, __LINE__ + 1
150
+ def #{i}(*args)
151
+ @visitor.#{i}(*args)
152
+ end
153
+ END
154
+ }
155
+
156
+ end
157
+
158
+ end
@@ -0,0 +1,441 @@
1
+ #
2
+ # xmlscan/scanner.rb
3
+ #
4
+ # Copyright (C) Ueno Katsuhiro 2002
5
+ #
6
+ # $Id: xmlchar.rb,v 1.7 2003/04/30 03:03:35 katsu Exp $
7
+ #
8
+
9
+ require 'xmlscan/scanner'
10
+
11
+
12
+ module XMLScan
13
+
14
+ module XMLChar
15
+
16
+ # generated by samples/getxmlchar.rb
17
+ char = [
18
+ 0x0009..0x0009, 0x000A..0x000A, 0x000D..0x000D, 0x0020..0xD7FF,
19
+ 0xE000..0xFFFD, 0x10000..0x10FFFF,
20
+ ]
21
+ base_char = [ # for Letter
22
+ 0x0041..0x005A, 0x0061..0x007A, 0x00C0..0x00D6, 0x00D8..0x00F6,
23
+ 0x00F8..0x00FF, 0x0100..0x0131, 0x0134..0x013E, 0x0141..0x0148,
24
+ 0x014A..0x017E, 0x0180..0x01C3, 0x01CD..0x01F0, 0x01F4..0x01F5,
25
+ 0x01FA..0x0217, 0x0250..0x02A8, 0x02BB..0x02C1, 0x0386..0x0386,
26
+ 0x0388..0x038A, 0x038C..0x038C, 0x038E..0x03A1, 0x03A3..0x03CE,
27
+ 0x03D0..0x03D6, 0x03DA..0x03DA, 0x03DC..0x03DC, 0x03DE..0x03DE,
28
+ 0x03E0..0x03E0, 0x03E2..0x03F3, 0x0401..0x040C, 0x040E..0x044F,
29
+ 0x0451..0x045C, 0x045E..0x0481, 0x0490..0x04C4, 0x04C7..0x04C8,
30
+ 0x04CB..0x04CC, 0x04D0..0x04EB, 0x04EE..0x04F5, 0x04F8..0x04F9,
31
+ 0x0531..0x0556, 0x0559..0x0559, 0x0561..0x0586, 0x05D0..0x05EA,
32
+ 0x05F0..0x05F2, 0x0621..0x063A, 0x0641..0x064A, 0x0671..0x06B7,
33
+ 0x06BA..0x06BE, 0x06C0..0x06CE, 0x06D0..0x06D3, 0x06D5..0x06D5,
34
+ 0x06E5..0x06E6, 0x0905..0x0939, 0x093D..0x093D, 0x0958..0x0961,
35
+ 0x0985..0x098C, 0x098F..0x0990, 0x0993..0x09A8, 0x09AA..0x09B0,
36
+ 0x09B2..0x09B2, 0x09B6..0x09B9, 0x09DC..0x09DD, 0x09DF..0x09E1,
37
+ 0x09F0..0x09F1, 0x0A05..0x0A0A, 0x0A0F..0x0A10, 0x0A13..0x0A28,
38
+ 0x0A2A..0x0A30, 0x0A32..0x0A33, 0x0A35..0x0A36, 0x0A38..0x0A39,
39
+ 0x0A59..0x0A5C, 0x0A5E..0x0A5E, 0x0A72..0x0A74, 0x0A85..0x0A8B,
40
+ 0x0A8D..0x0A8D, 0x0A8F..0x0A91, 0x0A93..0x0AA8, 0x0AAA..0x0AB0,
41
+ 0x0AB2..0x0AB3, 0x0AB5..0x0AB9, 0x0ABD..0x0ABD, 0x0AE0..0x0AE0,
42
+ 0x0B05..0x0B0C, 0x0B0F..0x0B10, 0x0B13..0x0B28, 0x0B2A..0x0B30,
43
+ 0x0B32..0x0B33, 0x0B36..0x0B39, 0x0B3D..0x0B3D, 0x0B5C..0x0B5D,
44
+ 0x0B5F..0x0B61, 0x0B85..0x0B8A, 0x0B8E..0x0B90, 0x0B92..0x0B95,
45
+ 0x0B99..0x0B9A, 0x0B9C..0x0B9C, 0x0B9E..0x0B9F, 0x0BA3..0x0BA4,
46
+ 0x0BA8..0x0BAA, 0x0BAE..0x0BB5, 0x0BB7..0x0BB9, 0x0C05..0x0C0C,
47
+ 0x0C0E..0x0C10, 0x0C12..0x0C28, 0x0C2A..0x0C33, 0x0C35..0x0C39,
48
+ 0x0C60..0x0C61, 0x0C85..0x0C8C, 0x0C8E..0x0C90, 0x0C92..0x0CA8,
49
+ 0x0CAA..0x0CB3, 0x0CB5..0x0CB9, 0x0CDE..0x0CDE, 0x0CE0..0x0CE1,
50
+ 0x0D05..0x0D0C, 0x0D0E..0x0D10, 0x0D12..0x0D28, 0x0D2A..0x0D39,
51
+ 0x0D60..0x0D61, 0x0E01..0x0E2E, 0x0E30..0x0E30, 0x0E32..0x0E33,
52
+ 0x0E40..0x0E45, 0x0E81..0x0E82, 0x0E84..0x0E84, 0x0E87..0x0E88,
53
+ 0x0E8A..0x0E8A, 0x0E8D..0x0E8D, 0x0E94..0x0E97, 0x0E99..0x0E9F,
54
+ 0x0EA1..0x0EA3, 0x0EA5..0x0EA5, 0x0EA7..0x0EA7, 0x0EAA..0x0EAB,
55
+ 0x0EAD..0x0EAE, 0x0EB0..0x0EB0, 0x0EB2..0x0EB3, 0x0EBD..0x0EBD,
56
+ 0x0EC0..0x0EC4, 0x0F40..0x0F47, 0x0F49..0x0F69, 0x10A0..0x10C5,
57
+ 0x10D0..0x10F6, 0x1100..0x1100, 0x1102..0x1103, 0x1105..0x1107,
58
+ 0x1109..0x1109, 0x110B..0x110C, 0x110E..0x1112, 0x113C..0x113C,
59
+ 0x113E..0x113E, 0x1140..0x1140, 0x114C..0x114C, 0x114E..0x114E,
60
+ 0x1150..0x1150, 0x1154..0x1155, 0x1159..0x1159, 0x115F..0x1161,
61
+ 0x1163..0x1163, 0x1165..0x1165, 0x1167..0x1167, 0x1169..0x1169,
62
+ 0x116D..0x116E, 0x1172..0x1173, 0x1175..0x1175, 0x119E..0x119E,
63
+ 0x11A8..0x11A8, 0x11AB..0x11AB, 0x11AE..0x11AF, 0x11B7..0x11B8,
64
+ 0x11BA..0x11BA, 0x11BC..0x11C2, 0x11EB..0x11EB, 0x11F0..0x11F0,
65
+ 0x11F9..0x11F9, 0x1E00..0x1E9B, 0x1EA0..0x1EF9, 0x1F00..0x1F15,
66
+ 0x1F18..0x1F1D, 0x1F20..0x1F45, 0x1F48..0x1F4D, 0x1F50..0x1F57,
67
+ 0x1F59..0x1F59, 0x1F5B..0x1F5B, 0x1F5D..0x1F5D, 0x1F5F..0x1F7D,
68
+ 0x1F80..0x1FB4, 0x1FB6..0x1FBC, 0x1FBE..0x1FBE, 0x1FC2..0x1FC4,
69
+ 0x1FC6..0x1FCC, 0x1FD0..0x1FD3, 0x1FD6..0x1FDB, 0x1FE0..0x1FEC,
70
+ 0x1FF2..0x1FF4, 0x1FF6..0x1FFC, 0x2126..0x2126, 0x212A..0x212B,
71
+ 0x212E..0x212E, 0x2180..0x2182, 0x3041..0x3094, 0x30A1..0x30FA,
72
+ 0x3105..0x312C, 0xAC00..0xD7A3,
73
+ ]
74
+ ideographic = [ # for Letter
75
+ 0x3007..0x3007, 0x3021..0x3029, 0x4E00..0x9FA5,
76
+ ]
77
+ combining_char = [ # for NameChar
78
+ 0x0300..0x0345, 0x0360..0x0361, 0x0483..0x0486, 0x0591..0x05A1,
79
+ 0x05A3..0x05B9, 0x05BB..0x05BD, 0x05BF..0x05BF, 0x05C1..0x05C2,
80
+ 0x05C4..0x05C4, 0x064B..0x0652, 0x0670..0x0670, 0x06D6..0x06DC,
81
+ 0x06DD..0x06DF, 0x06E0..0x06E4, 0x06E7..0x06E8, 0x06EA..0x06ED,
82
+ 0x0901..0x0903, 0x093C..0x093C, 0x093E..0x094C, 0x094D..0x094D,
83
+ 0x0951..0x0954, 0x0962..0x0963, 0x0981..0x0983, 0x09BC..0x09BC,
84
+ 0x09BE..0x09BE, 0x09BF..0x09BF, 0x09C0..0x09C4, 0x09C7..0x09C8,
85
+ 0x09CB..0x09CD, 0x09D7..0x09D7, 0x09E2..0x09E3, 0x0A02..0x0A02,
86
+ 0x0A3C..0x0A3C, 0x0A3E..0x0A3E, 0x0A3F..0x0A3F, 0x0A40..0x0A42,
87
+ 0x0A47..0x0A48, 0x0A4B..0x0A4D, 0x0A70..0x0A71, 0x0A81..0x0A83,
88
+ 0x0ABC..0x0ABC, 0x0ABE..0x0AC5, 0x0AC7..0x0AC9, 0x0ACB..0x0ACD,
89
+ 0x0B01..0x0B03, 0x0B3C..0x0B3C, 0x0B3E..0x0B43, 0x0B47..0x0B48,
90
+ 0x0B4B..0x0B4D, 0x0B56..0x0B57, 0x0B82..0x0B83, 0x0BBE..0x0BC2,
91
+ 0x0BC6..0x0BC8, 0x0BCA..0x0BCD, 0x0BD7..0x0BD7, 0x0C01..0x0C03,
92
+ 0x0C3E..0x0C44, 0x0C46..0x0C48, 0x0C4A..0x0C4D, 0x0C55..0x0C56,
93
+ 0x0C82..0x0C83, 0x0CBE..0x0CC4, 0x0CC6..0x0CC8, 0x0CCA..0x0CCD,
94
+ 0x0CD5..0x0CD6, 0x0D02..0x0D03, 0x0D3E..0x0D43, 0x0D46..0x0D48,
95
+ 0x0D4A..0x0D4D, 0x0D57..0x0D57, 0x0E31..0x0E31, 0x0E34..0x0E3A,
96
+ 0x0E47..0x0E4E, 0x0EB1..0x0EB1, 0x0EB4..0x0EB9, 0x0EBB..0x0EBC,
97
+ 0x0EC8..0x0ECD, 0x0F18..0x0F19, 0x0F35..0x0F35, 0x0F37..0x0F37,
98
+ 0x0F39..0x0F39, 0x0F3E..0x0F3E, 0x0F3F..0x0F3F, 0x0F71..0x0F84,
99
+ 0x0F86..0x0F8B, 0x0F90..0x0F95, 0x0F97..0x0F97, 0x0F99..0x0FAD,
100
+ 0x0FB1..0x0FB7, 0x0FB9..0x0FB9, 0x20D0..0x20DC, 0x20E1..0x20E1,
101
+ 0x302A..0x302F, 0x3099..0x3099, 0x309A..0x309A,
102
+ ]
103
+ digit = [ # for NameChar
104
+ 0x0030..0x0039, 0x0660..0x0669, 0x06F0..0x06F9, 0x0966..0x096F,
105
+ 0x09E6..0x09EF, 0x0A66..0x0A6F, 0x0AE6..0x0AEF, 0x0B66..0x0B6F,
106
+ 0x0BE7..0x0BEF, 0x0C66..0x0C6F, 0x0CE6..0x0CEF, 0x0D66..0x0D6F,
107
+ 0x0E50..0x0E59, 0x0ED0..0x0ED9, 0x0F20..0x0F29,
108
+ ]
109
+ extender = [ # for NameChar
110
+ 0x00B7..0x00B7, 0x02D0..0x02D0, 0x02D1..0x02D1, 0x0387..0x0387,
111
+ 0x0640..0x0640, 0x0E46..0x0E46, 0x0EC6..0x0EC6, 0x3005..0x3005,
112
+ 0x3031..0x3035, 0x309D..0x309E, 0x30FC..0x30FE,
113
+ ]
114
+
115
+ letter = base_char + ideographic
116
+
117
+ Char = char
118
+ NameChar = [ 0x2d..0x2e, 0x3a..0x3a, 0x5f..0x5f ] +
119
+ letter + combining_char + digit + extender
120
+ NameFirstChar = [ 0x3a..0x3a, 0x5f..0x5f ] + letter
121
+
122
+ [ Char, NameChar, NameFirstChar ].each { |i|
123
+ i.sort! { |a,b| a.begin <=> b.begin }
124
+ }
125
+
126
+
127
+ if Regexp.new("[\xc2\x80-\xc4\x80]", nil, 'U') =~ "\xc4\x81" then
128
+ # regexp engine is buggy ;p
129
+ buggy_regexp = true
130
+ else
131
+ buggy_regexp = false
132
+ end
133
+
134
+
135
+ o = Object.new
136
+ class << o
137
+ def charclass(a)
138
+ a.collect { |i|
139
+ b, e = i.begin, i.end
140
+ if b == e then
141
+ [b].pack('U')
142
+ elsif b + 1 == e then
143
+ [b,e].pack('UU')
144
+ elsif b < 0x80 and e >= 0x80 then
145
+ "#{b.chr}-\x7f" + [0x80,?-,e].pack('UCU')
146
+ else
147
+ [b,?-,e].pack('UCU')
148
+ end
149
+ }.join.sub(/\A-/, '\\\\-')
150
+ end
151
+
152
+ def make_regexp(a)
153
+ "[#{charclass(a)}]"
154
+ end
155
+
156
+ def make_neg_regexp(a)
157
+ "[^#{charclass(a)}]"
158
+ end
159
+
160
+ end
161
+
162
+
163
+ if buggy_regexp then
164
+ class << o
165
+ remove_method :make_regexp
166
+ def make_regexp(a)
167
+ b = []
168
+ a.each { |r|
169
+ if r.begin < 0x80 and r.end >= 0x80 then
170
+ b.push r.begin..0x7f
171
+ r = 0x80..r.end
172
+ end
173
+ if r.begin < 0x100 and r.end >= 0x100 then
174
+ b.push r.begin..0xff
175
+ r = 0x100..r.end
176
+ end
177
+ b.push r
178
+ }
179
+ mbc8 = b.select { |r| r.begin >= 0x80 and r.begin <= 0xff }
180
+ a = b.reject { |r| r.begin >= 0x80 and r.begin <= 0xff }
181
+ if mbc8.empty? then
182
+ "[#{charclass(a)}]"
183
+ else
184
+ dst = "(?:[#{charclass(a)}]"
185
+ mbc8.each { |r|
186
+ r.each { |i| dst << [?|, i].pack('CU') }
187
+ }
188
+ dst << ")"
189
+ end
190
+ end
191
+ end
192
+ end
193
+
194
+
195
+ CharPattern = Regexp.new("\\A#{o.make_regexp(Char)}*\\z", nil, 'U')
196
+ NotCharPattern = Regexp.new(o.make_neg_regexp(Char), nil, 'U')
197
+
198
+ NmtokenPattern = Regexp.new("\\A#{o.make_regexp(NameChar)}+\\z", nil, 'U')
199
+ NotNameCharPattern = Regexp.new(o.make_neg_regexp(NameChar), nil, 'U')
200
+
201
+ NamePattern = Regexp.new('\A' +
202
+ o.make_regexp(NameFirstChar) +
203
+ o.make_regexp(NameChar) + '*\z', nil, 'U')
204
+
205
+
206
+ def valid_char?(code)
207
+ NotCharPattern !~ [code].pack('U')
208
+ end
209
+
210
+ def valid_chardata?(str)
211
+ NotCharPattern !~ str
212
+ end
213
+
214
+ def valid_nmtoken?(str)
215
+ NotNameCharPattern !~ str
216
+ end
217
+
218
+ def valid_name?(str)
219
+ not NamePattern !~ str
220
+ end
221
+
222
+ if buggy_regexp then
223
+ remove_method :valid_char?
224
+ remove_method :valid_chardata?
225
+ remove_method :valid_nmtoken?
226
+ def valid_char?(code)
227
+ not CharPattern !~ [code].pack('U')
228
+ end
229
+ def valid_chardata?(str)
230
+ not CharPattern !~ str
231
+ end
232
+ def valid_nmtoken?(str)
233
+ not NmtokenPattern !~ str
234
+ end
235
+ end
236
+
237
+ module_function :valid_char?, :valid_chardata?
238
+ module_function :valid_nmtoken?, :valid_name?
239
+
240
+
241
+ def valid_pubid?(str)
242
+ /[^\- \r\na-zA-Z0-9'()+,.\/:=?;!*#\@$_%]/u !~ str
243
+ end
244
+
245
+
246
+ def valid_version?(str)
247
+ /[^\-a-zA-Z0-9_.:]/u !~ str
248
+ end
249
+ module_function :valid_version?
250
+
251
+
252
+ def valid_encoding?(str)
253
+ if /\A[A-Za-z]([\-A-Za-z0-9._])*\z/u =~ str then
254
+ true
255
+ else
256
+ false
257
+ end
258
+ end
259
+ module_function :valid_encoding?
260
+
261
+ end
262
+
263
+
264
+
265
+
266
+ class XMLScanner
267
+
268
+ module StrictChar
269
+
270
+ include XMLChar
271
+
272
+ private
273
+
274
+ def check_valid_name(name)
275
+ unless valid_name? name then
276
+ parse_error "`#{name}' is not valid for XML name"
277
+ end
278
+ end
279
+
280
+ def check_valid_chardata(str)
281
+ unless valid_chardata? str then
282
+ parse_error "invlalid XML character is found"
283
+ end
284
+ end
285
+
286
+ def check_valid_char(code)
287
+ unless valid_char? code then
288
+ wellformed_error "#{code} is not a valid XML character"
289
+ end
290
+ end
291
+
292
+ def check_valid_version(str)
293
+ unless valid_version? str then
294
+ parse_error "#{str} is not a valid XML version"
295
+ end
296
+ end
297
+
298
+ def check_valid_encoding(str)
299
+ unless valid_encoding? str then
300
+ parse_error "#{str} is not a valid XML encoding name"
301
+ end
302
+ end
303
+
304
+ def check_valid_pubid(str)
305
+ unless valid_pubid? str then
306
+ parse_error "#{str} is not a valid public ID"
307
+ end
308
+ end
309
+
310
+
311
+ def on_xmldecl_version(str)
312
+ check_valid_version str
313
+ super
314
+ end
315
+
316
+ def on_xmldecl_encoding(str)
317
+ check_valid_encoding str
318
+ super
319
+ end
320
+
321
+ def on_xmldecl_standalone(str)
322
+ check_valid_chardata str
323
+ super
324
+ end
325
+
326
+ def on_doctype(root, pubid, sysid)
327
+ check_valid_name root
328
+ check_valid_pubid pubid if pubid
329
+ check_valid_chardata sysid if sysid
330
+ super
331
+ end
332
+
333
+ def on_comment(str)
334
+ check_valid_chardata str
335
+ super
336
+ end
337
+
338
+ def on_pi(target, pi)
339
+ check_valid_name target
340
+ check_valid_chardata pi
341
+ super
342
+ end
343
+
344
+ def on_chardata(str)
345
+ check_valid_chardata str
346
+ super
347
+ end
348
+
349
+ def on_cdata(str)
350
+ check_valid_chardata str
351
+ super
352
+ end
353
+
354
+ def on_etag(name)
355
+ check_valid_name name
356
+ super
357
+ end
358
+
359
+ def on_entityref(ref)
360
+ check_valid_name ref
361
+ super
362
+ end
363
+
364
+ def on_charref(code)
365
+ check_valid_char code
366
+ super
367
+ end
368
+
369
+ def on_charref_hex(code)
370
+ check_valid_char code
371
+ super
372
+ end
373
+
374
+ def on_stag(name)
375
+ check_valid_name name
376
+ super
377
+ end
378
+
379
+ def on_attribute(name)
380
+ check_valid_name name
381
+ super
382
+ end
383
+
384
+ def on_attr_value(str)
385
+ check_valid_chardata str
386
+ super
387
+ end
388
+
389
+ def on_attr_entityref(ref)
390
+ check_valid_name ref
391
+ super
392
+ end
393
+
394
+ def on_attr_charref(code)
395
+ check_valid_char code
396
+ super
397
+ end
398
+
399
+ def on_attr_charref_hex(code)
400
+ check_valid_char code
401
+ super
402
+ end
403
+
404
+ end
405
+
406
+
407
+ private
408
+
409
+ def apply_option_strict_char
410
+ extend StrictChar
411
+ end
412
+
413
+ end
414
+
415
+
416
+ end
417
+
418
+
419
+
420
+
421
+
422
+
423
+ if $0 == __FILE__ then
424
+ class TestVisitor
425
+ include XMLScan::Visitor
426
+ def parse_error(msg)
427
+ STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
428
+ end
429
+ def wellformed_error(msg)
430
+ STDERR.printf("%s:%d: WFC: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
431
+ end
432
+ end
433
+
434
+ $s = scan = XMLScan::XMLScanner.new(TestVisitor.new, :strict_char)
435
+ src = ARGF
436
+ def src.path; filename; end
437
+ t1 = Time.times.utime
438
+ scan.parse src
439
+ t2 = Time.times.utime
440
+ STDERR.printf "%2.3f sec\n", t2 - t1
441
+ end