rgen 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +20 -1
- data/MIT-LICENSE +1 -1
- data/README +12 -9
- data/lib/instantiators/ea_instantiator.rb +36 -0
- data/lib/metamodels/uml13_metamodel.rb +559 -0
- data/lib/metamodels/uml13_metamodel_ext.rb +26 -0
- data/lib/mmgen/metamodel_generator.rb +5 -5
- data/lib/mmgen/mm_ext/ecore_ext.rb +95 -0
- data/lib/mmgen/mmgen.rb +6 -4
- data/lib/mmgen/templates/annotations.tpl +37 -0
- data/lib/mmgen/templates/metamodel_generator.tpl +171 -0
- data/lib/rgen/ecore/ecore.rb +190 -0
- data/lib/rgen/ecore/ecore_instantiator.rb +25 -0
- data/lib/rgen/ecore/ecore_transformer.rb +85 -0
- data/lib/rgen/environment.rb +9 -24
- data/lib/rgen/find_helper.rb +68 -0
- data/lib/rgen/{instantiator.rb → instantiator/abstract_instantiator.rb} +6 -2
- data/lib/rgen/instantiator/abstract_xml_instantiator.rb +59 -0
- data/lib/rgen/instantiator/default_xml_instantiator.rb +117 -0
- data/lib/rgen/instantiator/ecore_xml_instantiator.rb +144 -0
- data/lib/rgen/instantiator/nodebased_xml_instantiator.rb +157 -0
- data/lib/rgen/instantiator/xmi11_instantiator.rb +164 -0
- data/lib/rgen/metamodel_builder.rb +103 -9
- data/lib/rgen/metamodel_builder/build_helper.rb +26 -4
- data/lib/rgen/metamodel_builder/builder_extensions.rb +285 -88
- data/lib/rgen/metamodel_builder/builder_runtime.rb +7 -1
- data/lib/rgen/metamodel_builder/data_types.rb +67 -0
- data/lib/rgen/metamodel_builder/intermediate/annotation.rb +30 -0
- data/lib/rgen/metamodel_builder/metamodel_description.rb +232 -0
- data/lib/rgen/metamodel_builder/mm_multiple.rb +23 -0
- data/lib/rgen/metamodel_builder/module_extension.rb +33 -0
- data/lib/rgen/model_comparator.rb +56 -0
- data/lib/rgen/model_dumper.rb +5 -5
- data/lib/rgen/name_helper.rb +17 -1
- data/lib/rgen/template_language.rb +148 -28
- data/lib/rgen/template_language/directory_template_container.rb +56 -38
- data/lib/rgen/template_language/output_handler.rb +93 -77
- data/lib/rgen/template_language/template_container.rb +186 -143
- data/lib/rgen/transformer.rb +19 -14
- data/lib/transformers/uml13_to_ecore.rb +75 -0
- data/redist/xmlscan/ChangeLog +1301 -0
- data/redist/xmlscan/README +34 -0
- data/redist/xmlscan/THANKS +11 -0
- data/redist/xmlscan/doc/changes.html +74 -0
- data/redist/xmlscan/doc/changes.rd +80 -0
- data/redist/xmlscan/doc/en/conformance.html +136 -0
- data/redist/xmlscan/doc/en/conformance.rd +152 -0
- data/redist/xmlscan/doc/en/manual.html +356 -0
- data/redist/xmlscan/doc/en/manual.rd +402 -0
- data/redist/xmlscan/doc/ja/conformance.ja.html +118 -0
- data/redist/xmlscan/doc/ja/conformance.ja.rd +134 -0
- data/redist/xmlscan/doc/ja/manual.ja.html +325 -0
- data/redist/xmlscan/doc/ja/manual.ja.rd +370 -0
- data/redist/xmlscan/doc/src/Makefile +41 -0
- data/redist/xmlscan/doc/src/conformance.rd.src +256 -0
- data/redist/xmlscan/doc/src/langsplit.rb +110 -0
- data/redist/xmlscan/doc/src/manual.rd.src +614 -0
- data/redist/xmlscan/install.rb +41 -0
- data/redist/xmlscan/lib/xmlscan/encoding.rb +311 -0
- data/redist/xmlscan/lib/xmlscan/htmlscan.rb +289 -0
- data/redist/xmlscan/lib/xmlscan/namespace.rb +352 -0
- data/redist/xmlscan/lib/xmlscan/parser.rb +299 -0
- data/redist/xmlscan/lib/xmlscan/scanner.rb +1109 -0
- data/redist/xmlscan/lib/xmlscan/version.rb +22 -0
- data/redist/xmlscan/lib/xmlscan/visitor.rb +158 -0
- data/redist/xmlscan/lib/xmlscan/xmlchar.rb +441 -0
- data/redist/xmlscan/memo/CONFORMANCE +1249 -0
- data/redist/xmlscan/memo/PRODUCTIONS +195 -0
- data/redist/xmlscan/memo/contentspec.ry +335 -0
- data/redist/xmlscan/samples/chibixml.rb +105 -0
- data/redist/xmlscan/samples/getxmlchar.rb +122 -0
- data/redist/xmlscan/samples/rexml.rb +159 -0
- data/redist/xmlscan/samples/xmlbench.rb +88 -0
- data/redist/xmlscan/samples/xmlbench/parser/chibixml.rb +22 -0
- data/redist/xmlscan/samples/xmlbench/parser/nqxml.rb +29 -0
- data/redist/xmlscan/samples/xmlbench/parser/rexml.rb +62 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlparser.rb +22 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan-0.0.10.rb +62 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan-chibixml.rb +22 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan-rexml.rb +22 -0
- data/redist/xmlscan/samples/xmlbench/parser/xmlscan.rb +99 -0
- data/redist/xmlscan/samples/xmlbench/xmlbench-lib.rb +116 -0
- data/redist/xmlscan/samples/xmlconftest.rb +200 -0
- data/redist/xmlscan/test.rb +7 -0
- data/redist/xmlscan/tests/deftestcase.rb +73 -0
- data/redist/xmlscan/tests/runtest.rb +47 -0
- data/redist/xmlscan/tests/testall.rb +14 -0
- data/redist/xmlscan/tests/testencoding.rb +438 -0
- data/redist/xmlscan/tests/testhtmlscan.rb +752 -0
- data/redist/xmlscan/tests/testnamespace.rb +457 -0
- data/redist/xmlscan/tests/testparser.rb +591 -0
- data/redist/xmlscan/tests/testscanner.rb +1749 -0
- data/redist/xmlscan/tests/testxmlchar.rb +143 -0
- data/redist/xmlscan/tests/visitor.rb +34 -0
- data/test/array_extensions_test.rb +2 -2
- data/test/ea_instantiator_test.rb +41 -0
- data/test/ecore_self_test.rb +53 -0
- data/test/environment_test.rb +11 -6
- data/test/metamodel_builder_test.rb +404 -245
- data/test/metamodel_roundtrip_test.rb +52 -0
- data/test/metamodel_roundtrip_test/TestModel.rb +65 -0
- data/test/metamodel_roundtrip_test/TestModel_Regenerated.rb +64 -0
- data/test/metamodel_roundtrip_test/houseMetamodel.ecore +32 -0
- data/test/metamodel_roundtrip_test/houseMetamodel_from_ecore.rb +39 -0
- data/test/rgen_test.rb +3 -3
- data/test/template_language_test.rb +65 -39
- data/test/template_language_test/expected_result.txt +24 -3
- data/test/template_language_test/templates/code/array.tpl +11 -0
- data/test/template_language_test/templates/content/author.tpl +7 -0
- data/test/template_language_test/templates/content/chapter.tpl +1 -1
- data/test/template_language_test/templates/root.tpl +17 -8
- data/test/template_language_test/testout.txt +24 -3
- data/test/testmodel/class_model_checker.rb +119 -0
- data/test/{xmi_instantiator_test/testmodel.eap → testmodel/ea_testmodel.eap} +0 -0
- data/test/{xmi_instantiator_test/testmodel.xml → testmodel/ea_testmodel.xml} +81 -14
- data/test/testmodel/ea_testmodel_partial.xml +317 -0
- data/test/testmodel/ecore_model_checker.rb +101 -0
- data/test/testmodel/manual_testmodel.xml +22 -0
- data/test/testmodel/object_model_checker.rb +67 -0
- data/test/transformer_test.rb +18 -10
- data/test/xml_instantiator_test.rb +81 -8
- data/test/xml_instantiator_test/simple_ecore_model_checker.rb +94 -0
- data/test/xml_instantiator_test/simple_xmi_ecore_instantiator.rb +53 -0
- data/test/xml_instantiator_test/simple_xmi_metamodel.rb +49 -0
- data/test/xml_instantiator_test/simple_xmi_to_ecore.rb +75 -0
- metadata +126 -28
- data/lib/ea/xmi_class_instantiator.rb +0 -46
- data/lib/ea/xmi_helper.rb +0 -26
- data/lib/ea/xmi_metamodel.rb +0 -34
- data/lib/ea/xmi_object_instantiator.rb +0 -46
- data/lib/ea/xmi_to_classmodel.rb +0 -78
- data/lib/ea/xmi_to_objectmodel.rb +0 -92
- data/lib/mmgen/mm_ext/uml_classmodel_ext.rb +0 -71
- data/lib/mmgen/templates/uml_classmodel.tpl +0 -63
- data/lib/rgen/xml_instantiator.rb +0 -132
- data/lib/uml/objectmodel_instantiator.rb +0 -53
- data/lib/uml/uml_classmodel.rb +0 -92
- data/lib/uml/uml_objectmodel.rb +0 -65
- data/test/metamodel_generator_test.rb +0 -44
- data/test/metamodel_generator_test/TestModel.rb +0 -40
- data/test/metamodel_generator_test/expected_result.txt +0 -40
- data/test/xmi_class_instantiator_test.rb +0 -24
- data/test/xmi_instantiator_test/class_model_checker.rb +0 -97
- data/test/xmi_object_instantiator_test.rb +0 -65
- data/test/xml_instantiator_test/testmodel.xml +0 -7
@@ -0,0 +1,22 @@
|
|
1
|
+
#
|
2
|
+
# xmlscan/version.rb
|
3
|
+
#
|
4
|
+
# Copyright (C) UENO Katsuhiro 2002
|
5
|
+
#
|
6
|
+
# $Id: version.rb,v 1.9 2003/01/22 17:00:49 katsu Exp $
|
7
|
+
#
|
8
|
+
|
9
|
+
module XMLScan
|
10
|
+
|
11
|
+
# The version like 'X.X.0' (TENNY is 0) means that this is an unstable
|
12
|
+
# release. Incompatible changes will be applied to this version
|
13
|
+
# without special notice. This version should be distributed as a
|
14
|
+
# snapshot only.
|
15
|
+
#
|
16
|
+
# TENNY which is larger than 1 (e.g. 'X.X.1' or 'X.X.2') means this
|
17
|
+
# release is a stable release.
|
18
|
+
|
19
|
+
VERSION = '0.3.0'
|
20
|
+
RELEASE_DATE = '2003-01-23'
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,158 @@
|
|
1
|
+
#
|
2
|
+
# xmlscan/visitor.rb
|
3
|
+
#
|
4
|
+
# Copyright (C) Ueno Katsuhiro 2002
|
5
|
+
#
|
6
|
+
# $Id: visitor.rb,v 1.3 2003/05/12 14:13:33 katsu Exp $
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'xmlscan/version'
|
10
|
+
|
11
|
+
|
12
|
+
module XMLScan
|
13
|
+
|
14
|
+
class Error < StandardError
|
15
|
+
|
16
|
+
def initialize(msg, path = nil, lineno = nil)
|
17
|
+
super msg
|
18
|
+
@path = path
|
19
|
+
@lineno = lineno
|
20
|
+
end
|
21
|
+
|
22
|
+
attr_reader :path, :lineno
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
if @lineno and @path then
|
26
|
+
"#{@path}:#{@lineno}:#{super}"
|
27
|
+
else
|
28
|
+
super
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
class ParseError < Error ; end
|
35
|
+
class NotWellFormedError < Error ; end
|
36
|
+
class NotValidError < Error ; end
|
37
|
+
|
38
|
+
|
39
|
+
module Visitor
|
40
|
+
|
41
|
+
def parse_error(msg)
|
42
|
+
raise ParseError.new(msg)
|
43
|
+
end
|
44
|
+
|
45
|
+
def wellformed_error(msg)
|
46
|
+
raise NotWellFormedError.new(msg)
|
47
|
+
end
|
48
|
+
|
49
|
+
def valid_error(msg)
|
50
|
+
raise NotValidError.new(msg)
|
51
|
+
end
|
52
|
+
|
53
|
+
def warning(msg)
|
54
|
+
end
|
55
|
+
|
56
|
+
def on_xmldecl
|
57
|
+
end
|
58
|
+
|
59
|
+
def on_xmldecl_version(str)
|
60
|
+
end
|
61
|
+
|
62
|
+
def on_xmldecl_encoding(str)
|
63
|
+
end
|
64
|
+
|
65
|
+
def on_xmldecl_standalone(str)
|
66
|
+
end
|
67
|
+
|
68
|
+
def on_xmldecl_other(name, value)
|
69
|
+
end
|
70
|
+
|
71
|
+
def on_xmldecl_end
|
72
|
+
end
|
73
|
+
|
74
|
+
def on_doctype(root, pubid, sysid)
|
75
|
+
end
|
76
|
+
|
77
|
+
def on_prolog_space(str)
|
78
|
+
end
|
79
|
+
|
80
|
+
def on_comment(str)
|
81
|
+
end
|
82
|
+
|
83
|
+
def on_pi(target, pi)
|
84
|
+
end
|
85
|
+
|
86
|
+
def on_chardata(str)
|
87
|
+
end
|
88
|
+
|
89
|
+
def on_cdata(str)
|
90
|
+
end
|
91
|
+
|
92
|
+
def on_etag(name)
|
93
|
+
end
|
94
|
+
|
95
|
+
def on_entityref(ref)
|
96
|
+
end
|
97
|
+
|
98
|
+
def on_charref(code)
|
99
|
+
end
|
100
|
+
|
101
|
+
def on_charref_hex(code)
|
102
|
+
end
|
103
|
+
|
104
|
+
def on_start_document
|
105
|
+
end
|
106
|
+
|
107
|
+
def on_end_document
|
108
|
+
end
|
109
|
+
|
110
|
+
def on_stag(name)
|
111
|
+
end
|
112
|
+
|
113
|
+
def on_attribute(name)
|
114
|
+
end
|
115
|
+
|
116
|
+
def on_attr_value(str)
|
117
|
+
end
|
118
|
+
|
119
|
+
def on_attr_entityref(ref)
|
120
|
+
end
|
121
|
+
|
122
|
+
def on_attr_charref(code)
|
123
|
+
end
|
124
|
+
|
125
|
+
def on_attr_charref_hex(code)
|
126
|
+
end
|
127
|
+
|
128
|
+
def on_attribute_end(name)
|
129
|
+
end
|
130
|
+
|
131
|
+
def on_stag_end_empty(name)
|
132
|
+
end
|
133
|
+
|
134
|
+
def on_stag_end(name)
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
class Decoration
|
141
|
+
|
142
|
+
include Visitor
|
143
|
+
|
144
|
+
def initialize(visitor)
|
145
|
+
@visitor = visitor
|
146
|
+
end
|
147
|
+
|
148
|
+
Visitor.instance_methods(false).each { |i|
|
149
|
+
module_eval <<-END, __FILE__, __LINE__ + 1
|
150
|
+
def #{i}(*args)
|
151
|
+
@visitor.#{i}(*args)
|
152
|
+
end
|
153
|
+
END
|
154
|
+
}
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
end
|
@@ -0,0 +1,441 @@
|
|
1
|
+
#
|
2
|
+
# xmlscan/scanner.rb
|
3
|
+
#
|
4
|
+
# Copyright (C) Ueno Katsuhiro 2002
|
5
|
+
#
|
6
|
+
# $Id: xmlchar.rb,v 1.7 2003/04/30 03:03:35 katsu Exp $
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'xmlscan/scanner'
|
10
|
+
|
11
|
+
|
12
|
+
module XMLScan
|
13
|
+
|
14
|
+
module XMLChar
|
15
|
+
|
16
|
+
# generated by samples/getxmlchar.rb
|
17
|
+
char = [
|
18
|
+
0x0009..0x0009, 0x000A..0x000A, 0x000D..0x000D, 0x0020..0xD7FF,
|
19
|
+
0xE000..0xFFFD, 0x10000..0x10FFFF,
|
20
|
+
]
|
21
|
+
base_char = [ # for Letter
|
22
|
+
0x0041..0x005A, 0x0061..0x007A, 0x00C0..0x00D6, 0x00D8..0x00F6,
|
23
|
+
0x00F8..0x00FF, 0x0100..0x0131, 0x0134..0x013E, 0x0141..0x0148,
|
24
|
+
0x014A..0x017E, 0x0180..0x01C3, 0x01CD..0x01F0, 0x01F4..0x01F5,
|
25
|
+
0x01FA..0x0217, 0x0250..0x02A8, 0x02BB..0x02C1, 0x0386..0x0386,
|
26
|
+
0x0388..0x038A, 0x038C..0x038C, 0x038E..0x03A1, 0x03A3..0x03CE,
|
27
|
+
0x03D0..0x03D6, 0x03DA..0x03DA, 0x03DC..0x03DC, 0x03DE..0x03DE,
|
28
|
+
0x03E0..0x03E0, 0x03E2..0x03F3, 0x0401..0x040C, 0x040E..0x044F,
|
29
|
+
0x0451..0x045C, 0x045E..0x0481, 0x0490..0x04C4, 0x04C7..0x04C8,
|
30
|
+
0x04CB..0x04CC, 0x04D0..0x04EB, 0x04EE..0x04F5, 0x04F8..0x04F9,
|
31
|
+
0x0531..0x0556, 0x0559..0x0559, 0x0561..0x0586, 0x05D0..0x05EA,
|
32
|
+
0x05F0..0x05F2, 0x0621..0x063A, 0x0641..0x064A, 0x0671..0x06B7,
|
33
|
+
0x06BA..0x06BE, 0x06C0..0x06CE, 0x06D0..0x06D3, 0x06D5..0x06D5,
|
34
|
+
0x06E5..0x06E6, 0x0905..0x0939, 0x093D..0x093D, 0x0958..0x0961,
|
35
|
+
0x0985..0x098C, 0x098F..0x0990, 0x0993..0x09A8, 0x09AA..0x09B0,
|
36
|
+
0x09B2..0x09B2, 0x09B6..0x09B9, 0x09DC..0x09DD, 0x09DF..0x09E1,
|
37
|
+
0x09F0..0x09F1, 0x0A05..0x0A0A, 0x0A0F..0x0A10, 0x0A13..0x0A28,
|
38
|
+
0x0A2A..0x0A30, 0x0A32..0x0A33, 0x0A35..0x0A36, 0x0A38..0x0A39,
|
39
|
+
0x0A59..0x0A5C, 0x0A5E..0x0A5E, 0x0A72..0x0A74, 0x0A85..0x0A8B,
|
40
|
+
0x0A8D..0x0A8D, 0x0A8F..0x0A91, 0x0A93..0x0AA8, 0x0AAA..0x0AB0,
|
41
|
+
0x0AB2..0x0AB3, 0x0AB5..0x0AB9, 0x0ABD..0x0ABD, 0x0AE0..0x0AE0,
|
42
|
+
0x0B05..0x0B0C, 0x0B0F..0x0B10, 0x0B13..0x0B28, 0x0B2A..0x0B30,
|
43
|
+
0x0B32..0x0B33, 0x0B36..0x0B39, 0x0B3D..0x0B3D, 0x0B5C..0x0B5D,
|
44
|
+
0x0B5F..0x0B61, 0x0B85..0x0B8A, 0x0B8E..0x0B90, 0x0B92..0x0B95,
|
45
|
+
0x0B99..0x0B9A, 0x0B9C..0x0B9C, 0x0B9E..0x0B9F, 0x0BA3..0x0BA4,
|
46
|
+
0x0BA8..0x0BAA, 0x0BAE..0x0BB5, 0x0BB7..0x0BB9, 0x0C05..0x0C0C,
|
47
|
+
0x0C0E..0x0C10, 0x0C12..0x0C28, 0x0C2A..0x0C33, 0x0C35..0x0C39,
|
48
|
+
0x0C60..0x0C61, 0x0C85..0x0C8C, 0x0C8E..0x0C90, 0x0C92..0x0CA8,
|
49
|
+
0x0CAA..0x0CB3, 0x0CB5..0x0CB9, 0x0CDE..0x0CDE, 0x0CE0..0x0CE1,
|
50
|
+
0x0D05..0x0D0C, 0x0D0E..0x0D10, 0x0D12..0x0D28, 0x0D2A..0x0D39,
|
51
|
+
0x0D60..0x0D61, 0x0E01..0x0E2E, 0x0E30..0x0E30, 0x0E32..0x0E33,
|
52
|
+
0x0E40..0x0E45, 0x0E81..0x0E82, 0x0E84..0x0E84, 0x0E87..0x0E88,
|
53
|
+
0x0E8A..0x0E8A, 0x0E8D..0x0E8D, 0x0E94..0x0E97, 0x0E99..0x0E9F,
|
54
|
+
0x0EA1..0x0EA3, 0x0EA5..0x0EA5, 0x0EA7..0x0EA7, 0x0EAA..0x0EAB,
|
55
|
+
0x0EAD..0x0EAE, 0x0EB0..0x0EB0, 0x0EB2..0x0EB3, 0x0EBD..0x0EBD,
|
56
|
+
0x0EC0..0x0EC4, 0x0F40..0x0F47, 0x0F49..0x0F69, 0x10A0..0x10C5,
|
57
|
+
0x10D0..0x10F6, 0x1100..0x1100, 0x1102..0x1103, 0x1105..0x1107,
|
58
|
+
0x1109..0x1109, 0x110B..0x110C, 0x110E..0x1112, 0x113C..0x113C,
|
59
|
+
0x113E..0x113E, 0x1140..0x1140, 0x114C..0x114C, 0x114E..0x114E,
|
60
|
+
0x1150..0x1150, 0x1154..0x1155, 0x1159..0x1159, 0x115F..0x1161,
|
61
|
+
0x1163..0x1163, 0x1165..0x1165, 0x1167..0x1167, 0x1169..0x1169,
|
62
|
+
0x116D..0x116E, 0x1172..0x1173, 0x1175..0x1175, 0x119E..0x119E,
|
63
|
+
0x11A8..0x11A8, 0x11AB..0x11AB, 0x11AE..0x11AF, 0x11B7..0x11B8,
|
64
|
+
0x11BA..0x11BA, 0x11BC..0x11C2, 0x11EB..0x11EB, 0x11F0..0x11F0,
|
65
|
+
0x11F9..0x11F9, 0x1E00..0x1E9B, 0x1EA0..0x1EF9, 0x1F00..0x1F15,
|
66
|
+
0x1F18..0x1F1D, 0x1F20..0x1F45, 0x1F48..0x1F4D, 0x1F50..0x1F57,
|
67
|
+
0x1F59..0x1F59, 0x1F5B..0x1F5B, 0x1F5D..0x1F5D, 0x1F5F..0x1F7D,
|
68
|
+
0x1F80..0x1FB4, 0x1FB6..0x1FBC, 0x1FBE..0x1FBE, 0x1FC2..0x1FC4,
|
69
|
+
0x1FC6..0x1FCC, 0x1FD0..0x1FD3, 0x1FD6..0x1FDB, 0x1FE0..0x1FEC,
|
70
|
+
0x1FF2..0x1FF4, 0x1FF6..0x1FFC, 0x2126..0x2126, 0x212A..0x212B,
|
71
|
+
0x212E..0x212E, 0x2180..0x2182, 0x3041..0x3094, 0x30A1..0x30FA,
|
72
|
+
0x3105..0x312C, 0xAC00..0xD7A3,
|
73
|
+
]
|
74
|
+
ideographic = [ # for Letter
|
75
|
+
0x3007..0x3007, 0x3021..0x3029, 0x4E00..0x9FA5,
|
76
|
+
]
|
77
|
+
combining_char = [ # for NameChar
|
78
|
+
0x0300..0x0345, 0x0360..0x0361, 0x0483..0x0486, 0x0591..0x05A1,
|
79
|
+
0x05A3..0x05B9, 0x05BB..0x05BD, 0x05BF..0x05BF, 0x05C1..0x05C2,
|
80
|
+
0x05C4..0x05C4, 0x064B..0x0652, 0x0670..0x0670, 0x06D6..0x06DC,
|
81
|
+
0x06DD..0x06DF, 0x06E0..0x06E4, 0x06E7..0x06E8, 0x06EA..0x06ED,
|
82
|
+
0x0901..0x0903, 0x093C..0x093C, 0x093E..0x094C, 0x094D..0x094D,
|
83
|
+
0x0951..0x0954, 0x0962..0x0963, 0x0981..0x0983, 0x09BC..0x09BC,
|
84
|
+
0x09BE..0x09BE, 0x09BF..0x09BF, 0x09C0..0x09C4, 0x09C7..0x09C8,
|
85
|
+
0x09CB..0x09CD, 0x09D7..0x09D7, 0x09E2..0x09E3, 0x0A02..0x0A02,
|
86
|
+
0x0A3C..0x0A3C, 0x0A3E..0x0A3E, 0x0A3F..0x0A3F, 0x0A40..0x0A42,
|
87
|
+
0x0A47..0x0A48, 0x0A4B..0x0A4D, 0x0A70..0x0A71, 0x0A81..0x0A83,
|
88
|
+
0x0ABC..0x0ABC, 0x0ABE..0x0AC5, 0x0AC7..0x0AC9, 0x0ACB..0x0ACD,
|
89
|
+
0x0B01..0x0B03, 0x0B3C..0x0B3C, 0x0B3E..0x0B43, 0x0B47..0x0B48,
|
90
|
+
0x0B4B..0x0B4D, 0x0B56..0x0B57, 0x0B82..0x0B83, 0x0BBE..0x0BC2,
|
91
|
+
0x0BC6..0x0BC8, 0x0BCA..0x0BCD, 0x0BD7..0x0BD7, 0x0C01..0x0C03,
|
92
|
+
0x0C3E..0x0C44, 0x0C46..0x0C48, 0x0C4A..0x0C4D, 0x0C55..0x0C56,
|
93
|
+
0x0C82..0x0C83, 0x0CBE..0x0CC4, 0x0CC6..0x0CC8, 0x0CCA..0x0CCD,
|
94
|
+
0x0CD5..0x0CD6, 0x0D02..0x0D03, 0x0D3E..0x0D43, 0x0D46..0x0D48,
|
95
|
+
0x0D4A..0x0D4D, 0x0D57..0x0D57, 0x0E31..0x0E31, 0x0E34..0x0E3A,
|
96
|
+
0x0E47..0x0E4E, 0x0EB1..0x0EB1, 0x0EB4..0x0EB9, 0x0EBB..0x0EBC,
|
97
|
+
0x0EC8..0x0ECD, 0x0F18..0x0F19, 0x0F35..0x0F35, 0x0F37..0x0F37,
|
98
|
+
0x0F39..0x0F39, 0x0F3E..0x0F3E, 0x0F3F..0x0F3F, 0x0F71..0x0F84,
|
99
|
+
0x0F86..0x0F8B, 0x0F90..0x0F95, 0x0F97..0x0F97, 0x0F99..0x0FAD,
|
100
|
+
0x0FB1..0x0FB7, 0x0FB9..0x0FB9, 0x20D0..0x20DC, 0x20E1..0x20E1,
|
101
|
+
0x302A..0x302F, 0x3099..0x3099, 0x309A..0x309A,
|
102
|
+
]
|
103
|
+
digit = [ # for NameChar
|
104
|
+
0x0030..0x0039, 0x0660..0x0669, 0x06F0..0x06F9, 0x0966..0x096F,
|
105
|
+
0x09E6..0x09EF, 0x0A66..0x0A6F, 0x0AE6..0x0AEF, 0x0B66..0x0B6F,
|
106
|
+
0x0BE7..0x0BEF, 0x0C66..0x0C6F, 0x0CE6..0x0CEF, 0x0D66..0x0D6F,
|
107
|
+
0x0E50..0x0E59, 0x0ED0..0x0ED9, 0x0F20..0x0F29,
|
108
|
+
]
|
109
|
+
extender = [ # for NameChar
|
110
|
+
0x00B7..0x00B7, 0x02D0..0x02D0, 0x02D1..0x02D1, 0x0387..0x0387,
|
111
|
+
0x0640..0x0640, 0x0E46..0x0E46, 0x0EC6..0x0EC6, 0x3005..0x3005,
|
112
|
+
0x3031..0x3035, 0x309D..0x309E, 0x30FC..0x30FE,
|
113
|
+
]
|
114
|
+
|
115
|
+
letter = base_char + ideographic
|
116
|
+
|
117
|
+
Char = char
|
118
|
+
NameChar = [ 0x2d..0x2e, 0x3a..0x3a, 0x5f..0x5f ] +
|
119
|
+
letter + combining_char + digit + extender
|
120
|
+
NameFirstChar = [ 0x3a..0x3a, 0x5f..0x5f ] + letter
|
121
|
+
|
122
|
+
[ Char, NameChar, NameFirstChar ].each { |i|
|
123
|
+
i.sort! { |a,b| a.begin <=> b.begin }
|
124
|
+
}
|
125
|
+
|
126
|
+
|
127
|
+
if Regexp.new("[\xc2\x80-\xc4\x80]", nil, 'U') =~ "\xc4\x81" then
|
128
|
+
# regexp engine is buggy ;p
|
129
|
+
buggy_regexp = true
|
130
|
+
else
|
131
|
+
buggy_regexp = false
|
132
|
+
end
|
133
|
+
|
134
|
+
|
135
|
+
o = Object.new
|
136
|
+
class << o
|
137
|
+
def charclass(a)
|
138
|
+
a.collect { |i|
|
139
|
+
b, e = i.begin, i.end
|
140
|
+
if b == e then
|
141
|
+
[b].pack('U')
|
142
|
+
elsif b + 1 == e then
|
143
|
+
[b,e].pack('UU')
|
144
|
+
elsif b < 0x80 and e >= 0x80 then
|
145
|
+
"#{b.chr}-\x7f" + [0x80,?-,e].pack('UCU')
|
146
|
+
else
|
147
|
+
[b,?-,e].pack('UCU')
|
148
|
+
end
|
149
|
+
}.join.sub(/\A-/, '\\\\-')
|
150
|
+
end
|
151
|
+
|
152
|
+
def make_regexp(a)
|
153
|
+
"[#{charclass(a)}]"
|
154
|
+
end
|
155
|
+
|
156
|
+
def make_neg_regexp(a)
|
157
|
+
"[^#{charclass(a)}]"
|
158
|
+
end
|
159
|
+
|
160
|
+
end
|
161
|
+
|
162
|
+
|
163
|
+
if buggy_regexp then
|
164
|
+
class << o
|
165
|
+
remove_method :make_regexp
|
166
|
+
def make_regexp(a)
|
167
|
+
b = []
|
168
|
+
a.each { |r|
|
169
|
+
if r.begin < 0x80 and r.end >= 0x80 then
|
170
|
+
b.push r.begin..0x7f
|
171
|
+
r = 0x80..r.end
|
172
|
+
end
|
173
|
+
if r.begin < 0x100 and r.end >= 0x100 then
|
174
|
+
b.push r.begin..0xff
|
175
|
+
r = 0x100..r.end
|
176
|
+
end
|
177
|
+
b.push r
|
178
|
+
}
|
179
|
+
mbc8 = b.select { |r| r.begin >= 0x80 and r.begin <= 0xff }
|
180
|
+
a = b.reject { |r| r.begin >= 0x80 and r.begin <= 0xff }
|
181
|
+
if mbc8.empty? then
|
182
|
+
"[#{charclass(a)}]"
|
183
|
+
else
|
184
|
+
dst = "(?:[#{charclass(a)}]"
|
185
|
+
mbc8.each { |r|
|
186
|
+
r.each { |i| dst << [?|, i].pack('CU') }
|
187
|
+
}
|
188
|
+
dst << ")"
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
|
195
|
+
CharPattern = Regexp.new("\\A#{o.make_regexp(Char)}*\\z", nil, 'U')
|
196
|
+
NotCharPattern = Regexp.new(o.make_neg_regexp(Char), nil, 'U')
|
197
|
+
|
198
|
+
NmtokenPattern = Regexp.new("\\A#{o.make_regexp(NameChar)}+\\z", nil, 'U')
|
199
|
+
NotNameCharPattern = Regexp.new(o.make_neg_regexp(NameChar), nil, 'U')
|
200
|
+
|
201
|
+
NamePattern = Regexp.new('\A' +
|
202
|
+
o.make_regexp(NameFirstChar) +
|
203
|
+
o.make_regexp(NameChar) + '*\z', nil, 'U')
|
204
|
+
|
205
|
+
|
206
|
+
def valid_char?(code)
|
207
|
+
NotCharPattern !~ [code].pack('U')
|
208
|
+
end
|
209
|
+
|
210
|
+
def valid_chardata?(str)
|
211
|
+
NotCharPattern !~ str
|
212
|
+
end
|
213
|
+
|
214
|
+
def valid_nmtoken?(str)
|
215
|
+
NotNameCharPattern !~ str
|
216
|
+
end
|
217
|
+
|
218
|
+
def valid_name?(str)
|
219
|
+
not NamePattern !~ str
|
220
|
+
end
|
221
|
+
|
222
|
+
if buggy_regexp then
|
223
|
+
remove_method :valid_char?
|
224
|
+
remove_method :valid_chardata?
|
225
|
+
remove_method :valid_nmtoken?
|
226
|
+
def valid_char?(code)
|
227
|
+
not CharPattern !~ [code].pack('U')
|
228
|
+
end
|
229
|
+
def valid_chardata?(str)
|
230
|
+
not CharPattern !~ str
|
231
|
+
end
|
232
|
+
def valid_nmtoken?(str)
|
233
|
+
not NmtokenPattern !~ str
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
module_function :valid_char?, :valid_chardata?
|
238
|
+
module_function :valid_nmtoken?, :valid_name?
|
239
|
+
|
240
|
+
|
241
|
+
def valid_pubid?(str)
|
242
|
+
/[^\- \r\na-zA-Z0-9'()+,.\/:=?;!*#\@$_%]/u !~ str
|
243
|
+
end
|
244
|
+
|
245
|
+
|
246
|
+
def valid_version?(str)
|
247
|
+
/[^\-a-zA-Z0-9_.:]/u !~ str
|
248
|
+
end
|
249
|
+
module_function :valid_version?
|
250
|
+
|
251
|
+
|
252
|
+
def valid_encoding?(str)
|
253
|
+
if /\A[A-Za-z]([\-A-Za-z0-9._])*\z/u =~ str then
|
254
|
+
true
|
255
|
+
else
|
256
|
+
false
|
257
|
+
end
|
258
|
+
end
|
259
|
+
module_function :valid_encoding?
|
260
|
+
|
261
|
+
end
|
262
|
+
|
263
|
+
|
264
|
+
|
265
|
+
|
266
|
+
class XMLScanner
|
267
|
+
|
268
|
+
module StrictChar
|
269
|
+
|
270
|
+
include XMLChar
|
271
|
+
|
272
|
+
private
|
273
|
+
|
274
|
+
def check_valid_name(name)
|
275
|
+
unless valid_name? name then
|
276
|
+
parse_error "`#{name}' is not valid for XML name"
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
def check_valid_chardata(str)
|
281
|
+
unless valid_chardata? str then
|
282
|
+
parse_error "invlalid XML character is found"
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
def check_valid_char(code)
|
287
|
+
unless valid_char? code then
|
288
|
+
wellformed_error "#{code} is not a valid XML character"
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def check_valid_version(str)
|
293
|
+
unless valid_version? str then
|
294
|
+
parse_error "#{str} is not a valid XML version"
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
def check_valid_encoding(str)
|
299
|
+
unless valid_encoding? str then
|
300
|
+
parse_error "#{str} is not a valid XML encoding name"
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
def check_valid_pubid(str)
|
305
|
+
unless valid_pubid? str then
|
306
|
+
parse_error "#{str} is not a valid public ID"
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
|
311
|
+
def on_xmldecl_version(str)
|
312
|
+
check_valid_version str
|
313
|
+
super
|
314
|
+
end
|
315
|
+
|
316
|
+
def on_xmldecl_encoding(str)
|
317
|
+
check_valid_encoding str
|
318
|
+
super
|
319
|
+
end
|
320
|
+
|
321
|
+
def on_xmldecl_standalone(str)
|
322
|
+
check_valid_chardata str
|
323
|
+
super
|
324
|
+
end
|
325
|
+
|
326
|
+
def on_doctype(root, pubid, sysid)
|
327
|
+
check_valid_name root
|
328
|
+
check_valid_pubid pubid if pubid
|
329
|
+
check_valid_chardata sysid if sysid
|
330
|
+
super
|
331
|
+
end
|
332
|
+
|
333
|
+
def on_comment(str)
|
334
|
+
check_valid_chardata str
|
335
|
+
super
|
336
|
+
end
|
337
|
+
|
338
|
+
def on_pi(target, pi)
|
339
|
+
check_valid_name target
|
340
|
+
check_valid_chardata pi
|
341
|
+
super
|
342
|
+
end
|
343
|
+
|
344
|
+
def on_chardata(str)
|
345
|
+
check_valid_chardata str
|
346
|
+
super
|
347
|
+
end
|
348
|
+
|
349
|
+
def on_cdata(str)
|
350
|
+
check_valid_chardata str
|
351
|
+
super
|
352
|
+
end
|
353
|
+
|
354
|
+
def on_etag(name)
|
355
|
+
check_valid_name name
|
356
|
+
super
|
357
|
+
end
|
358
|
+
|
359
|
+
def on_entityref(ref)
|
360
|
+
check_valid_name ref
|
361
|
+
super
|
362
|
+
end
|
363
|
+
|
364
|
+
def on_charref(code)
|
365
|
+
check_valid_char code
|
366
|
+
super
|
367
|
+
end
|
368
|
+
|
369
|
+
def on_charref_hex(code)
|
370
|
+
check_valid_char code
|
371
|
+
super
|
372
|
+
end
|
373
|
+
|
374
|
+
def on_stag(name)
|
375
|
+
check_valid_name name
|
376
|
+
super
|
377
|
+
end
|
378
|
+
|
379
|
+
def on_attribute(name)
|
380
|
+
check_valid_name name
|
381
|
+
super
|
382
|
+
end
|
383
|
+
|
384
|
+
def on_attr_value(str)
|
385
|
+
check_valid_chardata str
|
386
|
+
super
|
387
|
+
end
|
388
|
+
|
389
|
+
def on_attr_entityref(ref)
|
390
|
+
check_valid_name ref
|
391
|
+
super
|
392
|
+
end
|
393
|
+
|
394
|
+
def on_attr_charref(code)
|
395
|
+
check_valid_char code
|
396
|
+
super
|
397
|
+
end
|
398
|
+
|
399
|
+
def on_attr_charref_hex(code)
|
400
|
+
check_valid_char code
|
401
|
+
super
|
402
|
+
end
|
403
|
+
|
404
|
+
end
|
405
|
+
|
406
|
+
|
407
|
+
private
|
408
|
+
|
409
|
+
def apply_option_strict_char
|
410
|
+
extend StrictChar
|
411
|
+
end
|
412
|
+
|
413
|
+
end
|
414
|
+
|
415
|
+
|
416
|
+
end
|
417
|
+
|
418
|
+
|
419
|
+
|
420
|
+
|
421
|
+
|
422
|
+
|
423
|
+
if $0 == __FILE__ then
|
424
|
+
class TestVisitor
|
425
|
+
include XMLScan::Visitor
|
426
|
+
def parse_error(msg)
|
427
|
+
STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
428
|
+
end
|
429
|
+
def wellformed_error(msg)
|
430
|
+
STDERR.printf("%s:%d: WFC: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
431
|
+
end
|
432
|
+
end
|
433
|
+
|
434
|
+
$s = scan = XMLScan::XMLScanner.new(TestVisitor.new, :strict_char)
|
435
|
+
src = ARGF
|
436
|
+
def src.path; filename; end
|
437
|
+
t1 = Time.times.utime
|
438
|
+
scan.parse src
|
439
|
+
t2 = Time.times.utime
|
440
|
+
STDERR.printf "%2.3f sec\n", t2 - t1
|
441
|
+
end
|