xmlscan 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +1276 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +31 -0
- data/README.rdoc +365 -0
- data/Rakefile +65 -0
- data/THANKS +11 -0
- data/VERSION +1 -0
- data/install.rb +41 -0
- data/lib/xmlscan/htmlscan.rb +290 -0
- data/lib/xmlscan/namespace.rb +353 -0
- data/lib/xmlscan/parser.rb +300 -0
- data/lib/xmlscan/scanner.rb +1123 -0
- data/lib/xmlscan/version.rb +23 -0
- data/lib/xmlscan/visitor.rb +162 -0
- data/lib/xmlscan/xmlchar.rb +248 -0
- data/test.rb +7 -0
- metadata +113 -0
data/THANKS
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
$Id: THANKS,v 1.4.2.1 2003/02/15 13:22:01 katsu Exp $
|
2
|
+
|
3
|
+
Thanks to all of the following for their valuable hints, fixes,
|
4
|
+
discussions, and contributions:
|
5
|
+
|
6
|
+
Yoshida Masato <yoshidam@yoshidam.net>
|
7
|
+
TAKAHASHI Masayoshi <maki@inac.co.jp>
|
8
|
+
NAKAMURA, Hiroshi <nakahiro@sarion.co.jp>
|
9
|
+
James Britt <james@jamesbritt.com>
|
10
|
+
Takaaki Tateishi <ttate@kt.jaist.ac.jp>
|
11
|
+
Tanaka Akira <akr@m17n.org>
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.2.3
|
data/install.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# install.rb
|
4
|
+
#
|
5
|
+
# $Id: install.rb,v 1.2 2002/12/26 21:09:38 katsu Exp $
|
6
|
+
|
7
|
+
require 'rbconfig'
|
8
|
+
require 'ftools'
|
9
|
+
require 'find'
|
10
|
+
require 'getoptlong'
|
11
|
+
|
12
|
+
DEFAULT_DESTDIR = Config::CONFIG['sitelibdir'] || Config::CONFIG['sitedir']
|
13
|
+
SRCDIR = File.dirname(__FILE__)
|
14
|
+
|
15
|
+
|
16
|
+
def install_rb(from, to)
|
17
|
+
from = SRCDIR + '/' + from
|
18
|
+
Find.find(from) { |src|
|
19
|
+
next unless File.file? src
|
20
|
+
next unless /\.rb\z/ =~ src
|
21
|
+
dst = src.sub(/\A#{Regexp.escape(from)}/, to)
|
22
|
+
File.makedirs File.dirname(dst), true
|
23
|
+
File.install src, dst, 0644, true
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
destdir = DEFAULT_DESTDIR
|
29
|
+
begin
|
30
|
+
GetoptLong.new([ "-d", "--destdir", GetoptLong::REQUIRED_ARGUMENT ]
|
31
|
+
).each_option { |opt, arg|
|
32
|
+
case opt
|
33
|
+
when '-d' then
|
34
|
+
destdir = arg
|
35
|
+
end
|
36
|
+
}
|
37
|
+
rescue
|
38
|
+
exit 2
|
39
|
+
end
|
40
|
+
|
41
|
+
install_rb "lib", destdir
|
@@ -0,0 +1,290 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# xmlscan/htmlscan.rb
|
4
|
+
#
|
5
|
+
# Copyright (C) Ueno Katsuhiro 2002
|
6
|
+
#
|
7
|
+
# $Id: htmlscan.rb,v 1.16.2.2 2003/05/01 15:43:23 katsu Exp $
|
8
|
+
#
|
9
|
+
|
10
|
+
require 'xmlscan/scanner'
|
11
|
+
|
12
|
+
|
13
|
+
module XMLScan
|
14
|
+
|
15
|
+
class HTMLScanner < XMLScanner
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def wellformed_error(msg)
|
20
|
+
# All wellformed error raised by XMLScanner are ignored.
|
21
|
+
# XMLScanner only raises wellformed error in stan_stag, which is a
|
22
|
+
# method completely overrided by HTMLScanner, so this method is
|
23
|
+
# never called in fact.
|
24
|
+
end
|
25
|
+
|
26
|
+
def on_xmldecl
|
27
|
+
raise "[BUG] this method must be never called"
|
28
|
+
end
|
29
|
+
|
30
|
+
def on_xmldecl_version(str)
|
31
|
+
raise "[BUG] this method must be never called"
|
32
|
+
end
|
33
|
+
|
34
|
+
def on_xmldecl_encoding(str)
|
35
|
+
raise "[BUG] this method must be never called"
|
36
|
+
end
|
37
|
+
|
38
|
+
def on_xmldecl_standalone(str)
|
39
|
+
raise "[BUG] this method must be never called"
|
40
|
+
end
|
41
|
+
|
42
|
+
def on_xmldecl_other(name, value)
|
43
|
+
raise "[BUG] this method must be never called"
|
44
|
+
end
|
45
|
+
|
46
|
+
def on_xmldecl_end
|
47
|
+
raise "[BUG] this method must be never called"
|
48
|
+
end
|
49
|
+
|
50
|
+
def on_stag_end_empty(name)
|
51
|
+
raise "[BUG] this method must be never called"
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def scan_comment(s)
|
58
|
+
s[0,4] = '' # remove `<!--'
|
59
|
+
comm = ''
|
60
|
+
until /--/n =~ s
|
61
|
+
comm << s
|
62
|
+
s = @src.get_plain
|
63
|
+
unless s then
|
64
|
+
parse_error "unterminated comment meets EOF"
|
65
|
+
return on_comment(comm)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
comm << $`
|
69
|
+
s = $'
|
70
|
+
until s.empty? || s.strip.empty? and @src.close_tag # --> or -- >
|
71
|
+
comm << '--'
|
72
|
+
if /\A\s*--/n =~ s then # <!--hoge-- --
|
73
|
+
comm << $&
|
74
|
+
s = $'
|
75
|
+
if s.empty? and @src.close_tag then # <!--hoge-- -->
|
76
|
+
parse_error "`-->' is found but comment must not end here"
|
77
|
+
comm.chop!.chop!
|
78
|
+
break
|
79
|
+
end
|
80
|
+
else # <!--hoge-- fuga
|
81
|
+
parse_error "only whitespace can appear between two comments"
|
82
|
+
end
|
83
|
+
if /\A-\s*\z/n =~ s and @src.close_tag then # <!--hoge--->
|
84
|
+
parse_error "`-->' is found but comment must not end here"
|
85
|
+
comm.chop!
|
86
|
+
break
|
87
|
+
end
|
88
|
+
until /--/n =~ s # copy & paste for performance
|
89
|
+
comm << s
|
90
|
+
s = @src.get_plain
|
91
|
+
unless s then
|
92
|
+
parse_error "unterminated comment meets EOF"
|
93
|
+
return on_comment(comm)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
comm << $`
|
97
|
+
s = $'
|
98
|
+
end
|
99
|
+
on_comment comm
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
alias scan_xml_pi scan_pi # PIO "<?" PIC "?>" -- <? PI ?> --
|
104
|
+
|
105
|
+
|
106
|
+
def scan_pi(s) # <?PI > this is default in SGML.
|
107
|
+
s[0,2] = '' # remove `<?'
|
108
|
+
pi = s
|
109
|
+
until @src.close_tag
|
110
|
+
s = @src.get_plain
|
111
|
+
unless s then
|
112
|
+
parse_error "unterminated PI meets EOF"
|
113
|
+
break
|
114
|
+
end
|
115
|
+
pi << s
|
116
|
+
end
|
117
|
+
on_pi '', pi
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
def scan_stag(s)
|
122
|
+
unless /(?=[\/\s='"])/n =~ s then
|
123
|
+
name = s
|
124
|
+
name[0,1] = '' # remove `<'
|
125
|
+
if name.empty? then # <> or <<
|
126
|
+
if @src.close_tag then
|
127
|
+
return found_empty_stag
|
128
|
+
else
|
129
|
+
parse_error "parse error at `<'"
|
130
|
+
return on_chardata('<')
|
131
|
+
end
|
132
|
+
end
|
133
|
+
on_stag name
|
134
|
+
found_unclosed_stag name unless @src.close_tag
|
135
|
+
on_stag_end name
|
136
|
+
else
|
137
|
+
name = $`
|
138
|
+
s = $'
|
139
|
+
name[0,1] = '' # remove `<'
|
140
|
+
if name.empty? then # `< tag' or `<=`
|
141
|
+
parse_error "parse error at `<'"
|
142
|
+
if @src.close_tag then
|
143
|
+
s << '>'
|
144
|
+
end
|
145
|
+
return on_chardata('<' << s)
|
146
|
+
end
|
147
|
+
on_stag name
|
148
|
+
begin
|
149
|
+
continue = false
|
150
|
+
s.scan(
|
151
|
+
/([^\s=\/'"]+)(?:\s*=\s*(?:('[^']*'?|"[^"]*"?)|([^\s='"]+)))?|(\S)/n
|
152
|
+
) { |key,val,val2,error|
|
153
|
+
if key then
|
154
|
+
if val then # key="value"
|
155
|
+
on_attribute key
|
156
|
+
qmark = val.slice!(0,1)
|
157
|
+
if val[-1] == qmark[0] then
|
158
|
+
val.chop!
|
159
|
+
scan_attvalue val unless val.empty?
|
160
|
+
else
|
161
|
+
scan_attvalue val unless val.empty?
|
162
|
+
begin
|
163
|
+
s = @src.get
|
164
|
+
unless s then
|
165
|
+
parse_error "unterminated attribute `#{key}' meets EOF"
|
166
|
+
break
|
167
|
+
end
|
168
|
+
c = s[0]
|
169
|
+
val, s = s.split(qmark, 2)
|
170
|
+
scan_attvalue '>' unless c == ?< or c == ?>
|
171
|
+
scan_attvalue val if c
|
172
|
+
end until s
|
173
|
+
continue = s
|
174
|
+
end
|
175
|
+
on_attribute_end key
|
176
|
+
elsif val2 then # key=value
|
177
|
+
on_attribute key
|
178
|
+
on_attr_value val2
|
179
|
+
on_attribute_end key
|
180
|
+
else # value
|
181
|
+
on_attribute nil
|
182
|
+
on_attr_value key
|
183
|
+
on_attribute_end nil
|
184
|
+
end
|
185
|
+
else
|
186
|
+
parse_error "parse error at `#{error}'"
|
187
|
+
end
|
188
|
+
}
|
189
|
+
end while continue
|
190
|
+
found_unclosed_stag name unless @src.close_tag
|
191
|
+
on_stag_end name
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
|
196
|
+
# This method should be called only from on_stag_end.
|
197
|
+
def get_cdata_content
|
198
|
+
unless not s = @src.test or s[0] == ?< && s[1] == ?/ then
|
199
|
+
dst = @src.get
|
200
|
+
until not s = @src.test or s[0] == ?< && s[1] == ?/
|
201
|
+
dst << @src.get_plain
|
202
|
+
end
|
203
|
+
dst
|
204
|
+
else
|
205
|
+
''
|
206
|
+
end
|
207
|
+
end
|
208
|
+
public :get_cdata_content
|
209
|
+
|
210
|
+
|
211
|
+
def scan_bang_tag(s)
|
212
|
+
if s == '<!' and @src.close_tag then # <!>
|
213
|
+
on_comment ''
|
214
|
+
else
|
215
|
+
parse_error "parse error at `<!'"
|
216
|
+
while s and not @src.close_tag # skip entire
|
217
|
+
s = @src.get_plain
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
|
223
|
+
def scan_internal_dtd(s)
|
224
|
+
parse_error "DTD subset is found but it is not permitted in HTML"
|
225
|
+
skip_internal_dtd s
|
226
|
+
end
|
227
|
+
|
228
|
+
|
229
|
+
def found_invalid_pubsys(pubsys)
|
230
|
+
s = pubsys.upcase
|
231
|
+
return s if s == 'PUBLIC' or s == 'SYSTEM'
|
232
|
+
super
|
233
|
+
end
|
234
|
+
|
235
|
+
|
236
|
+
def scan_prolog(s)
|
237
|
+
doctype = 0
|
238
|
+
while s
|
239
|
+
if s[0] == ?< then
|
240
|
+
if (c = s[1]) == ?! then
|
241
|
+
if s[2] == ?- and s[3] == ?- then
|
242
|
+
scan_comment s
|
243
|
+
elsif /\A<!doctype(?=\s)/in =~ s then
|
244
|
+
doctype += 1
|
245
|
+
if doctype > 1 then
|
246
|
+
parse_error "another document type declaration is found"
|
247
|
+
end
|
248
|
+
scan_doctype $'
|
249
|
+
else
|
250
|
+
break
|
251
|
+
end
|
252
|
+
elsif c == ?? then
|
253
|
+
scan_pi s
|
254
|
+
else
|
255
|
+
break
|
256
|
+
end
|
257
|
+
elsif s.strip.empty? then
|
258
|
+
on_prolog_space s
|
259
|
+
else
|
260
|
+
break
|
261
|
+
end
|
262
|
+
s = @src.get
|
263
|
+
end
|
264
|
+
scan_content(s || @src.get)
|
265
|
+
end
|
266
|
+
|
267
|
+
end
|
268
|
+
|
269
|
+
end
|
270
|
+
|
271
|
+
|
272
|
+
|
273
|
+
|
274
|
+
|
275
|
+
if $0 == __FILE__ then
|
276
|
+
class TestVisitor
|
277
|
+
include XMLScan::Visitor
|
278
|
+
def parse_error(msg)
|
279
|
+
STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
$s = scan = XMLScan::HTMLScanner.new(TestVisitor.new)
|
284
|
+
src = ARGF
|
285
|
+
def src.path; filename; end
|
286
|
+
t1 = Time.times.utime
|
287
|
+
scan.parse src
|
288
|
+
t2 = Time.times.utime
|
289
|
+
STDERR.printf "%2.3f sec\n", t2 - t1
|
290
|
+
end
|
@@ -0,0 +1,353 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# xmlscan/namespace.rb
|
4
|
+
#
|
5
|
+
# Copyright (C) Ueno Katsuhiro 2002
|
6
|
+
#
|
7
|
+
# $Id: namespace.rb,v 1.13 2003/01/22 13:06:18 katsu Exp $
|
8
|
+
#
|
9
|
+
|
10
|
+
require 'xmlscan/parser'
|
11
|
+
|
12
|
+
|
13
|
+
module XMLScan
|
14
|
+
|
15
|
+
class NSParseError < ParseError ; end
|
16
|
+
class NSNotWellFormedError < NotWellFormedError ; end
|
17
|
+
class NSNotValidError < NotValidError ; end
|
18
|
+
|
19
|
+
|
20
|
+
module NSVisitor
|
21
|
+
|
22
|
+
include Visitor
|
23
|
+
|
24
|
+
def ns_parse_error(msg)
|
25
|
+
raise NSParseError.new(msg)
|
26
|
+
end
|
27
|
+
|
28
|
+
def ns_wellformed_error(msg)
|
29
|
+
raise NSNotWellFormedError.new(msg)
|
30
|
+
end
|
31
|
+
|
32
|
+
def ns_valid_error(msg)
|
33
|
+
raise NSNotValidError.new(msg)
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# <foo:bar hoge:fuga='' hoge='' >
|
38
|
+
# <foo hoge:fuga='' hoge='' >
|
39
|
+
# ^ ^ ^ ^ ^ ^
|
40
|
+
# 1 2 3 4 5 6
|
41
|
+
#
|
42
|
+
# The following method will be called with the following arguments
|
43
|
+
# when the parser reaches the above point;
|
44
|
+
#
|
45
|
+
# 1: on_stag_ns ('foo:bar', 'foo', 'bar')
|
46
|
+
# or
|
47
|
+
# on_stag_ns ('foo', '', 'foo')
|
48
|
+
# 2: on_attribute_ns ('hoge:fuga', 'hoge', 'fuga')
|
49
|
+
# 3: on_attribute_end ('hoge:fuga')
|
50
|
+
# 4: on_attribute_ns ('hoge', nil, 'hoge')
|
51
|
+
# 5: on_attribute_end ('hoge')
|
52
|
+
# 6: on_stag_end_ns ('foo:bar', { 'foo' => '', ... })
|
53
|
+
# or
|
54
|
+
# on_stag_end_empty_ns ('foo:bar', { 'foo' => '', ... })
|
55
|
+
#
|
56
|
+
|
57
|
+
def on_stag_ns(qname, prefix, localpart)
|
58
|
+
end
|
59
|
+
|
60
|
+
def on_attribute_ns(qname, prefix, localpart)
|
61
|
+
end
|
62
|
+
|
63
|
+
def on_stag_end_ns(qname, namespaces)
|
64
|
+
end
|
65
|
+
|
66
|
+
def on_stag_end_empty_ns(qname, namespaces)
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
class XMLNamespaceDecoration < Decoration
|
75
|
+
|
76
|
+
proc {
|
77
|
+
h = {'foo'=>true} ; h['foo'] = nil
|
78
|
+
raise "requires Ruby-1.6 or above" unless h.key? 'foo'
|
79
|
+
}.call
|
80
|
+
|
81
|
+
PredefinedNamespace = {
|
82
|
+
'xml' => 'http://www.w3.org/XML/1998/namespace',
|
83
|
+
'xmlns' => 'http://www.w3.org/2000/xmlns/',
|
84
|
+
}
|
85
|
+
|
86
|
+
ReservedNamespace = PredefinedNamespace.invert
|
87
|
+
|
88
|
+
|
89
|
+
def ns_parse_error(msg)
|
90
|
+
@orig_visitor.ns_parse_error msg
|
91
|
+
end
|
92
|
+
|
93
|
+
def ns_wellformed_error(msg)
|
94
|
+
@orig_visitor.ns_wellformed_error msg
|
95
|
+
end
|
96
|
+
|
97
|
+
def ns_valid_error(msg)
|
98
|
+
@orig_visitor.ns_valid_error msg
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
def on_start_document
|
103
|
+
@namespace = {} #PredefinedNamespace.dup
|
104
|
+
@ns_hist = []
|
105
|
+
@ns_undeclared = {} # for checking undeclared namespace prefixes.
|
106
|
+
@prev_prefix = {} # for checking doubled attributes.
|
107
|
+
@dont_same = [] # ditto.
|
108
|
+
@xmlns = NamespaceDeclaration.new(self)
|
109
|
+
@orig_visitor = @visitor
|
110
|
+
@visitor.on_start_document
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
def on_stag(name)
|
115
|
+
@ns_hist.push nil
|
116
|
+
unless /:/n =~ name then
|
117
|
+
@visitor.on_stag_ns name, '', name
|
118
|
+
else
|
119
|
+
prefix, localpart = $`, $'
|
120
|
+
if localpart.include? ?: then
|
121
|
+
ns_parse_error "localpart `#{localpart}' includes `:'"
|
122
|
+
end
|
123
|
+
if prefix == 'xmlns' then
|
124
|
+
ns_wellformed_error \
|
125
|
+
"prefix `xmlns' is not used for namespace prefix declaration"
|
126
|
+
end
|
127
|
+
unless @namespace.key? prefix then
|
128
|
+
if uri = PredefinedNamespace[prefix] then
|
129
|
+
@namespace[prefix] = uri
|
130
|
+
else
|
131
|
+
@ns_undeclared[prefix] = true
|
132
|
+
end
|
133
|
+
end
|
134
|
+
@visitor.on_stag_ns name, prefix, localpart
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
def on_attribute(name)
|
140
|
+
if /:/n =~ name then
|
141
|
+
prefix, localpart = $`, $'
|
142
|
+
if localpart.include? ?: then
|
143
|
+
ns_parse_error "localpart `#{localpart}' includes `:'"
|
144
|
+
end
|
145
|
+
unless @namespace.key? prefix then
|
146
|
+
if uri = PredefinedNamespace[prefix] then
|
147
|
+
@namespace[prefix] = uri
|
148
|
+
else
|
149
|
+
@ns_undeclared[prefix] = true
|
150
|
+
end
|
151
|
+
end
|
152
|
+
if prefix == 'xmlns' then
|
153
|
+
@visitor = @xmlns
|
154
|
+
@xmlns.on_xmlns_start localpart
|
155
|
+
else
|
156
|
+
if prev = @prev_prefix[localpart] then
|
157
|
+
@dont_same.push [ prev, prefix, localpart ]
|
158
|
+
end
|
159
|
+
@prev_prefix[localpart] = prefix
|
160
|
+
@visitor.on_attribute_ns name, prefix, localpart
|
161
|
+
end
|
162
|
+
elsif name == 'xmlns' then
|
163
|
+
@visitor = @xmlns
|
164
|
+
@xmlns.on_xmlns_start ''
|
165
|
+
else
|
166
|
+
@visitor.on_attribute_ns name, nil, name
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
class NamespaceDeclaration
|
172
|
+
|
173
|
+
include XMLScan::Visitor
|
174
|
+
|
175
|
+
def initialize(parent)
|
176
|
+
@parent = parent
|
177
|
+
end
|
178
|
+
|
179
|
+
def on_xmlns_start(prefix)
|
180
|
+
@prefix = prefix
|
181
|
+
@nsdecl = ''
|
182
|
+
end
|
183
|
+
|
184
|
+
def on_attr_value(str)
|
185
|
+
@nsdecl << str
|
186
|
+
end
|
187
|
+
|
188
|
+
def on_attr_entityref(ref)
|
189
|
+
@parent.ns_wellformed_error \
|
190
|
+
"xmlns includes undeclared entity reference"
|
191
|
+
end
|
192
|
+
|
193
|
+
def on_attr_charref(code)
|
194
|
+
@nsdecl << [code].pack('U')
|
195
|
+
end
|
196
|
+
|
197
|
+
def on_attr_charref_hex(code)
|
198
|
+
@nsdecl << [code].pack('U')
|
199
|
+
end
|
200
|
+
|
201
|
+
def on_attribute_end(name)
|
202
|
+
@parent.on_xmlns_end @prefix, @nsdecl
|
203
|
+
end
|
204
|
+
|
205
|
+
end
|
206
|
+
|
207
|
+
|
208
|
+
def on_xmlns_end(prefix, uri)
|
209
|
+
@visitor = @orig_visitor
|
210
|
+
if PredefinedNamespace.key? prefix then
|
211
|
+
if prefix == 'xmlns' then
|
212
|
+
ns_wellformed_error \
|
213
|
+
"prefix `xmlns' can't be bound to any namespace explicitly"
|
214
|
+
elsif (s = PredefinedNamespace[prefix]) != uri then
|
215
|
+
ns_wellformed_error \
|
216
|
+
"prefix `#{prefix}' can't be bound to any namespace except `#{s}'"
|
217
|
+
end
|
218
|
+
end
|
219
|
+
if uri.empty? then
|
220
|
+
if prefix.empty? then
|
221
|
+
uri = nil
|
222
|
+
else
|
223
|
+
ns_parse_error "`#{prefix}' is bound to empty namespace name"
|
224
|
+
end
|
225
|
+
elsif ReservedNamespace.key? uri then
|
226
|
+
unless (s = ReservedNamespace[uri]) == prefix then
|
227
|
+
ns_wellformed_error \
|
228
|
+
"namespace `#{uri}' is reserved for prefix `#{s}'"
|
229
|
+
end
|
230
|
+
end
|
231
|
+
(@ns_hist.last || @ns_hist[-1] = {})[prefix] = @namespace[prefix]
|
232
|
+
@namespace[prefix] = uri
|
233
|
+
@ns_undeclared.delete prefix
|
234
|
+
end
|
235
|
+
|
236
|
+
|
237
|
+
def fix_namespace
|
238
|
+
unless @ns_undeclared.empty? then
|
239
|
+
@ns_undeclared.each_key { |i|
|
240
|
+
@visitor.ns_wellformed_error "prefix `#{i}' is not declared"
|
241
|
+
}
|
242
|
+
@ns_undeclared.clear
|
243
|
+
end
|
244
|
+
unless @dont_same.empty? then
|
245
|
+
@dont_same.each { |n1,n2,l|
|
246
|
+
if @namespace[n1] == @namespace[n2] then
|
247
|
+
ns_wellformed_error \
|
248
|
+
"doubled localpart `#{l}' in the same namespace"
|
249
|
+
end
|
250
|
+
}
|
251
|
+
@dont_same.clear
|
252
|
+
end
|
253
|
+
@prev_prefix.clear
|
254
|
+
end
|
255
|
+
|
256
|
+
|
257
|
+
def on_stag_end(name)
|
258
|
+
fix_namespace
|
259
|
+
@visitor.on_stag_end_ns name, @namespace
|
260
|
+
end
|
261
|
+
|
262
|
+
|
263
|
+
def on_etag(name)
|
264
|
+
h = @ns_hist.pop and @namespace.update h
|
265
|
+
@visitor.on_etag name
|
266
|
+
end
|
267
|
+
|
268
|
+
|
269
|
+
def on_stag_end_empty(name)
|
270
|
+
fix_namespace
|
271
|
+
@visitor.on_stag_end_empty_ns name, @namespace
|
272
|
+
h = @ns_hist.pop and @namespace.update h
|
273
|
+
end
|
274
|
+
|
275
|
+
|
276
|
+
def on_doctype(root, pubid, sysid)
|
277
|
+
if root.count(':') > 1 then
|
278
|
+
ns_parse_error "qualified name `#{root}' includes `:'"
|
279
|
+
end
|
280
|
+
@visitor.on_doctype root, pubid, sysid
|
281
|
+
end
|
282
|
+
|
283
|
+
|
284
|
+
def on_pi(target, pi)
|
285
|
+
if target.include? ?: then
|
286
|
+
ns_parse_error "PI target `#{target}' includes `:'"
|
287
|
+
end
|
288
|
+
@visitor.on_pi target, pi
|
289
|
+
end
|
290
|
+
|
291
|
+
|
292
|
+
def on_entityref(ref)
|
293
|
+
if ref.include? ?: then
|
294
|
+
ns_parse_error "entity reference `#{ref}' includes `:'"
|
295
|
+
end
|
296
|
+
@visitor.on_entityref ref
|
297
|
+
end
|
298
|
+
|
299
|
+
|
300
|
+
def on_attr_entityref(ref)
|
301
|
+
if ref.include? ?: then
|
302
|
+
ns_parse_error "entity reference `#{ref}' includes `:'"
|
303
|
+
end
|
304
|
+
@visitor.on_attr_entityref ref
|
305
|
+
end
|
306
|
+
|
307
|
+
end
|
308
|
+
|
309
|
+
|
310
|
+
|
311
|
+
class XMLParserNS < XMLParser
|
312
|
+
|
313
|
+
def initialize(*)
|
314
|
+
super
|
315
|
+
@visitor = @decoration = XMLNamespaceDecoration.new(@visitor)
|
316
|
+
end
|
317
|
+
|
318
|
+
end
|
319
|
+
|
320
|
+
end
|
321
|
+
|
322
|
+
|
323
|
+
|
324
|
+
|
325
|
+
|
326
|
+
if $0 == __FILE__ then
|
327
|
+
class TestVisitor
|
328
|
+
include XMLScan::NSVisitor
|
329
|
+
def parse_error(msg)
|
330
|
+
STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
331
|
+
end
|
332
|
+
def wellformed_error(msg)
|
333
|
+
STDERR.printf("%s:%d: WFC: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
334
|
+
end
|
335
|
+
def warning(msg)
|
336
|
+
STDERR.printf("%s:%d: warning: %s\n", $s.path,$s.lineno, msg) if $VERBOSE
|
337
|
+
end
|
338
|
+
def ns_parse_error(msg)
|
339
|
+
STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
340
|
+
end
|
341
|
+
def ns_wellformed_error(msg)
|
342
|
+
STDERR.printf("%s:%d: NSC: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
$s = scan = XMLScan::XMLParserNS.new(TestVisitor.new)
|
347
|
+
src = ARGF
|
348
|
+
def src.path; filename; end
|
349
|
+
t1 = Time.times.utime
|
350
|
+
scan.parse src
|
351
|
+
t2 = Time.times.utime
|
352
|
+
STDERR.printf "%2.3f sec\n", t2 - t1
|
353
|
+
end
|