xmlscan 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +1276 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +31 -0
- data/README.rdoc +365 -0
- data/Rakefile +65 -0
- data/THANKS +11 -0
- data/VERSION +1 -0
- data/install.rb +41 -0
- data/lib/xmlscan/htmlscan.rb +290 -0
- data/lib/xmlscan/namespace.rb +353 -0
- data/lib/xmlscan/parser.rb +300 -0
- data/lib/xmlscan/scanner.rb +1123 -0
- data/lib/xmlscan/version.rb +23 -0
- data/lib/xmlscan/visitor.rb +162 -0
- data/lib/xmlscan/xmlchar.rb +248 -0
- data/test.rb +7 -0
- metadata +113 -0
data/THANKS
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
$Id: THANKS,v 1.4.2.1 2003/02/15 13:22:01 katsu Exp $
|
2
|
+
|
3
|
+
Thanks to all of the following for their valuable hints, fixes,
|
4
|
+
discussions, and contributions:
|
5
|
+
|
6
|
+
Yoshida Masato <yoshidam@yoshidam.net>
|
7
|
+
TAKAHASHI Masayoshi <maki@inac.co.jp>
|
8
|
+
NAKAMURA, Hiroshi <nakahiro@sarion.co.jp>
|
9
|
+
James Britt <james@jamesbritt.com>
|
10
|
+
Takaaki Tateishi <ttate@kt.jaist.ac.jp>
|
11
|
+
Tanaka Akira <akr@m17n.org>
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.2.3
|
data/install.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# install.rb
|
4
|
+
#
|
5
|
+
# $Id: install.rb,v 1.2 2002/12/26 21:09:38 katsu Exp $
|
6
|
+
|
7
|
+
require 'rbconfig'
|
8
|
+
require 'ftools'
|
9
|
+
require 'find'
|
10
|
+
require 'getoptlong'
|
11
|
+
|
12
|
+
DEFAULT_DESTDIR = Config::CONFIG['sitelibdir'] || Config::CONFIG['sitedir']
|
13
|
+
SRCDIR = File.dirname(__FILE__)
|
14
|
+
|
15
|
+
|
16
|
+
def install_rb(from, to)
|
17
|
+
from = SRCDIR + '/' + from
|
18
|
+
Find.find(from) { |src|
|
19
|
+
next unless File.file? src
|
20
|
+
next unless /\.rb\z/ =~ src
|
21
|
+
dst = src.sub(/\A#{Regexp.escape(from)}/, to)
|
22
|
+
File.makedirs File.dirname(dst), true
|
23
|
+
File.install src, dst, 0644, true
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
destdir = DEFAULT_DESTDIR
|
29
|
+
begin
|
30
|
+
GetoptLong.new([ "-d", "--destdir", GetoptLong::REQUIRED_ARGUMENT ]
|
31
|
+
).each_option { |opt, arg|
|
32
|
+
case opt
|
33
|
+
when '-d' then
|
34
|
+
destdir = arg
|
35
|
+
end
|
36
|
+
}
|
37
|
+
rescue
|
38
|
+
exit 2
|
39
|
+
end
|
40
|
+
|
41
|
+
install_rb "lib", destdir
|
@@ -0,0 +1,290 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# xmlscan/htmlscan.rb
|
4
|
+
#
|
5
|
+
# Copyright (C) Ueno Katsuhiro 2002
|
6
|
+
#
|
7
|
+
# $Id: htmlscan.rb,v 1.16.2.2 2003/05/01 15:43:23 katsu Exp $
|
8
|
+
#
|
9
|
+
|
10
|
+
require 'xmlscan/scanner'
|
11
|
+
|
12
|
+
|
13
|
+
module XMLScan
|
14
|
+
|
15
|
+
class HTMLScanner < XMLScanner
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def wellformed_error(msg)
|
20
|
+
# All wellformed error raised by XMLScanner are ignored.
|
21
|
+
# XMLScanner only raises wellformed error in stan_stag, which is a
|
22
|
+
# method completely overrided by HTMLScanner, so this method is
|
23
|
+
# never called in fact.
|
24
|
+
end
|
25
|
+
|
26
|
+
def on_xmldecl
|
27
|
+
raise "[BUG] this method must be never called"
|
28
|
+
end
|
29
|
+
|
30
|
+
def on_xmldecl_version(str)
|
31
|
+
raise "[BUG] this method must be never called"
|
32
|
+
end
|
33
|
+
|
34
|
+
def on_xmldecl_encoding(str)
|
35
|
+
raise "[BUG] this method must be never called"
|
36
|
+
end
|
37
|
+
|
38
|
+
def on_xmldecl_standalone(str)
|
39
|
+
raise "[BUG] this method must be never called"
|
40
|
+
end
|
41
|
+
|
42
|
+
def on_xmldecl_other(name, value)
|
43
|
+
raise "[BUG] this method must be never called"
|
44
|
+
end
|
45
|
+
|
46
|
+
def on_xmldecl_end
|
47
|
+
raise "[BUG] this method must be never called"
|
48
|
+
end
|
49
|
+
|
50
|
+
def on_stag_end_empty(name)
|
51
|
+
raise "[BUG] this method must be never called"
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def scan_comment(s)
|
58
|
+
s[0,4] = '' # remove `<!--'
|
59
|
+
comm = ''
|
60
|
+
until /--/n =~ s
|
61
|
+
comm << s
|
62
|
+
s = @src.get_plain
|
63
|
+
unless s then
|
64
|
+
parse_error "unterminated comment meets EOF"
|
65
|
+
return on_comment(comm)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
comm << $`
|
69
|
+
s = $'
|
70
|
+
until s.empty? || s.strip.empty? and @src.close_tag # --> or -- >
|
71
|
+
comm << '--'
|
72
|
+
if /\A\s*--/n =~ s then # <!--hoge-- --
|
73
|
+
comm << $&
|
74
|
+
s = $'
|
75
|
+
if s.empty? and @src.close_tag then # <!--hoge-- -->
|
76
|
+
parse_error "`-->' is found but comment must not end here"
|
77
|
+
comm.chop!.chop!
|
78
|
+
break
|
79
|
+
end
|
80
|
+
else # <!--hoge-- fuga
|
81
|
+
parse_error "only whitespace can appear between two comments"
|
82
|
+
end
|
83
|
+
if /\A-\s*\z/n =~ s and @src.close_tag then # <!--hoge--->
|
84
|
+
parse_error "`-->' is found but comment must not end here"
|
85
|
+
comm.chop!
|
86
|
+
break
|
87
|
+
end
|
88
|
+
until /--/n =~ s # copy & paste for performance
|
89
|
+
comm << s
|
90
|
+
s = @src.get_plain
|
91
|
+
unless s then
|
92
|
+
parse_error "unterminated comment meets EOF"
|
93
|
+
return on_comment(comm)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
comm << $`
|
97
|
+
s = $'
|
98
|
+
end
|
99
|
+
on_comment comm
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
alias scan_xml_pi scan_pi # PIO "<?" PIC "?>" -- <? PI ?> --
|
104
|
+
|
105
|
+
|
106
|
+
def scan_pi(s) # <?PI > this is default in SGML.
|
107
|
+
s[0,2] = '' # remove `<?'
|
108
|
+
pi = s
|
109
|
+
until @src.close_tag
|
110
|
+
s = @src.get_plain
|
111
|
+
unless s then
|
112
|
+
parse_error "unterminated PI meets EOF"
|
113
|
+
break
|
114
|
+
end
|
115
|
+
pi << s
|
116
|
+
end
|
117
|
+
on_pi '', pi
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
def scan_stag(s)
|
122
|
+
unless /(?=[\/\s='"])/n =~ s then
|
123
|
+
name = s
|
124
|
+
name[0,1] = '' # remove `<'
|
125
|
+
if name.empty? then # <> or <<
|
126
|
+
if @src.close_tag then
|
127
|
+
return found_empty_stag
|
128
|
+
else
|
129
|
+
parse_error "parse error at `<'"
|
130
|
+
return on_chardata('<')
|
131
|
+
end
|
132
|
+
end
|
133
|
+
on_stag name
|
134
|
+
found_unclosed_stag name unless @src.close_tag
|
135
|
+
on_stag_end name
|
136
|
+
else
|
137
|
+
name = $`
|
138
|
+
s = $'
|
139
|
+
name[0,1] = '' # remove `<'
|
140
|
+
if name.empty? then # `< tag' or `<=`
|
141
|
+
parse_error "parse error at `<'"
|
142
|
+
if @src.close_tag then
|
143
|
+
s << '>'
|
144
|
+
end
|
145
|
+
return on_chardata('<' << s)
|
146
|
+
end
|
147
|
+
on_stag name
|
148
|
+
begin
|
149
|
+
continue = false
|
150
|
+
s.scan(
|
151
|
+
/([^\s=\/'"]+)(?:\s*=\s*(?:('[^']*'?|"[^"]*"?)|([^\s='"]+)))?|(\S)/n
|
152
|
+
) { |key,val,val2,error|
|
153
|
+
if key then
|
154
|
+
if val then # key="value"
|
155
|
+
on_attribute key
|
156
|
+
qmark = val.slice!(0,1)
|
157
|
+
if val[-1] == qmark[0] then
|
158
|
+
val.chop!
|
159
|
+
scan_attvalue val unless val.empty?
|
160
|
+
else
|
161
|
+
scan_attvalue val unless val.empty?
|
162
|
+
begin
|
163
|
+
s = @src.get
|
164
|
+
unless s then
|
165
|
+
parse_error "unterminated attribute `#{key}' meets EOF"
|
166
|
+
break
|
167
|
+
end
|
168
|
+
c = s[0]
|
169
|
+
val, s = s.split(qmark, 2)
|
170
|
+
scan_attvalue '>' unless c == ?< or c == ?>
|
171
|
+
scan_attvalue val if c
|
172
|
+
end until s
|
173
|
+
continue = s
|
174
|
+
end
|
175
|
+
on_attribute_end key
|
176
|
+
elsif val2 then # key=value
|
177
|
+
on_attribute key
|
178
|
+
on_attr_value val2
|
179
|
+
on_attribute_end key
|
180
|
+
else # value
|
181
|
+
on_attribute nil
|
182
|
+
on_attr_value key
|
183
|
+
on_attribute_end nil
|
184
|
+
end
|
185
|
+
else
|
186
|
+
parse_error "parse error at `#{error}'"
|
187
|
+
end
|
188
|
+
}
|
189
|
+
end while continue
|
190
|
+
found_unclosed_stag name unless @src.close_tag
|
191
|
+
on_stag_end name
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
|
196
|
+
# This method should be called only from on_stag_end.
|
197
|
+
def get_cdata_content
|
198
|
+
unless not s = @src.test or s[0] == ?< && s[1] == ?/ then
|
199
|
+
dst = @src.get
|
200
|
+
until not s = @src.test or s[0] == ?< && s[1] == ?/
|
201
|
+
dst << @src.get_plain
|
202
|
+
end
|
203
|
+
dst
|
204
|
+
else
|
205
|
+
''
|
206
|
+
end
|
207
|
+
end
|
208
|
+
public :get_cdata_content
|
209
|
+
|
210
|
+
|
211
|
+
def scan_bang_tag(s)
|
212
|
+
if s == '<!' and @src.close_tag then # <!>
|
213
|
+
on_comment ''
|
214
|
+
else
|
215
|
+
parse_error "parse error at `<!'"
|
216
|
+
while s and not @src.close_tag # skip entire
|
217
|
+
s = @src.get_plain
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
|
223
|
+
def scan_internal_dtd(s)
|
224
|
+
parse_error "DTD subset is found but it is not permitted in HTML"
|
225
|
+
skip_internal_dtd s
|
226
|
+
end
|
227
|
+
|
228
|
+
|
229
|
+
def found_invalid_pubsys(pubsys)
|
230
|
+
s = pubsys.upcase
|
231
|
+
return s if s == 'PUBLIC' or s == 'SYSTEM'
|
232
|
+
super
|
233
|
+
end
|
234
|
+
|
235
|
+
|
236
|
+
def scan_prolog(s)
|
237
|
+
doctype = 0
|
238
|
+
while s
|
239
|
+
if s[0] == ?< then
|
240
|
+
if (c = s[1]) == ?! then
|
241
|
+
if s[2] == ?- and s[3] == ?- then
|
242
|
+
scan_comment s
|
243
|
+
elsif /\A<!doctype(?=\s)/in =~ s then
|
244
|
+
doctype += 1
|
245
|
+
if doctype > 1 then
|
246
|
+
parse_error "another document type declaration is found"
|
247
|
+
end
|
248
|
+
scan_doctype $'
|
249
|
+
else
|
250
|
+
break
|
251
|
+
end
|
252
|
+
elsif c == ?? then
|
253
|
+
scan_pi s
|
254
|
+
else
|
255
|
+
break
|
256
|
+
end
|
257
|
+
elsif s.strip.empty? then
|
258
|
+
on_prolog_space s
|
259
|
+
else
|
260
|
+
break
|
261
|
+
end
|
262
|
+
s = @src.get
|
263
|
+
end
|
264
|
+
scan_content(s || @src.get)
|
265
|
+
end
|
266
|
+
|
267
|
+
end
|
268
|
+
|
269
|
+
end
|
270
|
+
|
271
|
+
|
272
|
+
|
273
|
+
|
274
|
+
|
275
|
+
if $0 == __FILE__ then
|
276
|
+
class TestVisitor
|
277
|
+
include XMLScan::Visitor
|
278
|
+
def parse_error(msg)
|
279
|
+
STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
$s = scan = XMLScan::HTMLScanner.new(TestVisitor.new)
|
284
|
+
src = ARGF
|
285
|
+
def src.path; filename; end
|
286
|
+
t1 = Time.times.utime
|
287
|
+
scan.parse src
|
288
|
+
t2 = Time.times.utime
|
289
|
+
STDERR.printf "%2.3f sec\n", t2 - t1
|
290
|
+
end
|
@@ -0,0 +1,353 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# xmlscan/namespace.rb
|
4
|
+
#
|
5
|
+
# Copyright (C) Ueno Katsuhiro 2002
|
6
|
+
#
|
7
|
+
# $Id: namespace.rb,v 1.13 2003/01/22 13:06:18 katsu Exp $
|
8
|
+
#
|
9
|
+
|
10
|
+
require 'xmlscan/parser'
|
11
|
+
|
12
|
+
|
13
|
+
module XMLScan
|
14
|
+
|
15
|
+
class NSParseError < ParseError ; end
|
16
|
+
class NSNotWellFormedError < NotWellFormedError ; end
|
17
|
+
class NSNotValidError < NotValidError ; end
|
18
|
+
|
19
|
+
|
20
|
+
module NSVisitor
|
21
|
+
|
22
|
+
include Visitor
|
23
|
+
|
24
|
+
def ns_parse_error(msg)
|
25
|
+
raise NSParseError.new(msg)
|
26
|
+
end
|
27
|
+
|
28
|
+
def ns_wellformed_error(msg)
|
29
|
+
raise NSNotWellFormedError.new(msg)
|
30
|
+
end
|
31
|
+
|
32
|
+
def ns_valid_error(msg)
|
33
|
+
raise NSNotValidError.new(msg)
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# <foo:bar hoge:fuga='' hoge='' >
|
38
|
+
# <foo hoge:fuga='' hoge='' >
|
39
|
+
# ^ ^ ^ ^ ^ ^
|
40
|
+
# 1 2 3 4 5 6
|
41
|
+
#
|
42
|
+
# The following method will be called with the following arguments
|
43
|
+
# when the parser reaches the above point;
|
44
|
+
#
|
45
|
+
# 1: on_stag_ns ('foo:bar', 'foo', 'bar')
|
46
|
+
# or
|
47
|
+
# on_stag_ns ('foo', '', 'foo')
|
48
|
+
# 2: on_attribute_ns ('hoge:fuga', 'hoge', 'fuga')
|
49
|
+
# 3: on_attribute_end ('hoge:fuga')
|
50
|
+
# 4: on_attribute_ns ('hoge', nil, 'hoge')
|
51
|
+
# 5: on_attribute_end ('hoge')
|
52
|
+
# 6: on_stag_end_ns ('foo:bar', { 'foo' => '', ... })
|
53
|
+
# or
|
54
|
+
# on_stag_end_empty_ns ('foo:bar', { 'foo' => '', ... })
|
55
|
+
#
|
56
|
+
|
57
|
+
def on_stag_ns(qname, prefix, localpart)
|
58
|
+
end
|
59
|
+
|
60
|
+
def on_attribute_ns(qname, prefix, localpart)
|
61
|
+
end
|
62
|
+
|
63
|
+
def on_stag_end_ns(qname, namespaces)
|
64
|
+
end
|
65
|
+
|
66
|
+
def on_stag_end_empty_ns(qname, namespaces)
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
class XMLNamespaceDecoration < Decoration
|
75
|
+
|
76
|
+
proc {
|
77
|
+
h = {'foo'=>true} ; h['foo'] = nil
|
78
|
+
raise "requires Ruby-1.6 or above" unless h.key? 'foo'
|
79
|
+
}.call
|
80
|
+
|
81
|
+
PredefinedNamespace = {
|
82
|
+
'xml' => 'http://www.w3.org/XML/1998/namespace',
|
83
|
+
'xmlns' => 'http://www.w3.org/2000/xmlns/',
|
84
|
+
}
|
85
|
+
|
86
|
+
ReservedNamespace = PredefinedNamespace.invert
|
87
|
+
|
88
|
+
|
89
|
+
def ns_parse_error(msg)
|
90
|
+
@orig_visitor.ns_parse_error msg
|
91
|
+
end
|
92
|
+
|
93
|
+
def ns_wellformed_error(msg)
|
94
|
+
@orig_visitor.ns_wellformed_error msg
|
95
|
+
end
|
96
|
+
|
97
|
+
def ns_valid_error(msg)
|
98
|
+
@orig_visitor.ns_valid_error msg
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
def on_start_document
|
103
|
+
@namespace = {} #PredefinedNamespace.dup
|
104
|
+
@ns_hist = []
|
105
|
+
@ns_undeclared = {} # for checking undeclared namespace prefixes.
|
106
|
+
@prev_prefix = {} # for checking doubled attributes.
|
107
|
+
@dont_same = [] # ditto.
|
108
|
+
@xmlns = NamespaceDeclaration.new(self)
|
109
|
+
@orig_visitor = @visitor
|
110
|
+
@visitor.on_start_document
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
def on_stag(name)
|
115
|
+
@ns_hist.push nil
|
116
|
+
unless /:/n =~ name then
|
117
|
+
@visitor.on_stag_ns name, '', name
|
118
|
+
else
|
119
|
+
prefix, localpart = $`, $'
|
120
|
+
if localpart.include? ?: then
|
121
|
+
ns_parse_error "localpart `#{localpart}' includes `:'"
|
122
|
+
end
|
123
|
+
if prefix == 'xmlns' then
|
124
|
+
ns_wellformed_error \
|
125
|
+
"prefix `xmlns' is not used for namespace prefix declaration"
|
126
|
+
end
|
127
|
+
unless @namespace.key? prefix then
|
128
|
+
if uri = PredefinedNamespace[prefix] then
|
129
|
+
@namespace[prefix] = uri
|
130
|
+
else
|
131
|
+
@ns_undeclared[prefix] = true
|
132
|
+
end
|
133
|
+
end
|
134
|
+
@visitor.on_stag_ns name, prefix, localpart
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
def on_attribute(name)
|
140
|
+
if /:/n =~ name then
|
141
|
+
prefix, localpart = $`, $'
|
142
|
+
if localpart.include? ?: then
|
143
|
+
ns_parse_error "localpart `#{localpart}' includes `:'"
|
144
|
+
end
|
145
|
+
unless @namespace.key? prefix then
|
146
|
+
if uri = PredefinedNamespace[prefix] then
|
147
|
+
@namespace[prefix] = uri
|
148
|
+
else
|
149
|
+
@ns_undeclared[prefix] = true
|
150
|
+
end
|
151
|
+
end
|
152
|
+
if prefix == 'xmlns' then
|
153
|
+
@visitor = @xmlns
|
154
|
+
@xmlns.on_xmlns_start localpart
|
155
|
+
else
|
156
|
+
if prev = @prev_prefix[localpart] then
|
157
|
+
@dont_same.push [ prev, prefix, localpart ]
|
158
|
+
end
|
159
|
+
@prev_prefix[localpart] = prefix
|
160
|
+
@visitor.on_attribute_ns name, prefix, localpart
|
161
|
+
end
|
162
|
+
elsif name == 'xmlns' then
|
163
|
+
@visitor = @xmlns
|
164
|
+
@xmlns.on_xmlns_start ''
|
165
|
+
else
|
166
|
+
@visitor.on_attribute_ns name, nil, name
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
class NamespaceDeclaration
|
172
|
+
|
173
|
+
include XMLScan::Visitor
|
174
|
+
|
175
|
+
def initialize(parent)
|
176
|
+
@parent = parent
|
177
|
+
end
|
178
|
+
|
179
|
+
def on_xmlns_start(prefix)
|
180
|
+
@prefix = prefix
|
181
|
+
@nsdecl = ''
|
182
|
+
end
|
183
|
+
|
184
|
+
def on_attr_value(str)
|
185
|
+
@nsdecl << str
|
186
|
+
end
|
187
|
+
|
188
|
+
def on_attr_entityref(ref)
|
189
|
+
@parent.ns_wellformed_error \
|
190
|
+
"xmlns includes undeclared entity reference"
|
191
|
+
end
|
192
|
+
|
193
|
+
def on_attr_charref(code)
|
194
|
+
@nsdecl << [code].pack('U')
|
195
|
+
end
|
196
|
+
|
197
|
+
def on_attr_charref_hex(code)
|
198
|
+
@nsdecl << [code].pack('U')
|
199
|
+
end
|
200
|
+
|
201
|
+
def on_attribute_end(name)
|
202
|
+
@parent.on_xmlns_end @prefix, @nsdecl
|
203
|
+
end
|
204
|
+
|
205
|
+
end
|
206
|
+
|
207
|
+
|
208
|
+
def on_xmlns_end(prefix, uri)
|
209
|
+
@visitor = @orig_visitor
|
210
|
+
if PredefinedNamespace.key? prefix then
|
211
|
+
if prefix == 'xmlns' then
|
212
|
+
ns_wellformed_error \
|
213
|
+
"prefix `xmlns' can't be bound to any namespace explicitly"
|
214
|
+
elsif (s = PredefinedNamespace[prefix]) != uri then
|
215
|
+
ns_wellformed_error \
|
216
|
+
"prefix `#{prefix}' can't be bound to any namespace except `#{s}'"
|
217
|
+
end
|
218
|
+
end
|
219
|
+
if uri.empty? then
|
220
|
+
if prefix.empty? then
|
221
|
+
uri = nil
|
222
|
+
else
|
223
|
+
ns_parse_error "`#{prefix}' is bound to empty namespace name"
|
224
|
+
end
|
225
|
+
elsif ReservedNamespace.key? uri then
|
226
|
+
unless (s = ReservedNamespace[uri]) == prefix then
|
227
|
+
ns_wellformed_error \
|
228
|
+
"namespace `#{uri}' is reserved for prefix `#{s}'"
|
229
|
+
end
|
230
|
+
end
|
231
|
+
(@ns_hist.last || @ns_hist[-1] = {})[prefix] = @namespace[prefix]
|
232
|
+
@namespace[prefix] = uri
|
233
|
+
@ns_undeclared.delete prefix
|
234
|
+
end
|
235
|
+
|
236
|
+
|
237
|
+
def fix_namespace
|
238
|
+
unless @ns_undeclared.empty? then
|
239
|
+
@ns_undeclared.each_key { |i|
|
240
|
+
@visitor.ns_wellformed_error "prefix `#{i}' is not declared"
|
241
|
+
}
|
242
|
+
@ns_undeclared.clear
|
243
|
+
end
|
244
|
+
unless @dont_same.empty? then
|
245
|
+
@dont_same.each { |n1,n2,l|
|
246
|
+
if @namespace[n1] == @namespace[n2] then
|
247
|
+
ns_wellformed_error \
|
248
|
+
"doubled localpart `#{l}' in the same namespace"
|
249
|
+
end
|
250
|
+
}
|
251
|
+
@dont_same.clear
|
252
|
+
end
|
253
|
+
@prev_prefix.clear
|
254
|
+
end
|
255
|
+
|
256
|
+
|
257
|
+
def on_stag_end(name)
|
258
|
+
fix_namespace
|
259
|
+
@visitor.on_stag_end_ns name, @namespace
|
260
|
+
end
|
261
|
+
|
262
|
+
|
263
|
+
def on_etag(name)
|
264
|
+
h = @ns_hist.pop and @namespace.update h
|
265
|
+
@visitor.on_etag name
|
266
|
+
end
|
267
|
+
|
268
|
+
|
269
|
+
def on_stag_end_empty(name)
|
270
|
+
fix_namespace
|
271
|
+
@visitor.on_stag_end_empty_ns name, @namespace
|
272
|
+
h = @ns_hist.pop and @namespace.update h
|
273
|
+
end
|
274
|
+
|
275
|
+
|
276
|
+
def on_doctype(root, pubid, sysid)
|
277
|
+
if root.count(':') > 1 then
|
278
|
+
ns_parse_error "qualified name `#{root}' includes `:'"
|
279
|
+
end
|
280
|
+
@visitor.on_doctype root, pubid, sysid
|
281
|
+
end
|
282
|
+
|
283
|
+
|
284
|
+
def on_pi(target, pi)
|
285
|
+
if target.include? ?: then
|
286
|
+
ns_parse_error "PI target `#{target}' includes `:'"
|
287
|
+
end
|
288
|
+
@visitor.on_pi target, pi
|
289
|
+
end
|
290
|
+
|
291
|
+
|
292
|
+
def on_entityref(ref)
|
293
|
+
if ref.include? ?: then
|
294
|
+
ns_parse_error "entity reference `#{ref}' includes `:'"
|
295
|
+
end
|
296
|
+
@visitor.on_entityref ref
|
297
|
+
end
|
298
|
+
|
299
|
+
|
300
|
+
def on_attr_entityref(ref)
|
301
|
+
if ref.include? ?: then
|
302
|
+
ns_parse_error "entity reference `#{ref}' includes `:'"
|
303
|
+
end
|
304
|
+
@visitor.on_attr_entityref ref
|
305
|
+
end
|
306
|
+
|
307
|
+
end
|
308
|
+
|
309
|
+
|
310
|
+
|
311
|
+
class XMLParserNS < XMLParser
|
312
|
+
|
313
|
+
def initialize(*)
|
314
|
+
super
|
315
|
+
@visitor = @decoration = XMLNamespaceDecoration.new(@visitor)
|
316
|
+
end
|
317
|
+
|
318
|
+
end
|
319
|
+
|
320
|
+
end
|
321
|
+
|
322
|
+
|
323
|
+
|
324
|
+
|
325
|
+
|
326
|
+
if $0 == __FILE__ then
|
327
|
+
class TestVisitor
|
328
|
+
include XMLScan::NSVisitor
|
329
|
+
def parse_error(msg)
|
330
|
+
STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
331
|
+
end
|
332
|
+
def wellformed_error(msg)
|
333
|
+
STDERR.printf("%s:%d: WFC: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
334
|
+
end
|
335
|
+
def warning(msg)
|
336
|
+
STDERR.printf("%s:%d: warning: %s\n", $s.path,$s.lineno, msg) if $VERBOSE
|
337
|
+
end
|
338
|
+
def ns_parse_error(msg)
|
339
|
+
STDERR.printf("%s:%d: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
340
|
+
end
|
341
|
+
def ns_wellformed_error(msg)
|
342
|
+
STDERR.printf("%s:%d: NSC: %s\n", $s.path, $s.lineno, msg) if $VERBOSE
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
$s = scan = XMLScan::XMLParserNS.new(TestVisitor.new)
|
347
|
+
src = ARGF
|
348
|
+
def src.path; filename; end
|
349
|
+
t1 = Time.times.utime
|
350
|
+
scan.parse src
|
351
|
+
t2 = Time.times.utime
|
352
|
+
STDERR.printf "%2.3f sec\n", t2 - t1
|
353
|
+
end
|