xmlscan 0.2.3 → 0.3.0prec
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +7 -4
- data/VERSION +1 -1
- data/lib/xmlscan/htmlscan.rb +7 -7
- data/lib/xmlscan/namespace.rb +33 -33
- data/lib/xmlscan/parser.rb +17 -13
- data/lib/xmlscan/processor.rb +97 -0
- data/lib/xmlscan/scanner.rb +118 -89
- data/lib/xmlscan/version.rb +4 -10
- data/lib/xmlscan/visitor.rb +31 -29
- data/lib/xmlscan/xmlchar.rb +18 -18
- metadata +15 -16
- data/install.rb +0 -41
- data/test.rb +0 -7
data/Rakefile
CHANGED
@@ -3,6 +3,9 @@
|
|
3
3
|
|
4
4
|
require 'rubygems'
|
5
5
|
require 'bundler'
|
6
|
+
require 'xmlscan/version'
|
7
|
+
|
8
|
+
VERSION = XMLScan::VERSION # File.exist?('VERSION') ? File.read('VERSION') : ""
|
6
9
|
|
7
10
|
begin
|
8
11
|
Bundler.setup(:default, :development)
|
@@ -15,10 +18,11 @@ end
|
|
15
18
|
require 'rake'
|
16
19
|
|
17
20
|
begin
|
21
|
+
include XMLScan
|
18
22
|
require 'jeweler'
|
19
23
|
Jeweler::Tasks.new do |gem|
|
20
|
-
gem.name =
|
21
|
-
gem.version =
|
24
|
+
gem.name = 'xmlscan'
|
25
|
+
gem.version = XMLScan::VERSION
|
22
26
|
gem.license = "MIT"
|
23
27
|
gem.summary = "The fastest XML parser written in 100% pure Ruby."
|
24
28
|
gem.email = "gerryg@inbox.com"
|
@@ -56,10 +60,9 @@ task :default => :spec
|
|
56
60
|
|
57
61
|
require 'rdoc/task'
|
58
62
|
Rake::RDocTask.new do |rdoc|
|
59
|
-
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
60
63
|
|
61
64
|
rdoc.rdoc_dir = 'rdoc'
|
62
|
-
rdoc.title = "xmlscan #{
|
65
|
+
rdoc.title = "xmlscan #{VERSION}"
|
63
66
|
rdoc.rdoc_files.include('README*')
|
64
67
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
65
68
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0prec
|
data/lib/xmlscan/htmlscan.rb
CHANGED
@@ -47,7 +47,7 @@ module XMLScan
|
|
47
47
|
raise "[BUG] this method must be never called"
|
48
48
|
end
|
49
49
|
|
50
|
-
def on_stag_end_empty(name)
|
50
|
+
def on_stag_end_empty(name, *a)
|
51
51
|
raise "[BUG] this method must be never called"
|
52
52
|
end
|
53
53
|
|
@@ -127,7 +127,7 @@ module XMLScan
|
|
127
127
|
return found_empty_stag
|
128
128
|
else
|
129
129
|
parse_error "parse error at `<'"
|
130
|
-
return on_chardata
|
130
|
+
return on_chardata '<'
|
131
131
|
end
|
132
132
|
end
|
133
133
|
on_stag name
|
@@ -142,7 +142,7 @@ module XMLScan
|
|
142
142
|
if @src.close_tag then
|
143
143
|
s << '>'
|
144
144
|
end
|
145
|
-
return on_chardata
|
145
|
+
return on_chardata '<'+s
|
146
146
|
end
|
147
147
|
on_stag name
|
148
148
|
begin
|
@@ -156,9 +156,9 @@ module XMLScan
|
|
156
156
|
qmark = val.slice!(0,1)
|
157
157
|
if val[-1] == qmark[0] then
|
158
158
|
val.chop!
|
159
|
-
|
159
|
+
scan_attr_value val unless val.empty?
|
160
160
|
else
|
161
|
-
|
161
|
+
scan_attr_value val unless val.empty?
|
162
162
|
begin
|
163
163
|
s = @src.get
|
164
164
|
unless s then
|
@@ -167,8 +167,8 @@ module XMLScan
|
|
167
167
|
end
|
168
168
|
c = s[0]
|
169
169
|
val, s = s.split(qmark, 2)
|
170
|
-
|
171
|
-
|
170
|
+
scan_attr_value '>' unless c == ?< or c == ?>
|
171
|
+
scan_attr_value val if c
|
172
172
|
end until s
|
173
173
|
continue = s
|
174
174
|
end
|
data/lib/xmlscan/namespace.rb
CHANGED
@@ -54,16 +54,16 @@ module XMLScan
|
|
54
54
|
# on_stag_end_empty_ns ('foo:bar', { 'foo' => '', ... })
|
55
55
|
#
|
56
56
|
|
57
|
-
def on_stag_ns(qname, prefix, localpart)
|
57
|
+
def on_stag_ns(qname, prefix, localpart, *a)
|
58
58
|
end
|
59
59
|
|
60
|
-
def on_attribute_ns(qname, prefix, localpart)
|
60
|
+
def on_attribute_ns(qname, prefix, localpart, *a)
|
61
61
|
end
|
62
62
|
|
63
|
-
def on_stag_end_ns(qname, namespaces)
|
63
|
+
def on_stag_end_ns(qname, namespaces, *a)
|
64
64
|
end
|
65
65
|
|
66
|
-
def on_stag_end_empty_ns(qname, namespaces)
|
66
|
+
def on_stag_end_empty_ns(qname, namespaces, *a)
|
67
67
|
end
|
68
68
|
|
69
69
|
end
|
@@ -99,7 +99,7 @@ module XMLScan
|
|
99
99
|
end
|
100
100
|
|
101
101
|
|
102
|
-
def on_start_document
|
102
|
+
def on_start_document(*a)
|
103
103
|
@namespace = {} #PredefinedNamespace.dup
|
104
104
|
@ns_hist = []
|
105
105
|
@ns_undeclared = {} # for checking undeclared namespace prefixes.
|
@@ -107,14 +107,14 @@ module XMLScan
|
|
107
107
|
@dont_same = [] # ditto.
|
108
108
|
@xmlns = NamespaceDeclaration.new(self)
|
109
109
|
@orig_visitor = @visitor
|
110
|
-
@visitor.on_start_document
|
110
|
+
@visitor.on_start_document *a
|
111
111
|
end
|
112
112
|
|
113
113
|
|
114
|
-
def on_stag(name)
|
114
|
+
def on_stag(name, *a)
|
115
115
|
@ns_hist.push nil
|
116
116
|
unless /:/n =~ name then
|
117
|
-
@visitor.on_stag_ns name, '', name
|
117
|
+
@visitor.on_stag_ns name, '', name, *a
|
118
118
|
else
|
119
119
|
prefix, localpart = $`, $'
|
120
120
|
if localpart.include? ?: then
|
@@ -131,12 +131,12 @@ module XMLScan
|
|
131
131
|
@ns_undeclared[prefix] = true
|
132
132
|
end
|
133
133
|
end
|
134
|
-
@visitor.on_stag_ns name, prefix, localpart
|
134
|
+
@visitor.on_stag_ns name, prefix, localpart, *a
|
135
135
|
end
|
136
136
|
end
|
137
137
|
|
138
138
|
|
139
|
-
def on_attribute(name)
|
139
|
+
def on_attribute(name, *a)
|
140
140
|
if /:/n =~ name then
|
141
141
|
prefix, localpart = $`, $'
|
142
142
|
if localpart.include? ?: then
|
@@ -157,13 +157,13 @@ module XMLScan
|
|
157
157
|
@dont_same.push [ prev, prefix, localpart ]
|
158
158
|
end
|
159
159
|
@prev_prefix[localpart] = prefix
|
160
|
-
@visitor.on_attribute_ns name, prefix, localpart
|
160
|
+
@visitor.on_attribute_ns name, prefix, localpart, *a
|
161
161
|
end
|
162
162
|
elsif name == 'xmlns' then
|
163
163
|
@visitor = @xmlns
|
164
164
|
@xmlns.on_xmlns_start ''
|
165
165
|
else
|
166
|
-
@visitor.on_attribute_ns name, nil, name
|
166
|
+
@visitor.on_attribute_ns name, nil, name, *a
|
167
167
|
end
|
168
168
|
end
|
169
169
|
|
@@ -176,36 +176,36 @@ module XMLScan
|
|
176
176
|
@parent = parent
|
177
177
|
end
|
178
178
|
|
179
|
-
def on_xmlns_start(prefix)
|
179
|
+
def on_xmlns_start(prefix, *a)
|
180
180
|
@prefix = prefix
|
181
181
|
@nsdecl = ''
|
182
182
|
end
|
183
183
|
|
184
|
-
def on_attr_value(str)
|
184
|
+
def on_attr_value(str, *a)
|
185
185
|
@nsdecl << str
|
186
186
|
end
|
187
187
|
|
188
|
-
def on_attr_entityref(ref)
|
188
|
+
def on_attr_entityref(ref, *a)
|
189
189
|
@parent.ns_wellformed_error \
|
190
190
|
"xmlns includes undeclared entity reference"
|
191
191
|
end
|
192
192
|
|
193
|
-
def on_attr_charref(code)
|
193
|
+
def on_attr_charref(code, *a)
|
194
194
|
@nsdecl << [code].pack('U')
|
195
195
|
end
|
196
196
|
|
197
|
-
def on_attr_charref_hex(code)
|
197
|
+
def on_attr_charref_hex(code, *a)
|
198
198
|
@nsdecl << [code].pack('U')
|
199
199
|
end
|
200
200
|
|
201
|
-
def on_attribute_end(name)
|
201
|
+
def on_attribute_end(name, *a)
|
202
202
|
@parent.on_xmlns_end @prefix, @nsdecl
|
203
203
|
end
|
204
204
|
|
205
205
|
end
|
206
206
|
|
207
207
|
|
208
|
-
def on_xmlns_end(prefix, uri)
|
208
|
+
def on_xmlns_end(prefix, uri, *a)
|
209
209
|
@visitor = @orig_visitor
|
210
210
|
if PredefinedNamespace.key? prefix then
|
211
211
|
if prefix == 'xmlns' then
|
@@ -254,54 +254,54 @@ module XMLScan
|
|
254
254
|
end
|
255
255
|
|
256
256
|
|
257
|
-
def on_stag_end(name)
|
257
|
+
def on_stag_end(name, *a)
|
258
258
|
fix_namespace
|
259
|
-
@visitor.on_stag_end_ns name, @namespace
|
259
|
+
@visitor.on_stag_end_ns name, @namespace, *a
|
260
260
|
end
|
261
261
|
|
262
262
|
|
263
|
-
def on_etag(name)
|
263
|
+
def on_etag(name, *a)
|
264
264
|
h = @ns_hist.pop and @namespace.update h
|
265
|
-
@visitor.on_etag name
|
265
|
+
@visitor.on_etag name, *a
|
266
266
|
end
|
267
267
|
|
268
268
|
|
269
|
-
def on_stag_end_empty(name)
|
269
|
+
def on_stag_end_empty(name, *a)
|
270
270
|
fix_namespace
|
271
|
-
@visitor.on_stag_end_empty_ns name, @namespace
|
271
|
+
@visitor.on_stag_end_empty_ns name, @namespace, *a
|
272
272
|
h = @ns_hist.pop and @namespace.update h
|
273
273
|
end
|
274
274
|
|
275
275
|
|
276
|
-
def on_doctype(root, pubid, sysid)
|
276
|
+
def on_doctype(root, pubid, sysid, *a)
|
277
277
|
if root.count(':') > 1 then
|
278
278
|
ns_parse_error "qualified name `#{root}' includes `:'"
|
279
279
|
end
|
280
|
-
@visitor.on_doctype root, pubid, sysid
|
280
|
+
@visitor.on_doctype root, pubid, sysid, *a
|
281
281
|
end
|
282
282
|
|
283
283
|
|
284
|
-
def on_pi(target, pi)
|
284
|
+
def on_pi(target, pi, *a)
|
285
285
|
if target.include? ?: then
|
286
286
|
ns_parse_error "PI target `#{target}' includes `:'"
|
287
287
|
end
|
288
|
-
@visitor.on_pi target, pi
|
288
|
+
@visitor.on_pi target, pi, *a
|
289
289
|
end
|
290
290
|
|
291
291
|
|
292
|
-
def on_entityref(ref)
|
292
|
+
def on_entityref(ref, *a)
|
293
293
|
if ref.include? ?: then
|
294
294
|
ns_parse_error "entity reference `#{ref}' includes `:'"
|
295
295
|
end
|
296
|
-
@visitor.on_entityref ref
|
296
|
+
@visitor.on_entityref ref, *a
|
297
297
|
end
|
298
298
|
|
299
299
|
|
300
|
-
def on_attr_entityref(ref)
|
300
|
+
def on_attr_entityref(ref, *a)
|
301
301
|
if ref.include? ?: then
|
302
302
|
ns_parse_error "entity reference `#{ref}' includes `:'"
|
303
303
|
end
|
304
|
-
@visitor.on_attr_entityref ref
|
304
|
+
@visitor.on_attr_entityref ref, *a
|
305
305
|
end
|
306
306
|
|
307
307
|
end
|
data/lib/xmlscan/parser.rb
CHANGED
@@ -43,7 +43,7 @@ module XMLScan
|
|
43
43
|
|
44
44
|
private
|
45
45
|
|
46
|
-
def on_xmldecl_version(str)
|
46
|
+
def on_xmldecl_version(str, *a)
|
47
47
|
unless str == '1.0' then
|
48
48
|
warning "unsupported XML version `#{str}'"
|
49
49
|
end
|
@@ -51,7 +51,7 @@ module XMLScan
|
|
51
51
|
end
|
52
52
|
|
53
53
|
|
54
|
-
def on_xmldecl_standalone(str)
|
54
|
+
def on_xmldecl_standalone(str, *a)
|
55
55
|
if str == 'yes' then
|
56
56
|
@standalone = true
|
57
57
|
elsif str == 'no' then
|
@@ -63,7 +63,7 @@ module XMLScan
|
|
63
63
|
end
|
64
64
|
|
65
65
|
|
66
|
-
def on_doctype(name, pubid, sysid)
|
66
|
+
def on_doctype(name, pubid, sysid, *a)
|
67
67
|
if pubid and not sysid then
|
68
68
|
parse_error "public external ID must have both public ID and system ID"
|
69
69
|
end
|
@@ -71,12 +71,12 @@ module XMLScan
|
|
71
71
|
end
|
72
72
|
|
73
73
|
|
74
|
-
def on_prolog_space(s)
|
74
|
+
def on_prolog_space(s, *a)
|
75
75
|
# just ignore it.
|
76
76
|
end
|
77
77
|
|
78
78
|
|
79
|
-
def on_pi(target, pi)
|
79
|
+
def on_pi(target, pi, *a)
|
80
80
|
if target.downcase == 'xml' then
|
81
81
|
parse_error "reserved PI target `#{target}'"
|
82
82
|
end
|
@@ -114,39 +114,43 @@ module XMLScan
|
|
114
114
|
#end
|
115
115
|
|
116
116
|
|
117
|
-
def on_stag(name)
|
117
|
+
def on_stag(name, *a)
|
118
118
|
@elem.push name
|
119
119
|
@visitor.on_stag name
|
120
120
|
@attr.clear
|
121
121
|
end
|
122
122
|
|
123
|
-
def on_attribute(name)
|
123
|
+
def on_attribute(name, *a)
|
124
124
|
unless @attr.check_unique name then
|
125
125
|
wellformed_error "doubled attribute `#{name}'"
|
126
126
|
end
|
127
127
|
@visitor.on_attribute name
|
128
128
|
end
|
129
129
|
|
130
|
-
def on_attr_value(str)
|
130
|
+
def on_attr_value(str, *a)
|
131
131
|
str.tr! "\t\r\n", ' ' # normalize
|
132
132
|
@visitor.on_attr_value str
|
133
133
|
end
|
134
134
|
|
135
|
-
def
|
135
|
+
def on_stag_end(name, *a)
|
136
|
+
@visitor.on_stag_end name, *a
|
137
|
+
end
|
138
|
+
|
139
|
+
def on_stag_end_empty(name, *a)
|
136
140
|
# @visitor.on_stag_end name
|
137
141
|
# @elem.pop
|
138
142
|
# @visitor.on_etag name
|
139
|
-
@visitor.on_stag_end_empty name
|
143
|
+
@visitor.on_stag_end_empty name, *a
|
140
144
|
@elem.pop
|
141
145
|
end
|
142
146
|
|
143
|
-
def on_etag(name)
|
147
|
+
def on_etag(name, *a)
|
144
148
|
last = @elem.pop
|
145
149
|
if last == name then
|
146
|
-
@visitor.on_etag name
|
150
|
+
@visitor.on_etag name, *a
|
147
151
|
elsif last then
|
148
152
|
wellformed_error "element type `#{name}' is not matched"
|
149
|
-
@visitor.on_etag last
|
153
|
+
@visitor.on_etag last, *a
|
150
154
|
else
|
151
155
|
parse_error "end tag `#{name}' appears alone"
|
152
156
|
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'xmlscan/parser'
|
3
|
+
require 'xmlscan/visitor'
|
4
|
+
require 'stringio'
|
5
|
+
|
6
|
+
module XMLScan
|
7
|
+
module ElementProcessor
|
8
|
+
include XMLScan::Visitor
|
9
|
+
|
10
|
+
SKIP = [:on_chardata, :on_stag, :on_etag, :on_attribute, :on_attr_entityref,
|
11
|
+
:on_attr_value, :on_start_document, :on_end_document, :on_attribute_end,
|
12
|
+
:on_stag_end, :on_stag_end_empty, :on_attr_charref, :on_attr_charref_hex]
|
13
|
+
|
14
|
+
MY_METHODS = XMLScan::Visitor.instance_methods.to_a - SKIP
|
15
|
+
|
16
|
+
def initialize(opts={}, mod=nil)
|
17
|
+
raise "No module" unless mod
|
18
|
+
(MY_METHODS - mod.instance_methods).each do |i|
|
19
|
+
self.class.class_eval %{def #{i}(d, *a) d&&(self << d) end}, __FILE__, __LINE__
|
20
|
+
end
|
21
|
+
self.class.send :include, mod
|
22
|
+
|
23
|
+
@element = opts[:element] || raise("need an element")
|
24
|
+
@key = opts[:key] || raise("need a key")
|
25
|
+
@extras = (ex = opts[:extras]) ? ex.map(&:to_sym) : []
|
26
|
+
@tmpl = opts[:substitute] || "{{:key}}"
|
27
|
+
|
28
|
+
@pairs = {} # output name=> [content, context, extra_values] * 1 or more
|
29
|
+
@context = '' # current key(name) of the element (card)
|
30
|
+
@stack = [] # stack of containing context cards
|
31
|
+
@out = [] # current output for name(card)
|
32
|
+
@parser = XMLScan::XMLParser.new(self)
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
class XMLProcessor
|
39
|
+
include ElementProcessor
|
40
|
+
|
41
|
+
def self.process(io, opts={}, mod=nil)
|
42
|
+
mod ||= ElementProcessing
|
43
|
+
STDERR << "process #{io.inspect}, #{opts.inspect}\n"
|
44
|
+
io = case io
|
45
|
+
when IO, StringIO; io
|
46
|
+
when String; open(io)
|
47
|
+
else raise "bad type file input #{io.inspect}"
|
48
|
+
end
|
49
|
+
|
50
|
+
visitor = new(opts, mod)
|
51
|
+
visitor.parser.parse(io)
|
52
|
+
visitor.pairs
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
module ElementProcessing
|
58
|
+
def <<(s) @out << s end
|
59
|
+
def on_chardata(s) self << s end
|
60
|
+
def on_stag_end(name, s, h, *a)
|
61
|
+
if name.to_sym == @element
|
62
|
+
# starting a new context, first output our substitute string
|
63
|
+
key= h&&h[@key.to_s]||'*no-name*'
|
64
|
+
self << @tmpl.split('|').find {
|
65
|
+
|x| !(/:\w[\w\d]*/ =~ x) || h[$&[1..-1].to_s] }.gsub(/:\w[\w\d]*/) {
|
66
|
+
|m| h[m[1..-1]]
|
67
|
+
}
|
68
|
+
# then push the current context and initialize this one
|
69
|
+
@stack.push([@context, @out, *@ex])
|
70
|
+
@pairs[key] = nil # insert it when first seen
|
71
|
+
@context = key; @out = []; @ex = @extras.map {|e| h[e.to_s]}
|
72
|
+
else self << s end # pass through tags we aren't processing
|
73
|
+
end
|
74
|
+
|
75
|
+
def on_etag(name, s=nil)
|
76
|
+
if name.to_sym == @element
|
77
|
+
# output a card (name, content, type)
|
78
|
+
@pairs[@context] = [@out, @stack[-1][0], *@ex]
|
79
|
+
# restore previous context from stack
|
80
|
+
last = @stack.pop
|
81
|
+
@context, @out, @ex = last.shift, last.shift, *last
|
82
|
+
else self << s end
|
83
|
+
end
|
84
|
+
|
85
|
+
def on_stag_empty_end(name, s=nil, h={}, *a)
|
86
|
+
if name.to_sym == @element
|
87
|
+
|
88
|
+
key= h&&h[@key.to_s]||'*no-name*'
|
89
|
+
ex = @extras.map {|e| h[e]}
|
90
|
+
@pairs[key] = [[], @context, *ex]
|
91
|
+
else self << s end
|
92
|
+
end
|
93
|
+
|
94
|
+
attr_reader :pairs, :parser
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
data/lib/xmlscan/scanner.rb
CHANGED
@@ -122,20 +122,29 @@ module XMLScan
|
|
122
122
|
self
|
123
123
|
end
|
124
124
|
|
125
|
-
|
125
|
+
=begin
|
126
|
+
Managing source in a private array.
|
127
|
+
* tag oriented (?< and ?> are the key tokens
|
128
|
+
* ?> that aren't followed by another ?< or ?> are stripped in splitting
|
129
|
+
=end
|
126
130
|
def get
|
127
131
|
pop or
|
128
132
|
unless @eof then
|
129
133
|
last = @last
|
130
134
|
begin
|
131
|
-
|
132
|
-
unless src then
|
135
|
+
unless chunk = @src.gets then
|
133
136
|
@eof = true
|
134
|
-
|
135
|
-
last
|
136
|
-
|
137
|
+
@last = nil
|
138
|
+
return last
|
139
|
+
#unshift last # to be popped after reverse!
|
140
|
+
#last = nil
|
141
|
+
#break
|
137
142
|
end
|
138
|
-
|
143
|
+
# negative lookahead: < or >< or >>
|
144
|
+
# so don't consume those (but split leaving them always at the
|
145
|
+
# end of chunks)
|
146
|
+
# consume (>) and split on >
|
147
|
+
a = chunk.split(/(?=<|>[<>])|>/, -1)
|
139
148
|
if last then
|
140
149
|
unless /\A[<>]/ =~ a.first then
|
141
150
|
a[0] = last << (a.first || '')
|
@@ -143,6 +152,7 @@ module XMLScan
|
|
143
152
|
push last
|
144
153
|
end
|
145
154
|
end
|
155
|
+
raise "size #{size}" if size > 1
|
146
156
|
concat a
|
147
157
|
last = pop
|
148
158
|
end while empty?
|
@@ -223,7 +233,7 @@ module XMLScan
|
|
223
233
|
last.push @last.inspect
|
224
234
|
end
|
225
235
|
a.push '#eof' if @eof
|
226
|
-
"((#{a
|
236
|
+
"((#{a*' '}) l(#{last*' '}) . #{source.inspect})"
|
227
237
|
end
|
228
238
|
|
229
239
|
def each
|
@@ -354,72 +364,72 @@ module XMLScan
|
|
354
364
|
end
|
355
365
|
end
|
356
366
|
|
357
|
-
def on_xmldecl_version(str)
|
358
|
-
@visitor.on_xmldecl_version str
|
367
|
+
def on_xmldecl_version(str, *a)
|
368
|
+
@visitor.on_xmldecl_version str, *a
|
359
369
|
end
|
360
370
|
|
361
|
-
def on_xmldecl_encoding(str)
|
362
|
-
@visitor.on_xmldecl_encoding str
|
371
|
+
def on_xmldecl_encoding(str, *a)
|
372
|
+
@visitor.on_xmldecl_encoding str, *a
|
363
373
|
end
|
364
374
|
|
365
|
-
def on_xmldecl_standalone(str)
|
366
|
-
@visitor.on_xmldecl_standalone str
|
375
|
+
def on_xmldecl_standalone(str, *a)
|
376
|
+
@visitor.on_xmldecl_standalone str, *a
|
367
377
|
end
|
368
378
|
|
369
|
-
def on_xmldecl_other(name, value)
|
370
|
-
@visitor.on_xmldecl_other name, value
|
379
|
+
def on_xmldecl_other(name, value, *a)
|
380
|
+
@visitor.on_xmldecl_other name, value, *a
|
371
381
|
end
|
372
382
|
|
373
|
-
def on_xmldecl_end
|
374
|
-
@visitor.on_xmldecl_end
|
383
|
+
def on_xmldecl_end(*a)
|
384
|
+
@visitor.on_xmldecl_end *a
|
375
385
|
end
|
376
386
|
|
377
|
-
def on_doctype(root, pubid, sysid)
|
378
|
-
@visitor.on_doctype root, pubid, sysid
|
387
|
+
def on_doctype(root, pubid, sysid, *a)
|
388
|
+
@visitor.on_doctype root, pubid, sysid, *a
|
379
389
|
end
|
380
390
|
|
381
|
-
def on_prolog_space(str)
|
382
|
-
@visitor.on_prolog_space str
|
391
|
+
def on_prolog_space(str, *a)
|
392
|
+
@visitor.on_prolog_space str, *a
|
383
393
|
end
|
384
394
|
|
385
|
-
def on_comment(str)
|
386
|
-
@visitor.on_comment str
|
395
|
+
def on_comment(str, *a)
|
396
|
+
@visitor.on_comment str, *a
|
387
397
|
end
|
388
398
|
|
389
|
-
def on_pi(target, pi)
|
390
|
-
@visitor.on_pi target, pi
|
399
|
+
def on_pi(target, pi, *a)
|
400
|
+
@visitor.on_pi target, pi, *a
|
391
401
|
end
|
392
402
|
|
393
|
-
def on_chardata(str)
|
394
|
-
@visitor.on_chardata str
|
403
|
+
def on_chardata(str, *a)
|
404
|
+
@visitor.on_chardata str, *a
|
395
405
|
end
|
396
406
|
|
397
|
-
def on_cdata(str)
|
398
|
-
@visitor.on_cdata str
|
407
|
+
def on_cdata(str, *a)
|
408
|
+
@visitor.on_cdata str, *a
|
399
409
|
end
|
400
410
|
|
401
|
-
def on_etag(name)
|
402
|
-
@visitor.on_etag name
|
411
|
+
def on_etag(name, *a)
|
412
|
+
@visitor.on_etag name, *a
|
403
413
|
end
|
404
414
|
|
405
|
-
def on_entityref(ref)
|
406
|
-
@visitor.on_entityref ref
|
415
|
+
def on_entityref(ref, *a)
|
416
|
+
@visitor.on_entityref ref, *a
|
407
417
|
end
|
408
418
|
|
409
|
-
def on_charref(code)
|
410
|
-
@visitor.on_charref code
|
419
|
+
def on_charref(code, *a)
|
420
|
+
@visitor.on_charref code, *a
|
411
421
|
end
|
412
422
|
|
413
|
-
def on_charref_hex(code)
|
414
|
-
@visitor.on_charref_hex code
|
423
|
+
def on_charref_hex(code, *a)
|
424
|
+
@visitor.on_charref_hex code, *a
|
415
425
|
end
|
416
426
|
|
417
|
-
def on_start_document
|
418
|
-
@visitor.on_start_document
|
427
|
+
def on_start_document(*a)
|
428
|
+
@visitor.on_start_document *a
|
419
429
|
end
|
420
430
|
|
421
|
-
def on_end_document
|
422
|
-
@visitor.on_end_document
|
431
|
+
def on_end_document(*a)
|
432
|
+
@visitor.on_end_document *a
|
423
433
|
end
|
424
434
|
|
425
435
|
|
@@ -444,50 +454,51 @@ module XMLScan
|
|
444
454
|
#
|
445
455
|
# A: on_chardata ('HOGE')
|
446
456
|
|
447
|
-
def on_stag(name)
|
448
|
-
@visitor.on_stag name
|
457
|
+
def on_stag(name, *a)
|
458
|
+
@visitor.on_stag name, *a
|
449
459
|
end
|
450
460
|
|
451
|
-
def on_attribute(name)
|
452
|
-
@visitor.on_attribute name
|
461
|
+
def on_attribute(name, *a)
|
462
|
+
@visitor.on_attribute name, *a
|
453
463
|
end
|
454
464
|
|
455
|
-
def on_attr_value(str)
|
456
|
-
@visitor.on_attr_value str
|
465
|
+
def on_attr_value(str, *a)
|
466
|
+
@visitor.on_attr_value str, *a
|
457
467
|
end
|
458
468
|
|
459
|
-
def on_attr_entityref(ref)
|
460
|
-
@visitor.on_attr_entityref ref
|
469
|
+
def on_attr_entityref(ref, *a)
|
470
|
+
@visitor.on_attr_entityref ref, *a
|
461
471
|
end
|
462
472
|
|
463
|
-
def on_attr_charref(code)
|
464
|
-
@visitor.on_attr_charref code
|
473
|
+
def on_attr_charref(code, *a)
|
474
|
+
@visitor.on_attr_charref code, *a
|
465
475
|
end
|
466
476
|
|
467
|
-
def on_attr_charref_hex(code)
|
468
|
-
@visitor.on_attr_charref_hex code
|
477
|
+
def on_attr_charref_hex(code, *a)
|
478
|
+
@visitor.on_attr_charref_hex code, *a
|
469
479
|
end
|
470
480
|
|
471
|
-
def on_attribute_end(name)
|
472
|
-
@visitor.on_attribute_end name
|
481
|
+
def on_attribute_end(name, *a)
|
482
|
+
@visitor.on_attribute_end name, *a, *a
|
473
483
|
end
|
474
484
|
|
475
|
-
def on_stag_end_empty(name)
|
476
|
-
@visitor.on_stag_end_empty name
|
485
|
+
def on_stag_end_empty(name, *a)
|
486
|
+
@visitor.on_stag_end_empty name, *a
|
477
487
|
end
|
478
488
|
|
479
|
-
def on_stag_end(name)
|
480
|
-
|
489
|
+
def on_stag_end(name, *a)
|
490
|
+
#STDERR << "ose #{name}, #{a.inspect}\n"
|
491
|
+
@visitor.on_stag_end name, *a
|
481
492
|
end
|
482
493
|
|
483
494
|
|
495
|
+
S_OPT_EXAMPLE = "".encode(::Encoding::WINDOWS_31J)
|
496
|
+
E_OPT_EXAMPLE = "".encode(::Encoding::EUCJP)
|
484
497
|
|
485
498
|
private
|
486
499
|
|
487
500
|
module OptRegexp
|
488
501
|
UTFSTR = "é"
|
489
|
-
S_OPT_EXAMPLE = "".encode Encoding.find('Windows-31J')
|
490
|
-
E_OPT_EXAMPLE = "".encode Encoding.find('EUC-JP')
|
491
502
|
|
492
503
|
RE_ENCODINGS = {
|
493
504
|
:n=>/e/n.encoding,
|
@@ -525,6 +536,7 @@ module XMLScan
|
|
525
536
|
else
|
526
537
|
s = $`
|
527
538
|
on_chardata s unless s.empty?
|
539
|
+
#orig = $'.sub(/(?=;).*$/,'')
|
528
540
|
ref = nil
|
529
541
|
$'.split('&', -1).each { |s|
|
530
542
|
unless /(?!\A);|(?=[ \t\r\n])/ =~ s and not $&.empty? then
|
@@ -533,18 +545,18 @@ module XMLScan
|
|
533
545
|
parse_error "reference to `#{ref}' doesn't end with `;'"
|
534
546
|
else
|
535
547
|
parse_error "`&' is not used for entity/character references"
|
536
|
-
on_chardata
|
548
|
+
on_chardata '&'+s
|
537
549
|
next
|
538
550
|
end
|
539
551
|
end
|
540
|
-
ref = $`
|
552
|
+
orig = ?& + (ref = $`) + ?;
|
541
553
|
s = $'
|
542
554
|
if /\A[^#]/ =~ ref then
|
543
|
-
on_entityref ref
|
555
|
+
on_entityref ref, orig
|
544
556
|
elsif /\A#(\d+)\z/ =~ ref then
|
545
|
-
on_charref $1.to_i
|
557
|
+
on_charref $1.to_i, orig
|
546
558
|
elsif /\A#x([\dA-Fa-f]+)\z/ =~ ref then
|
547
|
-
on_charref_hex $1.hex
|
559
|
+
on_charref_hex $1.hex, orig
|
548
560
|
else
|
549
561
|
parse_error "invalid character reference `#{ref}'"
|
550
562
|
end
|
@@ -558,8 +570,9 @@ module XMLScan
|
|
558
570
|
end
|
559
571
|
|
560
572
|
|
561
|
-
def
|
573
|
+
def scan_attr_value(s) # almostly copy & paste from scan_chardata
|
562
574
|
unless /&/ =~ s then
|
575
|
+
#STDERR << "no& attr_val #{s.inspect}, #{caller*"\n"}\n" if s == ?>
|
563
576
|
on_attr_value s
|
564
577
|
else
|
565
578
|
s = $`
|
@@ -576,14 +589,14 @@ module XMLScan
|
|
576
589
|
next
|
577
590
|
end
|
578
591
|
end
|
579
|
-
ref = $`
|
592
|
+
orig = ?& + (ref = $`) + ?;
|
580
593
|
s = $'
|
581
594
|
if /\A[^#]/ =~ ref then
|
582
|
-
on_attr_entityref ref
|
595
|
+
on_attr_entityref ref, orig
|
583
596
|
elsif /\A#(\d+)\z/ =~ ref then
|
584
|
-
on_attr_charref $1.to_i
|
597
|
+
on_attr_charref $1.to_i, orig
|
585
598
|
elsif /\A#x([\dA-Fa-f]+)\z/ =~ ref then
|
586
|
-
on_attr_charref_hex $1.hex
|
599
|
+
on_attr_charref_hex $1.hex, orig
|
587
600
|
else
|
588
601
|
parse_error "invalid character reference `#{ref}'"
|
589
602
|
end
|
@@ -682,6 +695,7 @@ module XMLScan
|
|
682
695
|
|
683
696
|
|
684
697
|
def scan_etag(s)
|
698
|
+
orig="#{s}>"
|
685
699
|
s[0,2] = '' # remove '</'
|
686
700
|
if s.empty? then
|
687
701
|
if @src.close_tag then # </>
|
@@ -689,14 +703,14 @@ module XMLScan
|
|
689
703
|
else # </< or </[EOF]
|
690
704
|
parse_error "parse error at `</'"
|
691
705
|
s << '>' if @src.close_tag
|
692
|
-
return on_chardata
|
706
|
+
return on_chardata '</' << s
|
693
707
|
end
|
694
708
|
elsif /[ \t\n\r]+/ =~ s then
|
695
709
|
s1, s2 = $`, $'
|
696
710
|
if s1.empty? then # </ tag
|
697
711
|
parse_error "parse error at `</'"
|
698
712
|
s << '>' if @src.close_tag
|
699
|
-
return on_chardata
|
713
|
+
return on_chardata '</' + s
|
700
714
|
elsif not s2.empty? then # </ta g
|
701
715
|
parse_error "illegal whitespace is found within end tag `#{s1}'"
|
702
716
|
while @src.get_tag
|
@@ -705,7 +719,7 @@ module XMLScan
|
|
705
719
|
s = s1
|
706
720
|
end
|
707
721
|
found_unclosed_etag s unless @src.close_tag # </tag< or </tag[EOF]
|
708
|
-
on_etag s
|
722
|
+
on_etag s, orig
|
709
723
|
end
|
710
724
|
|
711
725
|
|
@@ -745,6 +759,8 @@ module XMLScan
|
|
745
759
|
|
746
760
|
|
747
761
|
def scan_stag(s)
|
762
|
+
hash = {}
|
763
|
+
orig = [s.dup]
|
748
764
|
unless /(?=[\/ \t\n\r='"])/ =~ s then
|
749
765
|
name = s
|
750
766
|
name[0,1] = '' # remove `<'
|
@@ -753,54 +769,65 @@ module XMLScan
|
|
753
769
|
return found_empty_stag
|
754
770
|
else # << or <[EOF]
|
755
771
|
parse_error "parse error at `<'"
|
756
|
-
return on_chardata
|
772
|
+
return on_chardata '<'
|
757
773
|
end
|
758
774
|
end
|
759
775
|
on_stag name
|
760
776
|
found_unclosed_stag name unless @src.close_tag
|
761
|
-
on_stag_end name
|
777
|
+
on_stag_end name, orig*''+?>, {}
|
762
778
|
else
|
779
|
+
k = nil
|
763
780
|
name = $`
|
764
781
|
s = $'
|
765
782
|
name[0,1] = '' # remove `<'
|
766
783
|
if name.empty? then # `< tag' or `<=`
|
767
784
|
parse_error "parse error at `<'"
|
768
785
|
s << '>' if @src.close_tag
|
769
|
-
return on_chardata
|
786
|
+
return on_chardata '<' << s
|
770
787
|
end
|
771
788
|
on_stag name
|
772
789
|
emptyelem = false
|
773
|
-
key,val,error,qmark,c = nil
|
774
790
|
begin
|
775
791
|
continue = false
|
776
792
|
s.scan(/[ \t\n\r]([^= \t\n\r\/'"]+)[ \t\n\r]*=[ \t\n\r]*('[^']*'?|"[^"]*"?)|\/\z|([^ \t\n\r][\S\s]*)/
|
777
793
|
) { |key,val,error|
|
778
|
-
|
794
|
+
orig_val = []
|
795
|
+
if key then
|
779
796
|
on_attribute key
|
797
|
+
k=key
|
798
|
+
orig_val << val
|
780
799
|
qmark = val.slice!(0,1)
|
781
800
|
if val[-1] == qmark[0] then
|
782
801
|
val.chop!
|
783
|
-
|
802
|
+
scan_attr_value val unless val.empty?
|
784
803
|
else
|
785
|
-
|
804
|
+
scan_attr_value val unless val.empty?
|
786
805
|
begin
|
787
806
|
s = @src.get
|
807
|
+
#STDERR << "get some more? #{s.inspect}, #{orig.inspect}\n"
|
788
808
|
unless s then
|
789
809
|
parse_error "unterminated attribute `#{key}' meets EOF"
|
790
810
|
break
|
791
811
|
end
|
812
|
+
orig << s.dup
|
792
813
|
c = s[0]
|
793
814
|
val, s = s.split(qmark, 2)
|
815
|
+
orig_val << val
|
794
816
|
if c == ?< then
|
795
817
|
wellformed_error "`<' is found in attribute `#{key}'"
|
796
818
|
elsif c != ?> then
|
797
|
-
|
819
|
+
#STDERR << "close in quote? #{c.inspect}, #{@src.tag_start?}, #{@src.tag_end?}, #{s.inspect}, #{val.inspect}, #{orig.inspect}, #{orig_val.inspect}\n"
|
820
|
+
orig_val[-1,0] = orig[-1,0] = ?> # if @src.tag_start?
|
821
|
+
scan_attr_value ?>
|
798
822
|
end
|
799
|
-
|
823
|
+
scan_attr_value val if c
|
800
824
|
end until s
|
801
825
|
continue = s # if eof then continue is false, else true.
|
802
826
|
end
|
803
|
-
|
827
|
+
#STDERR << "attr:#{k}, #{orig_val}\n"
|
828
|
+
hash[k] = orig_val*''
|
829
|
+
#STDERR << "attr end #{hash.inspect}, #{k}, #{orig_val}\n"
|
830
|
+
on_attribute_end key #, orig_val*''
|
804
831
|
elsif error then
|
805
832
|
continue = s = found_stag_error(error)
|
806
833
|
else
|
@@ -816,9 +843,11 @@ module XMLScan
|
|
816
843
|
end
|
817
844
|
end
|
818
845
|
if emptyelem then
|
819
|
-
on_stag_end_empty name
|
846
|
+
on_stag_end_empty name, orig*''+?>, hash
|
820
847
|
else
|
821
|
-
|
848
|
+
#STDERR << "on stag end #{ name}, \"<#{name}#{s}>\", #{hash.inspect}\n"
|
849
|
+
on_stag_end name, orig*''+?>, hash
|
850
|
+
#on_stag_end name, "<#{name}#{s}>", hash
|
822
851
|
end
|
823
852
|
end
|
824
853
|
end
|
@@ -1067,10 +1096,10 @@ module XMLScan
|
|
1067
1096
|
|
1068
1097
|
|
1069
1098
|
def scan_document
|
1070
|
-
on_start_document
|
1099
|
+
on_start_document ''
|
1071
1100
|
@src.prepare
|
1072
1101
|
scan_prolog @src.get
|
1073
|
-
on_end_document
|
1102
|
+
on_end_document ''
|
1074
1103
|
end
|
1075
1104
|
|
1076
1105
|
|
data/lib/xmlscan/version.rb
CHANGED
@@ -9,15 +9,9 @@
|
|
9
9
|
|
10
10
|
module XMLScan
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
#
|
17
|
-
# TENNY which is larger than 1 (e.g. 'X.X.1' or 'X.X.2') means this
|
18
|
-
# release is a stable release.
|
19
|
-
|
20
|
-
VERSION = '0.2.3'
|
21
|
-
RELEASE_DATE = '2003-05-02'
|
12
|
+
GEMNAME = 'xmlscan'
|
13
|
+
VERSION_FILE = File.expand_path('../../VERSION', File.dirname(__FILE__))
|
14
|
+
VERSION = open(VERSION_FILE).to_a*''.chop
|
15
|
+
RELEASE_DATE = open(VERSION_FILE).mtime.strftime('%Y-%m-%d')
|
22
16
|
|
23
17
|
end
|
data/lib/xmlscan/visitor.rb
CHANGED
@@ -54,88 +54,88 @@ module XMLScan
|
|
54
54
|
def warning(msg)
|
55
55
|
end
|
56
56
|
|
57
|
-
def on_xmldecl
|
57
|
+
def on_xmldecl(*a)
|
58
58
|
end
|
59
59
|
|
60
|
-
def on_xmldecl_key(key, str)
|
60
|
+
def on_xmldecl_key(key, str, *a)
|
61
61
|
end
|
62
62
|
|
63
|
-
def on_xmldecl_version(str)
|
63
|
+
def on_xmldecl_version(str, *a)
|
64
64
|
end
|
65
65
|
|
66
|
-
def on_xmldecl_encoding(str)
|
66
|
+
def on_xmldecl_encoding(str, *a)
|
67
67
|
end
|
68
68
|
|
69
|
-
def on_xmldecl_standalone(str)
|
69
|
+
def on_xmldecl_standalone(str, *a)
|
70
70
|
end
|
71
71
|
|
72
|
-
def on_xmldecl_other(name, value)
|
72
|
+
def on_xmldecl_other(name, value, *a)
|
73
73
|
end
|
74
74
|
|
75
|
-
def on_xmldecl_end
|
75
|
+
def on_xmldecl_end(*a)
|
76
76
|
end
|
77
77
|
|
78
|
-
def on_doctype(root, pubid, sysid)
|
78
|
+
def on_doctype(root, pubid, sysid, *a)
|
79
79
|
end
|
80
80
|
|
81
|
-
def on_prolog_space(str)
|
81
|
+
def on_prolog_space(str, *a)
|
82
82
|
end
|
83
83
|
|
84
|
-
def on_comment(str)
|
84
|
+
def on_comment(str, *a)
|
85
85
|
end
|
86
86
|
|
87
|
-
def on_pi(target, pi)
|
87
|
+
def on_pi(target, pi, *a)
|
88
88
|
end
|
89
89
|
|
90
|
-
def on_chardata(str)
|
90
|
+
def on_chardata(str, *a)
|
91
91
|
end
|
92
92
|
|
93
|
-
def on_cdata(str)
|
93
|
+
def on_cdata(str, *a)
|
94
94
|
end
|
95
95
|
|
96
|
-
def on_etag(name)
|
96
|
+
def on_etag(name, *a)
|
97
97
|
end
|
98
98
|
|
99
|
-
def on_entityref(ref)
|
99
|
+
def on_entityref(ref, *a)
|
100
100
|
end
|
101
101
|
|
102
|
-
def on_charref(code)
|
102
|
+
def on_charref(code, *a)
|
103
103
|
end
|
104
104
|
|
105
|
-
def on_charref_hex(code)
|
105
|
+
def on_charref_hex(code, *a)
|
106
106
|
end
|
107
107
|
|
108
|
-
def on_start_document
|
108
|
+
def on_start_document(*a)
|
109
109
|
end
|
110
110
|
|
111
|
-
def on_end_document
|
111
|
+
def on_end_document(*a)
|
112
112
|
end
|
113
113
|
|
114
|
-
def on_stag(name)
|
114
|
+
def on_stag(name, *a)
|
115
115
|
end
|
116
116
|
|
117
|
-
def on_attribute(name)
|
117
|
+
def on_attribute(name, *a)
|
118
118
|
end
|
119
119
|
|
120
|
-
def on_attr_value(str)
|
120
|
+
def on_attr_value(str, *a)
|
121
121
|
end
|
122
122
|
|
123
|
-
def on_attr_entityref(ref)
|
123
|
+
def on_attr_entityref(ref, *a)
|
124
124
|
end
|
125
125
|
|
126
|
-
def on_attr_charref(code)
|
126
|
+
def on_attr_charref(code, *a)
|
127
127
|
end
|
128
128
|
|
129
|
-
def on_attr_charref_hex(code)
|
129
|
+
def on_attr_charref_hex(code, *a)
|
130
130
|
end
|
131
131
|
|
132
|
-
def on_attribute_end(name)
|
132
|
+
def on_attribute_end(name, *a)
|
133
133
|
end
|
134
134
|
|
135
|
-
def on_stag_end_empty(name)
|
135
|
+
def on_stag_end_empty(name, *a)
|
136
136
|
end
|
137
137
|
|
138
|
-
def on_stag_end(name)
|
138
|
+
def on_stag_end(name, *a)
|
139
139
|
end
|
140
140
|
|
141
141
|
end
|
@@ -146,13 +146,15 @@ module XMLScan
|
|
146
146
|
include Visitor
|
147
147
|
|
148
148
|
def initialize(visitor)
|
149
|
+
#STDERR << "new Decoration #{visitor}\n"
|
149
150
|
@visitor = visitor
|
150
151
|
end
|
151
152
|
|
152
153
|
Visitor.instance_methods.each { |i|
|
154
|
+
#STDERR << "#{i} \#{args.inspect}\\n"
|
153
155
|
module_eval <<-END, __FILE__, __LINE__ + 1
|
154
156
|
def #{i}(*args)
|
155
|
-
@visitor.#{i}(*args)
|
157
|
+
@visitor&&@visitor.#{i}(*args)
|
156
158
|
end
|
157
159
|
END
|
158
160
|
}
|
data/lib/xmlscan/xmlchar.rb
CHANGED
@@ -115,95 +115,95 @@ module XMLScan
|
|
115
115
|
end
|
116
116
|
|
117
117
|
|
118
|
-
def on_xmldecl_version(str)
|
118
|
+
def on_xmldecl_version(str, *a)
|
119
119
|
check_valid_version str
|
120
120
|
super
|
121
121
|
end
|
122
122
|
|
123
|
-
def on_xmldecl_encoding(str)
|
123
|
+
def on_xmldecl_encoding(str, *a)
|
124
124
|
check_valid_encoding str
|
125
125
|
super
|
126
126
|
end
|
127
127
|
|
128
|
-
def on_xmldecl_standalone(str)
|
128
|
+
def on_xmldecl_standalone(str, *a)
|
129
129
|
check_valid_chardata str
|
130
130
|
super
|
131
131
|
end
|
132
132
|
|
133
|
-
def on_doctype(root, pubid, sysid)
|
133
|
+
def on_doctype(root, pubid, sysid, *a)
|
134
134
|
check_valid_name root
|
135
135
|
check_valid_pubid pubid if pubid
|
136
136
|
check_valid_chardata sysid if sysid
|
137
137
|
super
|
138
138
|
end
|
139
139
|
|
140
|
-
def on_comment(str)
|
140
|
+
def on_comment(str, *a)
|
141
141
|
check_valid_chardata str
|
142
142
|
super
|
143
143
|
end
|
144
144
|
|
145
|
-
def on_pi(target, pi)
|
145
|
+
def on_pi(target, pi, *a)
|
146
146
|
check_valid_name target
|
147
147
|
check_valid_chardata pi
|
148
148
|
super
|
149
149
|
end
|
150
150
|
|
151
|
-
def on_chardata(str)
|
151
|
+
def on_chardata(str, *a)
|
152
152
|
check_valid_chardata str
|
153
153
|
super
|
154
154
|
end
|
155
155
|
|
156
|
-
def on_cdata(str)
|
156
|
+
def on_cdata(str, *a)
|
157
157
|
check_valid_chardata str
|
158
158
|
super
|
159
159
|
end
|
160
160
|
|
161
|
-
def on_etag(name)
|
161
|
+
def on_etag(name, *a)
|
162
162
|
check_valid_name name
|
163
163
|
super
|
164
164
|
end
|
165
165
|
|
166
|
-
def on_entityref(ref)
|
166
|
+
def on_entityref(ref, *a)
|
167
167
|
check_valid_name ref
|
168
168
|
super
|
169
169
|
end
|
170
170
|
|
171
|
-
def on_charref(code)
|
171
|
+
def on_charref(code, *a)
|
172
172
|
check_valid_char code
|
173
173
|
super
|
174
174
|
end
|
175
175
|
|
176
|
-
def on_charref_hex(code)
|
176
|
+
def on_charref_hex(code, *a)
|
177
177
|
check_valid_char code
|
178
178
|
super
|
179
179
|
end
|
180
180
|
|
181
|
-
def on_stag(name)
|
181
|
+
def on_stag(name, *a)
|
182
182
|
check_valid_name name
|
183
183
|
super
|
184
184
|
end
|
185
185
|
|
186
|
-
def on_attribute(name)
|
186
|
+
def on_attribute(name, *a)
|
187
187
|
check_valid_name name
|
188
188
|
super
|
189
189
|
end
|
190
190
|
|
191
|
-
def on_attr_value(str)
|
191
|
+
def on_attr_value(str, *a)
|
192
192
|
check_valid_chardata str
|
193
193
|
super
|
194
194
|
end
|
195
195
|
|
196
|
-
def on_attr_entityref(ref)
|
196
|
+
def on_attr_entityref(ref, *a)
|
197
197
|
check_valid_name ref
|
198
198
|
super
|
199
199
|
end
|
200
200
|
|
201
|
-
def on_attr_charref(code)
|
201
|
+
def on_attr_charref(code, *a)
|
202
202
|
check_valid_char code
|
203
203
|
super
|
204
204
|
end
|
205
205
|
|
206
|
-
def on_attr_charref_hex(code)
|
206
|
+
def on_attr_charref_hex(code, *a)
|
207
207
|
check_valid_char code
|
208
208
|
super
|
209
209
|
end
|
metadata
CHANGED
@@ -1,19 +1,19 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xmlscan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.3.0prec
|
5
|
+
prerelease: 5
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- UENO Katsuhiro <katsu@blue.sky.or.jp>
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &9706320 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 2.8.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *9706320
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &9705800 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '3.12'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *9705800
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: bundler
|
38
|
-
requirement: &
|
38
|
+
requirement: &9705220 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.0.0
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *9705220
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: jeweler
|
49
|
-
requirement: &
|
49
|
+
requirement: &9704580 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: 1.8.3
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *9704580
|
58
58
|
description: The fastest XML parser written in 100% pure Ruby.
|
59
59
|
email: gerryg@inbox.com
|
60
60
|
executables: []
|
@@ -69,15 +69,14 @@ files:
|
|
69
69
|
- Rakefile
|
70
70
|
- THANKS
|
71
71
|
- VERSION
|
72
|
-
- install.rb
|
73
72
|
- lib/xmlscan/htmlscan.rb
|
74
73
|
- lib/xmlscan/namespace.rb
|
75
74
|
- lib/xmlscan/parser.rb
|
75
|
+
- lib/xmlscan/processor.rb
|
76
76
|
- lib/xmlscan/scanner.rb
|
77
77
|
- lib/xmlscan/version.rb
|
78
78
|
- lib/xmlscan/visitor.rb
|
79
79
|
- lib/xmlscan/xmlchar.rb
|
80
|
-
- test.rb
|
81
80
|
homepage: http://github.com/GerryG/xmlformat/
|
82
81
|
licenses:
|
83
82
|
- MIT
|
@@ -97,13 +96,13 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
97
96
|
version: '0'
|
98
97
|
segments:
|
99
98
|
- 0
|
100
|
-
hash:
|
99
|
+
hash: 4206592949743860129
|
101
100
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
102
101
|
none: false
|
103
102
|
requirements:
|
104
|
-
- - ! '
|
103
|
+
- - ! '>'
|
105
104
|
- !ruby/object:Gem::Version
|
106
|
-
version:
|
105
|
+
version: 1.3.1
|
107
106
|
requirements: []
|
108
107
|
rubyforge_project:
|
109
108
|
rubygems_version: 1.8.15
|
data/install.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
#
|
3
|
-
# install.rb
|
4
|
-
#
|
5
|
-
# $Id: install.rb,v 1.2 2002/12/26 21:09:38 katsu Exp $
|
6
|
-
|
7
|
-
require 'rbconfig'
|
8
|
-
require 'ftools'
|
9
|
-
require 'find'
|
10
|
-
require 'getoptlong'
|
11
|
-
|
12
|
-
DEFAULT_DESTDIR = Config::CONFIG['sitelibdir'] || Config::CONFIG['sitedir']
|
13
|
-
SRCDIR = File.dirname(__FILE__)
|
14
|
-
|
15
|
-
|
16
|
-
def install_rb(from, to)
|
17
|
-
from = SRCDIR + '/' + from
|
18
|
-
Find.find(from) { |src|
|
19
|
-
next unless File.file? src
|
20
|
-
next unless /\.rb\z/ =~ src
|
21
|
-
dst = src.sub(/\A#{Regexp.escape(from)}/, to)
|
22
|
-
File.makedirs File.dirname(dst), true
|
23
|
-
File.install src, dst, 0644, true
|
24
|
-
}
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
|
-
destdir = DEFAULT_DESTDIR
|
29
|
-
begin
|
30
|
-
GetoptLong.new([ "-d", "--destdir", GetoptLong::REQUIRED_ARGUMENT ]
|
31
|
-
).each_option { |opt, arg|
|
32
|
-
case opt
|
33
|
-
when '-d' then
|
34
|
-
destdir = arg
|
35
|
-
end
|
36
|
-
}
|
37
|
-
rescue
|
38
|
-
exit 2
|
39
|
-
end
|
40
|
-
|
41
|
-
install_rb "lib", destdir
|