xmlscan 0.2.3 → 0.3.0prea
Sign up to get free protection for your applications and to get access to all the features.
- data/README.processor +33 -0
- data/Rakefile +7 -4
- data/VERSION +1 -1
- data/lib/xmlscan/htmlscan.rb +7 -7
- data/lib/xmlscan/namespace.rb +33 -33
- data/lib/xmlscan/parser.rb +17 -13
- data/lib/xmlscan/processor.rb +47 -0
- data/lib/xmlscan/scanner.rb +118 -89
- data/lib/xmlscan/version.rb +4 -10
- data/lib/xmlscan/visitor.rb +31 -29
- data/lib/xmlscan/xmlchar.rb +18 -18
- data/xmlcard.rb +48 -0
- metadata +17 -14
data/README.processor
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
|
2
|
+
processor = XMLScan::Processor.hash(:element=>:card, :key=>:name) do |k,h,s,|
|
3
|
+
h[:transclude] || "{{#{h[:name]}}}"
|
4
|
+
end
|
5
|
+
|
6
|
+
test_cases [
|
7
|
+
[ '<card name="foo" transclude="{{foo|titled}}">Some
|
8
|
+
<card name="name">Name data</card> and < >
|
9
|
+
<p>para data<b>bold</b>
|
10
|
+
</p><br/>
|
11
|
+
more<card
|
12
|
+
name="+hello" attr=""e;foo"e;"> and <card name="+nested">nested twice data</card>
|
13
|
+
</card>
|
14
|
+
</card>
|
15
|
+
', {
|
16
|
+
'foo' => 'Some
|
17
|
+
{{name}} and < >
|
18
|
+
<p>para data<b>bold</b>
|
19
|
+
</p><br/>
|
20
|
+
more{{+hello}}
|
21
|
+
',
|
22
|
+
'name' => 'Name data',
|
23
|
+
'foo+hello' => ' and {{+nested}}
|
24
|
+
',
|
25
|
+
'foo+hello+nested' => 'nested twice data' } ],
|
26
|
+
]
|
27
|
+
|
28
|
+
test_casts.each { |p|
|
29
|
+
assert processor.call(p[0]) == p[1]
|
30
|
+
}
|
31
|
+
|
32
|
+
|
33
|
+
|
data/Rakefile
CHANGED
@@ -3,6 +3,9 @@
|
|
3
3
|
|
4
4
|
require 'rubygems'
|
5
5
|
require 'bundler'
|
6
|
+
require 'xmlscan/version'
|
7
|
+
|
8
|
+
VERSION = XMLScan::VERSION # File.exist?('VERSION') ? File.read('VERSION') : ""
|
6
9
|
|
7
10
|
begin
|
8
11
|
Bundler.setup(:default, :development)
|
@@ -15,10 +18,11 @@ end
|
|
15
18
|
require 'rake'
|
16
19
|
|
17
20
|
begin
|
21
|
+
include XMLScan
|
18
22
|
require 'jeweler'
|
19
23
|
Jeweler::Tasks.new do |gem|
|
20
|
-
gem.name =
|
21
|
-
gem.version =
|
24
|
+
gem.name = 'xmlscan'
|
25
|
+
gem.version = XMLScan::VERSION
|
22
26
|
gem.license = "MIT"
|
23
27
|
gem.summary = "The fastest XML parser written in 100% pure Ruby."
|
24
28
|
gem.email = "gerryg@inbox.com"
|
@@ -56,10 +60,9 @@ task :default => :spec
|
|
56
60
|
|
57
61
|
require 'rdoc/task'
|
58
62
|
Rake::RDocTask.new do |rdoc|
|
59
|
-
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
60
63
|
|
61
64
|
rdoc.rdoc_dir = 'rdoc'
|
62
|
-
rdoc.title = "xmlscan #{
|
65
|
+
rdoc.title = "xmlscan #{VERSION}"
|
63
66
|
rdoc.rdoc_files.include('README*')
|
64
67
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
65
68
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0prea
|
data/lib/xmlscan/htmlscan.rb
CHANGED
@@ -47,7 +47,7 @@ module XMLScan
|
|
47
47
|
raise "[BUG] this method must be never called"
|
48
48
|
end
|
49
49
|
|
50
|
-
def on_stag_end_empty(name)
|
50
|
+
def on_stag_end_empty(name, *a)
|
51
51
|
raise "[BUG] this method must be never called"
|
52
52
|
end
|
53
53
|
|
@@ -127,7 +127,7 @@ module XMLScan
|
|
127
127
|
return found_empty_stag
|
128
128
|
else
|
129
129
|
parse_error "parse error at `<'"
|
130
|
-
return on_chardata
|
130
|
+
return on_chardata '<'
|
131
131
|
end
|
132
132
|
end
|
133
133
|
on_stag name
|
@@ -142,7 +142,7 @@ module XMLScan
|
|
142
142
|
if @src.close_tag then
|
143
143
|
s << '>'
|
144
144
|
end
|
145
|
-
return on_chardata
|
145
|
+
return on_chardata '<'+s
|
146
146
|
end
|
147
147
|
on_stag name
|
148
148
|
begin
|
@@ -156,9 +156,9 @@ module XMLScan
|
|
156
156
|
qmark = val.slice!(0,1)
|
157
157
|
if val[-1] == qmark[0] then
|
158
158
|
val.chop!
|
159
|
-
|
159
|
+
scan_attr_value val unless val.empty?
|
160
160
|
else
|
161
|
-
|
161
|
+
scan_attr_value val unless val.empty?
|
162
162
|
begin
|
163
163
|
s = @src.get
|
164
164
|
unless s then
|
@@ -167,8 +167,8 @@ module XMLScan
|
|
167
167
|
end
|
168
168
|
c = s[0]
|
169
169
|
val, s = s.split(qmark, 2)
|
170
|
-
|
171
|
-
|
170
|
+
scan_attr_value '>' unless c == ?< or c == ?>
|
171
|
+
scan_attr_value val if c
|
172
172
|
end until s
|
173
173
|
continue = s
|
174
174
|
end
|
data/lib/xmlscan/namespace.rb
CHANGED
@@ -54,16 +54,16 @@ module XMLScan
|
|
54
54
|
# on_stag_end_empty_ns ('foo:bar', { 'foo' => '', ... })
|
55
55
|
#
|
56
56
|
|
57
|
-
def on_stag_ns(qname, prefix, localpart)
|
57
|
+
def on_stag_ns(qname, prefix, localpart, *a)
|
58
58
|
end
|
59
59
|
|
60
|
-
def on_attribute_ns(qname, prefix, localpart)
|
60
|
+
def on_attribute_ns(qname, prefix, localpart, *a)
|
61
61
|
end
|
62
62
|
|
63
|
-
def on_stag_end_ns(qname, namespaces)
|
63
|
+
def on_stag_end_ns(qname, namespaces, *a)
|
64
64
|
end
|
65
65
|
|
66
|
-
def on_stag_end_empty_ns(qname, namespaces)
|
66
|
+
def on_stag_end_empty_ns(qname, namespaces, *a)
|
67
67
|
end
|
68
68
|
|
69
69
|
end
|
@@ -99,7 +99,7 @@ module XMLScan
|
|
99
99
|
end
|
100
100
|
|
101
101
|
|
102
|
-
def on_start_document
|
102
|
+
def on_start_document(*a)
|
103
103
|
@namespace = {} #PredefinedNamespace.dup
|
104
104
|
@ns_hist = []
|
105
105
|
@ns_undeclared = {} # for checking undeclared namespace prefixes.
|
@@ -107,14 +107,14 @@ module XMLScan
|
|
107
107
|
@dont_same = [] # ditto.
|
108
108
|
@xmlns = NamespaceDeclaration.new(self)
|
109
109
|
@orig_visitor = @visitor
|
110
|
-
@visitor.on_start_document
|
110
|
+
@visitor.on_start_document *a
|
111
111
|
end
|
112
112
|
|
113
113
|
|
114
|
-
def on_stag(name)
|
114
|
+
def on_stag(name, *a)
|
115
115
|
@ns_hist.push nil
|
116
116
|
unless /:/n =~ name then
|
117
|
-
@visitor.on_stag_ns name, '', name
|
117
|
+
@visitor.on_stag_ns name, '', name, *a
|
118
118
|
else
|
119
119
|
prefix, localpart = $`, $'
|
120
120
|
if localpart.include? ?: then
|
@@ -131,12 +131,12 @@ module XMLScan
|
|
131
131
|
@ns_undeclared[prefix] = true
|
132
132
|
end
|
133
133
|
end
|
134
|
-
@visitor.on_stag_ns name, prefix, localpart
|
134
|
+
@visitor.on_stag_ns name, prefix, localpart, *a
|
135
135
|
end
|
136
136
|
end
|
137
137
|
|
138
138
|
|
139
|
-
def on_attribute(name)
|
139
|
+
def on_attribute(name, *a)
|
140
140
|
if /:/n =~ name then
|
141
141
|
prefix, localpart = $`, $'
|
142
142
|
if localpart.include? ?: then
|
@@ -157,13 +157,13 @@ module XMLScan
|
|
157
157
|
@dont_same.push [ prev, prefix, localpart ]
|
158
158
|
end
|
159
159
|
@prev_prefix[localpart] = prefix
|
160
|
-
@visitor.on_attribute_ns name, prefix, localpart
|
160
|
+
@visitor.on_attribute_ns name, prefix, localpart, *a
|
161
161
|
end
|
162
162
|
elsif name == 'xmlns' then
|
163
163
|
@visitor = @xmlns
|
164
164
|
@xmlns.on_xmlns_start ''
|
165
165
|
else
|
166
|
-
@visitor.on_attribute_ns name, nil, name
|
166
|
+
@visitor.on_attribute_ns name, nil, name, *a
|
167
167
|
end
|
168
168
|
end
|
169
169
|
|
@@ -176,36 +176,36 @@ module XMLScan
|
|
176
176
|
@parent = parent
|
177
177
|
end
|
178
178
|
|
179
|
-
def on_xmlns_start(prefix)
|
179
|
+
def on_xmlns_start(prefix, *a)
|
180
180
|
@prefix = prefix
|
181
181
|
@nsdecl = ''
|
182
182
|
end
|
183
183
|
|
184
|
-
def on_attr_value(str)
|
184
|
+
def on_attr_value(str, *a)
|
185
185
|
@nsdecl << str
|
186
186
|
end
|
187
187
|
|
188
|
-
def on_attr_entityref(ref)
|
188
|
+
def on_attr_entityref(ref, *a)
|
189
189
|
@parent.ns_wellformed_error \
|
190
190
|
"xmlns includes undeclared entity reference"
|
191
191
|
end
|
192
192
|
|
193
|
-
def on_attr_charref(code)
|
193
|
+
def on_attr_charref(code, *a)
|
194
194
|
@nsdecl << [code].pack('U')
|
195
195
|
end
|
196
196
|
|
197
|
-
def on_attr_charref_hex(code)
|
197
|
+
def on_attr_charref_hex(code, *a)
|
198
198
|
@nsdecl << [code].pack('U')
|
199
199
|
end
|
200
200
|
|
201
|
-
def on_attribute_end(name)
|
201
|
+
def on_attribute_end(name, *a)
|
202
202
|
@parent.on_xmlns_end @prefix, @nsdecl
|
203
203
|
end
|
204
204
|
|
205
205
|
end
|
206
206
|
|
207
207
|
|
208
|
-
def on_xmlns_end(prefix, uri)
|
208
|
+
def on_xmlns_end(prefix, uri, *a)
|
209
209
|
@visitor = @orig_visitor
|
210
210
|
if PredefinedNamespace.key? prefix then
|
211
211
|
if prefix == 'xmlns' then
|
@@ -254,54 +254,54 @@ module XMLScan
|
|
254
254
|
end
|
255
255
|
|
256
256
|
|
257
|
-
def on_stag_end(name)
|
257
|
+
def on_stag_end(name, *a)
|
258
258
|
fix_namespace
|
259
|
-
@visitor.on_stag_end_ns name, @namespace
|
259
|
+
@visitor.on_stag_end_ns name, @namespace, *a
|
260
260
|
end
|
261
261
|
|
262
262
|
|
263
|
-
def on_etag(name)
|
263
|
+
def on_etag(name, *a)
|
264
264
|
h = @ns_hist.pop and @namespace.update h
|
265
|
-
@visitor.on_etag name
|
265
|
+
@visitor.on_etag name, *a
|
266
266
|
end
|
267
267
|
|
268
268
|
|
269
|
-
def on_stag_end_empty(name)
|
269
|
+
def on_stag_end_empty(name, *a)
|
270
270
|
fix_namespace
|
271
|
-
@visitor.on_stag_end_empty_ns name, @namespace
|
271
|
+
@visitor.on_stag_end_empty_ns name, @namespace, *a
|
272
272
|
h = @ns_hist.pop and @namespace.update h
|
273
273
|
end
|
274
274
|
|
275
275
|
|
276
|
-
def on_doctype(root, pubid, sysid)
|
276
|
+
def on_doctype(root, pubid, sysid, *a)
|
277
277
|
if root.count(':') > 1 then
|
278
278
|
ns_parse_error "qualified name `#{root}' includes `:'"
|
279
279
|
end
|
280
|
-
@visitor.on_doctype root, pubid, sysid
|
280
|
+
@visitor.on_doctype root, pubid, sysid, *a
|
281
281
|
end
|
282
282
|
|
283
283
|
|
284
|
-
def on_pi(target, pi)
|
284
|
+
def on_pi(target, pi, *a)
|
285
285
|
if target.include? ?: then
|
286
286
|
ns_parse_error "PI target `#{target}' includes `:'"
|
287
287
|
end
|
288
|
-
@visitor.on_pi target, pi
|
288
|
+
@visitor.on_pi target, pi, *a
|
289
289
|
end
|
290
290
|
|
291
291
|
|
292
|
-
def on_entityref(ref)
|
292
|
+
def on_entityref(ref, *a)
|
293
293
|
if ref.include? ?: then
|
294
294
|
ns_parse_error "entity reference `#{ref}' includes `:'"
|
295
295
|
end
|
296
|
-
@visitor.on_entityref ref
|
296
|
+
@visitor.on_entityref ref, *a
|
297
297
|
end
|
298
298
|
|
299
299
|
|
300
|
-
def on_attr_entityref(ref)
|
300
|
+
def on_attr_entityref(ref, *a)
|
301
301
|
if ref.include? ?: then
|
302
302
|
ns_parse_error "entity reference `#{ref}' includes `:'"
|
303
303
|
end
|
304
|
-
@visitor.on_attr_entityref ref
|
304
|
+
@visitor.on_attr_entityref ref, *a
|
305
305
|
end
|
306
306
|
|
307
307
|
end
|
data/lib/xmlscan/parser.rb
CHANGED
@@ -43,7 +43,7 @@ module XMLScan
|
|
43
43
|
|
44
44
|
private
|
45
45
|
|
46
|
-
def on_xmldecl_version(str)
|
46
|
+
def on_xmldecl_version(str, *a)
|
47
47
|
unless str == '1.0' then
|
48
48
|
warning "unsupported XML version `#{str}'"
|
49
49
|
end
|
@@ -51,7 +51,7 @@ module XMLScan
|
|
51
51
|
end
|
52
52
|
|
53
53
|
|
54
|
-
def on_xmldecl_standalone(str)
|
54
|
+
def on_xmldecl_standalone(str, *a)
|
55
55
|
if str == 'yes' then
|
56
56
|
@standalone = true
|
57
57
|
elsif str == 'no' then
|
@@ -63,7 +63,7 @@ module XMLScan
|
|
63
63
|
end
|
64
64
|
|
65
65
|
|
66
|
-
def on_doctype(name, pubid, sysid)
|
66
|
+
def on_doctype(name, pubid, sysid, *a)
|
67
67
|
if pubid and not sysid then
|
68
68
|
parse_error "public external ID must have both public ID and system ID"
|
69
69
|
end
|
@@ -71,12 +71,12 @@ module XMLScan
|
|
71
71
|
end
|
72
72
|
|
73
73
|
|
74
|
-
def on_prolog_space(s)
|
74
|
+
def on_prolog_space(s, *a)
|
75
75
|
# just ignore it.
|
76
76
|
end
|
77
77
|
|
78
78
|
|
79
|
-
def on_pi(target, pi)
|
79
|
+
def on_pi(target, pi, *a)
|
80
80
|
if target.downcase == 'xml' then
|
81
81
|
parse_error "reserved PI target `#{target}'"
|
82
82
|
end
|
@@ -114,39 +114,43 @@ module XMLScan
|
|
114
114
|
#end
|
115
115
|
|
116
116
|
|
117
|
-
def on_stag(name)
|
117
|
+
def on_stag(name, *a)
|
118
118
|
@elem.push name
|
119
119
|
@visitor.on_stag name
|
120
120
|
@attr.clear
|
121
121
|
end
|
122
122
|
|
123
|
-
def on_attribute(name)
|
123
|
+
def on_attribute(name, *a)
|
124
124
|
unless @attr.check_unique name then
|
125
125
|
wellformed_error "doubled attribute `#{name}'"
|
126
126
|
end
|
127
127
|
@visitor.on_attribute name
|
128
128
|
end
|
129
129
|
|
130
|
-
def on_attr_value(str)
|
130
|
+
def on_attr_value(str, *a)
|
131
131
|
str.tr! "\t\r\n", ' ' # normalize
|
132
132
|
@visitor.on_attr_value str
|
133
133
|
end
|
134
134
|
|
135
|
-
def
|
135
|
+
def on_stag_end(name, *a)
|
136
|
+
@visitor.on_stag_end name, *a
|
137
|
+
end
|
138
|
+
|
139
|
+
def on_stag_end_empty(name, *a)
|
136
140
|
# @visitor.on_stag_end name
|
137
141
|
# @elem.pop
|
138
142
|
# @visitor.on_etag name
|
139
|
-
@visitor.on_stag_end_empty name
|
143
|
+
@visitor.on_stag_end_empty name, *a
|
140
144
|
@elem.pop
|
141
145
|
end
|
142
146
|
|
143
|
-
def on_etag(name)
|
147
|
+
def on_etag(name, *a)
|
144
148
|
last = @elem.pop
|
145
149
|
if last == name then
|
146
|
-
@visitor.on_etag name
|
150
|
+
@visitor.on_etag name, *a
|
147
151
|
elsif last then
|
148
152
|
wellformed_error "element type `#{name}' is not matched"
|
149
|
-
@visitor.on_etag last
|
153
|
+
@visitor.on_etag last, *a
|
150
154
|
else
|
151
155
|
parse_error "end tag `#{name}' appears alone"
|
152
156
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'xmlscan/parser'
|
3
|
+
require 'xmlscan/visitor'
|
4
|
+
|
5
|
+
module XMLScan
|
6
|
+
module ElementProcessor
|
7
|
+
include XMLScan::Visitor
|
8
|
+
|
9
|
+
SKIP = [:on_chardata, :on_stag, :on_etag, :on_attribute, :on_attr_entityref,
|
10
|
+
:on_attr_value, :on_start_document, :on_end_document, :on_attribute_end,
|
11
|
+
:on_stag_end, :on_stag_end_empty, :on_attr_charref, :on_attr_charref_hex]
|
12
|
+
|
13
|
+
MY_METHODS = XMLScan::Visitor.instance_methods.to_a - SKIP
|
14
|
+
|
15
|
+
def initialize(opts={}, mod=nil)
|
16
|
+
(mod ? MY_METHODS - mod.instance_methods : MY_METHODS).each do |i|
|
17
|
+
self.class.class_eval %{def #{i}(d, *a) d&&(@out << d) end}, __FILE__, __LINE__
|
18
|
+
end
|
19
|
+
self.class.send :include, mod
|
20
|
+
|
21
|
+
@element = opts[:element] || raise("need an element")
|
22
|
+
@key = opts[:key] || raise("need a key")
|
23
|
+
@extras = (ex = opts[:extras]) ? ex.map(&:to_sym) : []
|
24
|
+
|
25
|
+
@pairs = [] # output [name, content, value] * 1 or more
|
26
|
+
@context = '' # current key(name) of the element (card)
|
27
|
+
@stack = [] # stack of containing context cards
|
28
|
+
@out = [] # current output for name(card)
|
29
|
+
@parser = XMLScan::XMLParser.new(self)
|
30
|
+
self
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
class XMLProcessor
|
36
|
+
include ElementProcessor
|
37
|
+
|
38
|
+
def self.process(file, opts={}, mod=nil)
|
39
|
+
raise "Not readable #{file.inspect}" unless IO===( io =
|
40
|
+
IO===file ? file : open(file) )
|
41
|
+
visitor = new(opts, mod)
|
42
|
+
visitor.parser.parse(io)
|
43
|
+
visitor.pairs
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
data/lib/xmlscan/scanner.rb
CHANGED
@@ -122,20 +122,29 @@ module XMLScan
|
|
122
122
|
self
|
123
123
|
end
|
124
124
|
|
125
|
-
|
125
|
+
=begin
|
126
|
+
Managing source in a private array.
|
127
|
+
* tag oriented (?< and ?> are the key tokens
|
128
|
+
* ?> that aren't followed by another ?< or ?> are stripped in splitting
|
129
|
+
=end
|
126
130
|
def get
|
127
131
|
pop or
|
128
132
|
unless @eof then
|
129
133
|
last = @last
|
130
134
|
begin
|
131
|
-
|
132
|
-
unless src then
|
135
|
+
unless chunk = @src.gets then
|
133
136
|
@eof = true
|
134
|
-
|
135
|
-
last
|
136
|
-
|
137
|
+
@last = nil
|
138
|
+
return last
|
139
|
+
#unshift last # to be popped after reverse!
|
140
|
+
#last = nil
|
141
|
+
#break
|
137
142
|
end
|
138
|
-
|
143
|
+
# negative lookahead: < or >< or >>
|
144
|
+
# so don't consume those (but split leaving them always at the
|
145
|
+
# end of chunks)
|
146
|
+
# consume (>) and split on >
|
147
|
+
a = chunk.split(/(?=<|>[<>])|>/, -1)
|
139
148
|
if last then
|
140
149
|
unless /\A[<>]/ =~ a.first then
|
141
150
|
a[0] = last << (a.first || '')
|
@@ -143,6 +152,7 @@ module XMLScan
|
|
143
152
|
push last
|
144
153
|
end
|
145
154
|
end
|
155
|
+
raise "size #{size}" if size > 1
|
146
156
|
concat a
|
147
157
|
last = pop
|
148
158
|
end while empty?
|
@@ -223,7 +233,7 @@ module XMLScan
|
|
223
233
|
last.push @last.inspect
|
224
234
|
end
|
225
235
|
a.push '#eof' if @eof
|
226
|
-
"((#{a
|
236
|
+
"((#{a*' '}) l(#{last*' '}) . #{source.inspect})"
|
227
237
|
end
|
228
238
|
|
229
239
|
def each
|
@@ -354,72 +364,72 @@ module XMLScan
|
|
354
364
|
end
|
355
365
|
end
|
356
366
|
|
357
|
-
def on_xmldecl_version(str)
|
358
|
-
@visitor.on_xmldecl_version str
|
367
|
+
def on_xmldecl_version(str, *a)
|
368
|
+
@visitor.on_xmldecl_version str, *a
|
359
369
|
end
|
360
370
|
|
361
|
-
def on_xmldecl_encoding(str)
|
362
|
-
@visitor.on_xmldecl_encoding str
|
371
|
+
def on_xmldecl_encoding(str, *a)
|
372
|
+
@visitor.on_xmldecl_encoding str, *a
|
363
373
|
end
|
364
374
|
|
365
|
-
def on_xmldecl_standalone(str)
|
366
|
-
@visitor.on_xmldecl_standalone str
|
375
|
+
def on_xmldecl_standalone(str, *a)
|
376
|
+
@visitor.on_xmldecl_standalone str, *a
|
367
377
|
end
|
368
378
|
|
369
|
-
def on_xmldecl_other(name, value)
|
370
|
-
@visitor.on_xmldecl_other name, value
|
379
|
+
def on_xmldecl_other(name, value, *a)
|
380
|
+
@visitor.on_xmldecl_other name, value, *a
|
371
381
|
end
|
372
382
|
|
373
|
-
def on_xmldecl_end
|
374
|
-
@visitor.on_xmldecl_end
|
383
|
+
def on_xmldecl_end(*a)
|
384
|
+
@visitor.on_xmldecl_end *a
|
375
385
|
end
|
376
386
|
|
377
|
-
def on_doctype(root, pubid, sysid)
|
378
|
-
@visitor.on_doctype root, pubid, sysid
|
387
|
+
def on_doctype(root, pubid, sysid, *a)
|
388
|
+
@visitor.on_doctype root, pubid, sysid, *a
|
379
389
|
end
|
380
390
|
|
381
|
-
def on_prolog_space(str)
|
382
|
-
@visitor.on_prolog_space str
|
391
|
+
def on_prolog_space(str, *a)
|
392
|
+
@visitor.on_prolog_space str, *a
|
383
393
|
end
|
384
394
|
|
385
|
-
def on_comment(str)
|
386
|
-
@visitor.on_comment str
|
395
|
+
def on_comment(str, *a)
|
396
|
+
@visitor.on_comment str, *a
|
387
397
|
end
|
388
398
|
|
389
|
-
def on_pi(target, pi)
|
390
|
-
@visitor.on_pi target, pi
|
399
|
+
def on_pi(target, pi, *a)
|
400
|
+
@visitor.on_pi target, pi, *a
|
391
401
|
end
|
392
402
|
|
393
|
-
def on_chardata(str)
|
394
|
-
@visitor.on_chardata str
|
403
|
+
def on_chardata(str, *a)
|
404
|
+
@visitor.on_chardata str, *a
|
395
405
|
end
|
396
406
|
|
397
|
-
def on_cdata(str)
|
398
|
-
@visitor.on_cdata str
|
407
|
+
def on_cdata(str, *a)
|
408
|
+
@visitor.on_cdata str, *a
|
399
409
|
end
|
400
410
|
|
401
|
-
def on_etag(name)
|
402
|
-
@visitor.on_etag name
|
411
|
+
def on_etag(name, *a)
|
412
|
+
@visitor.on_etag name, *a
|
403
413
|
end
|
404
414
|
|
405
|
-
def on_entityref(ref)
|
406
|
-
@visitor.on_entityref ref
|
415
|
+
def on_entityref(ref, *a)
|
416
|
+
@visitor.on_entityref ref, *a
|
407
417
|
end
|
408
418
|
|
409
|
-
def on_charref(code)
|
410
|
-
@visitor.on_charref code
|
419
|
+
def on_charref(code, *a)
|
420
|
+
@visitor.on_charref code, *a
|
411
421
|
end
|
412
422
|
|
413
|
-
def on_charref_hex(code)
|
414
|
-
@visitor.on_charref_hex code
|
423
|
+
def on_charref_hex(code, *a)
|
424
|
+
@visitor.on_charref_hex code, *a
|
415
425
|
end
|
416
426
|
|
417
|
-
def on_start_document
|
418
|
-
@visitor.on_start_document
|
427
|
+
def on_start_document(*a)
|
428
|
+
@visitor.on_start_document *a
|
419
429
|
end
|
420
430
|
|
421
|
-
def on_end_document
|
422
|
-
@visitor.on_end_document
|
431
|
+
def on_end_document(*a)
|
432
|
+
@visitor.on_end_document *a
|
423
433
|
end
|
424
434
|
|
425
435
|
|
@@ -444,50 +454,51 @@ module XMLScan
|
|
444
454
|
#
|
445
455
|
# A: on_chardata ('HOGE')
|
446
456
|
|
447
|
-
def on_stag(name)
|
448
|
-
@visitor.on_stag name
|
457
|
+
def on_stag(name, *a)
|
458
|
+
@visitor.on_stag name, *a
|
449
459
|
end
|
450
460
|
|
451
|
-
def on_attribute(name)
|
452
|
-
@visitor.on_attribute name
|
461
|
+
def on_attribute(name, *a)
|
462
|
+
@visitor.on_attribute name, *a
|
453
463
|
end
|
454
464
|
|
455
|
-
def on_attr_value(str)
|
456
|
-
@visitor.on_attr_value str
|
465
|
+
def on_attr_value(str, *a)
|
466
|
+
@visitor.on_attr_value str, *a
|
457
467
|
end
|
458
468
|
|
459
|
-
def on_attr_entityref(ref)
|
460
|
-
@visitor.on_attr_entityref ref
|
469
|
+
def on_attr_entityref(ref, *a)
|
470
|
+
@visitor.on_attr_entityref ref, *a
|
461
471
|
end
|
462
472
|
|
463
|
-
def on_attr_charref(code)
|
464
|
-
@visitor.on_attr_charref code
|
473
|
+
def on_attr_charref(code, *a)
|
474
|
+
@visitor.on_attr_charref code, *a
|
465
475
|
end
|
466
476
|
|
467
|
-
def on_attr_charref_hex(code)
|
468
|
-
@visitor.on_attr_charref_hex code
|
477
|
+
def on_attr_charref_hex(code, *a)
|
478
|
+
@visitor.on_attr_charref_hex code, *a
|
469
479
|
end
|
470
480
|
|
471
|
-
def on_attribute_end(name)
|
472
|
-
@visitor.on_attribute_end name
|
481
|
+
def on_attribute_end(name, *a)
|
482
|
+
@visitor.on_attribute_end name, *a, *a
|
473
483
|
end
|
474
484
|
|
475
|
-
def on_stag_end_empty(name)
|
476
|
-
@visitor.on_stag_end_empty name
|
485
|
+
def on_stag_end_empty(name, *a)
|
486
|
+
@visitor.on_stag_end_empty name, *a
|
477
487
|
end
|
478
488
|
|
479
|
-
def on_stag_end(name)
|
480
|
-
|
489
|
+
def on_stag_end(name, *a)
|
490
|
+
#STDERR << "ose #{name}, #{a.inspect}\n"
|
491
|
+
@visitor.on_stag_end name, *a
|
481
492
|
end
|
482
493
|
|
483
494
|
|
495
|
+
S_OPT_EXAMPLE = "".encode(::Encoding::WINDOWS_31J)
|
496
|
+
E_OPT_EXAMPLE = "".encode(::Encoding::EUCJP)
|
484
497
|
|
485
498
|
private
|
486
499
|
|
487
500
|
module OptRegexp
|
488
501
|
UTFSTR = "é"
|
489
|
-
S_OPT_EXAMPLE = "".encode Encoding.find('Windows-31J')
|
490
|
-
E_OPT_EXAMPLE = "".encode Encoding.find('EUC-JP')
|
491
502
|
|
492
503
|
RE_ENCODINGS = {
|
493
504
|
:n=>/e/n.encoding,
|
@@ -525,6 +536,7 @@ module XMLScan
|
|
525
536
|
else
|
526
537
|
s = $`
|
527
538
|
on_chardata s unless s.empty?
|
539
|
+
#orig = $'.sub(/(?=;).*$/,'')
|
528
540
|
ref = nil
|
529
541
|
$'.split('&', -1).each { |s|
|
530
542
|
unless /(?!\A);|(?=[ \t\r\n])/ =~ s and not $&.empty? then
|
@@ -533,18 +545,18 @@ module XMLScan
|
|
533
545
|
parse_error "reference to `#{ref}' doesn't end with `;'"
|
534
546
|
else
|
535
547
|
parse_error "`&' is not used for entity/character references"
|
536
|
-
on_chardata
|
548
|
+
on_chardata '&'+s
|
537
549
|
next
|
538
550
|
end
|
539
551
|
end
|
540
|
-
ref = $`
|
552
|
+
orig = ?& + (ref = $`) + ?;
|
541
553
|
s = $'
|
542
554
|
if /\A[^#]/ =~ ref then
|
543
|
-
on_entityref ref
|
555
|
+
on_entityref ref, orig
|
544
556
|
elsif /\A#(\d+)\z/ =~ ref then
|
545
|
-
on_charref $1.to_i
|
557
|
+
on_charref $1.to_i, orig
|
546
558
|
elsif /\A#x([\dA-Fa-f]+)\z/ =~ ref then
|
547
|
-
on_charref_hex $1.hex
|
559
|
+
on_charref_hex $1.hex, orig
|
548
560
|
else
|
549
561
|
parse_error "invalid character reference `#{ref}'"
|
550
562
|
end
|
@@ -558,8 +570,9 @@ module XMLScan
|
|
558
570
|
end
|
559
571
|
|
560
572
|
|
561
|
-
def
|
573
|
+
def scan_attr_value(s) # almostly copy & paste from scan_chardata
|
562
574
|
unless /&/ =~ s then
|
575
|
+
#STDERR << "no& attr_val #{s.inspect}, #{caller*"\n"}\n" if s == ?>
|
563
576
|
on_attr_value s
|
564
577
|
else
|
565
578
|
s = $`
|
@@ -576,14 +589,14 @@ module XMLScan
|
|
576
589
|
next
|
577
590
|
end
|
578
591
|
end
|
579
|
-
ref = $`
|
592
|
+
orig = ?& + (ref = $`) + ?;
|
580
593
|
s = $'
|
581
594
|
if /\A[^#]/ =~ ref then
|
582
|
-
on_attr_entityref ref
|
595
|
+
on_attr_entityref ref, orig
|
583
596
|
elsif /\A#(\d+)\z/ =~ ref then
|
584
|
-
on_attr_charref $1.to_i
|
597
|
+
on_attr_charref $1.to_i, orig
|
585
598
|
elsif /\A#x([\dA-Fa-f]+)\z/ =~ ref then
|
586
|
-
on_attr_charref_hex $1.hex
|
599
|
+
on_attr_charref_hex $1.hex, orig
|
587
600
|
else
|
588
601
|
parse_error "invalid character reference `#{ref}'"
|
589
602
|
end
|
@@ -682,6 +695,7 @@ module XMLScan
|
|
682
695
|
|
683
696
|
|
684
697
|
def scan_etag(s)
|
698
|
+
orig="#{s}>"
|
685
699
|
s[0,2] = '' # remove '</'
|
686
700
|
if s.empty? then
|
687
701
|
if @src.close_tag then # </>
|
@@ -689,14 +703,14 @@ module XMLScan
|
|
689
703
|
else # </< or </[EOF]
|
690
704
|
parse_error "parse error at `</'"
|
691
705
|
s << '>' if @src.close_tag
|
692
|
-
return on_chardata
|
706
|
+
return on_chardata '</' << s
|
693
707
|
end
|
694
708
|
elsif /[ \t\n\r]+/ =~ s then
|
695
709
|
s1, s2 = $`, $'
|
696
710
|
if s1.empty? then # </ tag
|
697
711
|
parse_error "parse error at `</'"
|
698
712
|
s << '>' if @src.close_tag
|
699
|
-
return on_chardata
|
713
|
+
return on_chardata '</' + s
|
700
714
|
elsif not s2.empty? then # </ta g
|
701
715
|
parse_error "illegal whitespace is found within end tag `#{s1}'"
|
702
716
|
while @src.get_tag
|
@@ -705,7 +719,7 @@ module XMLScan
|
|
705
719
|
s = s1
|
706
720
|
end
|
707
721
|
found_unclosed_etag s unless @src.close_tag # </tag< or </tag[EOF]
|
708
|
-
on_etag s
|
722
|
+
on_etag s, orig
|
709
723
|
end
|
710
724
|
|
711
725
|
|
@@ -745,6 +759,8 @@ module XMLScan
|
|
745
759
|
|
746
760
|
|
747
761
|
def scan_stag(s)
|
762
|
+
hash = {}
|
763
|
+
orig = [s.dup]
|
748
764
|
unless /(?=[\/ \t\n\r='"])/ =~ s then
|
749
765
|
name = s
|
750
766
|
name[0,1] = '' # remove `<'
|
@@ -753,54 +769,65 @@ module XMLScan
|
|
753
769
|
return found_empty_stag
|
754
770
|
else # << or <[EOF]
|
755
771
|
parse_error "parse error at `<'"
|
756
|
-
return on_chardata
|
772
|
+
return on_chardata '<'
|
757
773
|
end
|
758
774
|
end
|
759
775
|
on_stag name
|
760
776
|
found_unclosed_stag name unless @src.close_tag
|
761
|
-
on_stag_end name
|
777
|
+
on_stag_end name, orig*''+?>, {}
|
762
778
|
else
|
779
|
+
k = nil
|
763
780
|
name = $`
|
764
781
|
s = $'
|
765
782
|
name[0,1] = '' # remove `<'
|
766
783
|
if name.empty? then # `< tag' or `<=`
|
767
784
|
parse_error "parse error at `<'"
|
768
785
|
s << '>' if @src.close_tag
|
769
|
-
return on_chardata
|
786
|
+
return on_chardata '<' << s
|
770
787
|
end
|
771
788
|
on_stag name
|
772
789
|
emptyelem = false
|
773
|
-
key,val,error,qmark,c = nil
|
774
790
|
begin
|
775
791
|
continue = false
|
776
792
|
s.scan(/[ \t\n\r]([^= \t\n\r\/'"]+)[ \t\n\r]*=[ \t\n\r]*('[^']*'?|"[^"]*"?)|\/\z|([^ \t\n\r][\S\s]*)/
|
777
793
|
) { |key,val,error|
|
778
|
-
|
794
|
+
orig_val = []
|
795
|
+
if key then
|
779
796
|
on_attribute key
|
797
|
+
k=key
|
798
|
+
orig_val << val
|
780
799
|
qmark = val.slice!(0,1)
|
781
800
|
if val[-1] == qmark[0] then
|
782
801
|
val.chop!
|
783
|
-
|
802
|
+
scan_attr_value val unless val.empty?
|
784
803
|
else
|
785
|
-
|
804
|
+
scan_attr_value val unless val.empty?
|
786
805
|
begin
|
787
806
|
s = @src.get
|
807
|
+
#STDERR << "get some more? #{s.inspect}, #{orig.inspect}\n"
|
788
808
|
unless s then
|
789
809
|
parse_error "unterminated attribute `#{key}' meets EOF"
|
790
810
|
break
|
791
811
|
end
|
812
|
+
orig << s.dup
|
792
813
|
c = s[0]
|
793
814
|
val, s = s.split(qmark, 2)
|
815
|
+
orig_val << val
|
794
816
|
if c == ?< then
|
795
817
|
wellformed_error "`<' is found in attribute `#{key}'"
|
796
818
|
elsif c != ?> then
|
797
|
-
|
819
|
+
#STDERR << "close in quote? #{c.inspect}, #{@src.tag_start?}, #{@src.tag_end?}, #{s.inspect}, #{val.inspect}, #{orig.inspect}, #{orig_val.inspect}\n"
|
820
|
+
orig_val[-1,0] = orig[-1,0] = ?> # if @src.tag_start?
|
821
|
+
scan_attr_value ?>
|
798
822
|
end
|
799
|
-
|
823
|
+
scan_attr_value val if c
|
800
824
|
end until s
|
801
825
|
continue = s # if eof then continue is false, else true.
|
802
826
|
end
|
803
|
-
|
827
|
+
#STDERR << "attr:#{k}, #{orig_val}\n"
|
828
|
+
hash[k] = orig_val*''
|
829
|
+
#STDERR << "attr end #{hash.inspect}, #{k}, #{orig_val}\n"
|
830
|
+
on_attribute_end key #, orig_val*''
|
804
831
|
elsif error then
|
805
832
|
continue = s = found_stag_error(error)
|
806
833
|
else
|
@@ -816,9 +843,11 @@ module XMLScan
|
|
816
843
|
end
|
817
844
|
end
|
818
845
|
if emptyelem then
|
819
|
-
on_stag_end_empty name
|
846
|
+
on_stag_end_empty name, orig*''+?>, hash
|
820
847
|
else
|
821
|
-
|
848
|
+
#STDERR << "on stag end #{ name}, \"<#{name}#{s}>\", #{hash.inspect}\n"
|
849
|
+
on_stag_end name, orig*''+?>, hash
|
850
|
+
#on_stag_end name, "<#{name}#{s}>", hash
|
822
851
|
end
|
823
852
|
end
|
824
853
|
end
|
@@ -1067,10 +1096,10 @@ module XMLScan
|
|
1067
1096
|
|
1068
1097
|
|
1069
1098
|
def scan_document
|
1070
|
-
on_start_document
|
1099
|
+
on_start_document ''
|
1071
1100
|
@src.prepare
|
1072
1101
|
scan_prolog @src.get
|
1073
|
-
on_end_document
|
1102
|
+
on_end_document ''
|
1074
1103
|
end
|
1075
1104
|
|
1076
1105
|
|
data/lib/xmlscan/version.rb
CHANGED
@@ -9,15 +9,9 @@
|
|
9
9
|
|
10
10
|
module XMLScan
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
#
|
17
|
-
# TENNY which is larger than 1 (e.g. 'X.X.1' or 'X.X.2') means this
|
18
|
-
# release is a stable release.
|
19
|
-
|
20
|
-
VERSION = '0.2.3'
|
21
|
-
RELEASE_DATE = '2003-05-02'
|
12
|
+
GEMNAME = 'xmlscan'
|
13
|
+
VERSION_FILE = File.expand_path('../../VERSION', File.dirname(__FILE__))
|
14
|
+
VERSION = open(VERSION_FILE).to_a*''.chop
|
15
|
+
RELEASE_DATE = open(VERSION_FILE).mtime.strftime('%Y-%m-%d')
|
22
16
|
|
23
17
|
end
|
data/lib/xmlscan/visitor.rb
CHANGED
@@ -54,88 +54,88 @@ module XMLScan
|
|
54
54
|
def warning(msg)
|
55
55
|
end
|
56
56
|
|
57
|
-
def on_xmldecl
|
57
|
+
def on_xmldecl(*a)
|
58
58
|
end
|
59
59
|
|
60
|
-
def on_xmldecl_key(key, str)
|
60
|
+
def on_xmldecl_key(key, str, *a)
|
61
61
|
end
|
62
62
|
|
63
|
-
def on_xmldecl_version(str)
|
63
|
+
def on_xmldecl_version(str, *a)
|
64
64
|
end
|
65
65
|
|
66
|
-
def on_xmldecl_encoding(str)
|
66
|
+
def on_xmldecl_encoding(str, *a)
|
67
67
|
end
|
68
68
|
|
69
|
-
def on_xmldecl_standalone(str)
|
69
|
+
def on_xmldecl_standalone(str, *a)
|
70
70
|
end
|
71
71
|
|
72
|
-
def on_xmldecl_other(name, value)
|
72
|
+
def on_xmldecl_other(name, value, *a)
|
73
73
|
end
|
74
74
|
|
75
|
-
def on_xmldecl_end
|
75
|
+
def on_xmldecl_end(*a)
|
76
76
|
end
|
77
77
|
|
78
|
-
def on_doctype(root, pubid, sysid)
|
78
|
+
def on_doctype(root, pubid, sysid, *a)
|
79
79
|
end
|
80
80
|
|
81
|
-
def on_prolog_space(str)
|
81
|
+
def on_prolog_space(str, *a)
|
82
82
|
end
|
83
83
|
|
84
|
-
def on_comment(str)
|
84
|
+
def on_comment(str, *a)
|
85
85
|
end
|
86
86
|
|
87
|
-
def on_pi(target, pi)
|
87
|
+
def on_pi(target, pi, *a)
|
88
88
|
end
|
89
89
|
|
90
|
-
def on_chardata(str)
|
90
|
+
def on_chardata(str, *a)
|
91
91
|
end
|
92
92
|
|
93
|
-
def on_cdata(str)
|
93
|
+
def on_cdata(str, *a)
|
94
94
|
end
|
95
95
|
|
96
|
-
def on_etag(name)
|
96
|
+
def on_etag(name, *a)
|
97
97
|
end
|
98
98
|
|
99
|
-
def on_entityref(ref)
|
99
|
+
def on_entityref(ref, *a)
|
100
100
|
end
|
101
101
|
|
102
|
-
def on_charref(code)
|
102
|
+
def on_charref(code, *a)
|
103
103
|
end
|
104
104
|
|
105
|
-
def on_charref_hex(code)
|
105
|
+
def on_charref_hex(code, *a)
|
106
106
|
end
|
107
107
|
|
108
|
-
def on_start_document
|
108
|
+
def on_start_document(*a)
|
109
109
|
end
|
110
110
|
|
111
|
-
def on_end_document
|
111
|
+
def on_end_document(*a)
|
112
112
|
end
|
113
113
|
|
114
|
-
def on_stag(name)
|
114
|
+
def on_stag(name, *a)
|
115
115
|
end
|
116
116
|
|
117
|
-
def on_attribute(name)
|
117
|
+
def on_attribute(name, *a)
|
118
118
|
end
|
119
119
|
|
120
|
-
def on_attr_value(str)
|
120
|
+
def on_attr_value(str, *a)
|
121
121
|
end
|
122
122
|
|
123
|
-
def on_attr_entityref(ref)
|
123
|
+
def on_attr_entityref(ref, *a)
|
124
124
|
end
|
125
125
|
|
126
|
-
def on_attr_charref(code)
|
126
|
+
def on_attr_charref(code, *a)
|
127
127
|
end
|
128
128
|
|
129
|
-
def on_attr_charref_hex(code)
|
129
|
+
def on_attr_charref_hex(code, *a)
|
130
130
|
end
|
131
131
|
|
132
|
-
def on_attribute_end(name)
|
132
|
+
def on_attribute_end(name, *a)
|
133
133
|
end
|
134
134
|
|
135
|
-
def on_stag_end_empty(name)
|
135
|
+
def on_stag_end_empty(name, *a)
|
136
136
|
end
|
137
137
|
|
138
|
-
def on_stag_end(name)
|
138
|
+
def on_stag_end(name, *a)
|
139
139
|
end
|
140
140
|
|
141
141
|
end
|
@@ -146,13 +146,15 @@ module XMLScan
|
|
146
146
|
include Visitor
|
147
147
|
|
148
148
|
def initialize(visitor)
|
149
|
+
#STDERR << "new Decoration #{visitor}\n"
|
149
150
|
@visitor = visitor
|
150
151
|
end
|
151
152
|
|
152
153
|
Visitor.instance_methods.each { |i|
|
154
|
+
#STDERR << "#{i} \#{args.inspect}\\n"
|
153
155
|
module_eval <<-END, __FILE__, __LINE__ + 1
|
154
156
|
def #{i}(*args)
|
155
|
-
@visitor.#{i}(*args)
|
157
|
+
@visitor&&@visitor.#{i}(*args)
|
156
158
|
end
|
157
159
|
END
|
158
160
|
}
|
data/lib/xmlscan/xmlchar.rb
CHANGED
@@ -115,95 +115,95 @@ module XMLScan
|
|
115
115
|
end
|
116
116
|
|
117
117
|
|
118
|
-
def on_xmldecl_version(str)
|
118
|
+
def on_xmldecl_version(str, *a)
|
119
119
|
check_valid_version str
|
120
120
|
super
|
121
121
|
end
|
122
122
|
|
123
|
-
def on_xmldecl_encoding(str)
|
123
|
+
def on_xmldecl_encoding(str, *a)
|
124
124
|
check_valid_encoding str
|
125
125
|
super
|
126
126
|
end
|
127
127
|
|
128
|
-
def on_xmldecl_standalone(str)
|
128
|
+
def on_xmldecl_standalone(str, *a)
|
129
129
|
check_valid_chardata str
|
130
130
|
super
|
131
131
|
end
|
132
132
|
|
133
|
-
def on_doctype(root, pubid, sysid)
|
133
|
+
def on_doctype(root, pubid, sysid, *a)
|
134
134
|
check_valid_name root
|
135
135
|
check_valid_pubid pubid if pubid
|
136
136
|
check_valid_chardata sysid if sysid
|
137
137
|
super
|
138
138
|
end
|
139
139
|
|
140
|
-
def on_comment(str)
|
140
|
+
def on_comment(str, *a)
|
141
141
|
check_valid_chardata str
|
142
142
|
super
|
143
143
|
end
|
144
144
|
|
145
|
-
def on_pi(target, pi)
|
145
|
+
def on_pi(target, pi, *a)
|
146
146
|
check_valid_name target
|
147
147
|
check_valid_chardata pi
|
148
148
|
super
|
149
149
|
end
|
150
150
|
|
151
|
-
def on_chardata(str)
|
151
|
+
def on_chardata(str, *a)
|
152
152
|
check_valid_chardata str
|
153
153
|
super
|
154
154
|
end
|
155
155
|
|
156
|
-
def on_cdata(str)
|
156
|
+
def on_cdata(str, *a)
|
157
157
|
check_valid_chardata str
|
158
158
|
super
|
159
159
|
end
|
160
160
|
|
161
|
-
def on_etag(name)
|
161
|
+
def on_etag(name, *a)
|
162
162
|
check_valid_name name
|
163
163
|
super
|
164
164
|
end
|
165
165
|
|
166
|
-
def on_entityref(ref)
|
166
|
+
def on_entityref(ref, *a)
|
167
167
|
check_valid_name ref
|
168
168
|
super
|
169
169
|
end
|
170
170
|
|
171
|
-
def on_charref(code)
|
171
|
+
def on_charref(code, *a)
|
172
172
|
check_valid_char code
|
173
173
|
super
|
174
174
|
end
|
175
175
|
|
176
|
-
def on_charref_hex(code)
|
176
|
+
def on_charref_hex(code, *a)
|
177
177
|
check_valid_char code
|
178
178
|
super
|
179
179
|
end
|
180
180
|
|
181
|
-
def on_stag(name)
|
181
|
+
def on_stag(name, *a)
|
182
182
|
check_valid_name name
|
183
183
|
super
|
184
184
|
end
|
185
185
|
|
186
|
-
def on_attribute(name)
|
186
|
+
def on_attribute(name, *a)
|
187
187
|
check_valid_name name
|
188
188
|
super
|
189
189
|
end
|
190
190
|
|
191
|
-
def on_attr_value(str)
|
191
|
+
def on_attr_value(str, *a)
|
192
192
|
check_valid_chardata str
|
193
193
|
super
|
194
194
|
end
|
195
195
|
|
196
|
-
def on_attr_entityref(ref)
|
196
|
+
def on_attr_entityref(ref, *a)
|
197
197
|
check_valid_name ref
|
198
198
|
super
|
199
199
|
end
|
200
200
|
|
201
|
-
def on_attr_charref(code)
|
201
|
+
def on_attr_charref(code, *a)
|
202
202
|
check_valid_char code
|
203
203
|
super
|
204
204
|
end
|
205
205
|
|
206
|
-
def on_attr_charref_hex(code)
|
206
|
+
def on_attr_charref_hex(code, *a)
|
207
207
|
check_valid_char code
|
208
208
|
super
|
209
209
|
end
|
data/xmlcard.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'xmlscan/processor'
|
4
|
+
|
5
|
+
# need to make these into supplied blocks somehome
|
6
|
+
module CustomProcessing
|
7
|
+
def on_chardata(s) @out << s end
|
8
|
+
def on_stag_end(name, s, h, *a)
|
9
|
+
if name.to_sym == @element
|
10
|
+
# starting a new context, first output our substitute string
|
11
|
+
key= h&&h[@key.to_s]||'*no-name*'
|
12
|
+
sub = h['transclude'] || "{{#{key}}}"
|
13
|
+
@out << sub
|
14
|
+
# then push the current context and initialize this one
|
15
|
+
@stack.push([@context, @out, *@ex])
|
16
|
+
@context = key; @out = []
|
17
|
+
@ex = @extras.map {|e| h[e]}
|
18
|
+
else @out << s end # pass through tags we aren't processing
|
19
|
+
end
|
20
|
+
|
21
|
+
def on_etag(name, s=nil)
|
22
|
+
if name.to_sym == @element
|
23
|
+
# output a card (name, content, type)
|
24
|
+
@pairs << [@context, @out, @stack[-1][0], *@ex]
|
25
|
+
# restore previous context from stack
|
26
|
+
last = @stack.pop
|
27
|
+
@context, @out, @ex = last.shift, last.shift, *last
|
28
|
+
else @out << s end
|
29
|
+
end
|
30
|
+
|
31
|
+
def on_stag_empty_end(name, s=nil, h={}, *a)
|
32
|
+
if name.to_sym == @element
|
33
|
+
# I don't think we have this case, but it is simple to add later
|
34
|
+
STDERR << "empty card ???: #{name}, #{s}, #{h.inspect}\n"
|
35
|
+
else @out << s end
|
36
|
+
end
|
37
|
+
|
38
|
+
attr_reader :pairs, :parser
|
39
|
+
end
|
40
|
+
|
41
|
+
ARGV.each do |a|
|
42
|
+
pairs = XMLScan::XMLProcessor.process(a, {:key=>:name, :element=>:card, :extras=>[:type]}, CustomProcessing)
|
43
|
+
STDOUT << "Result\n"
|
44
|
+
STDOUT << pairs.map do |p| n,o,c,t = p
|
45
|
+
"#{c&&c.size>0&&"#{c}::"||''}#{n}#{t&&"[#{t}]"}=>#{o*''}"
|
46
|
+
end * "\n"
|
47
|
+
STDOUT << "\nDone\n"
|
48
|
+
end
|
metadata
CHANGED
@@ -1,19 +1,19 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xmlscan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.3.0prea
|
5
|
+
prerelease: 5
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- UENO Katsuhiro <katsu@blue.sky.or.jp>
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-16 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &9220620 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 2.8.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *9220620
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &9220120 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '3.12'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *9220120
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: bundler
|
38
|
-
requirement: &
|
38
|
+
requirement: &9219620 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 1.0.0
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *9219620
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: jeweler
|
49
|
-
requirement: &
|
49
|
+
requirement: &9219060 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: 1.8.3
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *9219060
|
58
58
|
description: The fastest XML parser written in 100% pure Ruby.
|
59
59
|
email: gerryg@inbox.com
|
60
60
|
executables: []
|
@@ -65,6 +65,7 @@ files:
|
|
65
65
|
- ChangeLog
|
66
66
|
- Gemfile
|
67
67
|
- Gemfile.lock
|
68
|
+
- README.processor
|
68
69
|
- README.rdoc
|
69
70
|
- Rakefile
|
70
71
|
- THANKS
|
@@ -73,11 +74,13 @@ files:
|
|
73
74
|
- lib/xmlscan/htmlscan.rb
|
74
75
|
- lib/xmlscan/namespace.rb
|
75
76
|
- lib/xmlscan/parser.rb
|
77
|
+
- lib/xmlscan/processor.rb
|
76
78
|
- lib/xmlscan/scanner.rb
|
77
79
|
- lib/xmlscan/version.rb
|
78
80
|
- lib/xmlscan/visitor.rb
|
79
81
|
- lib/xmlscan/xmlchar.rb
|
80
82
|
- test.rb
|
83
|
+
- xmlcard.rb
|
81
84
|
homepage: http://github.com/GerryG/xmlformat/
|
82
85
|
licenses:
|
83
86
|
- MIT
|
@@ -97,13 +100,13 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
97
100
|
version: '0'
|
98
101
|
segments:
|
99
102
|
- 0
|
100
|
-
hash:
|
103
|
+
hash: -1617079850723236327
|
101
104
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
102
105
|
none: false
|
103
106
|
requirements:
|
104
|
-
- - ! '
|
107
|
+
- - ! '>'
|
105
108
|
- !ruby/object:Gem::Version
|
106
|
-
version:
|
109
|
+
version: 1.3.1
|
107
110
|
requirements: []
|
108
111
|
rubyforge_project:
|
109
112
|
rubygems_version: 1.8.15
|