xmlscan 0.2.3 → 0.3.0preb

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -3,6 +3,9 @@
3
3
 
4
4
  require 'rubygems'
5
5
  require 'bundler'
6
+ require 'xmlscan/version'
7
+
8
+ VERSION = XMLScan::VERSION # File.exist?('VERSION') ? File.read('VERSION') : ""
6
9
 
7
10
  begin
8
11
  Bundler.setup(:default, :development)
@@ -15,10 +18,11 @@ end
15
18
  require 'rake'
16
19
 
17
20
  begin
21
+ include XMLScan
18
22
  require 'jeweler'
19
23
  Jeweler::Tasks.new do |gem|
20
- gem.name = "xmlscan"
21
- gem.version = '0.2.3'
24
+ gem.name = 'xmlscan'
25
+ gem.version = XMLScan::VERSION
22
26
  gem.license = "MIT"
23
27
  gem.summary = "The fastest XML parser written in 100% pure Ruby."
24
28
  gem.email = "gerryg@inbox.com"
@@ -56,10 +60,9 @@ task :default => :spec
56
60
 
57
61
  require 'rdoc/task'
58
62
  Rake::RDocTask.new do |rdoc|
59
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
60
63
 
61
64
  rdoc.rdoc_dir = 'rdoc'
62
- rdoc.title = "xmlscan #{version}"
65
+ rdoc.title = "xmlscan #{VERSION}"
63
66
  rdoc.rdoc_files.include('README*')
64
67
  rdoc.rdoc_files.include('lib/**/*.rb')
65
68
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.3
1
+ 0.3.0preb
@@ -47,7 +47,7 @@ module XMLScan
47
47
  raise "[BUG] this method must be never called"
48
48
  end
49
49
 
50
- def on_stag_end_empty(name)
50
+ def on_stag_end_empty(name, *a)
51
51
  raise "[BUG] this method must be never called"
52
52
  end
53
53
 
@@ -127,7 +127,7 @@ module XMLScan
127
127
  return found_empty_stag
128
128
  else
129
129
  parse_error "parse error at `<'"
130
- return on_chardata('<')
130
+ return on_chardata '<'
131
131
  end
132
132
  end
133
133
  on_stag name
@@ -142,7 +142,7 @@ module XMLScan
142
142
  if @src.close_tag then
143
143
  s << '>'
144
144
  end
145
- return on_chardata('<' << s)
145
+ return on_chardata '<'+s
146
146
  end
147
147
  on_stag name
148
148
  begin
@@ -156,9 +156,9 @@ module XMLScan
156
156
  qmark = val.slice!(0,1)
157
157
  if val[-1] == qmark[0] then
158
158
  val.chop!
159
- scan_attvalue val unless val.empty?
159
+ scan_attr_value val unless val.empty?
160
160
  else
161
- scan_attvalue val unless val.empty?
161
+ scan_attr_value val unless val.empty?
162
162
  begin
163
163
  s = @src.get
164
164
  unless s then
@@ -167,8 +167,8 @@ module XMLScan
167
167
  end
168
168
  c = s[0]
169
169
  val, s = s.split(qmark, 2)
170
- scan_attvalue '>' unless c == ?< or c == ?>
171
- scan_attvalue val if c
170
+ scan_attr_value '>' unless c == ?< or c == ?>
171
+ scan_attr_value val if c
172
172
  end until s
173
173
  continue = s
174
174
  end
@@ -54,16 +54,16 @@ module XMLScan
54
54
  # on_stag_end_empty_ns ('foo:bar', { 'foo' => '', ... })
55
55
  #
56
56
 
57
- def on_stag_ns(qname, prefix, localpart)
57
+ def on_stag_ns(qname, prefix, localpart, *a)
58
58
  end
59
59
 
60
- def on_attribute_ns(qname, prefix, localpart)
60
+ def on_attribute_ns(qname, prefix, localpart, *a)
61
61
  end
62
62
 
63
- def on_stag_end_ns(qname, namespaces)
63
+ def on_stag_end_ns(qname, namespaces, *a)
64
64
  end
65
65
 
66
- def on_stag_end_empty_ns(qname, namespaces)
66
+ def on_stag_end_empty_ns(qname, namespaces, *a)
67
67
  end
68
68
 
69
69
  end
@@ -99,7 +99,7 @@ module XMLScan
99
99
  end
100
100
 
101
101
 
102
- def on_start_document
102
+ def on_start_document(*a)
103
103
  @namespace = {} #PredefinedNamespace.dup
104
104
  @ns_hist = []
105
105
  @ns_undeclared = {} # for checking undeclared namespace prefixes.
@@ -107,14 +107,14 @@ module XMLScan
107
107
  @dont_same = [] # ditto.
108
108
  @xmlns = NamespaceDeclaration.new(self)
109
109
  @orig_visitor = @visitor
110
- @visitor.on_start_document
110
+ @visitor.on_start_document *a
111
111
  end
112
112
 
113
113
 
114
- def on_stag(name)
114
+ def on_stag(name, *a)
115
115
  @ns_hist.push nil
116
116
  unless /:/n =~ name then
117
- @visitor.on_stag_ns name, '', name
117
+ @visitor.on_stag_ns name, '', name, *a
118
118
  else
119
119
  prefix, localpart = $`, $'
120
120
  if localpart.include? ?: then
@@ -131,12 +131,12 @@ module XMLScan
131
131
  @ns_undeclared[prefix] = true
132
132
  end
133
133
  end
134
- @visitor.on_stag_ns name, prefix, localpart
134
+ @visitor.on_stag_ns name, prefix, localpart, *a
135
135
  end
136
136
  end
137
137
 
138
138
 
139
- def on_attribute(name)
139
+ def on_attribute(name, *a)
140
140
  if /:/n =~ name then
141
141
  prefix, localpart = $`, $'
142
142
  if localpart.include? ?: then
@@ -157,13 +157,13 @@ module XMLScan
157
157
  @dont_same.push [ prev, prefix, localpart ]
158
158
  end
159
159
  @prev_prefix[localpart] = prefix
160
- @visitor.on_attribute_ns name, prefix, localpart
160
+ @visitor.on_attribute_ns name, prefix, localpart, *a
161
161
  end
162
162
  elsif name == 'xmlns' then
163
163
  @visitor = @xmlns
164
164
  @xmlns.on_xmlns_start ''
165
165
  else
166
- @visitor.on_attribute_ns name, nil, name
166
+ @visitor.on_attribute_ns name, nil, name, *a
167
167
  end
168
168
  end
169
169
 
@@ -176,36 +176,36 @@ module XMLScan
176
176
  @parent = parent
177
177
  end
178
178
 
179
- def on_xmlns_start(prefix)
179
+ def on_xmlns_start(prefix, *a)
180
180
  @prefix = prefix
181
181
  @nsdecl = ''
182
182
  end
183
183
 
184
- def on_attr_value(str)
184
+ def on_attr_value(str, *a)
185
185
  @nsdecl << str
186
186
  end
187
187
 
188
- def on_attr_entityref(ref)
188
+ def on_attr_entityref(ref, *a)
189
189
  @parent.ns_wellformed_error \
190
190
  "xmlns includes undeclared entity reference"
191
191
  end
192
192
 
193
- def on_attr_charref(code)
193
+ def on_attr_charref(code, *a)
194
194
  @nsdecl << [code].pack('U')
195
195
  end
196
196
 
197
- def on_attr_charref_hex(code)
197
+ def on_attr_charref_hex(code, *a)
198
198
  @nsdecl << [code].pack('U')
199
199
  end
200
200
 
201
- def on_attribute_end(name)
201
+ def on_attribute_end(name, *a)
202
202
  @parent.on_xmlns_end @prefix, @nsdecl
203
203
  end
204
204
 
205
205
  end
206
206
 
207
207
 
208
- def on_xmlns_end(prefix, uri)
208
+ def on_xmlns_end(prefix, uri, *a)
209
209
  @visitor = @orig_visitor
210
210
  if PredefinedNamespace.key? prefix then
211
211
  if prefix == 'xmlns' then
@@ -254,54 +254,54 @@ module XMLScan
254
254
  end
255
255
 
256
256
 
257
- def on_stag_end(name)
257
+ def on_stag_end(name, *a)
258
258
  fix_namespace
259
- @visitor.on_stag_end_ns name, @namespace
259
+ @visitor.on_stag_end_ns name, @namespace, *a
260
260
  end
261
261
 
262
262
 
263
- def on_etag(name)
263
+ def on_etag(name, *a)
264
264
  h = @ns_hist.pop and @namespace.update h
265
- @visitor.on_etag name
265
+ @visitor.on_etag name, *a
266
266
  end
267
267
 
268
268
 
269
- def on_stag_end_empty(name)
269
+ def on_stag_end_empty(name, *a)
270
270
  fix_namespace
271
- @visitor.on_stag_end_empty_ns name, @namespace
271
+ @visitor.on_stag_end_empty_ns name, @namespace, *a
272
272
  h = @ns_hist.pop and @namespace.update h
273
273
  end
274
274
 
275
275
 
276
- def on_doctype(root, pubid, sysid)
276
+ def on_doctype(root, pubid, sysid, *a)
277
277
  if root.count(':') > 1 then
278
278
  ns_parse_error "qualified name `#{root}' includes `:'"
279
279
  end
280
- @visitor.on_doctype root, pubid, sysid
280
+ @visitor.on_doctype root, pubid, sysid, *a
281
281
  end
282
282
 
283
283
 
284
- def on_pi(target, pi)
284
+ def on_pi(target, pi, *a)
285
285
  if target.include? ?: then
286
286
  ns_parse_error "PI target `#{target}' includes `:'"
287
287
  end
288
- @visitor.on_pi target, pi
288
+ @visitor.on_pi target, pi, *a
289
289
  end
290
290
 
291
291
 
292
- def on_entityref(ref)
292
+ def on_entityref(ref, *a)
293
293
  if ref.include? ?: then
294
294
  ns_parse_error "entity reference `#{ref}' includes `:'"
295
295
  end
296
- @visitor.on_entityref ref
296
+ @visitor.on_entityref ref, *a
297
297
  end
298
298
 
299
299
 
300
- def on_attr_entityref(ref)
300
+ def on_attr_entityref(ref, *a)
301
301
  if ref.include? ?: then
302
302
  ns_parse_error "entity reference `#{ref}' includes `:'"
303
303
  end
304
- @visitor.on_attr_entityref ref
304
+ @visitor.on_attr_entityref ref, *a
305
305
  end
306
306
 
307
307
  end
@@ -43,7 +43,7 @@ module XMLScan
43
43
 
44
44
  private
45
45
 
46
- def on_xmldecl_version(str)
46
+ def on_xmldecl_version(str, *a)
47
47
  unless str == '1.0' then
48
48
  warning "unsupported XML version `#{str}'"
49
49
  end
@@ -51,7 +51,7 @@ module XMLScan
51
51
  end
52
52
 
53
53
 
54
- def on_xmldecl_standalone(str)
54
+ def on_xmldecl_standalone(str, *a)
55
55
  if str == 'yes' then
56
56
  @standalone = true
57
57
  elsif str == 'no' then
@@ -63,7 +63,7 @@ module XMLScan
63
63
  end
64
64
 
65
65
 
66
- def on_doctype(name, pubid, sysid)
66
+ def on_doctype(name, pubid, sysid, *a)
67
67
  if pubid and not sysid then
68
68
  parse_error "public external ID must have both public ID and system ID"
69
69
  end
@@ -71,12 +71,12 @@ module XMLScan
71
71
  end
72
72
 
73
73
 
74
- def on_prolog_space(s)
74
+ def on_prolog_space(s, *a)
75
75
  # just ignore it.
76
76
  end
77
77
 
78
78
 
79
- def on_pi(target, pi)
79
+ def on_pi(target, pi, *a)
80
80
  if target.downcase == 'xml' then
81
81
  parse_error "reserved PI target `#{target}'"
82
82
  end
@@ -114,39 +114,43 @@ module XMLScan
114
114
  #end
115
115
 
116
116
 
117
- def on_stag(name)
117
+ def on_stag(name, *a)
118
118
  @elem.push name
119
119
  @visitor.on_stag name
120
120
  @attr.clear
121
121
  end
122
122
 
123
- def on_attribute(name)
123
+ def on_attribute(name, *a)
124
124
  unless @attr.check_unique name then
125
125
  wellformed_error "doubled attribute `#{name}'"
126
126
  end
127
127
  @visitor.on_attribute name
128
128
  end
129
129
 
130
- def on_attr_value(str)
130
+ def on_attr_value(str, *a)
131
131
  str.tr! "\t\r\n", ' ' # normalize
132
132
  @visitor.on_attr_value str
133
133
  end
134
134
 
135
- def on_stag_end_empty(name)
135
+ def on_stag_end(name, *a)
136
+ @visitor.on_stag_end name, *a
137
+ end
138
+
139
+ def on_stag_end_empty(name, *a)
136
140
  # @visitor.on_stag_end name
137
141
  # @elem.pop
138
142
  # @visitor.on_etag name
139
- @visitor.on_stag_end_empty name
143
+ @visitor.on_stag_end_empty name, *a
140
144
  @elem.pop
141
145
  end
142
146
 
143
- def on_etag(name)
147
+ def on_etag(name, *a)
144
148
  last = @elem.pop
145
149
  if last == name then
146
- @visitor.on_etag name
150
+ @visitor.on_etag name, *a
147
151
  elsif last then
148
152
  wellformed_error "element type `#{name}' is not matched"
149
- @visitor.on_etag last
153
+ @visitor.on_etag last, *a
150
154
  else
151
155
  parse_error "end tag `#{name}' appears alone"
152
156
  end
@@ -0,0 +1,109 @@
1
+ # encoding: UTF-8
2
+ require 'xmlscan/parser'
3
+ require 'xmlscan/visitor'
4
+
5
+ module XMLScan
6
+ module ElementProcessor
7
+ include XMLScan::Visitor
8
+
9
+ SKIP = [:on_chardata, :on_stag, :on_etag, :on_attribute, :on_attr_entityref,
10
+ :on_attr_value, :on_start_document, :on_end_document, :on_attribute_end,
11
+ :on_stag_end, :on_stag_end_empty, :on_attr_charref, :on_attr_charref_hex]
12
+
13
+ MY_METHODS = XMLScan::Visitor.instance_methods.to_a - SKIP
14
+
15
+ def initialize(opts={}, mod=nil)
16
+ raise "No module" unless mod
17
+ STDERR << "init Element Processer #{mod}\n"
18
+ (MY_METHODS - mod.instance_methods).each do |i|
19
+ self.class.class_eval %{def #{i}(d, *a) d&&(@out << d) end}, __FILE__, __LINE__
20
+ end
21
+ self.class.send :include, mod
22
+
23
+ @element = opts[:element] || raise("need an element")
24
+ @key = opts[:key] || raise("need a key")
25
+ @extras = (ex = opts[:extras]) ? ex.map(&:to_sym) : []
26
+
27
+ @pairs = [] # output [name, content, value] * 1 or more
28
+ @context = '' # current key(name) of the element (card)
29
+ @stack = [] # stack of containing context cards
30
+ @out = [] # current output for name(card)
31
+ @parser = XMLScan::XMLParser.new(self)
32
+ self
33
+ end
34
+
35
+ end
36
+
37
+ class XMLProcessor
38
+ include ElementProcessor
39
+
40
+ def self.process(io, opts={}, mod=nil)
41
+ mod ||= ElementProcessing
42
+ STDERR << "process #{io.inspect}, #{opts.inspect}\n"
43
+ io = case io
44
+ when String; open(io)
45
+ when IO, StringIO; io
46
+ else raise "bad type file input #{io.inspect}"
47
+ end
48
+
49
+ visitor = new(opts, mod)
50
+ visitor.parser.parse(io)
51
+ visitor.pairs
52
+ end
53
+ end
54
+
55
+
56
+ module ElementProcessing
57
+ def on_chardata(s) @out << s end
58
+ def on_stag_end(name, s, h, *a)
59
+ if name.to_sym == @element
60
+ # starting a new context, first output our substitute string
61
+ key= h&&h[@key.to_s]||'*no-name*'
62
+ @tmpl = ":transclude|{{:name}}" # def: "{{:key}}"
63
+ STDERR << "templ #{@tmpl.inspect}\n"
64
+ #STDERR << "x> #{x.inspect}, #{h.inspect}, #{(!(/:\w[\w\d]*/ =~ x)) || h[$&[1..-1].to_s] }\n"
65
+ sub =
66
+ @tmpl.split('|').find {|x| !(/:\w[\w\d]*/ =~ x) ||
67
+ h[$&[1..-1].to_s] }.gsub(/:\w[\w\d]*/) {|m|
68
+ STDERR << "templ sub match #{m.inspect}, #{h[m[1..-1]]}\n"
69
+ h[m[1..-1]] }
70
+ #sub = h['transclude'] || "{{#{key}}}"
71
+ @out << sub
72
+ # then push the current context and initialize this one
73
+ @stack.push([@context, @out, *@ex])
74
+ @context = key; @out = []
75
+ @ex = @extras.map {|e| h[e]}
76
+ else @out << s end # pass through tags we aren't processing
77
+ end
78
+
79
+ def on_etag(name, s=nil)
80
+ if name.to_sym == @element
81
+ # output a card (name, content, type)
82
+ @pairs << [@context, @out, @stack[-1][0], *@ex]
83
+ # restore previous context from stack
84
+ last = @stack.pop
85
+ @context, @out, @ex = last.shift, last.shift, *last
86
+ else @out << s end
87
+ end
88
+
89
+ def on_stag_empty_end(name, s=nil, h={}, *a)
90
+ if name.to_sym == @element
91
+
92
+ key= h&&h[@key.to_s]||'*no-name*'
93
+ ex = @extras.map {|e| h[e]}
94
+ @pairs << [key, [], @context, *ex]
95
+ else @out << s end
96
+ end
97
+
98
+ attr_reader :pairs, :parser
99
+ end
100
+
101
+ end
102
+ ARGV.each do |a|
103
+ pairs = XMLScan::XMLProcessor.process(a, {:key=>:name, :element=>:card, :extras=>[:type]}, XMLScan::ElementProcessing)
104
+ STDOUT << "Result\n"
105
+ STDOUT << pairs.map do |p| n,o,c,t = p
106
+ "#{c&&c.size>0&&"#{c}::"||''}#{n}#{t&&"[#{t}]"}=>#{o*''}"
107
+ end * "\n"
108
+ STDOUT << "\nDone\n"
109
+ end