xmlscan 0.2.3 → 0.3.0preb

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -3,6 +3,9 @@
3
3
 
4
4
  require 'rubygems'
5
5
  require 'bundler'
6
+ require 'xmlscan/version'
7
+
8
+ VERSION = XMLScan::VERSION # File.exist?('VERSION') ? File.read('VERSION') : ""
6
9
 
7
10
  begin
8
11
  Bundler.setup(:default, :development)
@@ -15,10 +18,11 @@ end
15
18
  require 'rake'
16
19
 
17
20
  begin
21
+ include XMLScan
18
22
  require 'jeweler'
19
23
  Jeweler::Tasks.new do |gem|
20
- gem.name = "xmlscan"
21
- gem.version = '0.2.3'
24
+ gem.name = 'xmlscan'
25
+ gem.version = XMLScan::VERSION
22
26
  gem.license = "MIT"
23
27
  gem.summary = "The fastest XML parser written in 100% pure Ruby."
24
28
  gem.email = "gerryg@inbox.com"
@@ -56,10 +60,9 @@ task :default => :spec
56
60
 
57
61
  require 'rdoc/task'
58
62
  Rake::RDocTask.new do |rdoc|
59
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
60
63
 
61
64
  rdoc.rdoc_dir = 'rdoc'
62
- rdoc.title = "xmlscan #{version}"
65
+ rdoc.title = "xmlscan #{VERSION}"
63
66
  rdoc.rdoc_files.include('README*')
64
67
  rdoc.rdoc_files.include('lib/**/*.rb')
65
68
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.3
1
+ 0.3.0preb
@@ -47,7 +47,7 @@ module XMLScan
47
47
  raise "[BUG] this method must be never called"
48
48
  end
49
49
 
50
- def on_stag_end_empty(name)
50
+ def on_stag_end_empty(name, *a)
51
51
  raise "[BUG] this method must be never called"
52
52
  end
53
53
 
@@ -127,7 +127,7 @@ module XMLScan
127
127
  return found_empty_stag
128
128
  else
129
129
  parse_error "parse error at `<'"
130
- return on_chardata('<')
130
+ return on_chardata '<'
131
131
  end
132
132
  end
133
133
  on_stag name
@@ -142,7 +142,7 @@ module XMLScan
142
142
  if @src.close_tag then
143
143
  s << '>'
144
144
  end
145
- return on_chardata('<' << s)
145
+ return on_chardata '<'+s
146
146
  end
147
147
  on_stag name
148
148
  begin
@@ -156,9 +156,9 @@ module XMLScan
156
156
  qmark = val.slice!(0,1)
157
157
  if val[-1] == qmark[0] then
158
158
  val.chop!
159
- scan_attvalue val unless val.empty?
159
+ scan_attr_value val unless val.empty?
160
160
  else
161
- scan_attvalue val unless val.empty?
161
+ scan_attr_value val unless val.empty?
162
162
  begin
163
163
  s = @src.get
164
164
  unless s then
@@ -167,8 +167,8 @@ module XMLScan
167
167
  end
168
168
  c = s[0]
169
169
  val, s = s.split(qmark, 2)
170
- scan_attvalue '>' unless c == ?< or c == ?>
171
- scan_attvalue val if c
170
+ scan_attr_value '>' unless c == ?< or c == ?>
171
+ scan_attr_value val if c
172
172
  end until s
173
173
  continue = s
174
174
  end
@@ -54,16 +54,16 @@ module XMLScan
54
54
  # on_stag_end_empty_ns ('foo:bar', { 'foo' => '', ... })
55
55
  #
56
56
 
57
- def on_stag_ns(qname, prefix, localpart)
57
+ def on_stag_ns(qname, prefix, localpart, *a)
58
58
  end
59
59
 
60
- def on_attribute_ns(qname, prefix, localpart)
60
+ def on_attribute_ns(qname, prefix, localpart, *a)
61
61
  end
62
62
 
63
- def on_stag_end_ns(qname, namespaces)
63
+ def on_stag_end_ns(qname, namespaces, *a)
64
64
  end
65
65
 
66
- def on_stag_end_empty_ns(qname, namespaces)
66
+ def on_stag_end_empty_ns(qname, namespaces, *a)
67
67
  end
68
68
 
69
69
  end
@@ -99,7 +99,7 @@ module XMLScan
99
99
  end
100
100
 
101
101
 
102
- def on_start_document
102
+ def on_start_document(*a)
103
103
  @namespace = {} #PredefinedNamespace.dup
104
104
  @ns_hist = []
105
105
  @ns_undeclared = {} # for checking undeclared namespace prefixes.
@@ -107,14 +107,14 @@ module XMLScan
107
107
  @dont_same = [] # ditto.
108
108
  @xmlns = NamespaceDeclaration.new(self)
109
109
  @orig_visitor = @visitor
110
- @visitor.on_start_document
110
+ @visitor.on_start_document *a
111
111
  end
112
112
 
113
113
 
114
- def on_stag(name)
114
+ def on_stag(name, *a)
115
115
  @ns_hist.push nil
116
116
  unless /:/n =~ name then
117
- @visitor.on_stag_ns name, '', name
117
+ @visitor.on_stag_ns name, '', name, *a
118
118
  else
119
119
  prefix, localpart = $`, $'
120
120
  if localpart.include? ?: then
@@ -131,12 +131,12 @@ module XMLScan
131
131
  @ns_undeclared[prefix] = true
132
132
  end
133
133
  end
134
- @visitor.on_stag_ns name, prefix, localpart
134
+ @visitor.on_stag_ns name, prefix, localpart, *a
135
135
  end
136
136
  end
137
137
 
138
138
 
139
- def on_attribute(name)
139
+ def on_attribute(name, *a)
140
140
  if /:/n =~ name then
141
141
  prefix, localpart = $`, $'
142
142
  if localpart.include? ?: then
@@ -157,13 +157,13 @@ module XMLScan
157
157
  @dont_same.push [ prev, prefix, localpart ]
158
158
  end
159
159
  @prev_prefix[localpart] = prefix
160
- @visitor.on_attribute_ns name, prefix, localpart
160
+ @visitor.on_attribute_ns name, prefix, localpart, *a
161
161
  end
162
162
  elsif name == 'xmlns' then
163
163
  @visitor = @xmlns
164
164
  @xmlns.on_xmlns_start ''
165
165
  else
166
- @visitor.on_attribute_ns name, nil, name
166
+ @visitor.on_attribute_ns name, nil, name, *a
167
167
  end
168
168
  end
169
169
 
@@ -176,36 +176,36 @@ module XMLScan
176
176
  @parent = parent
177
177
  end
178
178
 
179
- def on_xmlns_start(prefix)
179
+ def on_xmlns_start(prefix, *a)
180
180
  @prefix = prefix
181
181
  @nsdecl = ''
182
182
  end
183
183
 
184
- def on_attr_value(str)
184
+ def on_attr_value(str, *a)
185
185
  @nsdecl << str
186
186
  end
187
187
 
188
- def on_attr_entityref(ref)
188
+ def on_attr_entityref(ref, *a)
189
189
  @parent.ns_wellformed_error \
190
190
  "xmlns includes undeclared entity reference"
191
191
  end
192
192
 
193
- def on_attr_charref(code)
193
+ def on_attr_charref(code, *a)
194
194
  @nsdecl << [code].pack('U')
195
195
  end
196
196
 
197
- def on_attr_charref_hex(code)
197
+ def on_attr_charref_hex(code, *a)
198
198
  @nsdecl << [code].pack('U')
199
199
  end
200
200
 
201
- def on_attribute_end(name)
201
+ def on_attribute_end(name, *a)
202
202
  @parent.on_xmlns_end @prefix, @nsdecl
203
203
  end
204
204
 
205
205
  end
206
206
 
207
207
 
208
- def on_xmlns_end(prefix, uri)
208
+ def on_xmlns_end(prefix, uri, *a)
209
209
  @visitor = @orig_visitor
210
210
  if PredefinedNamespace.key? prefix then
211
211
  if prefix == 'xmlns' then
@@ -254,54 +254,54 @@ module XMLScan
254
254
  end
255
255
 
256
256
 
257
- def on_stag_end(name)
257
+ def on_stag_end(name, *a)
258
258
  fix_namespace
259
- @visitor.on_stag_end_ns name, @namespace
259
+ @visitor.on_stag_end_ns name, @namespace, *a
260
260
  end
261
261
 
262
262
 
263
- def on_etag(name)
263
+ def on_etag(name, *a)
264
264
  h = @ns_hist.pop and @namespace.update h
265
- @visitor.on_etag name
265
+ @visitor.on_etag name, *a
266
266
  end
267
267
 
268
268
 
269
- def on_stag_end_empty(name)
269
+ def on_stag_end_empty(name, *a)
270
270
  fix_namespace
271
- @visitor.on_stag_end_empty_ns name, @namespace
271
+ @visitor.on_stag_end_empty_ns name, @namespace, *a
272
272
  h = @ns_hist.pop and @namespace.update h
273
273
  end
274
274
 
275
275
 
276
- def on_doctype(root, pubid, sysid)
276
+ def on_doctype(root, pubid, sysid, *a)
277
277
  if root.count(':') > 1 then
278
278
  ns_parse_error "qualified name `#{root}' includes `:'"
279
279
  end
280
- @visitor.on_doctype root, pubid, sysid
280
+ @visitor.on_doctype root, pubid, sysid, *a
281
281
  end
282
282
 
283
283
 
284
- def on_pi(target, pi)
284
+ def on_pi(target, pi, *a)
285
285
  if target.include? ?: then
286
286
  ns_parse_error "PI target `#{target}' includes `:'"
287
287
  end
288
- @visitor.on_pi target, pi
288
+ @visitor.on_pi target, pi, *a
289
289
  end
290
290
 
291
291
 
292
- def on_entityref(ref)
292
+ def on_entityref(ref, *a)
293
293
  if ref.include? ?: then
294
294
  ns_parse_error "entity reference `#{ref}' includes `:'"
295
295
  end
296
- @visitor.on_entityref ref
296
+ @visitor.on_entityref ref, *a
297
297
  end
298
298
 
299
299
 
300
- def on_attr_entityref(ref)
300
+ def on_attr_entityref(ref, *a)
301
301
  if ref.include? ?: then
302
302
  ns_parse_error "entity reference `#{ref}' includes `:'"
303
303
  end
304
- @visitor.on_attr_entityref ref
304
+ @visitor.on_attr_entityref ref, *a
305
305
  end
306
306
 
307
307
  end
@@ -43,7 +43,7 @@ module XMLScan
43
43
 
44
44
  private
45
45
 
46
- def on_xmldecl_version(str)
46
+ def on_xmldecl_version(str, *a)
47
47
  unless str == '1.0' then
48
48
  warning "unsupported XML version `#{str}'"
49
49
  end
@@ -51,7 +51,7 @@ module XMLScan
51
51
  end
52
52
 
53
53
 
54
- def on_xmldecl_standalone(str)
54
+ def on_xmldecl_standalone(str, *a)
55
55
  if str == 'yes' then
56
56
  @standalone = true
57
57
  elsif str == 'no' then
@@ -63,7 +63,7 @@ module XMLScan
63
63
  end
64
64
 
65
65
 
66
- def on_doctype(name, pubid, sysid)
66
+ def on_doctype(name, pubid, sysid, *a)
67
67
  if pubid and not sysid then
68
68
  parse_error "public external ID must have both public ID and system ID"
69
69
  end
@@ -71,12 +71,12 @@ module XMLScan
71
71
  end
72
72
 
73
73
 
74
- def on_prolog_space(s)
74
+ def on_prolog_space(s, *a)
75
75
  # just ignore it.
76
76
  end
77
77
 
78
78
 
79
- def on_pi(target, pi)
79
+ def on_pi(target, pi, *a)
80
80
  if target.downcase == 'xml' then
81
81
  parse_error "reserved PI target `#{target}'"
82
82
  end
@@ -114,39 +114,43 @@ module XMLScan
114
114
  #end
115
115
 
116
116
 
117
- def on_stag(name)
117
+ def on_stag(name, *a)
118
118
  @elem.push name
119
119
  @visitor.on_stag name
120
120
  @attr.clear
121
121
  end
122
122
 
123
- def on_attribute(name)
123
+ def on_attribute(name, *a)
124
124
  unless @attr.check_unique name then
125
125
  wellformed_error "doubled attribute `#{name}'"
126
126
  end
127
127
  @visitor.on_attribute name
128
128
  end
129
129
 
130
- def on_attr_value(str)
130
+ def on_attr_value(str, *a)
131
131
  str.tr! "\t\r\n", ' ' # normalize
132
132
  @visitor.on_attr_value str
133
133
  end
134
134
 
135
- def on_stag_end_empty(name)
135
+ def on_stag_end(name, *a)
136
+ @visitor.on_stag_end name, *a
137
+ end
138
+
139
+ def on_stag_end_empty(name, *a)
136
140
  # @visitor.on_stag_end name
137
141
  # @elem.pop
138
142
  # @visitor.on_etag name
139
- @visitor.on_stag_end_empty name
143
+ @visitor.on_stag_end_empty name, *a
140
144
  @elem.pop
141
145
  end
142
146
 
143
- def on_etag(name)
147
+ def on_etag(name, *a)
144
148
  last = @elem.pop
145
149
  if last == name then
146
- @visitor.on_etag name
150
+ @visitor.on_etag name, *a
147
151
  elsif last then
148
152
  wellformed_error "element type `#{name}' is not matched"
149
- @visitor.on_etag last
153
+ @visitor.on_etag last, *a
150
154
  else
151
155
  parse_error "end tag `#{name}' appears alone"
152
156
  end
@@ -0,0 +1,109 @@
1
+ # encoding: UTF-8
2
+ require 'xmlscan/parser'
3
+ require 'xmlscan/visitor'
4
+
5
+ module XMLScan
6
+ module ElementProcessor
7
+ include XMLScan::Visitor
8
+
9
+ SKIP = [:on_chardata, :on_stag, :on_etag, :on_attribute, :on_attr_entityref,
10
+ :on_attr_value, :on_start_document, :on_end_document, :on_attribute_end,
11
+ :on_stag_end, :on_stag_end_empty, :on_attr_charref, :on_attr_charref_hex]
12
+
13
+ MY_METHODS = XMLScan::Visitor.instance_methods.to_a - SKIP
14
+
15
+ def initialize(opts={}, mod=nil)
16
+ raise "No module" unless mod
17
+ STDERR << "init Element Processer #{mod}\n"
18
+ (MY_METHODS - mod.instance_methods).each do |i|
19
+ self.class.class_eval %{def #{i}(d, *a) d&&(@out << d) end}, __FILE__, __LINE__
20
+ end
21
+ self.class.send :include, mod
22
+
23
+ @element = opts[:element] || raise("need an element")
24
+ @key = opts[:key] || raise("need a key")
25
+ @extras = (ex = opts[:extras]) ? ex.map(&:to_sym) : []
26
+
27
+ @pairs = [] # output [name, content, value] * 1 or more
28
+ @context = '' # current key(name) of the element (card)
29
+ @stack = [] # stack of containing context cards
30
+ @out = [] # current output for name(card)
31
+ @parser = XMLScan::XMLParser.new(self)
32
+ self
33
+ end
34
+
35
+ end
36
+
37
+ class XMLProcessor
38
+ include ElementProcessor
39
+
40
+ def self.process(io, opts={}, mod=nil)
41
+ mod ||= ElementProcessing
42
+ STDERR << "process #{io.inspect}, #{opts.inspect}\n"
43
+ io = case io
44
+ when String; open(io)
45
+ when IO, StringIO; io
46
+ else raise "bad type file input #{io.inspect}"
47
+ end
48
+
49
+ visitor = new(opts, mod)
50
+ visitor.parser.parse(io)
51
+ visitor.pairs
52
+ end
53
+ end
54
+
55
+
56
+ module ElementProcessing
57
+ def on_chardata(s) @out << s end
58
+ def on_stag_end(name, s, h, *a)
59
+ if name.to_sym == @element
60
+ # starting a new context, first output our substitute string
61
+ key= h&&h[@key.to_s]||'*no-name*'
62
+ @tmpl = ":transclude|{{:name}}" # def: "{{:key}}"
63
+ STDERR << "templ #{@tmpl.inspect}\n"
64
+ #STDERR << "x> #{x.inspect}, #{h.inspect}, #{(!(/:\w[\w\d]*/ =~ x)) || h[$&[1..-1].to_s] }\n"
65
+ sub =
66
+ @tmpl.split('|').find {|x| !(/:\w[\w\d]*/ =~ x) ||
67
+ h[$&[1..-1].to_s] }.gsub(/:\w[\w\d]*/) {|m|
68
+ STDERR << "templ sub match #{m.inspect}, #{h[m[1..-1]]}\n"
69
+ h[m[1..-1]] }
70
+ #sub = h['transclude'] || "{{#{key}}}"
71
+ @out << sub
72
+ # then push the current context and initialize this one
73
+ @stack.push([@context, @out, *@ex])
74
+ @context = key; @out = []
75
+ @ex = @extras.map {|e| h[e]}
76
+ else @out << s end # pass through tags we aren't processing
77
+ end
78
+
79
+ def on_etag(name, s=nil)
80
+ if name.to_sym == @element
81
+ # output a card (name, content, type)
82
+ @pairs << [@context, @out, @stack[-1][0], *@ex]
83
+ # restore previous context from stack
84
+ last = @stack.pop
85
+ @context, @out, @ex = last.shift, last.shift, *last
86
+ else @out << s end
87
+ end
88
+
89
+ def on_stag_empty_end(name, s=nil, h={}, *a)
90
+ if name.to_sym == @element
91
+
92
+ key= h&&h[@key.to_s]||'*no-name*'
93
+ ex = @extras.map {|e| h[e]}
94
+ @pairs << [key, [], @context, *ex]
95
+ else @out << s end
96
+ end
97
+
98
+ attr_reader :pairs, :parser
99
+ end
100
+
101
+ end
102
+ ARGV.each do |a|
103
+ pairs = XMLScan::XMLProcessor.process(a, {:key=>:name, :element=>:card, :extras=>[:type]}, XMLScan::ElementProcessing)
104
+ STDOUT << "Result\n"
105
+ STDOUT << pairs.map do |p| n,o,c,t = p
106
+ "#{c&&c.size>0&&"#{c}::"||''}#{n}#{t&&"[#{t}]"}=>#{o*''}"
107
+ end * "\n"
108
+ STDOUT << "\nDone\n"
109
+ end